diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..be2be525ba --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +Please submit CVE to https://github.com/taosdata/TDengine/security/advisories. diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index e3e48ac3a1..a963e4497f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -399,7 +399,7 @@ if(${BUILD_WITH_COS}) INCLUDE_DIRECTORIES($ENV{HOME}/.cos-local.1/include) MESSAGE("$ENV{HOME}/.cos-local.1/include") - set(CMAKE_BUILD_TYPE debug) + set(CMAKE_BUILD_TYPE Release) set(ORIG_CMAKE_PROJECT_NAME ${CMAKE_PROJECT_NAME}) set(CMAKE_PROJECT_NAME cos_c_sdk) diff --git a/deps/arm/dm_static/libdmodule.a b/deps/arm/dm_static/libdmodule.a index 5f0f0e38b3..f71b97dc2b 100644 Binary files a/deps/arm/dm_static/libdmodule.a and b/deps/arm/dm_static/libdmodule.a differ diff --git a/deps/x86/dm_static/libdmodule.a b/deps/x86/dm_static/libdmodule.a index f5548e6988..348568f8d1 100644 Binary files a/deps/x86/dm_static/libdmodule.a and b/deps/x86/dm_static/libdmodule.a differ diff --git a/docs/en/02-intro/index.md b/docs/en/02-intro/index.md index 4d65c86371..f9fe68b47a 100644 --- a/docs/en/02-intro/index.md +++ b/docs/en/02-intro/index.md @@ -4,11 +4,11 @@ description: This document introduces the major features, competitive advantages toc_max_heading_level: 2 --- -TDengine is an [open source](https://tdengine.com/tdengine/open-source-time-series-database/), [high-performance](https://tdengine.com/tdengine/high-performance-time-series-database/), [cloud native](https://tdengine.com/tdengine/cloud-native-time-series-database/) [time-series database](https://tdengine.com/tsdb/) optimized for Internet of Things (IoT), Connected Cars, and Industrial IoT. Its code, including its cluster feature is open source under GNU AGPL v3.0. Besides the database engine, it provides [caching](../develop/cache), [stream processing](../develop/stream), [data subscription](../develop/tmq) and other functionalities to reduce the system complexity and cost of development and operation. +TDengine is a big data platform designed and optimized for IoT (Internet of Things) and Industrial Internet. It can safely and effetively converge, store, process and distribute high volume data (TB or even PB) generated everyday by a lot of devices and data acquisition units, monitor and alert business operation status in real time and provide real time business insight. The core component of TDengine is TDengine OSS, which is a high performance, open source, cloud native and simplified time series database. This section introduces the major features, competitive advantages, typical use-cases and benchmarks to help you get a high level overview of TDengine. -## Major Features +## Major Features of TDengine OSS The major features are listed below: @@ -132,3 +132,9 @@ As a high-performance, scalable and SQL supported time-series database, TDengine - [Introduction to Time-Series Database](https://tdengine.com/tsdb/) - [Introduction to TDengine competitive advantages](https://tdengine.com/tdengine/) + +## Products + +There are two products offered by TDengine: TDengine Enterprise and TDengine Cloud, for details please refer to +- [TDengine Enterprise](https://www.taosdata.com/tdengine-pro) +- [TDengine Cloud](https://cloud.taosdata.com/?utm_source=menu&utm_medium=webcn) diff --git a/docs/en/07-develop/01-connect/index.md b/docs/en/07-develop/01-connect/index.md index 49c0b11a36..3ca44783c4 100644 --- a/docs/en/07-develop/01-connect/index.md +++ b/docs/en/07-develop/01-connect/index.md @@ -221,7 +221,7 @@ curl -L -o php-tdengine.tar.gz https://github.com/Yurunsoft/php-tdengine/archive && tar -xzf php-tdengine.tar.gz -C php-tdengine --strip-components=1 ``` -> Version number `v1.0.2` is only for example, it can be replaced to any newer version, please check available version from [TDengine PHP Connector Releases](https://github.com/Yurunsoft/php-tdengine/releases). +> Version number `v1.0.2` is only for example, it can be replaced to any newer version. **Non-Swoole Environment: ** diff --git a/docs/en/07-develop/02-model/index.mdx b/docs/en/07-develop/02-model/index.mdx index db5a259cfe..4524a66a41 100644 --- a/docs/en/07-develop/02-model/index.mdx +++ b/docs/en/07-develop/02-model/index.mdx @@ -55,7 +55,7 @@ At most 4096 columns are allowed in a STable. If there are more than 4096 of met ## Create Table -A specific table needs to be created for each data collection point. Similar to RDBMS, table name and schema are required to create a table. Additionally, one or more tags can be created for each table. To create a table, a STable needs to be used as template and the values need to be specified for the tags. For example, for the meters in [Table 1](/tdinternal/arch#model_table1), the table can be created using below SQL statement. +A specific table needs to be created for each data collection point. Similar to RDBMS, table name and schema are required to create a table. Additionally, one or more tags can be created for each table. To create a table, a STable needs to be used as template and the values need to be specified for the tags. For example, for the smart meters table, the table can be created using below SQL statement. ```sql CREATE TABLE d1001 USING meters TAGS ("California.SanFrancisco", 2); diff --git a/docs/en/07-develop/07-tmq.mdx b/docs/en/07-develop/07-tmq.mdx index f833dbf439..ecadb5a499 100644 --- a/docs/en/07-develop/07-tmq.mdx +++ b/docs/en/07-develop/07-tmq.mdx @@ -352,10 +352,10 @@ You configure the following parameters when creating a consumer: | `td.connect.port` | string | Port of the server side | | | `group.id` | string | Consumer group ID; consumers with the same ID are in the same group | **Required**. Maximum length: 192. Each topic can create up to 100 consumer groups. | | `client.id` | string | Client ID | Maximum length: 192. | -| `auto.offset.reset` | enum | Initial offset for the consumer group | `earliest`: subscribe from the earliest data, this is the default behavior; `latest`: subscribe from the latest data; or `none`: can't subscribe without committed offset| +| `auto.offset.reset` | enum | Initial offset for the consumer group | `earliest`: subscribe from the earliest data, this is the default behavior(version < 3.2.0.0); `latest`: subscribe from the latest data, this is the default behavior(version >= 3.2.0.0); or `none`: can't subscribe without committed offset| | `enable.auto.commit` | boolean | Commit automatically; true: user application doesn't need to explicitly commit; false: user application need to handle commit by itself | Default value is true | | `auto.commit.interval.ms` | integer | Interval for automatic commits, in milliseconds | -| `msg.with.table.name` | boolean | Specify whether to deserialize table names from messages | default value: false +| `msg.with.table.name` | boolean | Specify whether to deserialize table names from messages. Not applicable if subscribe to a column (tbname can be written as a column in the subquery statement during column subscriptions) (This parameter has been deprecated since version 3.2.0.0 and remains true) | default value: false The method of specifying these parameters depends on the language used: @@ -458,7 +458,19 @@ from taos.tmq import Consumer # Syntax: `consumer = Consumer(configs)` # # Example: -consumer = Consumer({"group.id": "local", "td.connect.ip": "127.0.0.1"}) +consumer = Consumer( + { + "group.id": "local", + "client.id": "1", + "enable.auto.commit": "true", + "auto.commit.interval.ms": "1000", + "td.connect.ip": "127.0.0.1", + "td.connect.user": "root", + "td.connect.pass": "taosdata", + "auto.offset.reset": "earliest", + "msg.with.table.name": "true", + } +) ``` diff --git a/docs/en/12-taos-sql/16-operators.md b/docs/en/12-taos-sql/16-operators.md index 6b7adb4a3d..ce8ab8a03c 100644 --- a/docs/en/12-taos-sql/16-operators.md +++ b/docs/en/12-taos-sql/16-operators.md @@ -54,6 +54,7 @@ LIKE is used together with wildcards to match strings. Its usage is described as MATCH and NMATCH are used together with regular expressions to match strings. Their usage is described as follows: - Use POSIX regular expression syntax. For more information, see Regular Expressions. +- The `MATCH` operator returns true when the regular expression is matched. The `NMATCH` operator returns true when the regular expression is not matched. - Regular expression can be used against only table names, i.e. `tbname`, and tags/columns of binary/nchar types. - The maximum length of regular expression string is 128 bytes. Configuration parameter `maxRegexStringLen` can be used to set the maximum allowed regular expression. It's a configuration parameter on the client side, and will take effect after restarting the client. diff --git a/docs/en/12-taos-sql/20-keywords.md b/docs/en/12-taos-sql/20-keywords.md index 983d4f63c9..36cbc0948f 100644 --- a/docs/en/12-taos-sql/20-keywords.md +++ b/docs/en/12-taos-sql/20-keywords.md @@ -180,6 +180,7 @@ The following list shows all reserved keywords: - MAX_DELAY - BWLIMIT - MAXROWS +- MAX_SPEED - MERGE - META - MINROWS diff --git a/docs/en/12-taos-sql/22-meta.md b/docs/en/12-taos-sql/22-meta.md index 37304633e7..fad479d9d3 100644 --- a/docs/en/12-taos-sql/22-meta.md +++ b/docs/en/12-taos-sql/22-meta.md @@ -26,75 +26,85 @@ This document introduces the tables of INFORMATION_SCHEMA and their structure. ## INS_DNODES -Provides information about dnodes. Similar to SHOW DNODES. +Provides information about dnodes. Similar to SHOW DNODES. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :------------: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | | 1 | vnodes | SMALLINT | Current number of vnodes on the dnode. It should be noted that `vnodes` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 2 | support_vnodes | SMALLINT | Maximum number of vnodes on the dnode | -| 3 | status | BINARY(10) | Current status | -| 4 | note | BINARY(256) | Reason for going offline or other information | +| 3 | status | VARCHAR(10) | Current status | +| 4 | note | VARCHAR(256) | Reason for going offline or other information | | 5 | id | SMALLINT | Dnode ID | -| 6 | endpoint | BINARY(134) | Dnode endpoint | +| 6 | endpoint | VARCHAR(134) | Dnode endpoint | | 7 | create | TIMESTAMP | Creation time | ## INS_MNODES -Provides information about mnodes. Similar to SHOW MNODES. +Provides information about mnodes. Similar to SHOW MNODES. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | ------------------------------------------ | | 1 | id | SMALLINT | Mnode ID | -| 2 | endpoint | BINARY(134) | Mnode endpoint | -| 3 | role | BINARY(10) | Current role | +| 2 | endpoint | VARCHAR(134) | Mnode endpoint | +| 3 | role | VARCHAR(10) | Current role | | 4 | role_time | TIMESTAMP | Time at which the current role was assumed | | 5 | create_time | TIMESTAMP | Creation time | ## INS_QNODES -Provides information about qnodes. Similar to SHOW QNODES. +Provides information about qnodes. Similar to SHOW QNODES. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | --------------- | | 1 | id | SMALLINT | Qnode ID | -| 2 | endpoint | BINARY(134) | Qnode endpoint | +| 2 | endpoint | VARCHAR(134) | Qnode endpoint | +| 3 | create_time | TIMESTAMP | Creation time | + +## INS_SNODES + +Provides information about snodes. Similar to SHOW SNODES. Users whose SYSINFO attribute is 0 can't view this table. + +| # | **Column** | **Data Type** | **Description** | +| --- | :---------: | ------------- | --------------- | +| 1 | id | SMALLINT | Snode ID | +| 2 | endpoint | VARCHAR(134) | Snode endpoint | | 3 | create_time | TIMESTAMP | Creation time | ## INS_CLUSTER -Provides information about the cluster. +Provides information about the cluster. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | --------------- | | 1 | id | BIGINT | Cluster ID | -| 2 | name | BINARY(134) | Cluster name | +| 2 | name | VARCHAR(134) | Cluster name | | 3 | create_time | TIMESTAMP | Creation time | ## INS_DATABASES Provides information about user-created databases. Similar to SHOW DATABASES. -| # | **Column** | **Data Type** | **Description** | +| # | **Column** | **Data Type** | **Description** | | --- | :------------------: | ---------------- | ------------------------------------------------ | -| 1| name| BINARY(32)| Database name | +| 1 | name | VARCHAR(64) | Database name | | 2 | create_time | TIMESTAMP | Creation time | | 3 | ntables | INT | Number of standard tables and subtables (not including supertables) | | 4 | vgroups | INT | Number of vgroups. It should be noted that `vnodes` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 6 | replica | INT | Number of replicas. It should be noted that `replica` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 7 | strict | BINARY(4) | Obsoleted | -| 8 | duration | INT | Duration for storage of single files. It should be noted that `duration` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 9 | keep | INT | Data retention period. It should be noted that `keep` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 7 | strict | VARCHAR(4) | Obsoleted | +| 8 | duration | VARCHAR(10) | Duration for storage of single files. It should be noted that `duration` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 9 | keep | VARCHAR(32) | Data retention period. It should be noted that `keep` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 10 | buffer | INT | Write cache size per vnode, in MB. It should be noted that `buffer` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 11 | pagesize | INT | Page size for vnode metadata storage engine, in KB. It should be noted that `pagesize` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 12 | pages | INT | Number of pages per vnode metadata storage engine. It should be noted that `pages` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 13 | minrows | INT | Maximum number of records per file block. It should be noted that `minrows` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 14 | maxrows | INT | Minimum number of records per file block. It should be noted that `maxrows` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 15 | comp | INT | Compression method. It should be noted that `comp` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 16 | precision | BINARY(2) | Time precision. It should be noted that `precision` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 17 | status | BINARY(10) | Current database status | -| 18 | retentions | BINARY (60) | Aggregation interval and retention period. It should be noted that `retentions` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 16 | precision | VARCHAR(2) | Time precision. It should be noted that `precision` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 17 | status | VARCHAR(10) | Current database status | +| 18 | retentions | VARCHAR(60) | Aggregation interval and retention period. It should be noted that `retentions` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 19 | single_stable | BOOL | Whether the database can contain multiple supertables. It should be noted that `single_stable` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 20 | cachemodel | BINARY(60) | Caching method for the newest data. It should be noted that `cachemodel` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 20 | cachemodel | VARCHAR(60) | Caching method for the newest data. It should be noted that `cachemodel` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 21 | cachesize | INT | Memory per vnode used for caching the newest data. It should be noted that `cachesize` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 22 | wal_level | INT | WAL level. It should be noted that `wal_level` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 23 | wal_fsync_period | INT | Interval at which WAL is written to disk. It should be noted that `wal_fsync_period` is a TDengine keyword and needs to be escaped with ` when used as a column name. | @@ -111,15 +121,15 @@ Provides information about user-defined functions. | # | **Column** | **Data Type** | **Description** | | --- | :-----------: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| 1 | name | BINARY(64) | Function name | -| 2 | comment | BINARY(255) | Function description. It should be noted that `comment` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 1 | name | VARCHAR(64) | Function name | +| 2 | comment | VARCHAR(255) | Function description. It should be noted that `comment` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 3 | aggregate | INT | Whether the UDF is an aggregate function. It should be noted that `aggregate` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 4 | output_type | BINARY(31) | Output data type | +| 4 | output_type | VARCHAR(31) | Output data type | | 5 | create_time | TIMESTAMP | Creation time | | 6 | code_len | INT | Length of the source code | | 7 | bufsize | INT | Buffer size | -| 8 | func_language | BINARY(31) | UDF programming language | -| 9 | func_body | BINARY(16384) | UDF function body | +| 8 | func_language | VARCHAR(31) | UDF programming language | +| 9 | func_body | VARCHAR(16384) | UDF function body | | 10 | func_version | INT | UDF function version. starting from 0. Increasing by 1 each time it is updated | ## INS_INDEXES @@ -128,12 +138,12 @@ Provides information about user-created indices. Similar to SHOW INDEX. | # | **Column** | **Data Type** | **Description** | | --- | :--------------: | ------------- | --------------------------------------------------------------------- | -| 1 | db_name | BINARY(32) | Database containing the table with the specified index | -| 2 | table_name | BINARY(192) | Table containing the specified index | -| 3 | index_name | BINARY(192) | Index name | -| 4 | db_name | BINARY(64) | Index column | -| 5 | index_type | BINARY(10) | SMA or tag index | -| 6 | index_extensions | BINARY(256) | Other information For SMA/tag indices, this shows a list of functions | +| 1 | db_name | VARCHAR(32) | Database containing the table with the specified index | +| 2 | table_name | VARCHAR(192) | Table containing the specified index | +| 3 | index_name | VARCHAR(192) | Index name | +| 4 | db_name | VARCHAR(64) | Index column | +| 5 | index_type | VARCHAR(10) | SMA or tag index | +| 6 | index_extensions | VARCHAR(256) | Other information For SMA/tag indices, this shows a list of functions | ## INS_STABLES @@ -141,16 +151,16 @@ Provides information about supertables. | # | **Column** | **Data Type** | **Description** | | --- | :-----------: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| 1 | stable_name | BINARY(192) | Supertable name | -| 2 | db_name | BINARY(64) | All databases in the supertable | +| 1 | stable_name | VARCHAR(192) | Supertable name | +| 2 | db_name | VARCHAR(64) | All databases in the supertable | | 3 | create_time | TIMESTAMP | Creation time | | 4 | columns | INT | Number of columns | | 5 | tags | INT | Number of tags. It should be noted that `tags` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 6 | last_update | TIMESTAMP | Last updated time | -| 7 | table_comment | BINARY(1024) | Table description | -| 8 | watermark | BINARY(64) | Window closing time. It should be noted that `watermark` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 9 | max_delay | BINARY(64) | Maximum delay for pushing stream processing results. It should be noted that `max_delay` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 10 | rollup | BINARY(128) | Rollup aggregate function. It should be noted that `rollup` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 7 | table_comment | VARCHAR(1024) | Table description | +| 8 | watermark | VARCHAR(64) | Window closing time. It should be noted that `watermark` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 9 | max_delay | VARCHAR(64) | Maximum delay for pushing stream processing results. It should be noted that `max_delay` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 10 | rollup | VARCHAR(128) | Rollup aggregate function. It should be noted that `rollup` is a TDengine keyword and needs to be escaped with ` when used as a column name. | ## INS_TABLES @@ -158,37 +168,37 @@ Provides information about standard tables and subtables. | # | **Column** | **Data Type** | **Description** | | --- | :-----------: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| 1 | table_name | BINARY(192) | Table name | -| 2 | db_name | BINARY(64) | Database name | +| 1 | table_name | VARCHAR(192) | Table name | +| 2 | db_name | VARCHAR(64) | Database name | | 3 | create_time | TIMESTAMP | Creation time | | 4 | columns | INT | Number of columns | -| 5 | stable_name | BINARY(192) | Supertable name | +| 5 | stable_name | VARCHAR(192) | Supertable name | | 6 | uid | BIGINT | Table ID | | 7 | vgroup_id | INT | Vgroup ID | | 8 | ttl | INT | Table time-to-live. It should be noted that `ttl` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 9 | table_comment | BINARY(1024) | Table description | -| 10 | type | BINARY(20) | Table type | +| 9 | table_comment | VARCHAR(1024) | Table description | +| 10 | type | VARCHAR(20) | Table type | ## INS_TAGS | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | --------------- | -| 1 | table_name | BINARY(192) | Table name | -| 2 | db_name | BINARY(64) | Database name | -| 3 | stable_name | BINARY(192) | Supertable name | -| 4 | tag_name | BINARY(64) | Tag name | -| 5 | tag_type | BINARY(64) | Tag type | -| 6 | tag_value | BINARY(16384) | Tag value | +| 1 | table_name | VARCHAR(192) | Table name | +| 2 | db_name | VARCHAR(64) | Database name | +| 3 | stable_name | VARCHAR(192) | Supertable name | +| 4 | tag_name | VARCHAR(64) | Tag name | +| 5 | tag_type | VARCHAR(64) | Tag type | +| 6 | tag_value | VARCHAR(16384) | Tag value | ## INS_COLUMNS | # | **Column** | **Data Type** | **Description** | | --- | :-----------: | ------------- | ---------------- | -| 1 | table_name | BINARY(192) | Table name | -| 2 | db_name | BINARY(64) | Database name | -| 3 | table_type | BINARY(21) | Table type | -| 4 | col_name | BINARY(64) | Column name | -| 5 | col_type | BINARY(32) | Column type | +| 1 | table_name | VARCHAR(192) | Table name | +| 2 | db_name | VARCHAR(64) | Database name | +| 3 | table_type | VARCHAR(21) | Table type | +| 4 | col_name | VARCHAR(64) | Column name | +| 5 | col_type | VARCHAR(32) | Column type | | 6 | col_length | INT | Column length | | 7 | col_precision | INT | Column precision | | 8 | col_scale | INT | Column scale | @@ -196,51 +206,51 @@ Provides information about standard tables and subtables. ## INS_USERS -Provides information about TDengine users. +Provides information about TDengine users. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | ---------------- | -| 1 | user_name | BINARY(23) | User name | -| 2 | privilege | BINARY(256) | User permissions | +| 1 | user_name | VARCHAR(23) | User name | +| 2 | privilege | VARCHAR(256) | User permissions | | 3 | create_time | TIMESTAMP | Creation time | ## INS_GRANTS -Provides information about TDengine Enterprise Edition permissions. +Provides information about TDengine Enterprise Edition permissions. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| 1 | version | BINARY(9) | Whether the deployment is a licensed or trial version | -| 2 | cpu_cores | BINARY(9) | CPU cores included in license | -| 3 | dnodes | BINARY(10) | Dnodes included in license. It should be noted that `dnodes` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 4 | streams | BINARY(10) | Streams included in license. It should be noted that `streams` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 5 | users | BINARY(10) | Users included in license. It should be noted that `users` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 6 | accounts | BINARY(10) | Accounts included in license. It should be noted that `accounts` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 7 | storage | BINARY(21) | Storage space included in license. It should be noted that `storage` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 8 | connections | BINARY(21) | Client connections included in license. It should be noted that `connections` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 9 | databases | BINARY(11) | Databases included in license. It should be noted that `databases` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 10 | speed | BINARY(9) | Write speed specified in license (data points per second) | -| 11 | querytime | BINARY(9) | Total query time specified in license | -| 12 | timeseries | BINARY(21) | Number of metrics included in license | -| 13 | expired | BINARY(5) | Whether the license has expired | -| 14 | expire_time | BINARY(19) | When the trial period expires | +| 1 | version | VARCHAR(9) | Whether the deployment is a licensed or trial version | +| 2 | cpu_cores | VARCHAR(9) | CPU cores included in license | +| 3 | dnodes | VARCHAR(10) | Dnodes included in license. It should be noted that `dnodes` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 4 | streams | VARCHAR(10) | Streams included in license. It should be noted that `streams` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 5 | users | VARCHAR(10) | Users included in license. It should be noted that `users` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 6 | accounts | VARCHAR(10) | Accounts included in license. It should be noted that `accounts` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 7 | storage | VARCHAR(21) | Storage space included in license. It should be noted that `storage` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 8 | connections | VARCHAR(21) | Client connections included in license. It should be noted that `connections` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 9 | databases | VARCHAR(11) | Databases included in license. It should be noted that `databases` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 10 | speed | VARCHAR(9) | Write speed specified in license (data points per second) | +| 11 | querytime | VARCHAR(9) | Total query time specified in license | +| 12 | timeseries | VARCHAR(21) | Number of metrics included in license | +| 13 | expired | VARCHAR(5) | Whether the license has expired | +| 14 | expire_time | VARCHAR(19) | When the trial period expires | ## INS_VGROUPS -Provides information about vgroups. +Provides information about vgroups. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :--------: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | | 1 | vgroup_id | INT | Vgroup ID | -| 2 | db_name | BINARY(32) | Database name | +| 2 | db_name | VARCHAR(32) | Database name | | 3 | tables | INT | Tables in vgroup. It should be noted that `tables` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 4 | status | BINARY(10) | Vgroup status | +| 4 | status | VARCHAR(10) | Vgroup status | | 5 | v1_dnode | INT | Dnode ID of first vgroup member | -| 6 | v1_status | BINARY(10) | Status of first vgroup member | +| 6 | v1_status | VARCHAR(10) | Status of first vgroup member | | 7 | v2_dnode | INT | Dnode ID of second vgroup member | -| 8 | v2_status | BINARY(10) | Status of second vgroup member | +| 8 | v2_status | VARCHAR(10) | Status of second vgroup member | | 9 | v3_dnode | INT | Dnode ID of third vgroup member | -| 10 | v3_status | BINARY(10) | Status of third vgroup member | +| 10 | v3_status | VARCHAR(10) | Status of third vgroup member | | 11 | nfiles | INT | Number of data and metadata files in the vgroup | | 12 | file_size | INT | Size of the data and metadata files in the vgroup | | 13 | tsma | TINYINT | Whether time-range-wise SMA is enabled. 1 means enabled; 0 means disabled. | @@ -251,55 +261,57 @@ Provides system configuration information. | # | **Column** | **Data Type** | **Description** | | --- | :--------: | ------------- | ----------------------------------------------------------------------------------------------------------------------- | -| 1 | name | BINARY(32) | Parameter | -| 2 | value | BINARY(64) | Value. It should be noted that `value` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 1 | name | VARCHAR(32) | Parameter | +| 2 | value | VARCHAR(64) | Value. It should be noted that `value` is a TDengine keyword and needs to be escaped with ` when used as a column name. | ## INS_DNODE_VARIABLES -Provides dnode configuration information. +Provides dnode configuration information. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :--------: | ------------- | ----------------------------------------------------------------------------------------------------------------------- | | 1 | dnode_id | INT | Dnode ID | -| 2 | name | BINARY(32) | Parameter | -| 3 | value | BINARY(64) | Value. It should be noted that `value` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 2 | name | VARCHAR(32) | Parameter | +| 3 | value | VARCHAR(64) | Value. It should be noted that `value` is a TDengine keyword and needs to be escaped with ` when used as a column name. | ## INS_TOPICS | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | -------------------------------------- | -| 1 | topic_name | BINARY(192) | Topic name | -| 2 | db_name | BINARY(64) | Database for the topic | +| 1 | topic_name | VARCHAR(192) | Topic name | +| 2 | db_name | VARCHAR(64) | Database for the topic | | 3 | create_time | TIMESTAMP | Creation time | -| 4 | sql | BINARY(1024) | SQL statement used to create the topic | +| 4 | sql | VARCHAR(1024) | SQL statement used to create the topic | ## INS_SUBSCRIPTIONS | # | **Column** | **Data Type** | **Description** | | --- | :------------: | ------------- | --------------------------- | -| 1 | topic_name | BINARY(204) | Subscribed topic | -| 2 | consumer_group | BINARY(193) | Subscribed consumer group | +| 1 | topic_name | VARCHAR(204) | Subscribed topic | +| 2 | consumer_group | VARCHAR(193) | Subscribed consumer group | | 3 | vgroup_id | INT | Vgroup ID for the consumer | | 4 | consumer_id | BIGINT | Consumer ID | -| 5 | offset | BINARY(64) | Consumption progress | +| 5 | offset | VARCHAR(64) | Consumption progress | | 6 | rows | BIGINT | Number of consumption items | ## INS_STREAMS | # | **Column** | **Data Type** | **Description** | | --- | :----------: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| 1 | stream_name | BINARY(64) | Stream name | +| 1 | stream_name | VARCHAR(64) | Stream name | | 2 | create_time | TIMESTAMP | Creation time | -| 3 | sql | BINARY(1024) | SQL statement used to create the stream | -| 4 | status | BINARY(20) | Current status | -| 5 | source_db | BINARY(64) | Source database | -| 6 | target_db | BINARY(64) | Target database | -| 7 | target_table | BINARY(192) | Target table | +| 3 | sql | VARCHAR(1024) | SQL statement used to create the stream | +| 4 | status | VARCHAR(20) | Current status | +| 5 | source_db | VARCHAR(64) | Source database | +| 6 | target_db | VARCHAR(64) | Target database | +| 7 | target_table | VARCHAR(192) | Target table | | 8 | watermark | BIGINT | Watermark (see stream processing documentation). It should be noted that `watermark` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 9 | trigger | INT | Method of triggering the result push (see stream processing documentation). It should be noted that `trigger` is a TDengine keyword and needs to be escaped with ` when used as a column name. | ## INS_USER_PRIVILEGES +Users whose SYSINFO attribute is 0 can't view this table. + | # | **Column** | **Data Type** | **Description** |** | | --- | :----------: | ------------ | -------------------------------------------| | 1 | user_name | VARCHAR(24) | Username | diff --git a/docs/en/12-taos-sql/24-show.md b/docs/en/12-taos-sql/24-show.md index 9e2897160c..2a3975f9a2 100644 --- a/docs/en/12-taos-sql/24-show.md +++ b/docs/en/12-taos-sql/24-show.md @@ -73,10 +73,10 @@ Shows the SQL statement used to create the specified table. This statement can b ## SHOW DATABASES ```sql -SHOW DATABASES; +SHOW [USER | SYSTEM] DATABASES; ``` -Shows all user-created databases. +Shows all databases. The `USER` qualifier specifies only user-created databases. The `SYSTEM` qualifier specifies only system databases. ## SHOW DNODES @@ -183,10 +183,10 @@ Shows all subscriptions in the system. ## SHOW TABLES ```sql -SHOW [db_name.]TABLES [LIKE 'pattern']; +SHOW [NORMAL | CHILD] [db_name.]TABLES [LIKE 'pattern']; ``` -Shows all standard tables and subtables in the current database. You can use LIKE for fuzzy matching. +Shows all standard tables and subtables in the current database. You can use LIKE for fuzzy matching. The `Normal` qualifier specifies standard tables. The `CHILD` qualifier specifies subtables. ## SHOW TABLE DISTRIBUTED diff --git a/docs/en/13-operation/01-pkg-install.md b/docs/en/13-operation/01-pkg-install.md deleted file mode 100644 index 5610139471..0000000000 --- a/docs/en/13-operation/01-pkg-install.md +++ /dev/null @@ -1,178 +0,0 @@ ---- -title: Install and Uninstall -description: This document describes how to install, upgrade, and uninstall TDengine. ---- - -import Tabs from "@theme/Tabs"; -import TabItem from "@theme/TabItem"; - -This document gives more information about installing, uninstalling, and upgrading TDengine. - -## Install - -About details of installing TDenine, please refer to [Installation Guide](../../get-started/package/). - -## Uninstall - - - - -Uninstall package of TDengine by apt-get can be uninstalled as below: - -```bash -$ sudo apt-get remove tdengine -Reading package lists... Done -Building dependency tree -Reading state information... Done -The following packages will be REMOVED: - tdengine -0 upgraded, 0 newly installed, 1 to remove and 18 not upgraded. -After this operation, 68.3 MB disk space will be freed. -Do you want to continue? [Y/n] y -(Reading database ... 135625 files and directories currently installed.) -Removing tdengine (3.0.0.0) ... -TDengine is removed successfully! - -``` - -If you have installed taos-tools, please uninstall it first before uninstall TDengine. The command of uninstall is following: - -``` -$ sudo apt remove taostools -Reading package lists... Done -Building dependency tree -Reading state information... Done -The following packages will be REMOVED: - taostools -0 upgraded, 0 newly installed, 1 to remove and 0 not upgraded. -After this operation, 68.3 MB disk space will be freed. -Do you want to continue? [Y/n] -(Reading database ... 147973 files and directories currently installed.) -Removing taostools (2.1.2) ... -``` - - - - -Deb package of TDengine can be uninstalled as below: - -``` -$ sudo dpkg -r tdengine -(Reading database ... 137504 files and directories currently installed.) -Removing tdengine (3.0.0.0) ... -TDengine is removed successfully! - -``` - -Deb package of taosTools can be uninstalled as below: - -``` -$ sudo dpkg -r taostools -(Reading database ... 147973 files and directories currently installed.) -Removing taostools (2.1.2) ... -``` - - - - - -RPM package of TDengine can be uninstalled as below: - -``` -$ sudo rpm -e tdengine -TDengine is removed successfully! -``` - -RPM package of taosTools can be uninstalled as below: - -``` -sudo rpm -e taostools -taosToole is removed successfully! -``` - - - - - -tar.gz package of TDengine can be uninstalled as below: - -``` -$ rmtaos -TDengine is removed successfully! -``` - -tar.gz package of taosTools can be uninstalled as below: - -``` -$ rmtaostools -Start to uninstall taos tools ... - -taos tools is uninstalled successfully! -``` - - - - -Run C:\TDengine\unins000.exe to uninstall TDengine on a Windows system. - - - - -TDengine can be uninstalled as below: - -``` -$ rmtaos -TDengine is removed successfully! -``` - - - - -:::info - -- We strongly recommend not to use multiple kinds of installation packages on a single host TDengine. The packages may affect each other and cause errors. - -- After deb package is installed, if the installation directory is removed manually, uninstall or reinstall will not work. This issue can be resolved by using the command below which cleans up TDengine package information. - - ``` - $ sudo rm -f /var/lib/dpkg/info/tdengine* - ``` - -You can then reinstall if needed. - -- After rpm package is installed, if the installation directory is removed manually, uninstall or reinstall will not work. This issue can be resolved by using the command below which cleans up TDengine package information. - - ``` - $ sudo rpm -e --noscripts tdengine - ``` - -You can then reinstall if needed. - -::: - -Uninstalling and Modifying Files - -- When TDengine is uninstalled, the configuration /etc/taos/taos.cfg, data directory /var/lib/taos, log directory /var/log/taos are kept. They can be deleted manually with caution, because data can't be recovered. Please follow data integrity, security, backup or relevant SOPs before deleting any data. - -- When reinstalling TDengine, if the default configuration file /etc/taos/taos.cfg exists, it will be kept and the configuration file in the installation package will be renamed to taos.cfg.orig and stored at /usr/local/taos/cfg to be used as configuration sample. Otherwise the configuration file in the installation package will be installed to /etc/taos/taos.cfg and used. - - -## Upgrade -There are two aspects in upgrade operation: upgrade installation package and upgrade a running server. - -To upgrade a package, follow the steps mentioned previously to first uninstall the old version then install the new version. - -Upgrading a running server is much more complex. First please check the version number of the old version and the new version. The version number of TDengine consists of 4 sections, only if the first 2 sections match can the old version be upgraded to the new version. The steps of upgrading a running server are as below: -- Stop inserting data -- Make sure all data is persisted to disk, please use command `flush database` -- Stop the cluster of TDengine -- Uninstall old version and install new version -- Start the cluster of TDengine -- Execute simple queries, such as the ones executed prior to installing the new package, to make sure there is no data loss -- Run some simple data insertion statements to make sure the cluster works well -- Restore business services - -:::warning -TDengine doesn't guarantee any lower version is compatible with the data generated by a higher version, so it's never recommended to downgrade the version. - -::: diff --git a/docs/en/13-operation/10-monitor.md b/docs/en/13-operation/10-monitor.md index c1c6ac3c4c..2a667c132f 100644 --- a/docs/en/13-operation/10-monitor.md +++ b/docs/en/13-operation/10-monitor.md @@ -106,22 +106,22 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |field|type|is\_tag|comment| |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| -|uptime|FLOAT||dnode uptime| +|uptime|FLOAT||dnode uptime in `days`| |cpu\_engine|FLOAT||cpu usage of tdengine. read from `/proc//stat`| |cpu\_system|FLOAT||cpu usage of server. read from `/proc/stat`| |cpu\_cores|FLOAT||cpu cores of server| |mem\_engine|INT||memory usage of tdengine. read from `/proc//status`| -|mem\_system|INT||available memory on the server| +|mem\_system|INT||available memory on the server in `KB`| |mem\_total|INT||total memory of server in `KB`| |disk\_engine|INT||| |disk\_used|BIGINT||usage of data dir in `bytes`| |disk\_total|BIGINT||the capacity of data dir in `bytes`| -|net\_in|FLOAT||network throughput rate in kb/s. read from `/proc/net/dev`| -|net\_out|FLOAT||network throughput rate in kb/s. read from `/proc/net/dev`| -|io\_read|FLOAT||io throughput rate in kb/s. read from `/proc//io`| -|io\_write|FLOAT||io throughput rate in kb/s. read from `/proc//io`| -|io\_read\_disk|FLOAT||io throughput rate of disk in kb/s. read from `/proc//io`| -|io\_write\_disk|FLOAT||io throughput rate of disk in kb/s. read from `/proc//io`| +|net\_in|FLOAT||network throughput rate in byte/s. read from `/proc/net/dev`| +|net\_out|FLOAT||network throughput rate in byte/s. read from `/proc/net/dev`| +|io\_read|FLOAT||io throughput rate in byte/s. read from `/proc//io`| +|io\_write|FLOAT||io throughput rate in byte/s. read from `/proc//io`| +|io\_read\_disk|FLOAT||io throughput rate of disk in byte/s. read from `/proc//io`| +|io\_write\_disk|FLOAT||io throughput rate of disk in byte/s. read from `/proc//io`| |req\_select|INT||number of select queries received per dnode| |req\_select\_rate|FLOAT||number of select queries received per dnode divided by monitor interval.| |req\_insert|INT||number of insert queries received per dnode| @@ -150,9 +150,9 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |ts|TIMESTAMP||timestamp| |name|NCHAR||data directory. default is `/var/lib/taos`| |level|INT||level for multi-level storage| -|avail|BIGINT||available space for data directory| -|used|BIGINT||used space for data directory| -|total|BIGINT||total space for data directory| +|avail|BIGINT||available space for data directory in `bytes`| +|used|BIGINT||used space for data directory in `bytes`| +|total|BIGINT||total space for data directory in `bytes`| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| @@ -165,9 +165,9 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| |name|NCHAR||log directory. default is `/var/log/taos/`| -|avail|BIGINT||available space for log directory| -|used|BIGINT||used space for data directory| -|total|BIGINT||total space for data directory| +|avail|BIGINT||available space for log directory in `bytes`| +|used|BIGINT||used space for data directory in `bytes`| +|total|BIGINT||total space for data directory in `bytes`| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| @@ -180,9 +180,9 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| |name|NCHAR||temp directory. default is `/tmp/`| -|avail|BIGINT||available space for temp directory| -|used|BIGINT||used space for temp directory| -|total|BIGINT||total space for temp directory| +|avail|BIGINT||available space for temp directory in `bytes`| +|used|BIGINT||used space for temp directory in `bytes`| +|total|BIGINT||total space for temp directory in `bytes`| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| diff --git a/docs/en/14-reference/03-connector/07-python.mdx b/docs/en/14-reference/03-connector/07-python.mdx index 5067c33e2d..02c176ee3d 100644 --- a/docs/en/14-reference/03-connector/07-python.mdx +++ b/docs/en/14-reference/03-connector/07-python.mdx @@ -31,11 +31,13 @@ We recommend using the latest version of `taospy`, regardless of the version of |Python Connector Version|major changes| |:-------------------:|:----:| +|2.7.12|1. added support for `varbinary` type (STMT does not yet support)
2. improved query performance (thanks to contributor [hadrianl](https://github.com/taosdata/taos-connector-python/pull/209))| |2.7.9|support for getting assignment and seek function on subscription| |2.7.8|add `execute_many` method| |Python Websocket Connector Version|major changes| |:----------------------------:|:-----:| +|0.2.9|bugs fixes| |0.2.5|1. support for getting assignment and seek function on subscription
2. support schemaless
3. support STMT| |0.2.4|support `unsubscribe` on subscription| @@ -1023,10 +1025,6 @@ Due to the current imperfection of Python's nanosecond support (see link below), 1. https://stackoverflow.com/questions/10611328/parsing-datetime-strings-containing-nanoseconds 2. https://www.python.org/dev/peps/pep-0564/ -## Important Update - -[**Release Notes**] (https://github.com/taosdata/taos-connector-python/releases) - ## API Reference - [taos](https://docs.taosdata.com/api/taospy/taos/) diff --git a/docs/en/14-reference/03-connector/80-php.mdx b/docs/en/14-reference/03-connector/80-php.mdx index b6a31b6de3..b3c2065b6e 100644 --- a/docs/en/14-reference/03-connector/80-php.mdx +++ b/docs/en/14-reference/03-connector/80-php.mdx @@ -52,8 +52,6 @@ curl -L -o php-tdengine.tar.gz https://github.com/Yurunsoft/php-tdengine/archive && tar -xzf php-tdengine.tar.gz -C php-tdengine --strip-components=1 ``` -> Version number `v1.0.2` is only for example, it can be replaced to any newer version, please find available versions in [TDengine PHP Connector Releases](https://github.com/Yurunsoft/php-tdengine/releases). - **Non-Swoole Environment: ** ```shell diff --git a/docs/en/14-reference/03-connector/_linux_install.mdx b/docs/en/14-reference/03-connector/_linux_install.mdx index 398593cfe6..d637c2cb69 100644 --- a/docs/en/14-reference/03-connector/_linux_install.mdx +++ b/docs/en/14-reference/03-connector/_linux_install.mdx @@ -4,7 +4,6 @@ import PkgListV3 from "/components/PkgListV3"; - [All Downloads](../../releases/tdengine) 2. Unzip diff --git a/docs/en/14-reference/03-connector/_macos_install.mdx b/docs/en/14-reference/03-connector/_macos_install.mdx index effabbbebe..31ceae68b6 100644 --- a/docs/en/14-reference/03-connector/_macos_install.mdx +++ b/docs/en/14-reference/03-connector/_macos_install.mdx @@ -4,8 +4,6 @@ import PkgListV3 from "/components/PkgListV3"; - [All Downloads](../../releases/tdengine) - 2. Execute the installer, select the default value as prompted, and complete the installation. If the installation is blocked, you can right-click or ctrl-click on the installation package and select `Open`. 3. configure taos.cfg diff --git a/docs/en/14-reference/03-connector/_windows_install.mdx b/docs/en/14-reference/03-connector/_windows_install.mdx index 723f685b5d..a6e03f30fb 100644 --- a/docs/en/14-reference/03-connector/_windows_install.mdx +++ b/docs/en/14-reference/03-connector/_windows_install.mdx @@ -3,8 +3,6 @@ import PkgListV3 from "/components/PkgListV3"; 1. Download the client installation package - - [All Downloads](../../releases/tdengine) 2. Execute the installer, select the default value as prompted, and complete the installation 3. Installation path diff --git a/docs/en/14-reference/04-taosadapter.md b/docs/en/14-reference/04-taosadapter.md index 6bc49768c6..c75598b0df 100644 --- a/docs/en/14-reference/04-taosadapter.md +++ b/docs/en/14-reference/04-taosadapter.md @@ -31,7 +31,7 @@ taosAdapter provides the following features. ### Install taosAdapter -If you use the TDengine server, you don't need additional steps to install taosAdapter. You can download taosAdapter from [TDengine 3.0 released versions](../../releases/tdengine) to download the TDengine server installation package. If you need to deploy taosAdapter separately on another server other than the TDengine server, you should install the full TDengine server package on that server to install taosAdapter. If you need to build taosAdapter from source code, you can refer to the [Building taosAdapter]( https://github.com/taosdata/taosadapter/blob/3.0/BUILD.md) documentation. +If you use the TDengine server, you don't need additional steps to install taosAdapter. If you need to deploy taosAdapter separately on another server other than the TDengine server, you should install the full TDengine server package on that server to install taosAdapter. If you need to build taosAdapter from source code, you can refer to the [Building taosAdapter]( https://github.com/taosdata/taosadapter/blob/3.0/BUILD.md) documentation. ### Start/Stop taosAdapter @@ -180,7 +180,7 @@ See [example/config/taosadapter.toml](https://github.com/taosdata/taosadapter/bl node_export is an exporter for machine metrics. Please visit [https://github.com/prometheus/node_exporter](https://github.com/prometheus/node_exporter) for more information. - Support for Prometheus remote_read and remote_write remote_read and remote_write are interfaces for Prometheus data read and write from/to other data storage solution. Please visit [https://prometheus.io/blog/2019/10/10/remote-read-meets-streaming/#remote-apis](https://prometheus.io/blog/2019/10/10/remote-read-meets-streaming/#remote-apis) for more information. -- Get table's VGroup ID. For more information about VGroup, please refer to [primary-logic-unit](/tdinternal/arch/#primary-logic-unit). +- Get table's VGroup ID. ## Interfaces @@ -246,7 +246,7 @@ node_export is an exporter of hardware and OS metrics exposed by the \*NIX kerne ### Get table's VGroup ID -You can call `http://:6041/rest/vgid?db=&table=` to get table's VGroup ID. For more information about VGroup, please refer to [primary-logic-unit](/tdinternal/arch/#primary-logic-unit). +You can call `http://:6041/rest/vgid?db=&table=
` to get table's VGroup ID. ## Memory usage optimization methods diff --git a/docs/en/14-reference/05-taosbenchmark.md b/docs/en/14-reference/05-taosbenchmark.md index e65046f65d..e052c0d02b 100644 --- a/docs/en/14-reference/05-taosbenchmark.md +++ b/docs/en/14-reference/05-taosbenchmark.md @@ -397,6 +397,7 @@ The configuration parameters for specifying super table tag columns and data col ### Query scenario configuration parameters `filetype` must be set to `query` in the query scenario. +`query_times` is number of times queries were run. To control the query scenario by setting `kill_slow_query_threshold` and `kill_slow_query_interval` parameters to kill the execution of slow query statements. Threshold controls exec_usec of query command will be killed by taosBenchmark after the specified time, in seconds; interval controls sleep time to avoid continuous querying of slow queries consuming CPU in seconds. diff --git a/docs/en/14-reference/06-taosdump.md b/docs/en/14-reference/06-taosdump.md index baf07d6b9e..c07465a97c 100644 --- a/docs/en/14-reference/06-taosdump.md +++ b/docs/en/14-reference/06-taosdump.md @@ -103,7 +103,7 @@ Usage: taosdump [OPTION...] dbname [tbname ...] use letter and number only. Default is NOT. -n, --no-escape No escape char '`'. Default is using it. -Q, --dot-replace Repalce dot character with underline character in - the table name. + the table name.(Version 2.5.3) -T, --thread-num=THREAD_NUM Number of thread for dump in file. Default is 8. -C, --cloud=CLOUD_DSN specify a DSN to access TDengine cloud service @@ -113,6 +113,10 @@ Usage: taosdump [OPTION...] dbname [tbname ...] -?, --help Give this help list --usage Give a short usage message -V, --version Print program version + -W, --rename=RENAME-LIST Rename database name with new name during + importing data. RENAME-LIST: + "db1=newDB1|db2=newDB2" means rename db1 to newDB1 + and rename db2 to newDB2 (Version 2.5.4) Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. diff --git a/docs/en/20-third-party/70-seeq.md b/docs/en/20-third-party/70-seeq.md index e7ad5c8173..e42204dd5d 100644 --- a/docs/en/20-third-party/70-seeq.md +++ b/docs/en/20-third-party/70-seeq.md @@ -10,76 +10,60 @@ description: How to use Seeq and TDengine to perform time series data analysis Seeq is an advanced analytics software for the manufacturing industry and the Industrial Internet of Things (IIoT). Seeq supports the use of machine learning innovations within process manufacturing organizations. These capabilities enable organizations to deploy their own or third-party machine learning algorithms into advanced analytics applications used by frontline process engineers and subject matter experts, thus extending the efforts of a single data scientist to many frontline workers. -With the TDengine Java connector, Seeq effortlessly supports querying time series data provided by TDengine and offers functionalities such as data visualization, analysis, and forecasting. +TDengine can be added as a data source into Seeq via JDBC connector. Once data source is configured, Seeq can read data from TDengine and offers functionalities such as data visualization, analysis, and forecasting. -### Install Seeq +## Prerequisite -Please download Seeq Server and Seeq Data Lab software installation package from the [Seeq official website](https://www.seeq.com/customer-download). +1. Install Seeq Server and Seeq Data Lab software +2. Install TDengine or register TDengine Cloud service -### Install and start Seeq Server - -``` -tar xvzf seeq-server-xxx.tar.gz -cd seeq-server-installer -sudo ./install - -sudo seeq service enable -sudo seeq start -``` - -### Install and start Seeq Data Lab Server - -Seeq Data Lab needs to be installed on a separate server from Seeq Server and connected to Seeq Server through configuration. For detailed installation and configuration instructions, please refer to [the official documentation](https://support.seeq.com/space/KB/1034059842). - -``` -tar xvf seeq-data-lab--64bit-linux.tar.gz -sudo seeq-data-lab-installer/install -f /opt/seeq/seeq-data-lab -g /var/opt/seeq -u seeq -sudo seeq config set Network/DataLab/Hostname localhost -sudo seeq config set Network/DataLab/Port 34231 # the port of the Data Lab server (usually 34231) -sudo seeq config set Network/Hostname # the host IP or URL of the main Seeq Server - -# If the main Seeq server is configured to listen over HTTPS -sudo seeq config set Network/Webserver/SecurePort 443 # the secure port of the main Seeq Server (usually 443) - -# If the main Seeq server is NOT configured to listen over HTTPS -sudo seeq config set Network/Webserver/Port - -#On the main Seeq server, open a Seeq Command Prompt and set the hostname of the Data Lab server: -sudo seeq config set Network/DataLab/Hostname # the host IP (not URL) of the Data Lab server -sudo seeq config set Network/DataLab/Port 34231 # the port of the Data Lab server (usually 34231 -``` - -### Install TDengine on-premise instance - -See [Quick Install from Package](../../get-started). - -### Or use TDengine Cloud - -Register for a [TDengine Cloud](https://cloud.tdengine.com) account and log in to your account. - -## Make Seeq be able to access TDengine - -1. Get data location configuration +## Install TDengine JDBC connector +1. Get Seeq data location configuration ``` sudo seeq config get Folders/Data ``` - -2. Download TDengine Java connector from maven.org. Please use the latest version (Current is 3.2.5, https://repo1.maven.org/maven2/com/taosdata/jdbc/taos-jdbcdriver/3.2.5/taos-jdbcdriver-3.2.5-dist.jar). - +2. Download the latest TDengine Java connector from maven.org (current is version is [3.2.5](https://repo1.maven.org/maven2/com/taosdata/jdbc/taos-jdbcdriver/3.2.5/taos-jdbcdriver-3.2.5-dist.jar)), and copy the JAR file into the_directory_found_in_step_1/plugins/lib/ 3. Restart Seeq server - ``` sudo seeq restart ``` -4. Input License +## Add TDengine into Seeq's data source +1. Open Seeq, login as admin, go to Administration, click "Add Data Source" +2. For connector, choose SQL connector v2 +3. Inside "Additional Configuration" input box, copy and paste the following -Use a browser to access ip:34216 and input the license according to the guide. +``` +{ + "QueryDefinitions": [] + "Type": "GENERIC", + "Hostname": null, + "Port": 0, + "DatabaseName": null, + "Username": null, + "Password": null, + "InitialSql": null, + "TimeZone": null, + "PrintRows": false, + "UseWindowsAuth": false, + "SqlFetchBatchSize": 100000, + "UseSSL": false, + "JdbcProperties": null, + "GenericDatabaseConfig": { + "DatabaseJdbcUrl": "jdbc:TAOS-RS://localhost:6030/?user=root&password=taosdata", + "SqlDriverClassName": "com.taosdata.jdbc.rs.RestfulDriver", + "ResolutionInNanoseconds": 1000, + "ZonedColumnTypes": [] + } +} +``` -## How to use Seeq to analyze time-series data that TDengine serves +Note: You need to replace DatabaseJdbcUrl with your setting. Please login TDengine cloud or open taosExplorer for enterprise edition, click programming -> Java to find yours. For the "QueryDefintions", please follow the examples below to write your own. -This chapter demonstrates how to use Seeq software in conjunction with TDengine for time series data analysis. +## Use Seeq to analyze time-series data stored inside TDengine + +This chapter demonstrates how to use Seeq with TDengine for time series data analysis. ### Scenario Overview @@ -150,8 +134,8 @@ Please login with Seeq administrator and create a few data sources as following. "Hostname": null, "Port": 0, "DatabaseName": null, - "Username": "root", - "Password": "taosdata", + "Username": null, + "Password": null, "InitialSql": null, "TimeZone": null, "PrintRows": false, @@ -210,8 +194,8 @@ Please login with Seeq administrator and create a few data sources as following. "Hostname": null, "Port": 0, "DatabaseName": null, - "Username": "root", - "Password": "taosdata", + "Username": null, + "Password": null, "InitialSql": null, "TimeZone": null, "PrintRows": false, @@ -269,8 +253,8 @@ Please login with Seeq administrator and create a few data sources as following. "Hostname": null, "Port": 0, "DatabaseName": null, - "Username": "root", - "Password": "taosdata", + "Username": null, + "Password": null, "InitialSql": null, "TimeZone": null, "PrintRows": false, @@ -289,13 +273,13 @@ Please login with Seeq administrator and create a few data sources as following. #### Launch Seeq Workbench -Please login to Seeq server with IP:port and create a new Seeq Workbench, then select data sources and choose the correct tools to do data visualization and analysis. Please refer to [the official documentation](https://support.seeq.com/space/KB/146440193/Seeq+Workbench) for the details. +Please login to Seeq server and create a new Seeq Workbench, then select data sources and choose the correct tools to do data visualization and analysis. Please refer to [the official documentation](https://support.seeq.com/space/KB/146440193/Seeq+Workbench) for the details. ![Seeq Workbench](./seeq/seeq-demo-workbench.webp) #### Use Seeq Data Lab Server for advanced data analysis -Please login to the Seeq service with IP:port and create a new Seeq Data Lab. Then you can use advanced tools including Python environment and machine learning add-ons for more complex analysis. +Please login to the Seeq service and create a new Seeq Data Lab. Then you can use advanced tools including Python environment and machine learning add-ons for more complex analysis. ```Python from seeq import spy @@ -370,13 +354,15 @@ Please note that when using TDengine Cloud, you need to specify the database nam #### The data source of TDengine Cloud example +This data source contains the data from a smart meter in public database smartmeters. + ``` { "QueryDefinitions": [ { "Name": "CloudVoltage", "Type": "SIGNAL", - "Sql": "SELECT ts, voltage FROM test.meters", + "Sql": "SELECT ts, voltage FROM smartmeters.d1000", "Enabled": true, "TestMode": false, "TestQueriesDuringSync": true, @@ -409,8 +395,8 @@ Please note that when using TDengine Cloud, you need to specify the database nam "Hostname": null, "Port": 0, "DatabaseName": null, - "Username": "root", - "Password": "taosdata", + "Username": null, + "Password": null, "InitialSql": null, "TimeZone": null, "PrintRows": false, @@ -419,7 +405,7 @@ Please note that when using TDengine Cloud, you need to specify the database nam "UseSSL": false, "JdbcProperties": null, "GenericDatabaseConfig": { - "DatabaseJdbcUrl": "jdbc:TAOS-RS://gw.cloud.taosdata.com?useSSL=true&token=41ac9d61d641b6b334e8b76f45f5a8XXXXXXXXXX", + "DatabaseJdbcUrl": "jdbc:TAOS-RS://gw.us-west-2.aws.cloud.tdengine.com?useSSL=true&token=42b874395452d36f38dd6bf4317757611b213683", "SqlDriverClassName": "com.taosdata.jdbc.rs.RestfulDriver", "ResolutionInNanoseconds": 1000, "ZonedColumnTypes": [] @@ -433,8 +419,8 @@ Please note that when using TDengine Cloud, you need to specify the database nam ## Conclusion -By integrating Seeq and TDengine, it is possible to leverage the efficient storage and querying performance of TDengine while also benefiting from Seeq's powerful data visualization and analysis capabilities provided to users. +By integrating Seeq and TDengine, you can leverage the efficient storage and querying performance of TDengine while also benefiting from Seeq's powerful data visualization and analysis capabilities provided to users. -This integration allows users to take advantage of TDengine's high-performance time-series data storage and retrieval, ensuring efficient handling of large volumes of data. At the same time, Seeq provides advanced analytics features such as data visualization, anomaly detection, correlation analysis, and predictive modeling, enabling users to gain valuable insights and make data-driven decisions. +This integration allows users to take advantage of TDengine's high-performance time-series data storage and query, ensuring efficient handling of large volumes of data. At the same time, Seeq provides advanced analytics features such as data visualization, anomaly detection, correlation analysis, and predictive modeling, enabling users to gain valuable insights and make data-driven decisions. Together, Seeq and TDengine provide a comprehensive solution for time series data analysis in diverse industries such as manufacturing, IIoT, and power systems. The combination of efficient data storage and advanced analytics empowers users to unlock the full potential of their time series data, driving operational improvements, and enabling predictive and prescriptive analytics applications. diff --git a/docs/zh/02-intro.md b/docs/zh/02-intro.md index bb989f27da..93d650ddd6 100644 --- a/docs/zh/02-intro.md +++ b/docs/zh/02-intro.md @@ -4,20 +4,14 @@ description: 简要介绍 TDengine 的主要功能 toc_max_heading_level: 2 --- -TDengine 是一款开源、高性能、云原生的[时序数据库](https://tdengine.com/tsdb/),且针对物联网、车联网、工业互联网、金融、IT 运维等场景进行了优化。TDengine 的代码,包括集群功能,都在 GNU AGPL v3.0 下开源。除核心的时序数据库功能外,TDengine 还提供[缓存](../develop/cache/)、[数据订阅](../develop/tmq)、[流式计算](../develop/stream)等其它功能以降低系统复杂度及研发和运维成本。 +TDengine 是一款专为物联网、工业互联网等场景设计并优化的大数据平台,它能安全高效地将大量设备、数据采集器每天产生的高达 TB 甚至 PB 级的数据进行汇聚、存储、分析和分发,对业务运行状态进行实时监测、预警,提供实时的商业洞察。其核心模块是高性能、集群开源、云原生、极简的时序数据库 TDengine OSS。 -本章节介绍 TDengine 的主要产品和功能、竞争优势、适用场景、与其他数据库的对比测试等等,让大家对 TDengine 有个整体的了解。 -## 主要产品 - -TDengine 有三个主要产品:TDengine Enterprise (即 TDengine 企业版),TDengine Cloud,和 TDengine OSS,关于它们的具体定义请参考 -- [TDengine 企业版](https://www.taosdata.com/tdengine-pro) -- [TDengine 云服务](https://cloud.taosdata.com/?utm_source=menu&utm_medium=webcn) -- [TDengine 开源版](https://www.taosdata.com/tdengine-oss) +本节介绍 TDengine OSS 的主要产品和功能、竞争优势、适用场景、与其他数据库的对比测试等等,让大家对 TDengine OSS 有个整体了解 ## 主要功能 -TDengine 的主要功能如下: +TDengine OSS 的主要功能如下: 1. 写入数据,支持 - [SQL 写入](../develop/insert-data/sql-writing) @@ -150,3 +144,10 @@ TDengine 的主要功能如下: - [TDengine VS InfluxDB ,写入性能大 PK !](https://www.taosdata.com/2021/11/05/3248.html) - [TDengine 和 InfluxDB 查询性能对比测试报告](https://www.taosdata.com/2022/02/22/5969.html) - [TDengine 与 InfluxDB、OpenTSDB、Cassandra、MySQL、ClickHouse 等数据库的对比测试报告](https://www.taosdata.com/downloads/TDengine_Testing_Report_cn.pdf) + + +## 主要产品 + +TDengine 有两个主要产品:TDengine Enterprise (即 TDengine 企业版)和 TDengine Cloud,关于它们的具体定义请参考 +- [TDengine 企业版](https://www.taosdata.com/tdengine-pro) +- [TDengine 云服务](https://cloud.taosdata.com/?utm_source=menu&utm_medium=webcn) diff --git a/docs/zh/05-get-started/index.md b/docs/zh/05-get-started/index.md index 16172277b5..0b7ca02b9f 100644 --- a/docs/zh/05-get-started/index.md +++ b/docs/zh/05-get-started/index.md @@ -4,7 +4,7 @@ description: '快速设置 TDengine 环境并体验其高效写入和查询' --- import xiaot from './xiaot.webp' -import xiaot_new from './xiaot-03.webp' +import xiaot_new from './xiaot-20231007.png' import channel from './channel.webp' import official_account from './official-account.webp' diff --git a/docs/zh/05-get-started/xiaot-20231007.png b/docs/zh/05-get-started/xiaot-20231007.png new file mode 100644 index 0000000000..553bcbd090 Binary files /dev/null and b/docs/zh/05-get-started/xiaot-20231007.png differ diff --git a/docs/zh/07-develop/07-tmq.mdx b/docs/zh/07-develop/07-tmq.md similarity index 96% rename from docs/zh/07-develop/07-tmq.mdx rename to docs/zh/07-develop/07-tmq.md index 927d762829..8e43631c9a 100644 --- a/docs/zh/07-develop/07-tmq.mdx +++ b/docs/zh/07-develop/07-tmq.md @@ -63,17 +63,17 @@ import CDemo from "./_sub_c.mdx"; typedef void(tmq_commit_cb(tmq_t *tmq, int32_t code, void *param)); typedef enum tmq_conf_res_t { - TMQ_CONF_UNKNOWN = -2, - TMQ_CONF_INVALID = -1, - TMQ_CONF_OK = 0, -} tmq_conf_res_t; + TMQ_CONF_UNKNOWN = -2, + TMQ_CONF_INVALID = -1, + TMQ_CONF_OK = 0, + } tmq_conf_res_t; typedef struct tmq_topic_assignment { - int32_t vgId; - int64_t currentOffset; - int64_t begin; - int64_t end; -} tmq_topic_assignment; + int32_t vgId; + int64_t currentOffset; + int64_t begin; + int64_t end; + } tmq_topic_assignment; DLL_EXPORT tmq_conf_t *tmq_conf_new(); DLL_EXPORT tmq_conf_res_t tmq_conf_set(tmq_conf_t *conf, const char *key, const char *value); @@ -106,7 +106,7 @@ import CDemo from "./_sub_c.mdx"; DLL_EXPORT const char *tmq_get_db_name(TAOS_RES *res); DLL_EXPORT int32_t tmq_get_vgroup_id(TAOS_RES *res); DLL_EXPORT int64_t tmq_get_vgroup_offset(TAOS_RES* res); - DLL_EXPORT const char *tmq_err2str(int32_t code);DLL_EXPORT void tmq_conf_set_auto_commit_cb(tmq_conf_t *conf, tmq_commit_cb *cb, void *param); + DLL_EXPORT const char *tmq_err2str(int32_t code); ``` 下面介绍一下它们的具体用法(超级表和子表结构请参考“数据建模”一节),完整的示例代码请见下面 C 语言的示例代码。 @@ -351,10 +351,10 @@ CREATE TOPIC topic_name [with meta] AS DATABASE db_name; | `td.connect.port` | integer | 服务端的端口号 | | | `group.id` | string | 消费组 ID,同一消费组共享消费进度 |
**必填项**。最大长度:192。
每个topic最多可建立100个 consumer group | | `client.id` | string | 客户端 ID | 最大长度:192。 | -| `auto.offset.reset` | enum | 消费组订阅的初始位置 |
`earliest`: default;从头开始订阅;
`latest`: 仅从最新数据开始订阅;
`none`: 没有提交的 offset 无法订阅 | +| `auto.offset.reset` | enum | 消费组订阅的初始位置 |
`earliest`: default(version < 3.2.0.0);从头开始订阅;
`latest`: default(version >= 3.2.0.0);仅从最新数据开始订阅;
`none`: 没有提交的 offset 无法订阅 | | `enable.auto.commit` | boolean | 是否启用消费位点自动提交,true: 自动提交,客户端应用无需commit;false:客户端应用需要自行commit | 默认值为 true | | `auto.commit.interval.ms` | integer | 消费记录自动提交消费位点时间间隔,单位为毫秒 | 默认值为 5000 | -| `msg.with.table.name` | boolean | 是否允许从消息中解析表名, 不适用于列订阅(列订阅时可将 tbname 作为列写入 subquery 语句) |默认关闭 | +| `msg.with.table.name` | boolean | 是否允许从消息中解析表名, 不适用于列订阅(列订阅时可将 tbname 作为列写入 subquery 语句)(从3.2.0.0版本该参数废弃,恒为true) |默认关闭 | 对于不同编程语言,其设置方式如下: @@ -459,7 +459,19 @@ from taos.tmq import Consumer # Syntax: `consumer = Consumer(configs)` # # Example: -consumer = Consumer({"group.id": "local", "td.connect.ip": "127.0.0.1"}) +consumer = Consumer( + { + "group.id": "local", + "client.id": "1", + "enable.auto.commit": "true", + "auto.commit.interval.ms": "1000", + "td.connect.ip": "127.0.0.1", + "td.connect.user": "root", + "td.connect.pass": "taosdata", + "auto.offset.reset": "earliest", + "msg.with.table.name": "true", + } +) ``` diff --git a/docs/zh/08-connector/30-python.mdx b/docs/zh/08-connector/30-python.mdx index ab98b5b8de..1526c0da6e 100644 --- a/docs/zh/08-connector/30-python.mdx +++ b/docs/zh/08-connector/30-python.mdx @@ -33,11 +33,13 @@ Python 连接器的源码托管在 [GitHub](https://github.com/taosdata/taos-con |Python Connector 版本|主要变化| |:-------------------:|:----:| +|2.7.12|1. 新增 varbinary 类型支持(STMT暂不支持 varbinary )
2. query 性能提升(感谢贡献者[hadrianl](https://github.com/taosdata/taos-connector-python/pull/209))| |2.7.9|数据订阅支持获取消费进度和重置消费进度| |2.7.8|新增 `execute_many`| |Python Websocket Connector 版本|主要变化| |:----------------------------:|:-----:| +|0.2.9|已知问题修复| |0.2.5|1. 数据订阅支持获取消费进度和重置消费进度
2. 支持 schemaless
3. 支持 STMT| |0.2.4|数据订阅新增取消订阅方法| diff --git a/docs/zh/12-taos-sql/16-operators.md b/docs/zh/12-taos-sql/16-operators.md index 0636121edd..c2f0cae9c4 100644 --- a/docs/zh/12-taos-sql/16-operators.md +++ b/docs/zh/12-taos-sql/16-operators.md @@ -54,6 +54,7 @@ LIKE 条件使用通配符字符串进行匹配检查,规则如下: MATCH 条件和 NMATCH 条件使用正则表达式进行匹配,规则如下: - 支持符合 POSIX 规范的正则表达式,具体规范内容可参见 Regular Expressions。 +- MATCH 和正则表达式匹配时, 返回 TURE. NMATCH 和正则表达式不匹配时, 返回 TRUE. - 只能针对子表名(即 tbname)、字符串类型的标签值进行正则表达式过滤,不支持普通列的过滤。 - 正则匹配字符串长度不能超过 128 字节。可以通过参数 maxRegexStringLen 设置和调整最大允许的正则匹配字符串,该参数是客户端配置参数,需要重启客户端才能生效 diff --git a/docs/zh/12-taos-sql/20-keywords.md b/docs/zh/12-taos-sql/20-keywords.md index e7e926d0b7..f59eda1689 100644 --- a/docs/zh/12-taos-sql/20-keywords.md +++ b/docs/zh/12-taos-sql/20-keywords.md @@ -180,6 +180,7 @@ description: TDengine 保留关键字的详细列表 - MAX_DELAY - BWLIMIT - MAXROWS +- MAX_SPEED - MERGE - META - MINROWS diff --git a/docs/zh/12-taos-sql/22-meta.md b/docs/zh/12-taos-sql/22-meta.md index 35794ec269..db53dd462b 100644 --- a/docs/zh/12-taos-sql/22-meta.md +++ b/docs/zh/12-taos-sql/22-meta.md @@ -26,7 +26,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_DNODES -提供 dnode 的相关信息。也可以使用 SHOW DNODES 来查询这些信息。 +提供 dnode 的相关信息。也可以使用 SHOW DNODES 来查询这些信息。 SYSINFO 为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :------------: | ------------ | ----------------------------------------------------------------------------------------------------- | @@ -40,7 +40,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_MNODES -提供 mnode 的相关信息。也可以使用 SHOW MNODES 来查询这些信息。 +提供 mnode 的相关信息。也可以使用 SHOW MNODES 来查询这些信息。 SYSINFO 为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | ------------------ | @@ -52,22 +52,33 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_QNODES -当前系统中 QNODE 的信息。也可以使用 SHOW QNODES 来查询这些信息。 +当前系统中 QNODE 的信息。也可以使用 SHOW QNODES 来查询这些信息。SYSINFO 属性为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | ------------ | | 1 | id | SMALLINT | qnode id | -| 2 | endpoint | BINARY(134) | qnode 的地址 | +| 2 | endpoint | VARCHAR(134) | qnode 的地址 | | 3 | create_time | TIMESTAMP | 创建时间 | +## INS_SNODES + +当前系统中 SNODE 的信息。也可以使用 SHOW SNODES 来查询这些信息。SYSINFO 属性为 0 的用户不能查看此表。 + +| # | **列名** | **数据类型** | **说明** | +| --- | :---------: | ------------ | ------------ | +| 1 | id | SMALLINT | snode id | +| 2 | endpoint | VARCHAR(134) | snode 的地址 | +| 3 | create_time | TIMESTAMP | 创建时间 | + + ## INS_CLUSTER -存储集群相关信息。 +存储集群相关信息。 SYSINFO 属性为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | ---------- | | 1 | id | BIGINT | cluster id | -| 2 | name | BINARY(134) | 集群名称 | +| 2 | name | VARCHAR(134) | 集群名称 | | 3 | create_time | TIMESTAMP | 创建时间 | ## INS_DATABASES @@ -76,25 +87,25 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | # | **列名** | **数据类型** | **说明** | | --- | :------------------: | ---------------- | ------------------------------------------------ | -| 1 | name | BINARY(32) | 数据库名 | +| 1 | name | VARCHAR(64) | 数据库名 | | 2 | create_time | TIMESTAMP | 创建时间 | | 3 | ntables | INT | 数据库中表的数量,包含子表和普通表但不包含超级表 | | 4 | vgroups | INT | 数据库中有多少个 vgroup。需要注意,`vgroups` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 6 | replica | INT | 副本数。需要注意,`replica` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 7 | strict | BINARY(4) | 废弃参数 | -| 8 | duration | INT | 单文件存储数据的时间跨度。需要注意,`duration` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 9 | keep | INT | 数据保留时长。需要注意,`keep` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 7 | strict | VARCHAR(4) | 废弃参数 | +| 8 | duration | VARCHAR(10) | 单文件存储数据的时间跨度。需要注意,`duration` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 9 | keep | VARCHAR(32) | 数据保留时长。需要注意,`keep` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 10 | buffer | INT | 每个 vnode 写缓存的内存块大小,单位 MB。需要注意,`buffer` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 11 | pagesize | INT | 每个 VNODE 中元数据存储引擎的页大小,单位为 KB。需要注意,`pagesize` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 12 | pages | INT | 每个 vnode 元数据存储引擎的缓存页个数。需要注意,`pages` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 13 | minrows | INT | 文件块中记录的最大条数。需要注意,`minrows` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 14 | maxrows | INT | 文件块中记录的最小条数。需要注意,`maxrows` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 15 | comp | INT | 数据压缩方式。需要注意,`comp` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 16 | precision | BINARY(2) | 时间分辨率。需要注意,`precision` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 17 | status | BINARY(10) | 数据库状态 | -| 18 | retentions | BINARY (60) | 数据的聚合周期和保存时长。需要注意,`retentions` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 16 | precision | VARCHAR(2) | 时间分辨率。需要注意,`precision` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 17 | status | VARCHAR(10) | 数据库状态 | +| 18 | retentions | VARCHAR(60) | 数据的聚合周期和保存时长。需要注意,`retentions` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 19 | single_stable | BOOL | 表示此数据库中是否只可以创建一个超级表。需要注意,`single_stable` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 20 | cachemodel | BINARY(60) | 表示是否在内存中缓存子表的最近数据。需要注意,`cachemodel` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 20 | cachemodel | VARCHAR(60) | 表示是否在内存中缓存子表的最近数据。需要注意,`cachemodel` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 21 | cachesize | INT | 表示每个 vnode 中用于缓存子表最近数据的内存大小。需要注意,`cachesize` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 22 | wal_level | INT | WAL 级别。需要注意,`wal_level` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 23 | wal_fsync_period | INT | 数据落盘周期。需要注意,`wal_fsync_period` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | @@ -111,15 +122,15 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | # | **列名** | **数据类型** | **说明** | | --- | :-----------: | ------------- | --------------------------------------------------------------------------------------------- | -| 1 | name | BINARY(64) | 函数名 | -| 2 | comment | BINARY(255) | 补充说明。需要注意,`comment` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 1 | name | VARCHAR(64) | 函数名 | +| 2 | comment | VARCHAR(255) | 补充说明。需要注意,`comment` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 3 | aggregate | INT | 是否为聚合函数。需要注意,`aggregate` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 4 | output_type | BINARY(31) | 输出类型 | +| 4 | output_type | VARCHAR(31) | 输出类型 | | 5 | create_time | TIMESTAMP | 创建时间 | | 6 | code_len | INT | 代码长度 | | 7 | bufsize | INT | buffer 大小 | -| 8 | func_language | BINARY(31) | 自定义函数编程语言 | -| 9 | func_body | BINARY(16384) | 函数体定义 | +| 8 | func_language | VARCHAR(31) | 自定义函数编程语言 | +| 9 | func_body | VARCHAR(16384) | 函数体定义 | | 10 | func_version | INT | 函数版本号。初始版本为0,每次替换更新,版本号加1。 | @@ -129,12 +140,12 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | # | **列名** | **数据类型** | **说明** | | --- | :--------------: | ------------ | ------------------------------------------------------- | -| 1 | db_name | BINARY(32) | 包含此索引的表所在的数据库名 | -| 2 | table_name | BINARY(192) | 包含此索引的表的名称 | -| 3 | index_name | BINARY(192) | 索引名 | -| 4 | column_name | BINARY(64) | 建索引的列的列名 | -| 5 | index_type | BINARY(10) | 目前有 SMA 和 tag | -| 6 | index_extensions | BINARY(256) | 索引的额外信息。对 SMA/tag 类型的索引,是函数名的列表。 | +| 1 | db_name | VARCHAR(32) | 包含此索引的表所在的数据库名 | +| 2 | table_name | VARCHAR(192) | 包含此索引的表的名称 | +| 3 | index_name | VARCHAR(192) | 索引名 | +| 4 | column_name | VARCHAR(64) | 建索引的列的列名 | +| 5 | index_type | VARCHAR(10) | 目前有 SMA 和 tag | +| 6 | index_extensions | VARCHAR(256) | 索引的额外信息。对 SMA/tag 类型的索引,是函数名的列表。 | ## INS_STABLES @@ -142,16 +153,16 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | # | **列名** | **数据类型** | **说明** | | --- | :-----------: | ------------ | ----------------------------------------------------------------------------------------------------- | -| 1 | stable_name | BINARY(192) | 超级表表名 | -| 2 | db_name | BINARY(64) | 超级表所在的数据库的名称 | +| 1 | stable_name | VARCHAR(192) | 超级表表名 | +| 2 | db_name | VARCHAR(64) | 超级表所在的数据库的名称 | | 3 | create_time | TIMESTAMP | 创建时间 | | 4 | columns | INT | 列数目 | | 5 | tags | INT | 标签数目。需要注意,`tags` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 6 | last_update | TIMESTAMP | 最后更新时间 | -| 7 | table_comment | BINARY(1024) | 表注释 | -| 8 | watermark | BINARY(64) | 窗口的关闭时间。需要注意,`watermark` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 9 | max_delay | BINARY(64) | 推送计算结果的最大延迟。需要注意,`max_delay` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 10 | rollup | BINARY(128) | rollup 聚合函数。需要注意,`rollup` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 7 | table_comment | VARCHAR(1024) | 表注释 | +| 8 | watermark | VARCHAR(64) | 窗口的关闭时间。需要注意,`watermark` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 9 | max_delay | VARCHAR(64) | 推送计算结果的最大延迟。需要注意,`max_delay` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 10 | rollup | VARCHAR(128) | rollup 聚合函数。需要注意,`rollup` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | ## INS_TABLES @@ -159,37 +170,37 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | # | **列名** | **数据类型** | **说明** | | --- | :-----------: | ------------ | ------------------------------------------------------------------------------------- | -| 1 | table_name | BINARY(192) | 表名 | -| 2 | db_name | BINARY(64) | 数据库名 | +| 1 | table_name | VARCHAR(192) | 表名 | +| 2 | db_name | VARCHAR(64) | 数据库名 | | 3 | create_time | TIMESTAMP | 创建时间 | | 4 | columns | INT | 列数目 | -| 5 | stable_name | BINARY(192) | 所属的超级表表名 | +| 5 | stable_name | VARCHAR(192) | 所属的超级表表名 | | 6 | uid | BIGINT | 表 id | | 7 | vgroup_id | INT | vgroup id | | 8 | ttl | INT | 表的生命周期。需要注意,`ttl` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 9 | table_comment | BINARY(1024) | 表注释 | -| 10 | type | BINARY(21) | 表类型 | +| 9 | table_comment | VARCHAR(1024) | 表注释 | +| 10 | type | VARCHAR(21) | 表类型 | ## INS_TAGS | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------- | ---------------------- | -| 1 | table_name | BINARY(192) | 表名 | -| 2 | db_name | BINARY(64) | 该表所在的数据库的名称 | -| 3 | stable_name | BINARY(192) | 所属的超级表表名 | -| 4 | tag_name | BINARY(64) | tag 的名称 | -| 5 | tag_type | BINARY(64) | tag 的类型 | -| 6 | tag_value | BINARY(16384) | tag 的值 | +| 1 | table_name | VARCHAR(192) | 表名 | +| 2 | db_name | VARCHAR(64) | 该表所在的数据库的名称 | +| 3 | stable_name | VARCHAR(192) | 所属的超级表表名 | +| 4 | tag_name | VARCHAR(64) | tag 的名称 | +| 5 | tag_type | VARCHAR(64) | tag 的类型 | +| 6 | tag_value | VARCHAR(16384) | tag 的值 | ## INS_COLUMNS | # | **列名** | **数据类型** | **说明** | | --- | :-----------: | ------------ | ---------------------- | -| 1 | table_name | BINARY(192) | 表名 | -| 2 | db_name | BINARY(64) | 该表所在的数据库的名称 | -| 3 | table_type | BINARY(21) | 表类型 | -| 4 | col_name | BINARY(64) | 列 的名称 | -| 5 | col_type | BINARY(32) | 列 的类型 | +| 1 | table_name | VARCHAR(192) | 表名 | +| 2 | db_name | VARCHAR(64) | 该表所在的数据库的名称 | +| 3 | table_type | VARCHAR(21) | 表类型 | +| 4 | col_name | VARCHAR(64) | 列 的名称 | +| 5 | col_type | VARCHAR(32) | 列 的类型 | | 6 | col_length | INT | 列 的长度 | | 7 | col_precision | INT | 列 的精度 | | 8 | col_scale | INT | 列 的比例 | @@ -197,51 +208,51 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_USERS -提供系统中创建的用户的相关信息。 +提供系统中创建的用户的相关信息. SYSINFO 属性为0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | -------- | -| 1 | user_name | BINARY(23) | 用户名 | -| 2 | privilege | BINARY(256) | 权限 | +| 1 | user_name | VARCHAR(23) | 用户名 | +| 2 | privilege | VARCHAR(256) | 权限 | | 3 | create_time | TIMESTAMP | 创建时间 | ## INS_GRANTS -提供企业版授权的相关信息。 +提供企业版授权的相关信息。SYSINFO 属性为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | --------------------------------------------------------------------------------------------------------- | -| 1 | version | BINARY(9) | 企业版授权说明:official(官方授权的)/trial(试用的) | -| 2 | cpu_cores | BINARY(9) | 授权使用的 CPU 核心数量 | -| 3 | dnodes | BINARY(10) | 授权使用的 dnode 节点数量。需要注意,`dnodes` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 4 | streams | BINARY(10) | 授权创建的流数量。需要注意,`streams` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 5 | users | BINARY(10) | 授权创建的用户数量。需要注意,`users` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 6 | accounts | BINARY(10) | 授权创建的帐户数量。需要注意,`accounts` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 7 | storage | BINARY(21) | 授权使用的存储空间大小。需要注意,`storage` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 8 | connections | BINARY(21) | 授权使用的客户端连接数量。需要注意,`connections` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 9 | databases | BINARY(11) | 授权使用的数据库数量。需要注意,`databases` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 10 | speed | BINARY(9) | 授权使用的数据点每秒写入数量 | -| 11 | querytime | BINARY(9) | 授权使用的查询总时长 | -| 12 | timeseries | BINARY(21) | 授权使用的测点数量 | -| 13 | expired | BINARY(5) | 是否到期,true:到期,false:未到期 | -| 14 | expire_time | BINARY(19) | 试用期到期时间 | +| 1 | version | VARCHAR(9) | 企业版授权说明:official(官方授权的)/trial(试用的) | +| 2 | cpu_cores | VARCHAR(9) | 授权使用的 CPU 核心数量 | +| 3 | dnodes | VARCHAR(10) | 授权使用的 dnode 节点数量。需要注意,`dnodes` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 4 | streams | VARCHAR(10) | 授权创建的流数量。需要注意,`streams` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 5 | users | VARCHAR(10) | 授权创建的用户数量。需要注意,`users` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 6 | accounts | VARCHAR(10) | 授权创建的帐户数量。需要注意,`accounts` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 7 | storage | VARCHAR(21) | 授权使用的存储空间大小。需要注意,`storage` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 8 | connections | VARCHAR(21) | 授权使用的客户端连接数量。需要注意,`connections` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 9 | databases | VARCHAR(11) | 授权使用的数据库数量。需要注意,`databases` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 10 | speed | VARCHAR(9) | 授权使用的数据点每秒写入数量 | +| 11 | querytime | VARCHAR(9) | 授权使用的查询总时长 | +| 12 | timeseries | VARCHAR(21) | 授权使用的测点数量 | +| 13 | expired | VARCHAR(5) | 是否到期,true:到期,false:未到期 | +| 14 | expire_time | VARCHAR(19) | 试用期到期时间 | ## INS_VGROUPS -系统中所有 vgroups 的信息。 +系统中所有 vgroups 的信息。SYSINFO 属性为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :-------: | ------------ | ------------------------------------------------------------------------------------------------ | | 1 | vgroup_id | INT | vgroup id | -| 2 | db_name | BINARY(32) | 数据库名 | +| 2 | db_name | VARCHAR(32) | 数据库名 | | 3 | tables | INT | 此 vgroup 内有多少表。需要注意,`tables` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 4 | status | BINARY(10) | 此 vgroup 的状态 | +| 4 | status | VARCHAR(10) | 此 vgroup 的状态 | | 5 | v1_dnode | INT | 第一个成员所在的 dnode 的 id | -| 6 | v1_status | BINARY(10) | 第一个成员的状态 | +| 6 | v1_status | VARCHAR(10) | 第一个成员的状态 | | 7 | v2_dnode | INT | 第二个成员所在的 dnode 的 id | -| 8 | v2_status | BINARY(10) | 第二个成员的状态 | +| 8 | v2_status | VARCHAR(10) | 第二个成员的状态 | | 9 | v3_dnode | INT | 第三个成员所在的 dnode 的 id | -| 10 | v3_status | BINARY(10) | 第三个成员的状态 | +| 10 | v3_status | VARCHAR(10) | 第三个成员的状态 | | 11 | nfiles | INT | 此 vgroup 中数据/元数据文件的数量 | | 12 | file_size | INT | 此 vgroup 中数据/元数据文件的大小 | | 13 | tsma | TINYINT | 此 vgroup 是否专用于 Time-range-wise SMA,1: 是, 0: 否 | @@ -252,55 +263,57 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | # | **列名** | **数据类型** | **说明** | | --- | :------: | ------------ | --------------------------------------------------------------------------------------- | -| 1 | name | BINARY(32) | 配置项名称 | -| 2 | value | BINARY(64) | 该配置项的值。需要注意,`value` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 1 | name | VARCHAR(32) | 配置项名称 | +| 2 | value | VARCHAR(64) | 该配置项的值。需要注意,`value` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | ## INS_DNODE_VARIABLES -系统中每个 dnode 的配置参数。 +系统中每个 dnode 的配置参数。SYSINFO 属性 为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :------: | ------------ | --------------------------------------------------------------------------------------- | | 1 | dnode_id | INT | dnode 的 ID | -| 2 | name | BINARY(32) | 配置项名称 | -| 3 | value | BINARY(64) | 该配置项的值。需要注意,`value` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 2 | name | VARCHAR(32) | 配置项名称 | +| 3 | value | VARCHAR(64) | 该配置项的值。需要注意,`value` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | ## INS_TOPICS | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | ------------------------------ | -| 1 | topic_name | BINARY(192) | topic 名称 | -| 2 | db_name | BINARY(64) | topic 相关的 DB | +| 1 | topic_name | VARCHAR(192) | topic 名称 | +| 2 | db_name | VARCHAR(64) | topic 相关的 DB | | 3 | create_time | TIMESTAMP | topic 的 创建时间 | -| 4 | sql | BINARY(1024) | 创建该 topic 时所用的 SQL 语句 | +| 4 | sql | VARCHAR(1024) | 创建该 topic 时所用的 SQL 语句 | ## INS_SUBSCRIPTIONS | # | **列名** | **数据类型** | **说明** | | --- | :------------: | ------------ | ------------------------ | -| 1 | topic_name | BINARY(204) | 被订阅的 topic | -| 2 | consumer_group | BINARY(193) | 订阅者的消费者组 | +| 1 | topic_name | VARCHAR(204) | 被订阅的 topic | +| 2 | consumer_group | VARCHAR(193) | 订阅者的消费者组 | | 3 | vgroup_id | INT | 消费者被分配的 vgroup id | | 4 | consumer_id | BIGINT | 消费者的唯一 id | -| 5 | offset | BINARY(64) | 消费者的消费进度 | +| 5 | offset | VARCHAR(64) | 消费者的消费进度 | | 6 | rows | BIGINT | 消费者的消费的数据条数 | ## INS_STREAMS | # | **列名** | **数据类型** | **说明** | | --- | :----------: | ------------ | -------------------------------------------------------------------------------------------------------------------- | -| 1 | stream_name | BINARY(64) | 流计算名称 | +| 1 | stream_name | VARCHAR(64) | 流计算名称 | | 2 | create_time | TIMESTAMP | 创建时间 | -| 3 | sql | BINARY(1024) | 创建流计算时提供的 SQL 语句 | -| 4 | status | BINARY(20) | 流当前状态 | -| 5 | source_db | BINARY(64) | 源数据库 | -| 6 | target_db | BINARY(64) | 目的数据库 | -| 7 | target_table | BINARY(192) | 流计算写入的目标表 | +| 3 | sql | VARCHAR(1024) | 创建流计算时提供的 SQL 语句 | +| 4 | status | VARCHAR(20) | 流当前状态 | +| 5 | source_db | VARCHAR(64) | 源数据库 | +| 6 | target_db | VARCHAR(64) | 目的数据库 | +| 7 | target_table | VARCHAR(192) | 流计算写入的目标表 | | 8 | watermark | BIGINT | watermark,详见 SQL 手册流式计算。需要注意,`watermark` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 9 | trigger | INT | 计算结果推送模式,详见 SQL 手册流式计算。需要注意,`trigger` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | ## INS_USER_PRIVILEGES +注:SYSINFO 属性为 0 的用户不能查看此表。 + | # | **列名** | **数据类型** | **说明** | | --- | :----------: | ------------ | -------------------------------------------------------------------------------------------------------------------- | | 1 | user_name | VARCHAR(24) | 用户名 diff --git a/docs/zh/12-taos-sql/24-show.md b/docs/zh/12-taos-sql/24-show.md index 197a7c78d6..09333dd0b7 100644 --- a/docs/zh/12-taos-sql/24-show.md +++ b/docs/zh/12-taos-sql/24-show.md @@ -73,10 +73,10 @@ SHOW CREATE TABLE [db_name.]tb_name ## SHOW DATABASES ```sql -SHOW DATABASES; +SHOW [USER | SYSTEM] DATABASES; ``` -显示用户定义的所有数据库。 +显示定义的所有数据库。SYSTEM 指定只显示系统数据库。USER 指定只显示用户创建的数据库。 ## SHOW DNODES @@ -183,10 +183,10 @@ SHOW SUBSCRIPTIONS; ## SHOW TABLES ```sql -SHOW [db_name.]TABLES [LIKE 'pattern']; +SHOW [NORMAL | CHILD] [db_name.]TABLES [LIKE 'pattern']; ``` -显示当前数据库下的所有普通表和子表的信息。可以使用 LIKE 对表名进行模糊匹配。 +显示当前数据库下的所有普通表和子表的信息。可以使用 LIKE 对表名进行模糊匹配。NORMAL 指定只显示普通表信息, CHILD 指定只显示子表信息。 ## SHOW TABLE DISTRIBUTED diff --git a/docs/zh/14-reference/05-taosbenchmark.md b/docs/zh/14-reference/05-taosbenchmark.md index cc9f467138..597c188c11 100644 --- a/docs/zh/14-reference/05-taosbenchmark.md +++ b/docs/zh/14-reference/05-taosbenchmark.md @@ -395,6 +395,7 @@ taosBenchmark -A INT,DOUBLE,NCHAR,BINARY\(16\) ### 查询场景配置参数 查询场景下 `filetype` 必须设置为 `query`。 +`query_times` 指定运行查询的次数,数值类型 查询场景可以通过设置 `kill_slow_query_threshold` 和 `kill_slow_query_interval` 参数来控制杀掉慢查询语句的执行,threshold 控制如果 exec_usec 超过指定时间的查询将被 taosBenchmark 杀掉,单位为秒;interval 控制休眠时间,避免持续查询慢查询消耗 CPU ,单位为秒。 diff --git a/docs/zh/14-reference/06-taosdump.md b/docs/zh/14-reference/06-taosdump.md index 9fe3c5af7a..8972e587b0 100644 --- a/docs/zh/14-reference/06-taosdump.md +++ b/docs/zh/14-reference/06-taosdump.md @@ -106,7 +106,7 @@ Usage: taosdump [OPTION...] dbname [tbname ...] use letter and number only. Default is NOT. -n, --no-escape No escape char '`'. Default is using it. -Q, --dot-replace Repalce dot character with underline character in - the table name. + the table name.(Version 2.5.3) -T, --thread-num=THREAD_NUM Number of thread for dump in file. Default is 8. -C, --cloud=CLOUD_DSN specify a DSN to access TDengine cloud service @@ -116,6 +116,10 @@ Usage: taosdump [OPTION...] dbname [tbname ...] -?, --help Give this help list --usage Give a short usage message -V, --version Print program version + -W, --rename=RENAME-LIST Rename database name with new name during + importing data. RENAME-LIST: + "db1=newDB1|db2=newDB2" means rename db1 to newDB1 + and rename db2 to newDB2 (Version 2.5.4) Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. diff --git a/docs/zh/17-operation/06-monitor.md b/docs/zh/17-operation/06-monitor.md index 4f8dccc78d..563a7fc6f7 100644 --- a/docs/zh/17-operation/06-monitor.md +++ b/docs/zh/17-operation/06-monitor.md @@ -102,22 +102,22 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |field|type|is\_tag|comment| |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| -|uptime|FLOAT||dnode uptime| +|uptime|FLOAT||dnode uptime,单位:天| |cpu\_engine|FLOAT||taosd cpu 使用率,从 `/proc//stat` 读取| |cpu\_system|FLOAT||服务器 cpu 使用率,从 `/proc/stat` 读取| |cpu\_cores|FLOAT||服务器 cpu 核数| |mem\_engine|INT||taosd 内存使用率,从 `/proc//status` 读取| -|mem\_system|INT||服务器可用内存| +|mem\_system|INT||服务器可用内存,单位 KB| |mem\_total|INT||服务器内存总量,单位 KB| -|disk\_engine|INT||| +|disk\_engine|INT||单位 bytes| |disk\_used|BIGINT||data dir 挂载的磁盘使用量,单位 bytes| |disk\_total|BIGINT||data dir 挂载的磁盘总容量,单位 bytes| -|net\_in|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 received bytes。单位 kb/s| -|net\_out|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 transmit bytes。单位 kb/s| -|io\_read|FLOAT||io 吞吐率,从 `/proc//io` 中读取的 rchar 与上次数值计算之后,计算得到速度。单位 kb/s| -|io\_write|FLOAT||io 吞吐率,从 `/proc//io` 中读取的 wchar 与上次数值计算之后,计算得到速度。单位 kb/s| -|io\_read\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc//io` 中读取的 read_bytes。单位 kb/s| -|io\_write\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc//io` 中读取的 write_bytes。单位 kb/s| +|net\_in|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 received bytes。单位 byte/s| +|net\_out|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 transmit bytes。单位 byte/s| +|io\_read|FLOAT||io 吞吐率,从 `/proc//io` 中读取的 rchar 与上次数值计算之后,计算得到速度。单位 byte/s| +|io\_write|FLOAT||io 吞吐率,从 `/proc//io` 中读取的 wchar 与上次数值计算之后,计算得到速度。单位 byte/s| +|io\_read\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc//io` 中读取的 read_bytes。单位 byte/s| +|io\_write\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc//io` 中读取的 write_bytes。单位 byte/s| |req\_select|INT||两个间隔内发生的查询请求数目| |req\_select\_rate|FLOAT||两个间隔内的查询请求速度 = `req_select / monitorInterval`| |req\_insert|INT||两个间隔内发生的写入请求,包含的单条数据数目| @@ -146,9 +146,9 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |ts|TIMESTAMP||timestamp| |name|NCHAR||data 目录,一般为 `/var/lib/taos`| |level|INT||0、1、2 多级存储级别| -|avail|BIGINT||data 目录可用空间| -|used|BIGINT||data 目录已使用空间| -|total|BIGINT||data 目录空间| +|avail|BIGINT||data 目录可用空间。单位 byte| +|used|BIGINT||data 目录已使用空间。单位 byte| +|total|BIGINT||data 目录空间。单位 byte| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| @@ -161,9 +161,9 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| |name|NCHAR||log 目录名,一般为 `/var/log/taos/`| -|avail|BIGINT||log 目录可用空间| -|used|BIGINT||log 目录已使用空间| -|total|BIGINT||log 目录空间| +|avail|BIGINT||log 目录可用空间。单位 byte| +|used|BIGINT||log 目录已使用空间。单位 byte| +|total|BIGINT||log 目录空间。单位 byte| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| @@ -176,9 +176,9 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| |name|NCHAR||temp 目录名,一般为 `/tmp/`| -|avail|BIGINT||temp 目录可用空间| -|used|BIGINT||temp 目录已使用空间| -|total|BIGINT||temp 目录空间| +|avail|BIGINT||temp 目录可用空间。单位 byte| +|used|BIGINT||temp 目录已使用空间。单位 byte| +|total|BIGINT||temp 目录空间。单位 byte| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| diff --git a/docs/zh/20-third-party/70-seeq.md b/docs/zh/20-third-party/70-seeq.md index d5b7463925..f1e11b1b98 100644 --- a/docs/zh/20-third-party/70-seeq.md +++ b/docs/zh/20-third-party/70-seeq.md @@ -14,40 +14,7 @@ Seeq 是制造业和工业互联网(IIOT)高级分析软件。Seeq 支持在 ### Seeq 安装方法 -从 [Seeq 官网](https://www.seeq.com/customer-download)下载相关软件,例如 Seeq Server 和 Seeq Data Lab 等。 - -### Seeq Server 安装和启动 - -``` -tar xvzf seeq-server-xxx.tar.gz -cd seeq-server-installer -sudo ./install - -sudo seeq service enable -sudo seeq start -``` - -### Seeq Data Lab Server 安装和启动 - -Seeq Data Lab 需要安装在和 Seeq Server 不同的服务器上,并通过配置和 Seeq Server 互联。详细安装配置指令参见[Seeq 官方文档](https://support.seeq.com/space/KB/1034059842)。 - -``` -tar xvf seeq-data-lab--64bit-linux.tar.gz -sudo seeq-data-lab-installer/install -f /opt/seeq/seeq-data-lab -g /var/opt/seeq -u seeq -sudo seeq config set Network/DataLab/Hostname localhost -sudo seeq config set Network/DataLab/Port 34231 # the port of the Data Lab server (usually 34231) -sudo seeq config set Network/Hostname # the host IP or URL of the main Seeq Server - -# If the main Seeq server is configured to listen over HTTPS -sudo seeq config set Network/Webserver/SecurePort 443 # the secure port of the main Seeq Server (usually 443) - -# If the main Seeq server is NOT configured to listen over HTTPS -sudo seeq config set Network/Webserver/Port - -#On the main Seeq server, open a Seeq Command Prompt and set the hostname of the Data Lab server: -sudo seeq config set Network/DataLab/Hostname # the host IP (not URL) of the Data Lab server -sudo seeq config set Network/DataLab/Port 34231 # the port of the Data Lab server (usually 34231 -``` +从 [Seeq 官网](https://www.seeq.com/customer-download)下载相关软件,例如 Seeq Server 和 Seeq Data Lab 等。Seeq Data Lab 需要安装在和 Seeq Server 不同的服务器上,并通过配置和 Seeq Server 互联。详细安装配置指令参见[Seeq 知识库]( https://support.seeq.com/kb/latest/cloud/)。 ## TDengine 本地实例安装方法 diff --git a/examples/JDBC/JDBCDemo/README-jdbc-windows.md b/examples/JDBC/JDBCDemo/README-jdbc-windows.md index 5a781f40f7..e91a953cd1 100644 --- a/examples/JDBC/JDBCDemo/README-jdbc-windows.md +++ b/examples/JDBC/JDBCDemo/README-jdbc-windows.md @@ -44,17 +44,17 @@ OS name: "windows 10", version: "10.0", arch: "amd64", family: "windows" - + D:\apache-maven-localRepository - - alimaven - aliyun maven - http://maven.aliyun.com/nexus/content/groups/public/ - central - + + alimaven + aliyun maven + http://maven.aliyun.com/nexus/content/groups/public/ + central + @@ -126,7 +126,7 @@ https://www.taosdata.com/cn/all-downloads/ 修改client的hosts文件(C:\Windows\System32\drivers\etc\hosts),将server的hostname和ip配置到client的hosts文件中 ``` -192.168.236.136 td01 +192.168.236.136 td01 ``` 配置完成后,在命令行内使用TDengine CLI连接server端 diff --git a/examples/go/BUILD.md b/examples/go/BUILD.md new file mode 100644 index 0000000000..dd607001cc --- /dev/null +++ b/examples/go/BUILD.md @@ -0,0 +1,3 @@ +go mod init demo +go mod tidy +go build diff --git a/include/common/tdataformat.h b/include/common/tdataformat.h index e04bdd1b07..aed1d03fc1 100644 --- a/include/common/tdataformat.h +++ b/include/common/tdataformat.h @@ -108,7 +108,7 @@ int32_t tBufferReserve(SBuffer *pBuffer, int64_t nData, void **ppData); int32_t tRowBuild(SArray *aColVal, const STSchema *pTSchema, SRow **ppRow); int32_t tRowGet(SRow *pRow, STSchema *pTSchema, int32_t iCol, SColVal *pColVal); void tRowDestroy(SRow *pRow); -void tRowSort(SArray *aRowP); +int32_t tRowSort(SArray *aRowP); int32_t tRowMerge(SArray *aRowP, STSchema *pTSchema, int8_t flag); int32_t tRowUpsertColData(SRow *pRow, STSchema *pTSchema, SColData *aColData, int32_t nColData, int32_t flag); diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 18a0d119f8..d08b424e9c 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -768,6 +768,8 @@ typedef struct { char* pAst2; int64_t deleteMark1; int64_t deleteMark2; + int32_t sqlLen; + char* sql; } SMCreateStbReq; int32_t tSerializeSMCreateStbReq(void* buf, int32_t bufLen, SMCreateStbReq* pReq); @@ -788,10 +790,13 @@ typedef struct { int8_t source; // 1-taosX or 0-taosClient int8_t reserved[6]; tb_uid_t suid; + int32_t sqlLen; + char* sql; } SMDropStbReq; int32_t tSerializeSMDropStbReq(void* buf, int32_t bufLen, SMDropStbReq* pReq); int32_t tDeserializeSMDropStbReq(void* buf, int32_t bufLen, SMDropStbReq* pReq); +void tFreeSMDropStbReq(SMDropStbReq *pReq); typedef struct { char name[TSDB_TABLE_FNAME_LEN]; @@ -801,6 +806,8 @@ typedef struct { int32_t ttl; int32_t commentLen; char* comment; + int32_t sqlLen; + char* sql; } SMAlterStbReq; int32_t tSerializeSMAlterStbReq(void* buf, int32_t bufLen, SMAlterStbReq* pReq); @@ -871,10 +878,13 @@ int32_t tDeserializeSCreateAcctReq(void* buf, int32_t bufLen, SCreateAcctReq* pR typedef struct { char user[TSDB_USER_LEN]; + int32_t sqlLen; + char* sql; } SDropUserReq, SDropAcctReq; int32_t tSerializeSDropUserReq(void* buf, int32_t bufLen, SDropUserReq* pReq); int32_t tDeserializeSDropUserReq(void* buf, int32_t bufLen, SDropUserReq* pReq); +void tFreeSDropUserReq(SDropUserReq *pReq); typedef struct SIpV4Range{ uint32_t ip; @@ -888,19 +898,21 @@ typedef struct { SIpWhiteList* cloneIpWhiteList(SIpWhiteList* pIpWhiteList); typedef struct { - int8_t createType; - int8_t superUser; // denote if it is a super user or not - int8_t sysInfo; - int8_t enable; - char user[TSDB_USER_LEN]; - char pass[TSDB_USET_PASSWORD_LEN]; + int8_t createType; + int8_t superUser; // denote if it is a super user or not + int8_t sysInfo; + int8_t enable; + char user[TSDB_USER_LEN]; + char pass[TSDB_USET_PASSWORD_LEN]; int32_t numIpRanges; SIpV4Range* pIpRanges; + int32_t sqlLen; + char* sql; } SCreateUserReq; int32_t tSerializeSCreateUserReq(void* buf, int32_t bufLen, SCreateUserReq* pReq); int32_t tDeserializeSCreateUserReq(void* buf, int32_t bufLen, SCreateUserReq* pReq); -void tFreeSCreateUserReq(SCreateUserReq* pReq); +void tFreeSCreateUserReq(SCreateUserReq *pReq); typedef struct { int64_t ver; @@ -927,18 +939,20 @@ int32_t tSerializeRetrieveIpWhite(void* buf, int32_t bufLen, SRetrieveIpWhiteReq int32_t tDeserializeRetrieveIpWhite(void* buf, int32_t bufLen, SRetrieveIpWhiteReq* pReq); typedef struct { - int8_t alterType; - int8_t superUser; - int8_t sysInfo; - int8_t enable; - char user[TSDB_USER_LEN]; - char pass[TSDB_USET_PASSWORD_LEN]; - char objname[TSDB_DB_FNAME_LEN]; // db or topic - char tabName[TSDB_TABLE_NAME_LEN]; - char* tagCond; - int32_t tagCondLen; + int8_t alterType; + int8_t superUser; + int8_t sysInfo; + int8_t enable; + char user[TSDB_USER_LEN]; + char pass[TSDB_USET_PASSWORD_LEN]; + char objname[TSDB_DB_FNAME_LEN]; // db or topic + char tabName[TSDB_TABLE_NAME_LEN]; + char* tagCond; + int32_t tagCondLen; int32_t numIpRanges; SIpV4Range* pIpRanges; + int32_t sqlLen; + char* sql; } SAlterUserReq; int32_t tSerializeSAlterUserReq(void* buf, int32_t bufLen, SAlterUserReq* pReq); @@ -1118,6 +1132,8 @@ typedef struct { int16_t hashPrefix; int16_t hashSuffix; int32_t tsdbPageSize; + int32_t sqlLen; + char* sql; } SCreateDbReq; int32_t tSerializeSCreateDbReq(void* buf, int32_t bufLen, SCreateDbReq* pReq); @@ -1144,18 +1160,24 @@ typedef struct { int32_t minRows; int32_t walRetentionPeriod; int32_t walRetentionSize; + int32_t sqlLen; + char* sql; } SAlterDbReq; int32_t tSerializeSAlterDbReq(void* buf, int32_t bufLen, SAlterDbReq* pReq); int32_t tDeserializeSAlterDbReq(void* buf, int32_t bufLen, SAlterDbReq* pReq); +void tFreeSAlterDbReq(SAlterDbReq* pReq); typedef struct { char db[TSDB_DB_FNAME_LEN]; int8_t ignoreNotExists; + int32_t sqlLen; + char* sql; } SDropDbReq; int32_t tSerializeSDropDbReq(void* buf, int32_t bufLen, SDropDbReq* pReq); int32_t tDeserializeSDropDbReq(void* buf, int32_t bufLen, SDropDbReq* pReq); +void tFreeSDropDbReq(SDropDbReq* pReq); typedef struct { char db[TSDB_DB_FNAME_LEN]; @@ -1350,10 +1372,13 @@ void tFreeSUserAuthBatchRsp(SUserAuthBatchRsp* pRsp); typedef struct { char db[TSDB_DB_FNAME_LEN]; STimeWindow timeRange; + int32_t sqlLen; + char* sql; } SCompactDbReq; int32_t tSerializeSCompactDbReq(void* buf, int32_t bufLen, SCompactDbReq* pReq); int32_t tDeserializeSCompactDbReq(void* buf, int32_t bufLen, SCompactDbReq* pReq); +void tFreeSCompactDbReq(SCompactDbReq *pReq); typedef struct { char name[TSDB_FUNC_NAME_LEN]; @@ -1933,10 +1958,13 @@ void tFreeSExplainRsp(SExplainRsp* pRsp); typedef struct { char fqdn[TSDB_FQDN_LEN]; // end point, hostname:port int32_t port; + int32_t sqlLen; + char* sql; } SCreateDnodeReq; int32_t tSerializeSCreateDnodeReq(void* buf, int32_t bufLen, SCreateDnodeReq* pReq); int32_t tDeserializeSCreateDnodeReq(void* buf, int32_t bufLen, SCreateDnodeReq* pReq); +void tFreeSCreateDnodeReq(SCreateDnodeReq* pReq); typedef struct { int32_t dnodeId; @@ -1944,10 +1972,13 @@ typedef struct { int32_t port; int8_t force; int8_t unsafe; + int32_t sqlLen; + char* sql; } SDropDnodeReq; int32_t tSerializeSDropDnodeReq(void* buf, int32_t bufLen, SDropDnodeReq* pReq); int32_t tDeserializeSDropDnodeReq(void* buf, int32_t bufLen, SDropDnodeReq* pReq); +void tFreeSDropDnodeReq(SDropDnodeReq* pReq); enum { RESTORE_TYPE__ALL = 1, @@ -1959,19 +1990,25 @@ enum { typedef struct { int32_t dnodeId; int8_t restoreType; + int32_t sqlLen; + char* sql; } SRestoreDnodeReq; int32_t tSerializeSRestoreDnodeReq(void* buf, int32_t bufLen, SRestoreDnodeReq* pReq); int32_t tDeserializeSRestoreDnodeReq(void* buf, int32_t bufLen, SRestoreDnodeReq* pReq); +void tFreeSRestoreDnodeReq(SRestoreDnodeReq *pReq); typedef struct { int32_t dnodeId; char config[TSDB_DNODE_CONFIG_LEN]; char value[TSDB_DNODE_VALUE_LEN]; + int32_t sqlLen; + char* sql; } SMCfgDnodeReq; int32_t tSerializeSMCfgDnodeReq(void* buf, int32_t bufLen, SMCfgDnodeReq* pReq); int32_t tDeserializeSMCfgDnodeReq(void* buf, int32_t bufLen, SMCfgDnodeReq* pReq); +void tFreeSMCfgDnodeReq(SMCfgDnodeReq *pReq); typedef struct { char config[TSDB_DNODE_CONFIG_LEN]; @@ -1983,12 +2020,15 @@ int32_t tDeserializeSDCfgDnodeReq(void* buf, int32_t bufLen, SDCfgDnodeReq* pReq typedef struct { int32_t dnodeId; + int32_t sqlLen; + char* sql; } SMCreateMnodeReq, SMDropMnodeReq, SDDropMnodeReq, SMCreateQnodeReq, SMDropQnodeReq, SDCreateQnodeReq, SDDropQnodeReq, SMCreateSnodeReq, SMDropSnodeReq, SDCreateSnodeReq, SDDropSnodeReq; int32_t tSerializeSCreateDropMQSNodeReq(void* buf, int32_t bufLen, SMCreateQnodeReq* pReq); int32_t tDeserializeSCreateDropMQSNodeReq(void* buf, int32_t bufLen, SMCreateQnodeReq* pReq); - +void tFreeSMCreateQnodeReq(SMCreateQnodeReq *pReq); +void tFreeSDDropQnodeReq(SDDropQnodeReq* pReq); typedef struct { int8_t replica; SReplica replicas[TSDB_MAX_REPLICA]; @@ -2023,10 +2063,13 @@ int32_t tDeserializeSKillTransReq(void* buf, int32_t bufLen, SKillTransReq* pReq typedef struct { int32_t useless; // useless + int32_t sqlLen; + char* sql; } SBalanceVgroupReq; int32_t tSerializeSBalanceVgroupReq(void* buf, int32_t bufLen, SBalanceVgroupReq* pReq); int32_t tDeserializeSBalanceVgroupReq(void* buf, int32_t bufLen, SBalanceVgroupReq* pReq); +void tFreeSBalanceVgroupReq(SBalanceVgroupReq *pReq); typedef struct { int32_t vgId1; @@ -2041,18 +2084,24 @@ typedef struct { int32_t dnodeId1; int32_t dnodeId2; int32_t dnodeId3; + int32_t sqlLen; + char* sql; } SRedistributeVgroupReq; int32_t tSerializeSRedistributeVgroupReq(void* buf, int32_t bufLen, SRedistributeVgroupReq* pReq); int32_t tDeserializeSRedistributeVgroupReq(void* buf, int32_t bufLen, SRedistributeVgroupReq* pReq); +void tFreeSRedistributeVgroupReq(SRedistributeVgroupReq *pReq); typedef struct { int32_t useless; int32_t vgId; + int32_t sqlLen; + char* sql; } SBalanceVgroupLeaderReq; int32_t tSerializeSBalanceVgroupLeaderReq(void* buf, int32_t bufLen, SBalanceVgroupLeaderReq* pReq); int32_t tDeserializeSBalanceVgroupLeaderReq(void* buf, int32_t bufLen, SBalanceVgroupLeaderReq* pReq); +void tFreeSBalanceVgroupLeaderReq(SBalanceVgroupLeaderReq *pReq); typedef struct { int32_t vgId; @@ -2526,10 +2575,13 @@ typedef struct { typedef struct { char name[TSDB_TOPIC_FNAME_LEN]; int8_t igNotExists; + int32_t sqlLen; + char* sql; } SMDropTopicReq; int32_t tSerializeSMDropTopicReq(void* buf, int32_t bufLen, SMDropTopicReq* pReq); int32_t tDeserializeSMDropTopicReq(void* buf, int32_t bufLen, SMDropTopicReq* pReq); +void tFreeSMDropTopicReq(SMDropTopicReq *pReq); typedef struct { char topic[TSDB_TOPIC_FNAME_LEN]; @@ -2625,6 +2677,8 @@ typedef struct SVCreateTbReq { SSchemaWrapper schemaRow; } ntb; }; + int32_t sqlLen; + char* sql; } SVCreateTbReq; int tEncodeSVCreateTbReq(SEncoder* pCoder, const SVCreateTbReq* pReq); @@ -2636,6 +2690,7 @@ static FORCE_INLINE void tdDestroySVCreateTbReq(SVCreateTbReq* req) { return; } + taosMemoryFreeClear(req->sql); taosMemoryFreeClear(req->name); taosMemoryFreeClear(req->comment); if (req->type == TSDB_CHILD_TABLE) { @@ -3099,6 +3154,8 @@ typedef struct { typedef struct { char name[TSDB_STREAM_FNAME_LEN]; int8_t igNotExists; + int32_t sqlLen; + char* sql; } SMDropStreamReq; typedef struct { @@ -3112,12 +3169,20 @@ typedef struct { int32_t taskId; } SVDropStreamTaskReq; +typedef struct { + SMsgHead head; + int64_t streamId; + int32_t taskId; + int64_t dataVer; +} SVStreamTaskVerUpdateReq; + typedef struct { int8_t reserved; } SVDropStreamTaskRsp; int32_t tSerializeSMDropStreamReq(void* buf, int32_t bufLen, const SMDropStreamReq* pReq); int32_t tDeserializeSMDropStreamReq(void* buf, int32_t bufLen, SMDropStreamReq* pReq); +void tFreeSMDropStreamReq(SMDropStreamReq* pReq); typedef struct { char name[TSDB_STREAM_FNAME_LEN]; @@ -3276,7 +3341,7 @@ typedef struct { SMsgHead head; int64_t streamId; int32_t taskId; -} SVPauseStreamTaskReq; +} SVPauseStreamTaskReq, SVResetStreamTaskReq; typedef struct { int8_t reserved; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 7e030bef3f..4a2ae18765 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -305,11 +305,11 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_SYNC_FORCE_FOLLOWER, "sync-force-become-follower", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG) -// TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY, "vnode-stream-scan-history", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY_FINISH, "vnode-stream-scan-history-finish", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_UPDATE, "vnode-stream-update", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_RESET, "vnode-stream-reset", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_CHECK, "vnode-stream-task-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL) diff --git a/include/libs/audit/audit.h b/include/libs/audit/audit.h index 8465ec510e..85d462b96b 100644 --- a/include/libs/audit/audit.h +++ b/include/libs/audit/audit.h @@ -29,7 +29,7 @@ extern "C" { #endif -#define AUDIT_DETAIL_MAX 16000 +#define AUDIT_DETAIL_MAX 65472 typedef struct { const char *server; @@ -39,7 +39,8 @@ typedef struct { int32_t auditInit(const SAuditCfg *pCfg); void auditSend(SJson *pJson); -void auditRecord(SRpcMsg *pReq, int64_t clusterId, char *operation, char *target1, char *target2, char *detail); +void auditRecord(SRpcMsg *pReq, int64_t clusterId, char *operation, char *target1, char *target2, + char *detail, int32_t len); #ifdef __cplusplus } diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 3eb624f932..629efa00b3 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -29,7 +29,23 @@ extern "C" { #ifndef _STREAM_H_ #define _STREAM_H_ -typedef struct SStreamTask SStreamTask; +#define ONE_MiB_F (1048576.0) +#define ONE_KiB_F (1024.0) +#define SIZE_IN_MiB(_v) ((_v) / ONE_MiB_F) +#define SIZE_IN_KiB(_v) ((_v) / ONE_KiB_F) + + +#define TASK_DOWNSTREAM_READY 0x0 +#define TASK_DOWNSTREAM_NOT_READY 0x1 +#define TASK_DOWNSTREAM_NOT_LEADER 0x2 +#define TASK_SELF_NEW_STAGE 0x3 + +#define NODE_ROLE_UNINIT 0x1 +#define NODE_ROLE_LEADER 0x2 +#define NODE_ROLE_FOLLOWER 0x3 + +typedef struct SStreamTask SStreamTask; +typedef struct SStreamQueue SStreamQueue; #define SSTREAM_TASK_VER 2 enum { @@ -64,6 +80,7 @@ enum { TASK_INPUT_STATUS__NORMAL = 1, TASK_INPUT_STATUS__BLOCKED, TASK_INPUT_STATUS__FAILED, + TASK_INPUT_STATUS__REFUSED, }; enum { @@ -106,6 +123,7 @@ typedef struct { } SStreamQueueItem; typedef void FTbSink(SStreamTask* pTask, void* vnode, void* data); +typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data); typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); typedef struct { @@ -154,8 +172,6 @@ typedef struct { int64_t size; } SStreamQueueRes; -void streamFreeQitem(SStreamQueueItem* data); - #if 0 bool streamQueueResEmpty(const SStreamQueueRes* pRes); int64_t streamQueueResSize(const SStreamQueueRes* pRes); @@ -175,22 +191,9 @@ int32_t streamQueuePush(SStreamQueue1* pQueue, SStreamQueueItem* pItem); SStreamQueueRes streamQueueGetRes(SStreamQueue1* pQueue); #endif -typedef struct { - STaosQueue* pQueue; - STaosQall* qall; - void* qItem; - int8_t status; -} SStreamQueue; - int32_t streamInit(); void streamCleanUp(); -SStreamQueue* streamQueueOpen(int64_t cap); -void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); -void streamQueueProcessSuccess(SStreamQueue* queue); -void streamQueueProcessFail(SStreamQueue* queue); -void* streamQueueNextItem(SStreamQueue* pQueue); - SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type); void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit); @@ -204,7 +207,7 @@ typedef struct { int32_t taskId; int32_t nodeId; SEpSet epSet; -} STaskDispatcherFixedEp; +} STaskDispatcherFixed; typedef struct { char stbFullName[TSDB_TABLE_FNAME_LEN]; @@ -222,8 +225,6 @@ typedef struct { SSHashObj* pTblInfo; } STaskSinkTb; -typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data); - typedef struct { int64_t smaId; // following are not applicable to encoder and decoder @@ -244,10 +245,10 @@ typedef struct SStreamChildEpInfo { int64_t stage; // upstream task stage value, to denote if the upstream node has restart/replica changed/transfer } SStreamChildEpInfo; -typedef struct SStreamTaskKey { +typedef struct STaskId { int64_t streamId; - int32_t taskId; -} SStreamTaskKey; + int64_t taskId; +} STaskId; typedef struct SStreamTaskId { int64_t streamId; @@ -256,19 +257,22 @@ typedef struct SStreamTaskId { } SStreamTaskId; typedef struct SCheckpointInfo { + int64_t startTs; int64_t checkpointId; - int64_t checkpointVer; // latest checkpointId version + int64_t checkpointVer; // latest checkpointId version int64_t nextProcessVer; // current offset in WAL, not serialize it + int64_t failedId; // record the latest failed checkpoint id } SCheckpointInfo; typedef struct SStreamStatus { - int8_t taskStatus; - int8_t downstreamReady; // downstream tasks are all ready now, if this flag is set - int8_t schedStatus; - int8_t keepTaskStatus; - bool appendTranstateBlock; // has append the transfer state data block already, todo: remove it - int8_t timerActive; // timer is active - int8_t pauseAllowed; // allowed task status to be set to be paused + int8_t taskStatus; + int8_t downstreamReady; // downstream tasks are all ready now, if this flag is set + int8_t schedStatus; + int8_t keepTaskStatus; + bool appendTranstateBlock; // has append the transfer state data block already, todo: remove it + int8_t pauseAllowed; // allowed task status to be set to be paused + int32_t timerActive; // timer is active + int32_t inScanHistorySentinel; } SStreamStatus; typedef struct SDataRange { @@ -287,21 +291,27 @@ typedef struct SSTaskBasicInfo { int64_t triggerParam; // in msec } SSTaskBasicInfo; +typedef struct SStreamDispatchReq SStreamDispatchReq; +typedef struct STokenBucket STokenBucket; +typedef struct SMetaHbInfo SMetaHbInfo; + typedef struct SDispatchMsgInfo { - void* pData; // current dispatch data + SStreamDispatchReq* pData; // current dispatch data + int8_t dispatchMsgType; int16_t msgType; // dispatch msg type int32_t retryCount; // retry send data count - int64_t blockingTs; // output blocking timestamp + int64_t startTs; // dispatch start time, record total elapsed time for dispatch + SArray* pRetryList; // current dispatch successfully completed node of downstream + void* pTimer; // used to dispatch data after a given time duration } SDispatchMsgInfo; -typedef struct STaskOutputInfo { - int8_t type; +typedef struct STaskOutputQueue { int8_t status; SStreamQueue* queue; -} STaskOutputInfo; +} STaskOutputQueue; typedef struct STaskInputInfo { - int8_t status; + int8_t status; SStreamQueue* queue; } STaskInputInfo; @@ -310,62 +320,76 @@ typedef struct STaskSchedInfo { void* pTimer; } STaskSchedInfo; -typedef struct SSinkTaskRecorder { +typedef struct SSinkRecorder { int64_t numOfSubmit; int64_t numOfBlocks; int64_t numOfRows; -} SSinkTaskRecorder; + int64_t dataSize; +} SSinkRecorder; -typedef struct { - int64_t created; - int64_t init; - int64_t step1Start; - int64_t step2Start; - int64_t sinkStart; -} STaskTimestamp; +typedef struct STaskExecStatisInfo { + int64_t created; + int64_t init; + int64_t start; + int64_t step1Start; + int64_t step2Start; + int32_t updateCount; + int64_t latestUpdateTs; + int32_t processDataBlocks; + int64_t processDataSize; + int32_t dispatch; + int64_t dispatchDataSize; + int32_t checkpoint; + SSinkRecorder sink; +} STaskExecStatisInfo; -typedef struct STokenBucket { - int32_t capacity; // total capacity - int64_t fillTimestamp;// fill timestamp - int32_t numOfToken; // total available tokens - int32_t rate; // number of token per second -} STokenBucket; +typedef struct SHistoryTaskInfo { + STaskId id; + void* pTimer; + int32_t tickCount; + int32_t retryTimes; + int32_t waitInterval; +} SHistoryTaskInfo; -struct SStreamTask { - int64_t ver; - SStreamTaskId id; - SSTaskBasicInfo info; - STaskOutputInfo outputInfo; - STaskInputInfo inputInfo; - STaskSchedInfo schedInfo; - SDispatchMsgInfo msgInfo; - SStreamStatus status; - SCheckpointInfo chkInfo; - STaskExec exec; - SDataRange dataRange; - SStreamTaskId historyTaskId; - SStreamTaskId streamTaskId; - STaskTimestamp tsInfo; - SArray* pReadyMsgList; // SArray - TdThreadMutex lock; // secure the operation of set task status and puting data into inputQ - SArray* pUpstreamInfoList; - - // output +typedef struct STaskOutputInfo { union { - STaskDispatcherFixedEp fixedEpDispatcher; + STaskDispatcherFixed fixedDispatcher; STaskDispatcherShuffle shuffleDispatcher; STaskSinkTb tbSink; STaskSinkSma smaSink; STaskSinkFetch fetchSink; }; - SSinkTaskRecorder sinkRecorder; - STokenBucket tokenBucket; + int8_t type; + STokenBucket* pTokenBucket; +} STaskOutputInfo; - void* launchTaskTimer; - SMsgCb* pMsgCb; // msg handle - SStreamState* pState; // state backend - SArray* pRspMsgList; +typedef struct SUpstreamInfo { + SArray* pList; + int32_t numOfClosed; +} SUpstreamInfo; +struct SStreamTask { + int64_t ver; + SStreamTaskId id; + SSTaskBasicInfo info; + STaskOutputQueue outputq; + STaskInputInfo inputInfo; + STaskSchedInfo schedInfo; + STaskOutputInfo outputInfo; + SDispatchMsgInfo msgInfo; + SStreamStatus status; + SCheckpointInfo chkInfo; + STaskExec exec; + SDataRange dataRange; + SHistoryTaskInfo hTaskInfo; + STaskId streamTaskId; + STaskExecStatisInfo execInfo; + SArray* pReadyMsgList; // SArray + TdThreadMutex lock; // secure the operation of set task status and puting data into inputQ + SMsgCb* pMsgCb; // msg handle + SStreamState* pState; // state backend + SArray* pRspMsgList; + SUpstreamInfo upstreamInfo; // the followings attributes don't be serialized int32_t notReadyTasks; int32_t numOfWaitingUpstream; @@ -381,11 +405,13 @@ struct SStreamTask { char reserve[256]; }; -typedef struct SMetaHbInfo { - tmr_h hbTmr; - int32_t stopFlag; - int32_t tickCounter; -} SMetaHbInfo; +typedef struct STaskStartInfo { + int64_t startTs; + int64_t readyTs; + int32_t startedAfterNodeUpdate; + SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing + int32_t elapsedTime; +} STaskStartInfo; // meta typedef struct SStreamMeta { @@ -393,22 +419,25 @@ typedef struct SStreamMeta { TDB* db; TTB* pTaskDb; TTB* pCheckpointDb; - SHashObj* pTasks; - SArray* pTaskList; // SArray + SHashObj* pTasksMap; + SArray* pTaskList; // SArray void* ahandle; TXN* txn; FTaskExpand* expandFunc; int32_t vgId; int64_t stage; + int32_t role; + STaskStartInfo startInfo; SRWLatch lock; int32_t walScanCounter; void* streamBackend; int64_t streamBackendRid; SHashObj* pTaskBackendUnique; TdThreadMutex backendMutex; - SMetaHbInfo hbInfo; - int32_t closedTask; - int32_t totalTasks; // this value should be increased when a new task is added into the meta + SMetaHbInfo* pHbInfo; + SHashObj* pUpdateTaskSet; + int32_t numOfStreamTasks; // this value should be increased when a new task is added into the meta + int32_t numOfPausedTasks; int32_t chkptNotReadyTasks; int64_t rid; @@ -417,26 +446,25 @@ typedef struct SStreamMeta { SArray* chkpInUse; int32_t chkpCap; SRWLatch chkpDirLock; - int32_t pauseTaskNum; } SStreamMeta; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo); -SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHistory, int64_t triggerParam, - SArray* pTaskList); +SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, bool fillHistory, int64_t triggerParam, + SArray* pTaskList, bool hasFillhistory); int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask); void tFreeStreamTask(SStreamTask* pTask); int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver); int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo); -int32_t tDecodeStreamTaskId(SDecoder* pDecoder, SStreamTaskId* pTaskId); +int32_t tDecodeStreamTaskId(SDecoder* pDecoder, STaskId* pTaskId); int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem); int32_t streamTaskPutDataIntoOutputQ(SStreamTask* pTask, SStreamDataBlock* pBlock); int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask); -bool streamQueueIsFull(const STaosQueue* pQueue, bool inputQ); +bool streamQueueIsFull(const SStreamQueue* pQueue); typedef struct { SMsgHead head; @@ -444,11 +472,12 @@ typedef struct { int32_t taskId; } SStreamTaskRunReq; -typedef struct { +struct SStreamDispatchReq { int32_t type; int64_t stage; // nodeId from upstream task int64_t streamId; int32_t taskId; + int32_t msgId; // msg id to identify if the incoming msg from the same sender int32_t srcVgId; int32_t upstreamTaskId; int32_t upstreamChildId; @@ -457,7 +486,7 @@ typedef struct { int64_t totalLen; SArray* dataLen; // SArray SArray* data; // SArray -} SStreamDispatchReq; +}; typedef struct { int64_t streamId; @@ -465,7 +494,9 @@ typedef struct { int32_t upstreamTaskId; int32_t downstreamNodeId; int32_t downstreamTaskId; + int32_t msgId; int8_t inputStatus; + int64_t stage; } SStreamDispatchRsp; typedef struct { @@ -522,7 +553,7 @@ typedef struct { int32_t downstreamTaskId; int32_t upstreamNodeId; int32_t childId; -} SStreamScanHistoryFinishReq, SStreamTransferReq; +} SStreamScanHistoryFinishReq; int32_t tEncodeStreamScanHistoryFinishReq(SEncoder* pEncoder, const SStreamScanHistoryFinishReq* pReq); int32_t tDecodeStreamScanHistoryFinishReq(SDecoder* pDecoder, SStreamScanHistoryFinishReq* pReq); @@ -568,9 +599,19 @@ int32_t tEncodeStreamCheckpointReadyMsg(SEncoder* pEncoder, const SStreamCheckpo int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointReadyMsg* pRsp); typedef struct STaskStatusEntry { - int64_t streamId; - int32_t taskId; + STaskId id; int32_t status; + int32_t stage; + int32_t nodeId; + int64_t verStart; // start version in WAL, only valid for source task + int64_t verEnd; // end version in WAL, only valid for source task + int64_t processedVer; // only valid for source task + int64_t activeCheckpointId; // current active checkpoint id + bool checkpointFailed; // denote if the checkpoint is failed or not + double inputQUsed; // in MiB + double inputRate; + double sinkQuota; // existed quota size for sink task + double sinkDataSize; // sink to dest data size } STaskStatusEntry; typedef struct SStreamHbMsg { @@ -636,15 +677,14 @@ void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq); int32_t streamSetupScheduleTrigger(SStreamTask* pTask); -int32_t streamProcessRunReq(SStreamTask* pTask); -int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg, bool exec); +int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg); int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code); int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg); SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); void streamTaskInputFail(SStreamTask* pTask); -int32_t streamTryExec(SStreamTask* pTask); +int32_t streamExecTask(SStreamTask* pTask); int32_t streamSchedExec(SStreamTask* pTask); bool streamTaskShouldStop(const SStreamStatus* pStatus); bool streamTaskShouldPause(const SStreamStatus* pStatus); @@ -656,10 +696,14 @@ char* createStreamTaskIdStr(int64_t streamId, int32_t taskId); // recover and fill history void streamTaskCheckDownstream(SStreamTask* pTask); -int32_t streamTaskLaunchScanHistory(SStreamTask* pTask); +int32_t streamTaskStartScanHistory(SStreamTask* pTask); int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage); int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList); void streamTaskResetUpstreamStageInfo(SStreamTask* pTask); +bool streamTaskAllUpstreamClosed(SStreamTask* pTask); +bool streamTaskSetSchedStatusWait(SStreamTask* pTask); +int8_t streamTaskSetSchedStatusActive(SStreamTask* pTask); +int8_t streamTaskSetSchedStatusInActive(SStreamTask* pTask); int32_t streamTaskStop(SStreamTask* pTask); int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* pReq, SStreamTaskCheckRsp* pRsp, @@ -670,14 +714,15 @@ int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask); int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated); bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer); int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); +int32_t streamQueueGetAvailableSpace(const SStreamQueue* pQueue, int32_t* availNum, double* availSize); // common int32_t streamRestoreParam(SStreamTask* pTask); int32_t streamSetStatusNormal(SStreamTask* pTask); +int32_t streamSetStatusUnint(SStreamTask* pTask); const char* streamGetTaskStatusStr(int32_t status); void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta); void streamTaskResume(SStreamTask* pTask, SStreamMeta* pMeta); -void streamTaskHalt(SStreamTask* pTask); void streamTaskResumeFromHalt(SStreamTask* pTask); void streamTaskDisablePause(SStreamTask* pTask); void streamTaskEnablePause(SStreamTask* pTask); @@ -690,6 +735,9 @@ int32_t streamTaskReloadState(SStreamTask* pTask); void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); +void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask); +void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc); + // source level int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); @@ -707,24 +755,27 @@ void streamMetaCleanup(); SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage); void streamMetaClose(SStreamMeta* streamMeta); int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); // save to stream meta store -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey); +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pKey); int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded); int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); -int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId); +int32_t streamMetaReopen(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta); +void streamMetaStartHb(SStreamMeta* pMeta); +void streamMetaInitForSnode(SStreamMeta* pMeta); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); +void streamTaskClearCheckInfo(SStreamTask* pTask); int32_t streamAlignTransferState(SStreamTask* pTask); - +int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId); int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask, int8_t isSucceed); int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, diff --git a/include/util/talgo.h b/include/util/talgo.h index 7c92c0fe87..b065ea3705 100644 --- a/include/util/talgo.h +++ b/include/util/talgo.h @@ -54,6 +54,17 @@ typedef int32_t (*__ext_compar_fn_t)(const void *p1, const void *p2, const void */ void taosqsort(void *src, int64_t numOfElem, int64_t size, const void *param, __ext_compar_fn_t comparFn); +/** + * merge sort, with the compare function requiring additional parameters support + * + * @param src + * @param numOfElem + * @param size + * @param comparFn + * @return int32_t 0 for success, other for failure. + */ +int32_t taosMergeSort(void *src, int64_t numOfElem, int64_t size, __compar_fn_t comparFn); + /** * binary search, with range support * diff --git a/include/util/tarray.h b/include/util/tarray.h index 4d9c930521..e494f78f48 100644 --- a/include/util/tarray.h +++ b/include/util/tarray.h @@ -214,12 +214,19 @@ void taosArrayDestroyEx(SArray* pArray, FDelete fp); void taosArraySwap(SArray* a, SArray* b); /** - * sort the array + * sort the array use qsort * @param pArray * @param compar */ void taosArraySort(SArray* pArray, __compar_fn_t comparFn); +/** + * sort the array use merge sort + * @param pArray + * @param compar + */ +int32_t taosArrayMSort(SArray* pArray, __compar_fn_t comparFn); + /** * search the array * @param pArray diff --git a/include/util/tarray2.h b/include/util/tarray2.h index cd49e64789..2e9b0c7cb5 100644 --- a/include/util/tarray2.h +++ b/include/util/tarray2.h @@ -165,6 +165,13 @@ static FORCE_INLINE int32_t tarray2SortInsert(void *arr, const void *elePtr, int #define TARRAY2_FOREACH_PTR_REVERSE(a, ep) \ for (int32_t __i = (a)->size - 1; __i >= 0 && ((ep) = &(a)->data[__i], 1); __i--) +#define TARRAY2_SORT(a, cmp) \ + do { \ + if ((a)->size > 1) { \ + taosSort((a)->data, (a)->size, sizeof((a)->data[0]), (__compar_fn_t)cmp); \ + } \ + } while (0) + #ifdef __cplusplus } #endif diff --git a/include/util/tlog.h b/include/util/tlog.h index 5a421033c9..a6d146a79e 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -55,6 +55,7 @@ extern int32_t tmrDebugFlag; extern int32_t uDebugFlag; extern int32_t rpcDebugFlag; extern int32_t qDebugFlag; +extern int32_t stDebugFlag; extern int32_t wDebugFlag; extern int32_t sDebugFlag; extern int32_t tsdbDebugFlag; diff --git a/packaging/cfg/taos.cfg b/packaging/cfg/taos.cfg index 2159899aa2..743edb6ca3 100644 --- a/packaging/cfg/taos.cfg +++ b/packaging/cfg/taos.cfg @@ -98,6 +98,9 @@ # enable/disable system monitor # monitor 1 +# enable/disable audit log +# audit 1 + # The following parameter is used to limit the maximum number of lines in log files. # max number of lines per log filters # numOfLogLines 10000000 diff --git a/packaging/cfg/taosd.service b/packaging/cfg/taosd.service index 52c4b1d1e2..bfa330f6cb 100644 --- a/packaging/cfg/taosd.service +++ b/packaging/cfg/taosd.service @@ -8,7 +8,7 @@ Type=simple ExecStart=/usr/bin/taosd ExecStartPre=/usr/local/taos/bin/startPre.sh TimeoutStopSec=1000000s -LimitNOFILE=infinity +LimitNOFILE=1048576 LimitNPROC=infinity LimitCORE=infinity TimeoutStartSec=0 diff --git a/packaging/tools/makepkg.sh b/packaging/tools/makepkg.sh index 9e70a6bbf1..42465b8783 100755 --- a/packaging/tools/makepkg.sh +++ b/packaging/tools/makepkg.sh @@ -89,7 +89,7 @@ else ${build_dir}/bin/taosBenchmark \ ${build_dir}/bin/TDinsight.sh \ ${build_dir}/bin/tdengine-datasource.zip \ - ${build_dir}/bin/tdengine-datasource.zip.md5sum" + ${build_dir}/bin/tdengine-datasource.zip.md5" fi [ -f ${build_dir}/bin/taosx ] && taosx_bin="${build_dir}/bin/taosx" diff --git a/source/client/src/clientRawBlockWrite.c b/source/client/src/clientRawBlockWrite.c index b7e92d2e65..e7ba30d78c 100644 --- a/source/client/src/clientRawBlockWrite.c +++ b/source/client/src/clientRawBlockWrite.c @@ -377,6 +377,7 @@ _exit: for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { pCreateReq = req.pReqs + iReq; taosMemoryFreeClear(pCreateReq->comment); + taosMemoryFreeClear(pCreateReq->sql); if (pCreateReq->type == TSDB_CHILD_TABLE) { taosArrayDestroy(pCreateReq->ctb.tagName); } diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 10f8b89f4d..91c21fe344 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -683,7 +683,7 @@ static int32_t smlCheckMeta(SSchema *schema, int32_t length, SArray *cols, bool SSmlKv *kv = (SSmlKv *)taosArrayGet(cols, i); if (taosHashGet(hashTmp, kv->key, kv->keyLen) == NULL) { taosHashCleanup(hashTmp); - return -1; + return TSDB_CODE_SML_INVALID_DATA; } } taosHashCleanup(hashTmp); diff --git a/source/client/src/clientSmlJson.c b/source/client/src/clientSmlJson.c index f9076112c4..167638ab69 100644 --- a/source/client/src/clientSmlJson.c +++ b/source/client/src/clientSmlJson.c @@ -256,7 +256,8 @@ int smlJsonParseObjFirst(char **start, SSmlLineInfo *element, int8_t *offset) { } if (unlikely(index >= OTD_JSON_FIELDS_NUM)) { - uError("index >= %d, %s", OTD_JSON_FIELDS_NUM, *start) return -1; + uError("index >= %d, %s", OTD_JSON_FIELDS_NUM, *start); + return TSDB_CODE_TSC_INVALID_JSON; } char *sTmp = *start; @@ -367,7 +368,8 @@ int smlJsonParseObjFirst(char **start, SSmlLineInfo *element, int8_t *offset) { if (unlikely(index != OTD_JSON_FIELDS_NUM) || element->tags == NULL || element->cols == NULL || element->measure == NULL || element->timestamp == NULL) { - uError("elements != %d or element parse null", OTD_JSON_FIELDS_NUM) return -1; + uError("elements != %d or element parse null", OTD_JSON_FIELDS_NUM); + return TSDB_CODE_TSC_INVALID_JSON; } return 0; } @@ -381,7 +383,8 @@ int smlJsonParseObj(char **start, SSmlLineInfo *element, int8_t *offset) { } if (unlikely(index >= OTD_JSON_FIELDS_NUM)) { - uError("index >= %d, %s", OTD_JSON_FIELDS_NUM, *start) return -1; + uError("index >= %d, %s", OTD_JSON_FIELDS_NUM, *start); + return TSDB_CODE_TSC_INVALID_JSON; } if ((*start)[1] == 'm') { @@ -448,7 +451,8 @@ int smlJsonParseObj(char **start, SSmlLineInfo *element, int8_t *offset) { } if (unlikely(index != 0 && index != OTD_JSON_FIELDS_NUM)) { - uError("elements != %d", OTD_JSON_FIELDS_NUM) return -1; + uError("elements != %d", OTD_JSON_FIELDS_NUM); + return TSDB_CODE_TSC_INVALID_JSON; } return 0; } @@ -477,7 +481,7 @@ static int32_t smlGetJsonElements(cJSON *root, cJSON ***marks) { } if (*marks[i] == NULL) { uError("smlGetJsonElements error, not find mark:%d:%s", i, jsonName[i]); - return -1; + return TSDB_CODE_TSC_INVALID_JSON; } } return TSDB_CODE_SUCCESS; @@ -816,25 +820,25 @@ static int64_t smlParseTSFromJSONObj(SSmlHandle *info, cJSON *root, int32_t toPr int32_t size = cJSON_GetArraySize(root); if (unlikely(size != OTD_JSON_SUB_FIELDS_NUM)) { smlBuildInvalidDataMsg(&info->msgBuf, "invalidate json", NULL); - return -1; + return TSDB_CODE_TSC_INVALID_JSON; } cJSON *value = cJSON_GetObjectItem(root, "value"); if (unlikely(!cJSON_IsNumber(value))) { smlBuildInvalidDataMsg(&info->msgBuf, "invalidate json", NULL); - return -1; + return TSDB_CODE_TSC_INVALID_JSON; } cJSON *type = cJSON_GetObjectItem(root, "type"); if (unlikely(!cJSON_IsString(type))) { smlBuildInvalidDataMsg(&info->msgBuf, "invalidate json", NULL); - return -1; + return TSDB_CODE_TSC_INVALID_JSON; } double timeDouble = value->valuedouble; if (unlikely(smlDoubleToInt64OverFlow(timeDouble))) { smlBuildInvalidDataMsg(&info->msgBuf, "timestamp is too large", NULL); - return -1; + return TSDB_CODE_TSC_VALUE_OUT_OF_RANGE; } if (timeDouble == 0) { @@ -849,32 +853,29 @@ static int64_t smlParseTSFromJSONObj(SSmlHandle *info, cJSON *root, int32_t toPr size_t typeLen = strlen(type->valuestring); if (typeLen == 1 && (type->valuestring[0] == 's' || type->valuestring[0] == 'S')) { // seconds - int8_t fromPrecision = TSDB_TIME_PRECISION_SECONDS; +// int8_t fromPrecision = TSDB_TIME_PRECISION_SECONDS; if (smlFactorS[toPrecision] < INT64_MAX / tsInt64) { return tsInt64 * smlFactorS[toPrecision]; } - return -1; + return TSDB_CODE_TSC_VALUE_OUT_OF_RANGE; } else if (typeLen == 2 && (type->valuestring[1] == 's' || type->valuestring[1] == 'S')) { switch (type->valuestring[0]) { case 'm': case 'M': // milliseconds return convertTimePrecision(tsInt64, TSDB_TIME_PRECISION_MILLI, toPrecision); - break; case 'u': case 'U': // microseconds return convertTimePrecision(tsInt64, TSDB_TIME_PRECISION_MICRO, toPrecision); - break; case 'n': case 'N': return convertTimePrecision(tsInt64, TSDB_TIME_PRECISION_NANO, toPrecision); - break; default: - return -1; + return TSDB_CODE_TSC_INVALID_JSON_TYPE; } } else { - return -1; + return TSDB_CODE_TSC_INVALID_JSON_TYPE; } } @@ -895,7 +896,7 @@ static int64_t smlParseTSFromJSON(SSmlHandle *info, cJSON *timestamp) { double timeDouble = timestamp->valuedouble; if (unlikely(smlDoubleToInt64OverFlow(timeDouble))) { smlBuildInvalidDataMsg(&info->msgBuf, "timestamp is too large", NULL); - return -1; + return TSDB_CODE_TSC_VALUE_OUT_OF_RANGE; } if (unlikely(timeDouble < 0)) { @@ -911,14 +912,14 @@ static int64_t smlParseTSFromJSON(SSmlHandle *info, cJSON *timestamp) { if (unlikely(fromPrecision == -1)) { smlBuildInvalidDataMsg(&info->msgBuf, "timestamp precision can only be seconds(10 digits) or milli seconds(13 digits)", NULL); - return -1; + return TSDB_CODE_SML_INVALID_DATA; } int64_t tsInt64 = timeDouble; if (fromPrecision == TSDB_TIME_PRECISION_SECONDS) { if (smlFactorS[toPrecision] < INT64_MAX / tsInt64) { return tsInt64 * smlFactorS[toPrecision]; } - return -1; + return TSDB_CODE_TSC_VALUE_OUT_OF_RANGE; } else { return convertTimePrecision(timeDouble, fromPrecision, toPrecision); } @@ -926,7 +927,7 @@ static int64_t smlParseTSFromJSON(SSmlHandle *info, cJSON *timestamp) { return smlParseTSFromJSONObj(info, timestamp, toPrecision); } else { smlBuildInvalidDataMsg(&info->msgBuf, "invalidate json", NULL); - return -1; + return TSDB_CODE_TSC_INVALID_JSON; } } diff --git a/source/client/src/clientSmlLine.c b/source/client/src/clientSmlLine.c index a565fb1a21..006475654a 100644 --- a/source/client/src/clientSmlLine.c +++ b/source/client/src/clientSmlLine.c @@ -70,7 +70,7 @@ static int64_t smlParseInfluxTime(SSmlHandle *info, const char *data, int32_t le int64_t ts = smlGetTimeValue(data, len, fromPrecision, toPrecision); if (unlikely(ts == -1)) { smlBuildInvalidDataMsg(&info->msgBuf, "invalid timestamp", data); - return -1; + return TSDB_CODE_SML_INVALID_DATA; } return ts; } diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 6ee5508048..781b362674 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -26,8 +26,7 @@ #define EMPTY_BLOCK_POLL_IDLE_DURATION 10 #define DEFAULT_AUTO_COMMIT_INTERVAL 5000 - -#define OFFSET_IS_RESET_OFFSET(_of) ((_of) < 0) +#define DEFAULT_HEARTBEAT_INTERVAL 3000 struct SMqMgmt { int8_t inited; @@ -63,8 +62,7 @@ struct tmq_conf_t { int8_t resetOffset; int8_t withTbName; int8_t snapEnable; - int32_t snapBatchSize; - bool hbBgEnable; +// int32_t snapBatchSize; uint16_t port; int32_t autoCommitInterval; char* ip; @@ -84,7 +82,6 @@ struct tmq_t { int32_t autoCommitInterval; int8_t resetOffsetCfg; uint64_t consumerId; - bool hbBgEnable; tmq_commit_cb* commitCb; void* commitCbUserParam; @@ -269,8 +266,7 @@ tmq_conf_t* tmq_conf_new() { conf->withTbName = false; conf->autoCommit = true; conf->autoCommitInterval = DEFAULT_AUTO_COMMIT_INTERVAL; - conf->resetOffset = TMQ_OFFSET__RESET_EARLIEST; - conf->hbBgEnable = true; + conf->resetOffset = TMQ_OFFSET__RESET_LATEST; return conf; } @@ -360,10 +356,10 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } } - if (strcasecmp(key, "experimental.snapshot.batch.size") == 0) { - conf->snapBatchSize = taosStr2int64(value); - return TMQ_CONF_OK; - } +// if (strcasecmp(key, "experimental.snapshot.batch.size") == 0) { +// conf->snapBatchSize = taosStr2int64(value); +// return TMQ_CONF_OK; +// } // if (strcasecmp(key, "enable.heartbeat.background") == 0) { // if (strcasecmp(value, "true") == 0) { @@ -822,7 +818,7 @@ void tmqSendHbReq(void* param, void* tmrId) { OVER: tDeatroySMqHbReq(&req); - taosTmrReset(tmqSendHbReq, 1000, param, tmqMgmt.timer, &tmq->hbLiveTimer); + taosTmrReset(tmqSendHbReq, DEFAULT_HEARTBEAT_INTERVAL, param, tmqMgmt.timer, &tmq->hbLiveTimer); taosReleaseRef(tmqMgmt.rsetId, refId); } @@ -1077,8 +1073,6 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { pTmq->resetOffsetCfg = conf->resetOffset; taosInitRWLatch(&pTmq->lock); - pTmq->hbBgEnable = conf->hbBgEnable; - // assign consumerId pTmq->consumerId = tGenIdPI64(); @@ -1102,19 +1096,16 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { goto _failed; } - if (pTmq->hbBgEnable) { - int64_t* pRefId = taosMemoryMalloc(sizeof(int64_t)); - *pRefId = pTmq->refId; - pTmq->hbLiveTimer = taosTmrStart(tmqSendHbReq, 1000, pRefId, tmqMgmt.timer); - } + int64_t* pRefId = taosMemoryMalloc(sizeof(int64_t)); + *pRefId = pTmq->refId; + pTmq->hbLiveTimer = taosTmrStart(tmqSendHbReq, DEFAULT_HEARTBEAT_INTERVAL, pRefId, tmqMgmt.timer); char buf[TSDB_OFFSET_LEN] = {0}; STqOffsetVal offset = {.type = pTmq->resetOffsetCfg}; tFormatOffset(buf, tListLen(buf), &offset); tscInfo("consumer:0x%" PRIx64 " is setup, refId:%" PRId64 - ", groupId:%s, snapshot:%d, autoCommit:%d, commitInterval:%dms, offset:%s, backgroudHB:%d", - pTmq->consumerId, pTmq->refId, pTmq->groupId, pTmq->useSnapshot, pTmq->autoCommit, pTmq->autoCommitInterval, - buf, pTmq->hbBgEnable); + ", groupId:%s, snapshot:%d, autoCommit:%d, commitInterval:%dms, offset:%s", + pTmq->consumerId, pTmq->refId, pTmq->groupId, pTmq->useSnapshot, pTmq->autoCommit, pTmq->autoCommitInterval, buf); return pTmq; diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index a1c8690dfc..addf0aa629 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -47,7 +47,8 @@ void printSubResults(void* pRes, int32_t* totalRows) { int32_t precision = taos_result_precision(pRes); taos_print_row(buf, row, fields, numOfFields); *totalRows += 1; - printf("vgId: %d, offset: %lld, precision: %d, row content: %s\n", vgId, offset, precision, buf); + std::cout << "vgId:" << vgId << ", offset:" << offset << ", precision:" << precision << ", row content:" << buf + << std::endl; } // taos_free_result(pRes); @@ -832,7 +833,7 @@ TEST(clientCase, projection_query_tables) { for(int32_t i = 0; i < 1000000; ++i) { char t[512] = {0}; - sprintf(t, "insert into t1 values(%ld, %ld)", start + i, i); + sprintf(t, "insert into t1 values(now, %d)", i); while(1) { void* p = taos_query(pConn, t); code = taos_errno(p); @@ -1167,16 +1168,19 @@ TEST(clientCase, tmq_commit) { } for(int i = 0; i < numOfAssign; i++){ - printf("assign i:%d, vgId:%d, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); + tmq_topic_assignment* pa = &pAssign[i]; + std::cout << "assign i:" << i << ", vgId:" << pa->vgId << ", offset:" << pa->currentOffset << ", start:%" + << pa->begin << ", end:%" << pa->end << std::endl; - int64_t committed = tmq_committed(tmq, topicName, pAssign[i].vgId); - printf("committed vgId:%d, committed:%lld\n", pAssign[i].vgId, committed); + int64_t committed = tmq_committed(tmq, topicName, pa->vgId); + std::cout << "committed vgId:" << pa->vgId << " committed:" << committed << std::endl; - int64_t position = tmq_position(tmq, topicName, pAssign[i].vgId); - printf("position vgId:%d, position:%lld\n", pAssign[i].vgId, position); - tmq_offset_seek(tmq, topicName, pAssign[i].vgId, 1); - position = tmq_position(tmq, topicName, pAssign[i].vgId); - printf("after seek 1, position vgId:%d, position:%lld\n", pAssign[i].vgId, position); + int64_t position = tmq_position(tmq, topicName, pa->vgId); + std::cout << "position vgId:" << pa->vgId << ", position:" << position << std::endl; + + tmq_offset_seek(tmq, topicName, pa->vgId, 1); + position = tmq_position(tmq, topicName, pa->vgId); + std::cout << "after seek 1, position vgId:" << pa->vgId << " position:" << position << std::endl; } while (1) { @@ -1191,12 +1195,14 @@ TEST(clientCase, tmq_commit) { tmq_commit_sync(tmq, pRes); for(int i = 0; i < numOfAssign; i++) { int64_t committed = tmq_committed(tmq, topicName, pAssign[i].vgId); - printf("committed vgId:%d, committed:%lld\n", pAssign[i].vgId, committed); + std::cout << "committed vgId:" << pAssign[i].vgId << " , committed:" << committed << std::endl; if(committed > 0){ int32_t code = tmq_commit_offset_sync(tmq, topicName, pAssign[i].vgId, 4); printf("tmq_commit_offset_sync vgId:%d, offset:4, code:%d\n", pAssign[i].vgId, code); int64_t committed = tmq_committed(tmq, topicName, pAssign[i].vgId); - printf("after tmq_commit_offset_sync, committed vgId:%d, committed:%lld\n", pAssign[i].vgId, committed); + + std::cout << "after tmq_commit_offset_sync, committed vgId:" << pAssign[i].vgId << ", committed:" << committed + << std::endl; } } if (pRes != NULL) { @@ -1212,7 +1218,12 @@ TEST(clientCase, tmq_commit) { taos_close(pConn); fprintf(stderr, "%d msg consumed, include %d rows\n", msgCnt, totalRows); } - +namespace { +void doPrintInfo(tmq_topic_assignment* pa, int32_t index) { + std::cout << "assign i:" << index << ", vgId:" << pa->vgId << ", offset:%" << pa->currentOffset << ", start:%" + << pa->begin << ", end:%" << pa->end << std::endl; +} +} TEST(clientCase, td_25129) { // taos_options(TSDB_OPTION_CONFIGDIR, "~/first/cfg"); @@ -1264,7 +1275,7 @@ TEST(clientCase, td_25129) { } for(int i = 0; i < numOfAssign; i++){ - printf("assign i:%d, vgId:%d, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); + doPrintInfo(&pAssign[i], i); } // tmq_offset_seek(tmq, "tp", pAssign[0].vgId, 4); @@ -1281,7 +1292,7 @@ TEST(clientCase, td_25129) { } for(int i = 0; i < numOfAssign; i++){ - printf("assign i:%d, vgId:%d, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); + doPrintInfo(&pAssign[i], i); } tmq_free_assignment(pAssign); @@ -1298,7 +1309,7 @@ TEST(clientCase, td_25129) { for(int i = 0; i < numOfAssign; i++){ int64_t committed = tmq_committed(tmq, topicName, pAssign[i].vgId); - printf("assign i:%d, vgId:%d, committed:%lld, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, committed, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); + doPrintInfo(&pAssign[i], i); } while (1) { @@ -1328,7 +1339,7 @@ TEST(clientCase, td_25129) { } for(int i = 0; i < numOfAssign; i++){ - printf("assign i:%d, vgId:%d, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); + doPrintInfo(&pAssign[i], i); } } else { for(int i = 0; i < numOfAssign; i++) { @@ -1364,7 +1375,7 @@ TEST(clientCase, td_25129) { } for(int i = 0; i < numOfAssign; i++){ - printf("assign i:%d, vgId:%d, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); + doPrintInfo(&pAssign[i], i); } tmq_free_assignment(pAssign); diff --git a/source/common/src/systable.c b/source/common/src/systable.c index a81059f95b..dc584437bf 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -159,11 +159,15 @@ static const SSysDbTableSchema streamSchema[] = { static const SSysDbTableSchema streamTaskSchema[] = { {.name = "stream_name", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "task_id", .bytes = 32, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "node_type", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "task_id", .bytes = 16 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "node_type", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, - {.name = "level", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "status", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "level", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "status", .bytes = 15 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "stage", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, + {.name = "in_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, +// {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, }; static const SSysDbTableSchema userTblsSchema[] = { diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 330eb4ae30..bf21b2eda0 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -2360,27 +2360,26 @@ void trimDataBlock(SSDataBlock* pBlock, int32_t totalRows, const bool* pBoolList int32_t maxRows = 0; size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - for (int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); - // it is a reserved column for scalar function, and no data in this column yet. - if (pDst->pData == NULL) { - continue; - } + if (!pBoolList) { + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); + // it is a reserved column for scalar function, and no data in this column yet. + if (pDst->pData == NULL) { + continue; + } - int32_t numOfRows = 0; - if (IS_VAR_DATA_TYPE(pDst->info.type)) { - pDst->varmeta.length = 0; + int32_t numOfRows = 0; + if (IS_VAR_DATA_TYPE(pDst->info.type)) { + pDst->varmeta.length = 0; + } } - } - - if (NULL == pBoolList) { return; } - + for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); // it is a reserved column for scalar function, and no data in this column yet. - if (pDst->pData == NULL) { + if (pDst->pData == NULL || (IS_VAR_DATA_TYPE(pDst->info.type) && pDst->varmeta.length == 0)) { continue; } diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index e04ba1515f..d220da0d84 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -610,9 +610,13 @@ _exit: return code; } -void tRowSort(SArray *aRowP) { - if (TARRAY_SIZE(aRowP) <= 1) return; - taosArraySort(aRowP, tRowPCmprFn); +int32_t tRowSort(SArray *aRowP) { + if (TARRAY_SIZE(aRowP) <= 1) return 0; + int32_t code = taosArrayMSort(aRowP, tRowPCmprFn); + if (code != TSDB_CODE_SUCCESS) { + uError("taosArrayMSort failed caused by %d", code); + } + return code; } int32_t tRowMerge(SArray *aRowP, STSchema *pTSchema, int8_t flag) { @@ -3590,5 +3594,5 @@ void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max, int64_ NULL, // TSDB_DATA_TYPE_DECIMAL NULL, // TSDB_DATA_TYPE_BLOB NULL, // TSDB_DATA_TYPE_MEDIUMBLOB - NULL // TSDB_DATA_TYPE_GEOMETRY + tColDataCalcSMAVarType // TSDB_DATA_TYPE_GEOMETRY }; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 92b5ff2828..3d7b38161a 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -244,8 +244,8 @@ int32_t tsTtlBatchDropNum = 10000; // number of tables dropped per batch // internal int32_t tsTransPullupInterval = 2; int32_t tsMqRebalanceInterval = 2; -int32_t tsStreamCheckpointTickInterval = 600; -int32_t tsStreamNodeCheckInterval = 10; +int32_t tsStreamCheckpointTickInterval = 300; +int32_t tsStreamNodeCheckInterval = 30; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups @@ -269,7 +269,7 @@ int8_t tsS3Enabled = false; int32_t tsS3BlockSize = 4096; // number of tsdb pages int32_t tsS3BlockCacheSize = 16; // number of blocks -int32_t tsCheckpointInterval = 20; +int32_t tsCheckpointInterval = 300; #ifndef _STORAGE int32_t taosSetTfsCfg(SConfig *pCfg) { @@ -411,6 +411,7 @@ static int32_t taosAddServerLogCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "idxDebugFlag", idxDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "tdbDebugFlag", tdbDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "metaDebugFlag", metaDebugFlag, 0, 255, 0) != CFG_SCOPE_SERVER) return -1; + if (cfgAddInt32(pCfg, "stDebugFlag", stDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; return 0; } @@ -649,7 +650,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "disableStream", tsDisableStream, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, CFG_SCOPE_SERVER) != 0) return -1; - if (cfgAddInt64(pCfg, "checkpointInterval", tsCheckpointInterval, 0, INT64_MAX, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt64(pCfg, "checkpointInterval", tsStreamCheckpointTickInterval, 60, 1200, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, CFG_SCOPE_SERVER) != 0) return -1; @@ -863,6 +864,7 @@ static void taosSetServerLogCfg(SConfig *pCfg) { idxDebugFlag = cfgGetItem(pCfg, "idxDebugFlag")->i32; tdbDebugFlag = cfgGetItem(pCfg, "tdbDebugFlag")->i32; metaDebugFlag = cfgGetItem(pCfg, "metaDebugFlag")->i32; + stDebugFlag = cfgGetItem(pCfg, "stDebugFlag")->i32; } static int32_t taosSetSlowLogScope(char *pScope) { @@ -1709,15 +1711,22 @@ void taosCfgDynamicOptions(const char *option, const char *value) { return; } + if (strcasecmp(option, "asynclog") == 0) { + int32_t newAsynclog = atoi(value); + uInfo("asynclog set from %d to %d", tsAsyncLog, newAsynclog); + tsAsyncLog = newAsynclog; + return; + } + const char *options[] = { "dDebugFlag", "vDebugFlag", "mDebugFlag", "wDebugFlag", "sDebugFlag", "tsdbDebugFlag", "tqDebugFlag", "fsDebugFlag", "udfDebugFlag", "smaDebugFlag", "idxDebugFlag", "tdbDebugFlag", "tmrDebugFlag", "uDebugFlag", - "smaDebugFlag", "rpcDebugFlag", "qDebugFlag", "metaDebugFlag", "jniDebugFlag", + "smaDebugFlag", "rpcDebugFlag", "qDebugFlag", "metaDebugFlag", "jniDebugFlag", "stDebugFlag", }; int32_t *optionVars[] = { &dDebugFlag, &vDebugFlag, &mDebugFlag, &wDebugFlag, &sDebugFlag, &tsdbDebugFlag, &tqDebugFlag, &fsDebugFlag, &udfDebugFlag, &smaDebugFlag, &idxDebugFlag, &tdbDebugFlag, &tmrDebugFlag, &uDebugFlag, - &smaDebugFlag, &rpcDebugFlag, &qDebugFlag, &metaDebugFlag, &jniDebugFlag, + &smaDebugFlag, &rpcDebugFlag, &qDebugFlag, &metaDebugFlag, &jniDebugFlag, &stDebugFlag, }; int32_t optionSize = tListLen(options); @@ -1770,6 +1779,7 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { taosSetDebugFlag(&idxDebugFlag, "idxDebugFlag", flag, rewrite); taosSetDebugFlag(&tdbDebugFlag, "tdbDebugFlag", flag, rewrite); taosSetDebugFlag(&metaDebugFlag, "metaDebugFlag", flag, rewrite); + taosSetDebugFlag(&stDebugFlag, "stDebugFlag", flag, rewrite); uInfo("all debug flag are set to %d", flag); } diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 0ba9539124..9b66bd1fb3 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -30,6 +30,32 @@ #include "tlog.h" +#define DECODESQL() \ + do { \ + if(!tDecodeIsEnd(&decoder)){ \ + if(tDecodeI32(&decoder, &pReq->sqlLen) < 0) return -1; \ + if(pReq->sqlLen > 0){ \ + if (tDecodeBinaryAlloc(&decoder, (void **)&pReq->sql, NULL) < 0) return -1; \ + } \ + } \ + } while (0) + +#define ENCODESQL() \ + do { \ + if (pReq->sqlLen > 0 && pReq->sql != NULL){ \ + if (tEncodeI32(&encoder, pReq->sqlLen) < 0) return -1; \ + if (tEncodeBinary(&encoder, pReq->sql, pReq->sqlLen) < 0) return -1; \ + } \ + } while (0) + +#define FREESQL() \ + do { \ + if(pReq->sql != NULL){ \ + taosMemoryFree(pReq->sql); \ + } \ + pReq->sql = NULL; \ + } while (0) + static int32_t tDecodeSVAlterTbReqCommon(SDecoder *pDecoder, SVAlterTbReq *pReq); static int32_t tDecodeSBatchDeleteReqCommon(SDecoder *pDecoder, SBatchDeleteReq *pReq); @@ -561,6 +587,8 @@ int32_t tSerializeSMCreateStbReq(void *buf, int32_t bufLen, SMCreateStbReq *pReq if (tEncodeI64(&encoder, pReq->deleteMark1) < 0) return -1; if (tEncodeI64(&encoder, pReq->deleteMark2) < 0) return -1; + ENCODESQL(); + tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -656,6 +684,8 @@ int32_t tDeserializeSMCreateStbReq(void *buf, int32_t bufLen, SMCreateStbReq *pR if (tDecodeI64(&decoder, &pReq->deleteMark1) < 0) return -1; if (tDecodeI64(&decoder, &pReq->deleteMark2) < 0) return -1; + DECODESQL(); + tEndDecode(&decoder); tDecoderClear(&decoder); return 0; @@ -668,6 +698,7 @@ void tFreeSMCreateStbReq(SMCreateStbReq *pReq) { taosMemoryFreeClear(pReq->pComment); taosMemoryFreeClear(pReq->pAst1); taosMemoryFreeClear(pReq->pAst2); + FREESQL(); } int32_t tSerializeSMDropStbReq(void *buf, int32_t bufLen, SMDropStbReq *pReq) { @@ -682,6 +713,7 @@ int32_t tSerializeSMDropStbReq(void *buf, int32_t bufLen, SMDropStbReq *pReq) { if (tEncodeI8(&encoder, pReq->reserved[i]) < 0) return -1; } if (tEncodeI64(&encoder, pReq->suid) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -702,12 +734,18 @@ int32_t tDeserializeSMDropStbReq(void *buf, int32_t bufLen, SMDropStbReq *pReq) } if (tDecodeI64(&decoder, &pReq->suid) < 0) return -1; + DECODESQL(); + tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSMDropStbReq(SMDropStbReq *pReq) { + FREESQL(); +} + int32_t tSerializeSMAlterStbReq(void *buf, int32_t bufLen, SMAlterStbReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -727,6 +765,7 @@ int32_t tSerializeSMAlterStbReq(void *buf, int32_t bufLen, SMAlterStbReq *pReq) if (pReq->commentLen > 0) { if (tEncodeCStr(&encoder, pReq->comment) < 0) return -1; } + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -767,6 +806,8 @@ int32_t tDeserializeSMAlterStbReq(void *buf, int32_t bufLen, SMAlterStbReq *pReq if (tDecodeCStrTo(&decoder, pReq->comment) < 0) return -1; } + DECODESQL(); + tEndDecode(&decoder); tDecoderClear(&decoder); return 0; @@ -776,6 +817,7 @@ void tFreeSMAltertbReq(SMAlterStbReq *pReq) { taosArrayDestroy(pReq->pFields); pReq->pFields = NULL; taosMemoryFreeClear(pReq->comment); + FREESQL(); } int32_t tSerializeSEpSet(void *buf, int32_t bufLen, const SEpSet *pEpset) { @@ -1084,7 +1126,7 @@ int32_t tDeserializeSNotifyReq(void *buf, int32_t bufLen, SNotifyReq *pReq) { } code = 0; - + _exit: tEndDecode(&decoder); tDecoderClear(&decoder); @@ -1426,6 +1468,7 @@ int32_t tSerializeSDropUserReq(void *buf, int32_t bufLen, SDropUserReq *pReq) { if (tStartEncode(&encoder) < 0) return -1; if (tEncodeCStr(&encoder, pReq->user) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -1439,12 +1482,17 @@ int32_t tDeserializeSDropUserReq(void *buf, int32_t bufLen, SDropUserReq *pReq) if (tStartDecode(&decoder) < 0) return -1; if (tDecodeCStrTo(&decoder, pReq->user) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSDropUserReq(SDropUserReq *pReq) { + FREESQL(); +} + SIpWhiteList *cloneIpWhiteList(SIpWhiteList *pIpWhiteList) { if (pIpWhiteList == NULL) return NULL; @@ -1470,6 +1518,8 @@ int32_t tSerializeSCreateUserReq(void *buf, int32_t bufLen, SCreateUserReq *pReq if (tEncodeU32(&encoder, pReq->pIpRanges[i].ip) < 0) return -1; if (tEncodeU32(&encoder, pReq->pIpRanges[i].mask) < 0) return -1; } + + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -1495,15 +1545,13 @@ int32_t tDeserializeSCreateUserReq(void *buf, int32_t bufLen, SCreateUserReq *pR if (tDecodeU32(&decoder, &(pReq->pIpRanges[i].ip)) < 0) return -1; if (tDecodeU32(&decoder, &(pReq->pIpRanges[i].mask)) < 0) return -1; } + + DECODESQL(); tEndDecode(&decoder); - - tDecoderClear(&decoder); return 0; } -void tFreeSCreateUserReq(SCreateUserReq *pReq) { taosMemoryFree(pReq->pIpRanges); } - int32_t tSerializeSUpdateIpWhite(void *buf, int32_t bufLen, SUpdateIpWhite *pReq) { // impl later SEncoder encoder = {0}; @@ -1602,6 +1650,7 @@ int32_t tSerializeRetrieveIpWhite(void *buf, int32_t bufLen, SRetrieveIpWhiteReq tEncoderClear(&encoder); return tlen; } + int32_t tDeserializeRetrieveIpWhite(void *buf, int32_t bufLen, SRetrieveIpWhiteReq *pReq) { SDecoder decoder = {0}; tDecoderInit(&decoder, buf, bufLen); @@ -1614,6 +1663,11 @@ int32_t tDeserializeRetrieveIpWhite(void *buf, int32_t bufLen, SRetrieveIpWhiteR return 0; } +void tFreeSCreateUserReq(SCreateUserReq *pReq) { + FREESQL(); + taosMemoryFreeClear(pReq->pIpRanges); +} + int32_t tSerializeSAlterUserReq(void *buf, int32_t bufLen, SAlterUserReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -1637,6 +1691,7 @@ int32_t tSerializeSAlterUserReq(void *buf, int32_t bufLen, SAlterUserReq *pReq) if (tEncodeU32(&encoder, pReq->pIpRanges[i].ip) < 0) return -1; if (tEncodeU32(&encoder, pReq->pIpRanges[i].mask) < 0) return -1; } + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -1673,6 +1728,7 @@ int32_t tDeserializeSAlterUserReq(void *buf, int32_t bufLen, SAlterUserReq *pReq if (tDecodeU32(&decoder, &(pReq->pIpRanges[i].ip)) < 0) return -1; if (tDecodeU32(&decoder, &(pReq->pIpRanges[i].mask)) < 0) return -1; } + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); @@ -1682,6 +1738,7 @@ int32_t tDeserializeSAlterUserReq(void *buf, int32_t bufLen, SAlterUserReq *pReq void tFreeSAlterUserReq(SAlterUserReq *pReq) { taosMemoryFreeClear(pReq->tagCond); taosMemoryFree(pReq->pIpRanges); + FREESQL(); } int32_t tSerializeSGetUserAuthReq(void *buf, int32_t bufLen, SGetUserAuthReq *pReq) { @@ -2041,6 +2098,7 @@ int32_t tSerializeSCreateDropMQSNodeReq(void *buf, int32_t bufLen, SMCreateQnode if (tStartEncode(&encoder) < 0) return -1; if (tEncodeI32(&encoder, pReq->dnodeId) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -2054,12 +2112,21 @@ int32_t tDeserializeSCreateDropMQSNodeReq(void *buf, int32_t bufLen, SMCreateQno if (tStartDecode(&decoder) < 0) return -1; if (tDecodeI32(&decoder, &pReq->dnodeId) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSMCreateQnodeReq(SMCreateQnodeReq *pReq){ + FREESQL(); +} + +void tFreeSDDropQnodeReq(SDDropQnodeReq* pReq) { + FREESQL(); +} + int32_t tSerializeSDropDnodeReq(void *buf, int32_t bufLen, SDropDnodeReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -2070,6 +2137,7 @@ int32_t tSerializeSDropDnodeReq(void *buf, int32_t bufLen, SDropDnodeReq *pReq) if (tEncodeI32(&encoder, pReq->port) < 0) return -1; if (tEncodeI8(&encoder, pReq->force) < 0) return -1; if (tEncodeI8(&encoder, pReq->unsafe) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -2092,12 +2160,17 @@ int32_t tDeserializeSDropDnodeReq(void *buf, int32_t bufLen, SDropDnodeReq *pReq pReq->unsafe = false; } + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSDropDnodeReq(SDropDnodeReq *pReq) { + FREESQL(); +} + int32_t tSerializeSRestoreDnodeReq(void *buf, int32_t bufLen, SRestoreDnodeReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -2105,6 +2178,7 @@ int32_t tSerializeSRestoreDnodeReq(void *buf, int32_t bufLen, SRestoreDnodeReq * if (tStartEncode(&encoder) < 0) return -1; if (tEncodeI32(&encoder, pReq->dnodeId) < 0) return -1; if (tEncodeI8(&encoder, pReq->restoreType) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -2119,12 +2193,17 @@ int32_t tDeserializeSRestoreDnodeReq(void *buf, int32_t bufLen, SRestoreDnodeReq if (tStartDecode(&decoder) < 0) return -1; if (tDecodeI32(&decoder, &pReq->dnodeId) < 0) return -1; if (tDecodeI8(&decoder, &pReq->restoreType) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSRestoreDnodeReq(SRestoreDnodeReq *pReq) { + FREESQL(); +} + int32_t tSerializeSMCfgDnodeReq(void *buf, int32_t bufLen, SMCfgDnodeReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -2133,6 +2212,7 @@ int32_t tSerializeSMCfgDnodeReq(void *buf, int32_t bufLen, SMCfgDnodeReq *pReq) if (tEncodeI32(&encoder, pReq->dnodeId) < 0) return -1; if (tEncodeCStr(&encoder, pReq->config) < 0) return -1; if (tEncodeCStr(&encoder, pReq->value) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -2148,12 +2228,17 @@ int32_t tDeserializeSMCfgDnodeReq(void *buf, int32_t bufLen, SMCfgDnodeReq *pReq if (tDecodeI32(&decoder, &pReq->dnodeId) < 0) return -1; if (tDecodeCStrTo(&decoder, pReq->config) < 0) return -1; if (tDecodeCStrTo(&decoder, pReq->value) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSMCfgDnodeReq(SMCfgDnodeReq *pReq) { + FREESQL(); +} + int32_t tSerializeSDCfgDnodeReq(void *buf, int32_t bufLen, SDCfgDnodeReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -2188,6 +2273,7 @@ int32_t tSerializeSCreateDnodeReq(void *buf, int32_t bufLen, SCreateDnodeReq *pR if (tStartEncode(&encoder) < 0) return -1; if (tEncodeCStr(&encoder, pReq->fqdn) < 0) return -1; if (tEncodeI32(&encoder, pReq->port) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -2202,12 +2288,17 @@ int32_t tDeserializeSCreateDnodeReq(void *buf, int32_t bufLen, SCreateDnodeReq * if (tStartDecode(&decoder) < 0) return -1; if (tDecodeCStrTo(&decoder, pReq->fqdn) < 0) return -1; if (tDecodeI32(&decoder, &pReq->port) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSCreateDnodeReq(SCreateDnodeReq *pReq) { + FREESQL(); +} + int32_t tSerializeSCreateFuncReq(void *buf, int32_t bufLen, SCreateFuncReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -2695,6 +2786,8 @@ int32_t tSerializeSCreateDbReq(void *buf, int32_t bufLen, SCreateDbReq *pReq) { } if (tEncodeI32(&encoder, pReq->tsdbPageSize) < 0) return -1; if (tEncodeI32(&encoder, pReq->keepTimeOffset) < 0) return -1; + + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -2762,6 +2855,8 @@ int32_t tDeserializeSCreateDbReq(void *buf, int32_t bufLen, SCreateDbReq *pReq) if (tDecodeI32(&decoder, &pReq->keepTimeOffset) < 0) return -1; } + DECODESQL(); + tEndDecode(&decoder); tDecoderClear(&decoder); @@ -2771,6 +2866,7 @@ int32_t tDeserializeSCreateDbReq(void *buf, int32_t bufLen, SCreateDbReq *pReq) void tFreeSCreateDbReq(SCreateDbReq *pReq) { taosArrayDestroy(pReq->pRetensions); pReq->pRetensions = NULL; + FREESQL(); } int32_t tSerializeSAlterDbReq(void *buf, int32_t bufLen, SAlterDbReq *pReq) { @@ -2800,6 +2896,7 @@ int32_t tSerializeSAlterDbReq(void *buf, int32_t bufLen, SAlterDbReq *pReq) { if (tEncodeI32(&encoder, pReq->walRetentionPeriod) < 0) return -1; if (tEncodeI32(&encoder, pReq->walRetentionSize) < 0) return -1; if (tEncodeI32(&encoder, pReq->keepTimeOffset) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -2847,12 +2944,18 @@ int32_t tDeserializeSAlterDbReq(void *buf, int32_t bufLen, SAlterDbReq *pReq) { if (!tDecodeIsEnd(&decoder)) { if (tDecodeI32(&decoder, &pReq->keepTimeOffset) < 0) return -1; } + + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSAlterDbReq(SAlterDbReq *pReq) { + FREESQL(); +} + int32_t tSerializeSDropDbReq(void *buf, int32_t bufLen, SDropDbReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -2860,6 +2963,7 @@ int32_t tSerializeSDropDbReq(void *buf, int32_t bufLen, SDropDbReq *pReq) { if (tStartEncode(&encoder) < 0) return -1; if (tEncodeCStr(&encoder, pReq->db) < 0) return -1; if (tEncodeI8(&encoder, pReq->ignoreNotExists) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -2874,12 +2978,17 @@ int32_t tDeserializeSDropDbReq(void *buf, int32_t bufLen, SDropDbReq *pReq) { if (tStartDecode(&decoder) < 0) return -1; if (tDecodeCStrTo(&decoder, pReq->db) < 0) return -1; if (tDecodeI8(&decoder, &pReq->ignoreNotExists) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSDropDbReq(SDropDbReq *pReq) { + FREESQL(); +} + int32_t tSerializeSDropDbRsp(void *buf, int32_t bufLen, SDropDbRsp *pRsp) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -3134,6 +3243,7 @@ int32_t tSerializeSCompactDbReq(void *buf, int32_t bufLen, SCompactDbReq *pReq) if (tEncodeCStr(&encoder, pReq->db) < 0) return -1; if (tEncodeI64(&encoder, pReq->timeRange.skey) < 0) return -1; if (tEncodeI64(&encoder, pReq->timeRange.ekey) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -3149,12 +3259,17 @@ int32_t tDeserializeSCompactDbReq(void *buf, int32_t bufLen, SCompactDbReq *pReq if (tDecodeCStrTo(&decoder, pReq->db) < 0) return -1; if (tDecodeI64(&decoder, &pReq->timeRange.skey) < 0) return -1; if (tDecodeI64(&decoder, &pReq->timeRange.ekey) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSCompactDbReq(SCompactDbReq *pReq) { + FREESQL(); +} + int32_t tSerializeSUseDbRspImp(SEncoder *pEncoder, const SUseDbRsp *pRsp) { if (tEncodeCStr(pEncoder, pRsp->db) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->uid) < 0) return -1; @@ -4305,6 +4420,7 @@ int32_t tSerializeSMDropTopicReq(void *buf, int32_t bufLen, SMDropTopicReq *pReq if (tStartEncode(&encoder) < 0) return -1; if (tEncodeCStr(&encoder, pReq->name) < 0) return -1; if (tEncodeI8(&encoder, pReq->igNotExists) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -4319,12 +4435,17 @@ int32_t tDeserializeSMDropTopicReq(void *buf, int32_t bufLen, SMDropTopicReq *pR if (tStartDecode(&decoder) < 0) return -1; if (tDecodeCStrTo(&decoder, pReq->name) < 0) return -1; if (tDecodeI8(&decoder, &pReq->igNotExists) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSMDropTopicReq(SMDropTopicReq *pReq) { + FREESQL(); +} + int32_t tSerializeSMDropCgroupReq(void *buf, int32_t bufLen, SMDropCgroupReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -5215,6 +5336,7 @@ int32_t tSerializeSBalanceVgroupReq(void *buf, int32_t bufLen, SBalanceVgroupReq if (tStartEncode(&encoder) < 0) return -1; if (tEncodeI32(&encoder, pReq->useless) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -5228,12 +5350,17 @@ int32_t tDeserializeSBalanceVgroupReq(void *buf, int32_t bufLen, SBalanceVgroupR if (tStartDecode(&decoder) < 0) return -1; if (tDecodeI32(&decoder, &pReq->useless) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSBalanceVgroupReq(SBalanceVgroupReq *pReq) { + FREESQL(); +} + int32_t tSerializeSBalanceVgroupLeaderReq(void *buf, int32_t bufLen, SBalanceVgroupLeaderReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -5241,6 +5368,7 @@ int32_t tSerializeSBalanceVgroupLeaderReq(void *buf, int32_t bufLen, SBalanceVgr if (tStartEncode(&encoder) < 0) return -1; if (tEncodeI32(&encoder, pReq->useless) < 0) return -1; if (tEncodeI32(&encoder, pReq->vgId) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -5257,12 +5385,18 @@ int32_t tDeserializeSBalanceVgroupLeaderReq(void *buf, int32_t bufLen, SBalanceV if(!tDecodeIsEnd(&decoder)){ if (tDecodeI32(&decoder, &pReq->vgId) < 0) return -1; } + + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSBalanceVgroupLeaderReq(SBalanceVgroupLeaderReq *pReq) { + FREESQL(); +} + int32_t tSerializeSMergeVgroupReq(void *buf, int32_t bufLen, SMergeVgroupReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -5299,6 +5433,7 @@ int32_t tSerializeSRedistributeVgroupReq(void *buf, int32_t bufLen, SRedistribut if (tEncodeI32(&encoder, pReq->dnodeId1) < 0) return -1; if (tEncodeI32(&encoder, pReq->dnodeId2) < 0) return -1; if (tEncodeI32(&encoder, pReq->dnodeId3) < 0) return -1; + ENCODESQL(); tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -5315,12 +5450,17 @@ int32_t tDeserializeSRedistributeVgroupReq(void *buf, int32_t bufLen, SRedistrib if (tDecodeI32(&decoder, &pReq->dnodeId1) < 0) return -1; if (tDecodeI32(&decoder, &pReq->dnodeId2) < 0) return -1; if (tDecodeI32(&decoder, &pReq->dnodeId3) < 0) return -1; + DECODESQL(); tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSRedistributeVgroupReq(SRedistributeVgroupReq *pReq) { + FREESQL(); +} + int32_t tSerializeSSplitVgroupReq(void *buf, int32_t bufLen, SSplitVgroupReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -5943,6 +6083,7 @@ int32_t tDeserializeSOperatorParam(SDecoder *pDecoder, SOperatorParam *pOpParam) if (uidNum > 0) { pScan->pUidList = taosArrayInit(uidNum, sizeof(int64_t)); if (NULL == pScan->pUidList) return -1; + for (int32_t m = 0; m < uidNum; ++m) { if (tDecodeI64(pDecoder, &uid) < 0) return -1; taosArrayPush(pScan->pUidList, &uid); @@ -5959,6 +6100,7 @@ int32_t tDeserializeSOperatorParam(SDecoder *pDecoder, SOperatorParam *pOpParam) int32_t childrenNum = 0; if (tDecodeI32(pDecoder, &childrenNum) < 0) return -1; + if (childrenNum > 0) { pOpParam->pChildren = taosArrayInit(childrenNum, POINTER_BYTES); if (NULL == pOpParam->pChildren) return -1; @@ -6836,6 +6978,8 @@ int32_t tSerializeSMDropStreamReq(void *buf, int32_t bufLen, const SMDropStreamR if (tEncodeCStr(&encoder, pReq->name) < 0) return -1; if (tEncodeI8(&encoder, pReq->igNotExists) < 0) return -1; + ENCODESQL(); + tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -6851,12 +6995,18 @@ int32_t tDeserializeSMDropStreamReq(void *buf, int32_t bufLen, SMDropStreamReq * if (tDecodeCStrTo(&decoder, pReq->name) < 0) return -1; if (tDecodeI8(&decoder, &pReq->igNotExists) < 0) return -1; + DECODESQL(); + tEndDecode(&decoder); tDecoderClear(&decoder); return 0; } +void tFreeSMDropStreamReq(SMDropStreamReq *pReq) { + FREESQL(); +} + int32_t tSerializeSMRecoverStreamReq(void *buf, int32_t bufLen, const SMRecoverStreamReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -6998,6 +7148,11 @@ int tEncodeSVCreateTbReq(SEncoder *pCoder, const SVCreateTbReq *pReq) { } else { ASSERT(0); } + //ENCODESQL + if(pReq->sqlLen > 0 && pReq->sql != NULL) { + if (tEncodeI32(pCoder, pReq->sqlLen) < 0) return -1; + if (tEncodeBinary(pCoder, pReq->sql, pReq->sqlLen) < 0) return -1; + } tEndEncode(pCoder); return 0; @@ -7041,6 +7196,14 @@ int tDecodeSVCreateTbReq(SDecoder *pCoder, SVCreateTbReq *pReq) { ASSERT(0); } + //DECODESQL + if(!tDecodeIsEnd(pCoder)){ + if(tDecodeI32(pCoder, &pReq->sqlLen) < 0) return -1; + if(pReq->sqlLen > 0){ + if (tDecodeBinaryAlloc(pCoder, (void**)&pReq->sql, NULL) < 0) return -1; + } + } + tEndDecode(pCoder); return 0; } @@ -7062,6 +7225,11 @@ void tDestroySVCreateTbReq(SVCreateTbReq *pReq, int32_t flags) { if (pReq->ntb.schemaRow.pSchema) taosMemoryFree(pReq->ntb.schemaRow.pSchema); } } + + if(pReq->sql != NULL){ + taosMemoryFree(pReq->sql); + } + pReq->sql = NULL; } int tEncodeSVCreateTbBatchReq(SEncoder *pCoder, const SVCreateTbBatchReq *pReq) { diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index 7d65ac424f..425218f0e1 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -756,7 +756,8 @@ int32_t taosTimeCountIntervalForFill(int64_t skey, int64_t ekey, int64_t interva } int64_t taosTimeTruncate(int64_t ts, const SInterval* pInterval) { - if (pInterval->sliding == 0 && pInterval->interval == 0) { + if (pInterval->sliding == 0) { + ASSERT(pInterval->interval == 0); return ts; } diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 3fc3ca4cea..24b5b2566c 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -80,15 +80,18 @@ int32_t mmProcessDropReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) { if (pInput->pData->dnodeId != 0 && dropReq.dnodeId != pInput->pData->dnodeId) { terrno = TSDB_CODE_INVALID_OPTION; dGError("failed to drop mnode since %s", terrstr()); + tFreeSMCreateQnodeReq(&dropReq); return -1; } SMnodeOpt option = {.deploy = false}; if (mmWriteFile(pInput->path, &option) != 0) { dGError("failed to write mnode file since %s", terrstr()); + tFreeSMCreateQnodeReq(&dropReq); return -1; } + tFreeSMCreateQnodeReq(&dropReq); return 0; } @@ -213,6 +216,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c index 86bc11c616..82876d6886 100644 --- a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c +++ b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c @@ -39,15 +39,18 @@ int32_t qmProcessCreateReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) { if (pInput->pData->dnodeId != 0 && createReq.dnodeId != pInput->pData->dnodeId) { terrno = TSDB_CODE_INVALID_OPTION; dError("failed to create qnode since %s", terrstr()); + tFreeSMCreateQnodeReq(&createReq); return -1; } bool deployed = true; if (dmWriteFile(pInput->path, pInput->name, deployed) != 0) { dError("failed to write qnode file since %s", terrstr()); + tFreeSMCreateQnodeReq(&createReq); return -1; } + tFreeSMCreateQnodeReq(&createReq); return 0; } @@ -61,15 +64,18 @@ int32_t qmProcessDropReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) { if (pInput->pData->dnodeId != 0 && dropReq.dnodeId != pInput->pData->dnodeId) { terrno = TSDB_CODE_INVALID_OPTION; dError("failed to drop qnode since %s", terrstr()); + tFreeSMCreateQnodeReq(&dropReq); return -1; } bool deployed = false; if (dmWriteFile(pInput->path, pInput->name, deployed) != 0) { dError("failed to write qnode file since %s", terrstr()); + tFreeSMCreateQnodeReq(&dropReq); return -1; } + tFreeSMCreateQnodeReq(&dropReq); return 0; } diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index 13b81231d4..b29c5c1eb4 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -28,15 +28,18 @@ int32_t smProcessCreateReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) { if (pInput->pData->dnodeId != 0 && createReq.dnodeId != pInput->pData->dnodeId) { terrno = TSDB_CODE_INVALID_OPTION; dError("failed to create snode since %s", terrstr()); + tFreeSMCreateQnodeReq(&createReq); return -1; } bool deployed = true; if (dmWriteFile(pInput->path, pInput->name, deployed) != 0) { dError("failed to write snode file since %s", terrstr()); + tFreeSMCreateQnodeReq(&createReq); return -1; } + tFreeSMCreateQnodeReq(&createReq); return 0; } @@ -50,15 +53,18 @@ int32_t smProcessDropReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) { if (pInput->pData->dnodeId != 0 && dropReq.dnodeId != pInput->pData->dnodeId) { terrno = TSDB_CODE_INVALID_OPTION; dError("failed to drop snode since %s", terrstr()); + tFreeSMCreateQnodeReq(&dropReq); return -1; } bool deployed = false; if (dmWriteFile(pInput->path, pInput->name, deployed) != 0) { dError("failed to write snode file since %s", terrstr()); + tFreeSMCreateQnodeReq(&dropReq); return -1; } + tFreeSMCreateQnodeReq(&dropReq); return 0; } diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 84294d9357..0e17d2b75f 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -817,13 +817,13 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; -// if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index e31237fc67..f4236964ca 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -672,6 +672,7 @@ typedef struct { char name[TSDB_STREAM_FNAME_LEN]; // ctl SRWLatch lock; + // create info int64_t createTime; int64_t updateTime; diff --git a/source/dnode/mnode/impl/inc/mndUser.h b/source/dnode/mnode/impl/inc/mndUser.h index 1aa01fd59d..c01741b350 100644 --- a/source/dnode/mnode/impl/inc/mndUser.h +++ b/source/dnode/mnode/impl/inc/mndUser.h @@ -40,6 +40,7 @@ SHashObj *mndDupTopicHash(SHashObj *pOld); int32_t mndValidateUserAuthInfo(SMnode *pMnode, SUserAuthVersion *pUsers, int32_t numOfUses, void **ppRsp, int32_t *pRspLen); int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db); +int32_t mndUserRemoveStb(SMnode *pMnode, STrans *pTrans, char *stb); int32_t mndUserRemoveTopic(SMnode *pMnode, STrans *pTrans, char *topic); int32_t mndUserDupObj(SUserObj *pUser, SUserObj *pNew); diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index a27de37daf..58c8a271bf 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -759,45 +759,10 @@ static int32_t mndProcessCreateDbReq(SRpcMsg *pReq) { code = mndCreateDb(pMnode, pReq, &createReq, pUser); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - char detail[3000] = {0}; - char tmp[100] = {0}; - - mndBuildAuditDetailInt32(detail, tmp, "buffer:%d", createReq.buffer); - mndBuildAuditDetailInt32(detail, tmp, "cacheLast:%d", createReq.cacheLast); - mndBuildAuditDetailInt32(detail, tmp, "cacheLastSize:%d", createReq.cacheLastSize); - mndBuildAuditDetailInt32(detail, tmp, "compression:%d", createReq.compression); - mndBuildAuditDetailInt32(detail, tmp, "daysPerFile:%d", createReq.daysPerFile); - mndBuildAuditDetailInt32(detail, tmp, "daysToKeep0:%d", createReq.daysToKeep0); - mndBuildAuditDetailInt32(detail, tmp, "daysToKeep1:%d", createReq.daysToKeep1); - mndBuildAuditDetailInt32(detail, tmp, "daysToKeep2:%d", createReq.daysToKeep2); - mndBuildAuditDetailInt32(detail, tmp, "keepTimeOffset:%d", createReq.keepTimeOffset); - mndBuildAuditDetailInt32(detail, tmp, "hashPrefix:%d", createReq.hashPrefix); - mndBuildAuditDetailInt32(detail, tmp, "hashSuffix:%d", createReq.hashSuffix); - mndBuildAuditDetailInt32(detail, tmp, "ignoreExist:%d", createReq.ignoreExist); - mndBuildAuditDetailInt32(detail, tmp, "maxRows:%d", createReq.maxRows); - mndBuildAuditDetailInt32(detail, tmp, "minRows:%d", createReq.minRows); - mndBuildAuditDetailInt32(detail, tmp, "numOfRetensions:%d", createReq.numOfRetensions); - mndBuildAuditDetailInt32(detail, tmp, "numOfStables:%d", createReq.numOfStables); - mndBuildAuditDetailInt32(detail, tmp, "numOfVgroups:%d", createReq.numOfVgroups); - mndBuildAuditDetailInt32(detail, tmp, "pages:%d", createReq.pages); - mndBuildAuditDetailInt32(detail, tmp, "pageSize:%d", createReq.pageSize); - mndBuildAuditDetailInt32(detail, tmp, "precision:%d", createReq.precision); - mndBuildAuditDetailInt32(detail, tmp, "replications:%d", createReq.replications); - mndBuildAuditDetailInt32(detail, tmp, "schemaless:%d", createReq.schemaless); - mndBuildAuditDetailInt32(detail, tmp, "sstTrigger:%d", createReq.sstTrigger); - mndBuildAuditDetailInt32(detail, tmp, "strict:%d", createReq.strict); - mndBuildAuditDetailInt32(detail, tmp, "tsdbPageSize:%d", createReq.tsdbPageSize); - mndBuildAuditDetailInt32(detail, tmp, "walFsyncPeriod:%d", createReq.walFsyncPeriod); - mndBuildAuditDetailInt32(detail, tmp, "walLevel:%d", createReq.walLevel); - mndBuildAuditDetailInt32(detail, tmp, "walRetentionPeriod:%d", createReq.walRetentionPeriod); - mndBuildAuditDetailInt32(detail, tmp, "walRetentionSize:%" PRId64, createReq.walRetentionSize); - mndBuildAuditDetailInt32(detail, tmp, "walRollPeriod:%d", createReq.walRollPeriod); - mndBuildAuditDetailInt32(detail, tmp, "walSegmentSize:%" PRId64, createReq.walSegmentSize); - SName name = {0}; tNameFromString(&name, createReq.db, T_NAME_ACCT | T_NAME_DB); - auditRecord(pReq, pMnode->clusterId, "createDB", name.dbname, "", detail); + auditRecord(pReq, pMnode->clusterId, "createDB", name.dbname, "", createReq.sql, createReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -1049,30 +1014,10 @@ static int32_t mndProcessAlterDbReq(SRpcMsg *pReq) { if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; } - char detail[3000] = {0}; - char tmp[100] = {0}; - - mndBuildAuditDetailInt32(detail, tmp, "buffer:%d", alterReq.buffer); - mndBuildAuditDetailInt32(detail, tmp, "cacheLast:%d", alterReq.cacheLast); - mndBuildAuditDetailInt32(detail, tmp, "cacheLastSize:%d", alterReq.cacheLastSize); - mndBuildAuditDetailInt32(detail, tmp, "daysPerFile:%d", alterReq.daysPerFile); - mndBuildAuditDetailInt32(detail, tmp, "daysToKeep0:%d", alterReq.daysToKeep0); - mndBuildAuditDetailInt32(detail, tmp, "daysToKeep1:%d", alterReq.daysToKeep1); - mndBuildAuditDetailInt32(detail, tmp, "daysToKeep2:%d", alterReq.daysToKeep2); - mndBuildAuditDetailInt32(detail, tmp, "keepTimeOffset:%d", alterReq.keepTimeOffset); - mndBuildAuditDetailInt32(detail, tmp, "minRows:%d", alterReq.minRows); - mndBuildAuditDetailInt32(detail, tmp, "pages:%d", alterReq.pages); - mndBuildAuditDetailInt32(detail, tmp, "pageSize:%d", alterReq.pageSize); - mndBuildAuditDetailInt32(detail, tmp, "replications:%d", alterReq.replications); - mndBuildAuditDetailInt32(detail, tmp, "sstTrigger:%d", alterReq.sstTrigger); - mndBuildAuditDetailInt32(detail, tmp, "strict:%d", alterReq.strict); - mndBuildAuditDetailInt32(detail, tmp, "walFsyncPeriod:%d", alterReq.walFsyncPeriod); - mndBuildAuditDetailInt32(detail, tmp, "walRetentionSize:%d", alterReq.walRetentionSize); - SName name = {0}; tNameFromString(&name, alterReq.db, T_NAME_ACCT | T_NAME_DB); - auditRecord(pReq, pMnode->clusterId, "alterDB", name.dbname, "", detail); + auditRecord(pReq, pMnode->clusterId, "alterDB", name.dbname, "", alterReq.sql, alterReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -1082,6 +1027,7 @@ _OVER: mndReleaseDb(pMnode, pDb); taosArrayDestroy(dbObj.cfg.pRetensions); + tFreeSAlterDbReq(&alterReq); terrno = code; return code; @@ -1364,13 +1310,10 @@ static int32_t mndProcessDropDbReq(SRpcMsg *pReq) { code = TSDB_CODE_ACTION_IN_PROGRESS; } - char detail[1000] = {0}; - sprintf(detail, "ignoreNotExists:%d", dropReq.ignoreNotExists); - SName name = {0}; tNameFromString(&name, dropReq.db, T_NAME_ACCT | T_NAME_DB); - auditRecord(pReq, pMnode->clusterId, "dropDB", name.dbname, "", detail); + auditRecord(pReq, pMnode->clusterId, "dropDB", name.dbname, "", dropReq.sql, dropReq.sqlLen); _OVER: if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -1378,6 +1321,7 @@ _OVER: } mndReleaseDb(pMnode, pDb); + tFreeSDropDbReq(&dropReq); return code; } diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index f1a1bb8102..b53dee7bff 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -1025,7 +1025,7 @@ static int32_t mndProcessCreateDnodeReq(SRpcMsg *pReq) { char obj[200] = {0}; sprintf(obj, "%s:%d", createReq.fqdn, createReq.port); - auditRecord(pReq, pMnode->clusterId, "createDnode", obj, "", ""); + auditRecord(pReq, pMnode->clusterId, "createDnode", obj, "", createReq.sql, createReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -1033,6 +1033,7 @@ _OVER: } mndReleaseDnode(pMnode, pDnode); + tFreeSCreateDnodeReq(&createReq); return code; } @@ -1173,13 +1174,7 @@ static int32_t mndProcessDropDnodeReq(SRpcMsg *pReq) { char obj1[30] = {0}; sprintf(obj1, "%d", dropReq.dnodeId); - // char obj2[150] = {0}; - // sprintf(obj2, "%s:%d", dropReq.fqdn, dropReq.port); - - char detail[100] = {0}; - sprintf(detail, "force:%d, unsafe:%d", dropReq.force, dropReq.unsafe); - - auditRecord(pReq, pMnode->clusterId, "dropDnode", obj1, "", detail); + auditRecord(pReq, pMnode->clusterId, "dropDnode", obj1, "", dropReq.sql, dropReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -1190,6 +1185,7 @@ _OVER: mndReleaseMnode(pMnode, pMObj); mndReleaseQnode(pMnode, pQObj); mndReleaseSnode(pMnode, pSObj); + tFreeSDropDnodeReq(&dropReq); return code; } @@ -1198,7 +1194,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { const char *options[] = { "debugFlag", "dDebugFlag", "vDebugFlag", "mDebugFlag", "wDebugFlag", "sDebugFlag", "tsdbDebugFlag", "tqDebugFlag", "fsDebugFlag", "udfDebugFlag", "smaDebugFlag", "idxDebugFlag", "tdbDebugFlag", "tmrDebugFlag", - "uDebugFlag", "smaDebugFlag", "rpcDebugFlag", "qDebugFlag", "metaDebugFlag", + "uDebugFlag", "smaDebugFlag", "rpcDebugFlag", "qDebugFlag", "metaDebugFlag", "stDebugFlag", }; int32_t optionSize = tListLen(options); @@ -1210,6 +1206,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { mInfo("dnode:%d, start to config, option:%s, value:%s", cfgReq.dnodeId, cfgReq.config, cfgReq.value); if (mndCheckOperPrivilege(pMnode, pReq->info.conn.user, MND_OPER_CONFIG_DNODE) != 0) { + tFreeSMCfgDnodeReq(&cfgReq); return -1; } @@ -1220,6 +1217,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { if (' ' != cfgReq.config[7] && 0 != cfgReq.config[7]) { mError("dnode:%d, failed to config monitor since invalid conf:%s", cfgReq.dnodeId, cfgReq.config); terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); return -1; } @@ -1231,6 +1229,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { if (flag < 0 || flag > 2) { mError("dnode:%d, failed to config monitor since value:%d", cfgReq.dnodeId, flag); terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); return -1; } @@ -1246,6 +1245,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { mError("dnode:%d, failed to config ttlPushInterval since value:%d. Valid range: [0, 100000]", cfgReq.dnodeId, flag); terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); return -1; } @@ -1261,11 +1261,27 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { mError("dnode:%d, failed to config ttlBatchDropNum since value:%d. Valid range: [0, %d]", cfgReq.dnodeId, flag, INT32_MAX); terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); return -1; } strcpy(dcfgReq.config, "ttlbatchdropnum"); snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag); + } else if (strncasecmp(cfgReq.config, "asynclog", 8) == 0) { + int32_t optLen = strlen("asynclog"); + int32_t flag = -1; + int32_t code = mndMCfgGetValInt32(&cfgReq, optLen, &flag); + if (code < 0) return code; + + if (flag < 0 || flag > 1) { + mError("dnode:%d, failed to config asynclog since value:%d. Valid range: [0, 1]", cfgReq.dnodeId, flag); + terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); + return -1; + } + + strcpy(dcfgReq.config, "asynclog"); + snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag); #ifdef TD_ENTERPRISE } else if (strncasecmp(cfgReq.config, "supportvnodes", 13) == 0) { int32_t optLen = strlen("supportvnodes"); @@ -1276,6 +1292,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { if (flag < 0 || flag > 4096) { mError("dnode:%d, failed to config supportVnodes since value:%d. Valid range: [0, 4096]", cfgReq.dnodeId, flag); terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); return -1; } if (flag == 0) { @@ -1291,6 +1308,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { if (' ' != cfgReq.config[index] && 0 != cfgReq.config[index]) { mError("dnode:%d, failed to config activeCode since invalid conf:%s", cfgReq.dnodeId, cfgReq.config); terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); return -1; } int32_t vlen = strlen(cfgReq.value); @@ -1300,6 +1318,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { mError("dnode:%d, failed to config activeCode since invalid vlen:%d. conf:%s, val:%s", cfgReq.dnodeId, vlen, cfgReq.config, cfgReq.value); terrno = TSDB_CODE_INVALID_OPTION; + tFreeSMCfgDnodeReq(&cfgReq); return -1; } @@ -1307,10 +1326,11 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%s", cfgReq.value); if (mndConfigDnode(pMnode, pReq, &cfgReq, opt) != 0) { - mError("dnode:%d, failed to config activeCode since %s. conf:%s, val:%s", cfgReq.dnodeId, terrstr(), - cfgReq.config, cfgReq.value); + mError("dnode:%d, failed to config activeCode since %s", cfgReq.dnodeId, terrstr()); + tFreeSMCfgDnodeReq(&cfgReq); return -1; } + tFreeSMCfgDnodeReq(&cfgReq); return 0; #endif } else { @@ -1323,6 +1343,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { if (' ' != cfgReq.config[optLen] && 0 != cfgReq.config[optLen]) { mError("dnode:%d, failed to config since invalid conf:%s", cfgReq.dnodeId, cfgReq.config); terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); return -1; } @@ -1334,6 +1355,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { if (flag < 0 || flag > 255) { mError("dnode:%d, failed to config %s since value:%d", cfgReq.dnodeId, optName, flag); terrno = TSDB_CODE_INVALID_CFG; + tFreeSMCfgDnodeReq(&cfgReq); return -1; } @@ -1345,6 +1367,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { if (!findOpt) { terrno = TSDB_CODE_INVALID_CFG; mError("dnode:%d, failed to config since %s", cfgReq.dnodeId, terrstr()); + tFreeSMCfgDnodeReq(&cfgReq); return -1; } } @@ -1352,10 +1375,9 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { char obj[50] = {0}; sprintf(obj, "%d", cfgReq.dnodeId); - char detail[500] = {0}; - sprintf(detail, "config:%s, value:%s", cfgReq.config, cfgReq.value); + auditRecord(pReq, pMnode->clusterId, "alterDnode", obj, "", cfgReq.sql, cfgReq.sqlLen); - auditRecord(pReq, pMnode->clusterId, "alterDnode", obj, "", detail); + tFreeSMCfgDnodeReq(&cfgReq); int32_t code = -1; SSdb *pSdb = pMnode->pSdb; diff --git a/source/dnode/mnode/impl/src/mndIndex.c b/source/dnode/mnode/impl/src/mndIndex.c index 2e78116a86..041cc664e5 100644 --- a/source/dnode/mnode/impl/src/mndIndex.c +++ b/source/dnode/mnode/impl/src/mndIndex.c @@ -439,7 +439,7 @@ static int32_t mndProcessCreateIdxReq(SRpcMsg *pReq) { pDb = mndAcquireDbByStb(pMnode, createReq.stbName); if (pDb == NULL) { - terrno = TSDB_CODE_MND_INVALID_DB; + terrno = TSDB_CODE_MND_DB_NOT_EXIST; goto _OVER; } diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index 5827a30b43..22b2fec857 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -656,7 +656,7 @@ static int32_t mndProcessCreateMnodeReq(SRpcMsg *pReq) { char obj[40] = {0}; sprintf(obj, "%d", createReq.dnodeId); - auditRecord(pReq, pMnode->clusterId, "createMnode", obj, "", ""); + auditRecord(pReq, pMnode->clusterId, "createMnode", obj, "", createReq.sql, createReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -665,6 +665,7 @@ _OVER: mndReleaseMnode(pMnode, pObj); mndReleaseDnode(pMnode, pDnode); + tFreeSMCreateQnodeReq(&createReq); return code; } @@ -797,7 +798,7 @@ static int32_t mndProcessDropMnodeReq(SRpcMsg *pReq) { char obj[40] = {0}; sprintf(obj, "%d", dropReq.dnodeId); - auditRecord(pReq, pMnode->clusterId, "dropMnode", obj, "", ""); + auditRecord(pReq, pMnode->clusterId, "dropMnode", obj, "", dropReq.sql, dropReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -805,6 +806,7 @@ _OVER: } mndReleaseMnode(pMnode, pObj); + tFreeSMCreateQnodeReq(&dropReq); return code; } diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c index 6f67778615..1f8c3b161b 100644 --- a/source/dnode/mnode/impl/src/mndProfile.c +++ b/source/dnode/mnode/impl/src/mndProfile.c @@ -259,7 +259,7 @@ static int32_t mndProcessConnectReq(SRpcMsg *pReq) { if (pDb == NULL) { if (0 != strcmp(connReq.db, TSDB_INFORMATION_SCHEMA_DB) && (0 != strcmp(connReq.db, TSDB_PERFORMANCE_SCHEMA_DB))) { - terrno = TSDB_CODE_MND_INVALID_DB; + terrno = TSDB_CODE_MND_DB_NOT_EXIST; mGError("user:%s, failed to login from %s while use db:%s since %s", pReq->info.conn.user, ip, connReq.db, terrstr()); goto _OVER; @@ -314,10 +314,10 @@ _CONNECT: sprintf(obj, "%s:%d", ip, pConn->port); char detail[1000] = {0}; - sprintf(detail, "connType:%d, db:%s, pid:%d, startTime:%" PRId64 ", sVer:%s, app:%s", + sprintf(detail, "connType:%d, db:%s, pid:%d, startTime:%" PRId64 ", sVer:%s, app:%s", connReq.connType, connReq.db, connReq.pid, connReq.startTime, connReq.sVer, connReq.app); - auditRecord(pReq, pMnode->clusterId, "login", connReq.user, obj, detail); + auditRecord(pReq, pMnode->clusterId, "login", connReq.user, obj, detail, strlen(detail)); _OVER: diff --git a/source/dnode/mnode/impl/src/mndQnode.c b/source/dnode/mnode/impl/src/mndQnode.c index 767e06a8d4..af11476d64 100644 --- a/source/dnode/mnode/impl/src/mndQnode.c +++ b/source/dnode/mnode/impl/src/mndQnode.c @@ -310,7 +310,7 @@ static int32_t mndProcessCreateQnodeReq(SRpcMsg *pReq) { char obj[33] = {0}; sprintf(obj, "%d", createReq.dnodeId); - auditRecord(pReq, pMnode->clusterId, "createQnode", obj, "", ""); + auditRecord(pReq, pMnode->clusterId, "createQnode", obj, "", createReq.sql, createReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { mError("qnode:%d, failed to create since %s", createReq.dnodeId, terrstr()); @@ -318,6 +318,7 @@ _OVER: mndReleaseQnode(pMnode, pObj); mndReleaseDnode(pMnode, pDnode); + tFreeSMCreateQnodeReq(&createReq); return code; } @@ -423,7 +424,7 @@ static int32_t mndProcessDropQnodeReq(SRpcMsg *pReq) { char obj[33] = {0}; sprintf(obj, "%d", dropReq.dnodeId); - auditRecord(pReq, pMnode->clusterId, "dropQnode", obj, "", ""); + auditRecord(pReq, pMnode->clusterId, "dropQnode", obj, "", dropReq.sql, dropReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -431,6 +432,7 @@ _OVER: } mndReleaseQnode(pMnode, pObj); + tFreeSMCreateQnodeReq(&dropReq); return code; } diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 1d7d391acf..2931f6be6b 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -27,8 +27,8 @@ #define SINK_NODE_LEVEL (0) extern bool tsDeployOnSnode; -static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, - SVgObj* pVgroup, SEpSet* pEpset, int32_t fillHistory); +static int32_t doAddSinkTask(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup, + SEpSet* pEpset, bool isFillhistory); int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int64_t watermark, int64_t deleteMark) { @@ -87,15 +87,17 @@ END: } int32_t mndSetSinkTaskInfo(SStreamObj* pStream, SStreamTask* pTask) { + STaskOutputInfo* pInfo = &pTask->outputInfo; + if (pStream->smaId != 0) { - pTask->outputInfo.type = TASK_OUTPUT__SMA; - pTask->smaSink.smaId = pStream->smaId; + pInfo->type = TASK_OUTPUT__SMA; + pInfo->smaSink.smaId = pStream->smaId; } else { - pTask->outputInfo.type = TASK_OUTPUT__TABLE; - pTask->tbSink.stbUid = pStream->targetStbUid; - memcpy(pTask->tbSink.stbFullName, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN); - pTask->tbSink.pSchemaWrapper = tCloneSSchemaWrapper(&pStream->outputSchema); - if (pTask->tbSink.pSchemaWrapper == NULL) { + pInfo->type = TASK_OUTPUT__TABLE; + pInfo->tbSink.stbUid = pStream->targetStbUid; + memcpy(pInfo->tbSink.stbFullName, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN); + pInfo->tbSink.pSchemaWrapper = tCloneSSchemaWrapper(&pStream->outputSchema); + if (pInfo->tbSink.pSchemaWrapper == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } } @@ -113,7 +115,7 @@ int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SAr isShuffle = true; pTask->outputInfo.type = TASK_OUTPUT__SHUFFLE_DISPATCH; pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH; - if (mndExtractDbInfo(pMnode, pDb, &pTask->shuffleDispatcher.dbInfo, NULL) < 0) { + if (mndExtractDbInfo(pMnode, pDb, &pTask->outputInfo.shuffleDispatcher.dbInfo, NULL) < 0) { return -1; } } @@ -124,8 +126,8 @@ int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SAr int32_t numOfSinkNodes = taosArrayGetSize(pSinkNodeList); if (isShuffle) { - memcpy(pTask->shuffleDispatcher.stbFullName, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN); - SArray* pVgs = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + memcpy(pTask->outputInfo.shuffleDispatcher.stbFullName, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN); + SArray* pVgs = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgroups = taosArrayGetSize(pVgs); for (int32_t i = 0; i < numOfVgroups; i++) { @@ -207,8 +209,7 @@ SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) { } // create sink node for each vgroup. -int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, SEpSet* pEpset, - int32_t fillHistory) { +int32_t doAddShuffleSinkTask(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, SEpSet* pEpset, bool fillHistory) { SSdb* pSdb = pMnode->pSdb; void* pIter = NULL; @@ -224,17 +225,17 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStrea continue; } - mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, pEpset, fillHistory); + doAddSinkTask(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, pEpset, fillHistory); sdbRelease(pSdb, pVgroup); } return 0; } -int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup, - SEpSet* pEpset, int32_t fillHistory) { - int64_t uid = (fillHistory == 0)? pStream->uid:pStream->hTaskUid; - SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SINK, fillHistory, 0, pTaskList); +int32_t doAddSinkTask(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup, + SEpSet* pEpset, bool isFillhistory) { + int64_t uid = (isFillhistory)? pStream->hTaskUid:pStream->uid; + SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SINK, isFillhistory, 0, pTaskList, pStream->conf.fillHistory); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; @@ -248,17 +249,16 @@ int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* p return 0; } -static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList, - SStreamObj* pStream, SSubplan* plan, uint64_t uid, SEpSet* pEpset, - int8_t fillHistory, bool hasExtraSink, int64_t firstWindowSkey) { - SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList); +static int32_t addSourceTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList, + SStreamObj* pStream, SSubplan* plan, uint64_t uid, SEpSet* pEpset, bool fillHistory, + bool hasExtraSink, int64_t firstWindowSkey, bool hasFillHistory) { + SStreamTask* pTask = + tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList, hasFillHistory); if (pTask == NULL) { return terrno; } epsetAssign(&pTask->info.mnodeEpset, pEpset); - - // todo set the correct ts, which should be last key of queried table. STimeWindow* pWindow = &pTask->dataRange.window; pWindow->skey = INT64_MIN; @@ -296,8 +296,8 @@ static void setHTasksId(SArray* pTaskList, const SArray* pHTaskList) { SStreamTask** pStreamTask = taosArrayGet(pTaskList, i); SStreamTask** pHTask = taosArrayGet(pHTaskList, i); - (*pStreamTask)->historyTaskId.taskId = (*pHTask)->id.taskId; - (*pStreamTask)->historyTaskId.streamId = (*pHTask)->id.streamId; + (*pStreamTask)->hTaskInfo.id.taskId = (*pHTask)->id.taskId; + (*pStreamTask)->hTaskInfo.id.streamId = (*pHTask)->id.streamId; (*pHTask)->streamTaskId.taskId = (*pStreamTask)->id.taskId; (*pHTask)->streamTaskId.streamId = (*pStreamTask)->id.streamId; @@ -345,8 +345,8 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* // new stream task SArray** pSinkTaskList = taosArrayGet(pStream->tasks, SINK_NODE_LEVEL); - int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, pEpset, - 0, hasExtraSink, nextWindowSkey); + int32_t code = addSourceTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, pEpset, + false, hasExtraSink, nextWindowSkey, pStream->conf.fillHistory); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pSdb, pVgroup); return -1; @@ -354,8 +354,8 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* if (pStream->conf.fillHistory) { SArray** pHSinkTaskList = taosArrayGet(pStream->pHTasksList, SINK_NODE_LEVEL); - code = addSourceStreamTask(pMnode, pVgroup, pHTaskList, *pHSinkTaskList, pStream, plan, pStream->hTaskUid, - pEpset, 1, hasExtraSink, nextWindowSkey); + code = addSourceTask(pMnode, pVgroup, pHTaskList, *pHSinkTaskList, pStream, plan, pStream->hTaskUid, + pEpset, true, hasExtraSink, nextWindowSkey, true); } sdbRelease(pSdb, pVgroup); @@ -371,10 +371,10 @@ static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* return TSDB_CODE_SUCCESS; } -static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t uid, SStreamTask* pDownstreamTask, +static int32_t doAddSourceTask(SArray* pTaskList, bool isFillhistory, int64_t uid, SStreamTask* pDownstreamTask, SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, SEpSet* pEpset, - int64_t nextWindowSkey) { - SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, 0, pTaskList); + int64_t nextWindowSkey, bool hasFillHistory) { + SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, isFillhistory, 0, pTaskList, hasFillHistory); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; @@ -400,8 +400,8 @@ static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t ui } static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeList, SMnode* pMnode, SStreamObj* pStream, - SEpSet* pEpset, int32_t fillHistory, SStreamTask** pAggTask) { - *pAggTask = tNewStreamTask(uid, TASK_LEVEL__AGG, fillHistory, pStream->conf.triggerParam, pTaskList); + SEpSet* pEpset, bool fillHistory, SStreamTask** pAggTask, bool hasFillhistory) { + *pAggTask = tNewStreamTask(uid, TASK_LEVEL__AGG, fillHistory, pStream->conf.triggerParam, pTaskList, hasFillhistory); if (*pAggTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; @@ -432,7 +432,8 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan *pAggTask = NULL; SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL); - int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, pEpset, 0, pAggTask); + int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, pEpset, false, pAggTask, + pStream->conf.fillHistory); if (code != TSDB_CODE_SUCCESS) { return -1; } @@ -461,7 +462,7 @@ static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan *pHAggTask = NULL; code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pEpset, pStream->conf.fillHistory, - pHAggTask); + pHAggTask, pStream->conf.fillHistory); if (code != TSDB_CODE_SUCCESS) { if (pSnode != NULL) { sdbRelease(pSdb, pSnode); @@ -520,8 +521,8 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl continue; } - int32_t code = - doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, pEpset, nextWindowSkey); + int32_t code = doAddSourceTask(pSourceTaskList, false, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, pEpset, + nextWindowSkey, pStream->conf.fillHistory); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pSdb, pVgroup); terrno = code; @@ -529,8 +530,8 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl } if (pStream->conf.fillHistory) { - code = doAddSourceTask(pHSourceTaskList, 1, pStream->hTaskUid, pHDownstreamTask, pMnode, plan, pVgroup, - pEpset, nextWindowSkey); + code = doAddSourceTask(pHSourceTaskList, true, pStream->hTaskUid, pHDownstreamTask, pMnode, plan, pVgroup, pEpset, + nextWindowSkey, pStream->conf.fillHistory); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pSdb, pVgroup); return code; @@ -548,16 +549,16 @@ static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPl } static int32_t addSinkTasks(SArray* pTasksList, SMnode* pMnode, SStreamObj* pStream, SArray** pCreatedTaskList, - SEpSet* pEpset, int32_t fillHistory) { + SEpSet* pEpset, bool fillHistory) { SArray* pSinkTaskList = addNewTaskList(pTasksList); if (pStream->fixedSinkVgId == 0) { - if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, pEpset, fillHistory) < 0) { + if (doAddShuffleSinkTask(pMnode, pSinkTaskList, pStream, pEpset, fillHistory) < 0) { // TODO free return -1; } } else { - if (mndAddSinkTaskToStream(pStream, pSinkTaskList, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg, - pEpset, fillHistory) < 0) { + if (doAddSinkTask(pStream, pSinkTaskList, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg, pEpset, + fillHistory) < 0) { // TODO free return -1; } diff --git a/source/dnode/mnode/impl/src/mndSnode.c b/source/dnode/mnode/impl/src/mndSnode.c index 5e98380a08..f4f9cbb535 100644 --- a/source/dnode/mnode/impl/src/mndSnode.c +++ b/source/dnode/mnode/impl/src/mndSnode.c @@ -316,6 +316,7 @@ _OVER: mndReleaseSnode(pMnode, pObj); mndReleaseDnode(pMnode, pDnode); + tFreeSMCreateQnodeReq(&createReq); return code; } @@ -425,6 +426,7 @@ _OVER: } mndReleaseSnode(pMnode, pObj); + tFreeSMCreateQnodeReq(&dropReq); return code; } diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index c47c4994b7..eaf74a96cb 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -859,18 +859,23 @@ int32_t mndBuildStbFromReq(SMnode *pMnode, SStbObj *pDst, SMCreateStbReq *pCreat return 0; } static int32_t mndGenIdxNameForFirstTag(char *fullname, char *dbname, char *tagname) { - char randStr[24] = {0}; + char randStr[TSDB_COL_NAME_LEN] = {0}; + int32_t left = TSDB_COL_NAME_LEN - strlen(tagname) - 1; + if (left <= 1) { + sprintf(fullname, "%s.%s", dbname, tagname); + } else { + int8_t start = left < 8 ? 0 : 8; + int8_t end = left >= 24 ? 24 : left - 1; + // gen rand str len [base:end] + // note: ignore rand performance issues + int64_t len = taosRand() % (end - start + 1) + start; + taosRandStr2(randStr, len); + sprintf(fullname, "%s.%s_%s", dbname, tagname, randStr); + } - int8_t start = 8; - int8_t end = sizeof(randStr) - 1; - // gen rand str len [base:end] - // note: ignore rand performance issues - int64_t len = taosRand() % (end - start + 1) + start; - - taosRandStr2(randStr, len); - sprintf(fullname, "%s.%s_%s", dbname, tagname, randStr); return 0; } + static int32_t mndCreateStb(SMnode *pMnode, SRpcMsg *pReq, SMCreateStbReq *pCreate, SDbObj *pDb) { SStbObj stbObj = {0}; int32_t code = -1; @@ -1075,80 +1080,6 @@ static int32_t mndBuildStbFromAlter(SStbObj *pStb, SStbObj *pDst, SMCreateStbReq return TSDB_CODE_SUCCESS; } -static char *mndAuditFieldTypeStr(int32_t type) { - switch (type) { - case TSDB_DATA_TYPE_NULL: - return "null"; - case TSDB_DATA_TYPE_BOOL: - return "bool"; - case TSDB_DATA_TYPE_TINYINT: - return "tinyint"; - case TSDB_DATA_TYPE_SMALLINT: - return "smallint"; - case TSDB_DATA_TYPE_INT: - return "int"; - case TSDB_DATA_TYPE_BIGINT: - return "bigint"; - case TSDB_DATA_TYPE_FLOAT: - return "float"; - case TSDB_DATA_TYPE_DOUBLE: - return "double"; - case TSDB_DATA_TYPE_VARCHAR: - return "varchar"; - case TSDB_DATA_TYPE_TIMESTAMP: - return "timestamp"; - case TSDB_DATA_TYPE_NCHAR: - return "nchar"; - case TSDB_DATA_TYPE_UTINYINT: - return "utinyint"; - case TSDB_DATA_TYPE_USMALLINT: - return "usmallint"; - case TSDB_DATA_TYPE_UINT: - return "uint"; - case TSDB_DATA_TYPE_UBIGINT: - return "ubigint"; - case TSDB_DATA_TYPE_JSON: - return "json"; - case TSDB_DATA_TYPE_VARBINARY: - return "varbinary"; - case TSDB_DATA_TYPE_DECIMAL: - return "decimal"; - case TSDB_DATA_TYPE_BLOB: - return "blob"; - case TSDB_DATA_TYPE_MEDIUMBLOB: - return "mediumblob"; - case TSDB_DATA_TYPE_GEOMETRY: - return "geometry"; - - default: - return "error"; - } -} - -static void mndAuditFieldStr(char *detail, SArray *arr, int32_t len, int32_t max) { - int32_t detialLen = strlen(detail); - int32_t fieldLen = 0; - for (int32_t i = 0; i < len; ++i) { - SField *pField = taosArrayGet(arr, i); - char field[TSDB_COL_NAME_LEN + 20] = {0}; - fieldLen = strlen(", "); - if (detialLen > 0 && detialLen < max - fieldLen - 1) { - strcat(detail, ", "); - detialLen += fieldLen; - } else { - break; - } - sprintf(field, "%s:%s", pField->name, mndAuditFieldTypeStr(pField->type)); - fieldLen = strlen(field); - if (detialLen < max - fieldLen - 1) { - strcat(detail, field); - detialLen += fieldLen; - } else { - break; - } - } -} - static int32_t mndProcessCreateStbReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; int32_t code = -1; @@ -1257,26 +1188,10 @@ static int32_t mndProcessCreateStbReq(SRpcMsg *pReq) { } if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - char detail[AUDIT_DETAIL_MAX] = {0}; - sprintf(detail, - "colVer:%d, delay1:%" PRId64 ", delay2:%" PRId64 ", deleteMark1:%" PRId64 - ", " - "deleteMark2:%" PRId64 - ", igExists:%d, numOfColumns:%d, numOfFuncs:%d, numOfTags:%d, " - "source:%d, suid:%" PRId64 - ", tagVer:%d, ttl:%d, " - "watermark1:%" PRId64 ", watermark2:%" PRId64, - createReq.colVer, createReq.delay1, createReq.delay2, createReq.deleteMark1, createReq.deleteMark2, - createReq.igExists, createReq.numOfColumns, createReq.numOfFuncs, createReq.numOfTags, createReq.source, - createReq.suid, createReq.tagVer, createReq.ttl, createReq.watermark1, createReq.watermark2); - - mndAuditFieldStr(detail, createReq.pColumns, createReq.numOfColumns, AUDIT_DETAIL_MAX); - mndAuditFieldStr(detail, createReq.pTags, createReq.numOfTags, AUDIT_DETAIL_MAX); - SName name = {0}; tNameFromString(&name, createReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - auditRecord(pReq, pMnode->clusterId, "createStb", name.dbname, name.tname, detail); + auditRecord(pReq, pMnode->clusterId, "createStb", name.dbname, name.tname, createReq.sql, createReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -2333,7 +2248,7 @@ static int32_t mndProcessAlterStbReq(SRpcMsg *pReq) { pDb = mndAcquireDbByStb(pMnode, alterReq.name); if (pDb == NULL) { - terrno = TSDB_CODE_MND_INVALID_DB; + terrno = TSDB_CODE_MND_DB_NOT_EXIST; goto _OVER; } @@ -2350,13 +2265,10 @@ static int32_t mndProcessAlterStbReq(SRpcMsg *pReq) { code = mndAlterStb(pMnode, pReq, &alterReq, pDb, pStb); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - char detail[2000] = {0}; - sprintf(detail, "alterType:%d, numOfFields:%d, ttl:%d", alterReq.alterType, alterReq.numOfFields, alterReq.ttl); - SName name = {0}; tNameFromString(&name, alterReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - auditRecord(pReq, pMnode->clusterId, "alterStb", name.dbname, name.tname, detail); + auditRecord(pReq, pMnode->clusterId, "alterStb", name.dbname, name.tname, alterReq.sql, alterReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -2448,6 +2360,7 @@ static int32_t mndDropStb(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SStbObj *p if (mndSetDropStbRedoActions(pMnode, pTrans, pDb, pStb) != 0) goto _OVER; if (mndDropIdxsByStb(pMnode, pTrans, pDb, pStb) != 0) goto _OVER; if (mndDropSmasByStb(pMnode, pTrans, pDb, pStb) != 0) goto _OVER; + if (mndUserRemoveStb(pMnode, pTrans, pStb->name) != 0) goto _OVER; if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; code = 0; @@ -2619,13 +2532,10 @@ static int32_t mndProcessDropStbReq(SRpcMsg *pReq) { code = mndDropStb(pMnode, pReq, pDb, pStb); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - char detail[2000] = {0}; - sprintf(detail, "igNotExists:%d, source:%d", dropReq.igNotExists, dropReq.source); - SName name = {0}; tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - auditRecord(pReq, pMnode->clusterId, "dropStb", name.dbname, name.tname, detail); + auditRecord(pReq, pMnode->clusterId, "dropStb", name.dbname, name.tname, dropReq.sql, dropReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -2634,6 +2544,7 @@ _OVER: mndReleaseDb(pMnode, pDb); mndReleaseStb(pMnode, pStb); + tFreeSMDropStbReq(&dropReq); return code; } @@ -3627,7 +3538,7 @@ static int32_t mndProcessCreateIndexReq(SRpcMsg *pReq) { pDb = mndAcquireDbByStb(pMnode, tagIdxReq.dbFName); if (pDb == NULL) { - terrno = TSDB_CODE_MND_INVALID_DB; + terrno = TSDB_CODE_MND_DB_NOT_EXIST; goto _OVER; } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 049b4e737a..be7e6f1fdf 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -37,17 +37,19 @@ typedef struct SNodeEntry { int32_t nodeId; + bool stageUpdated; // the stage has been updated due to the leader/follower change or node reboot. SEpSet epset; // compare the epset to identify the vgroup tranferring between different dnodes. int64_t hbTimestamp; // second } SNodeEntry; -typedef struct SStreamVnodeRevertIndex { +typedef struct SStreamExecNodeInfo { SArray *pNodeEntryList; - int64_t ts; // snapshot ts + int64_t ts; // snapshot ts + int64_t activeCheckpoint; // active check point id SHashObj *pTaskMap; SArray *pTaskList; TdThreadMutex lock; -} SStreamVnodeRevertIndex; +} SStreamExecNodeInfo; typedef struct SVgroupChangeInfo { SHashObj *pDBMap; @@ -55,7 +57,7 @@ typedef struct SVgroupChangeInfo { } SVgroupChangeInfo; static int32_t mndNodeCheckSentinel = 0; -static SStreamVnodeRevertIndex execNodeList; +static SStreamExecNodeInfo execNodeList; static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); @@ -65,9 +67,6 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); static int32_t mndProcessStreamHb(SRpcMsg *pReq); -static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq); -static int32_t mndProcessStreamMetaReq(SRpcMsg *pReq); -static int32_t mndGetStreamMeta(SRpcMsg *pReq, SShowObj *pShow, STableMetaRsp *pMeta); static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); static void mndCancelGetNextStream(SMnode *pMnode, void *pIter); static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); @@ -78,13 +77,19 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in int64_t streamId, int32_t taskId); static int32_t mndProcessNodeCheck(SRpcMsg *pReq); static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg); -static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamVnodeRevertIndex *pExecNode); -static SArray *doExtractNodeListFromStream(SMnode *pMnode); +static SArray *extractNodeListFromStream(SMnode *pMnode); static SArray *mndTakeVgroupSnapshot(SMnode *pMnode); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); -static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans); + +static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name); +static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans); static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset); +static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgroupChangeInfo *pInfo); + +static void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecNodeInfo * pExecNode); +static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecNodeInfo *pExecNode); +static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); int32_t mndInitStream(SMnode *pMnode) { SSdbTable table = { @@ -107,6 +112,7 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_RESUME_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_STOP_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_STREAM_TASK_UPDATE_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_VND_STREAM_TASK_RESET_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); @@ -125,7 +131,7 @@ int32_t mndInitStream(SMnode *pMnode) { taosThreadMutexInit(&execNodeList.lock, NULL); execNodeList.pTaskMap = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_NO_LOCK); - execNodeList.pTaskList = taosArrayInit(4, sizeof(STaskStatusEntry)); + execNodeList.pTaskList = taosArrayInit(4, sizeof(STaskId)); return sdbSetTable(pMnode->pSdb, table); } @@ -579,21 +585,6 @@ int32_t mndPersistDropStreamLog(SMnode *pMnode, STrans *pTrans, SStreamObj *pStr return 0; } -static int32_t mndSetStreamRecover(SMnode *pMnode, STrans *pTrans, const SStreamObj *pStream) { - SStreamObj streamObj = {0}; - memcpy(streamObj.name, pStream->name, TSDB_STREAM_FNAME_LEN); - streamObj.status = STREAM_STATUS__RECOVER; - - SSdbRaw *pCommitRaw = mndStreamActionEncode(&streamObj); - if (pCommitRaw == NULL) return -1; - if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { - mError("stream trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); - return -1; - } - (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); - return 0; -} - static int32_t mndCreateStbForStream(SMnode *pMnode, STrans *pTrans, const SStreamObj *pStream, const char *user) { SStbObj *pStb = NULL; SDbObj *pDb = NULL; @@ -802,17 +793,6 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { } } - // pDb = mndAcquireDb(pMnode, streamObj.sourceDb); - // if (pDb->cfg.replications != 1) { - // mError("stream source db must have only 1 replica, but %s has %d", pDb->name, pDb->cfg.replications); - // terrno = TSDB_CODE_MND_MULTI_REPLICA_SOURCE_DB; - // mndReleaseDb(pMnode, pDb); - // pDb = NULL; - // goto _OVER; - // } - - // mndReleaseDb(pMnode, pDb); - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq, "create-stream"); if (pTrans == NULL) { mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); @@ -874,22 +854,15 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { code = TSDB_CODE_ACTION_IN_PROGRESS; - char detail[2000] = {0}; - sprintf(detail, - "checkpointFreq:%" PRId64 ", createStb:%d, deleteMark:%" PRId64 - ", fillHistory:%d, igExists:%d, igExpired:%d, igUpdate:%d, lastTs:%" PRId64 ", maxDelay:%" PRId64 - ", numOfTags:%d, sourceDB:%s, targetStbFullName:%s, triggerType:%d, watermark:%" PRId64, - createStreamReq.checkpointFreq, createStreamReq.createStb, createStreamReq.deleteMark, - createStreamReq.fillHistory, createStreamReq.igExists, createStreamReq.igExpired, createStreamReq.igUpdate, - createStreamReq.lastTs, createStreamReq.maxDelay, createStreamReq.numOfTags, createStreamReq.sourceDB, - createStreamReq.targetStbFullName, createStreamReq.triggerType, createStreamReq.watermark); - SName name = {0}; tNameFromString(&name, createStreamReq.name, T_NAME_ACCT | T_NAME_DB); //reuse this function for stream - - auditRecord(pReq, pMnode->clusterId, "createStream", name.dbname, "", detail); - + + //TODO + if (createStreamReq.sql != NULL) { + auditRecord(pReq, pMnode->clusterId, "createStream", name.dbname, "", + createStreamReq.sql, strlen(createStreamReq.sql)); + } _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); @@ -1063,8 +1036,7 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in // return -1; // } -static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream, SMnode *pMnode, - int64_t checkpointId) { +static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream, SMnode *pMnode, int64_t chkptId) { taosWLockLatch(&pStream->lock); int32_t totLevel = taosArrayGetSize(pStream->tasks); @@ -1088,7 +1060,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream void *buf; int32_t tlen; - if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, + if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, chkptId, pTask->id.streamId, pTask->id.taskId) < 0) { mndReleaseVgroup(pMnode, pVgObj); taosWUnLockLatch(&pStream->lock); @@ -1109,9 +1081,9 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream } } - pStream->checkpointId = checkpointId; + pStream->checkpointId = chkptId; pStream->checkpointFreq = taosGetTimestampMs(); - atomic_store_64(&pStream->currentTick, 0); + pStream->currentTick = 0; // 3. commit log: stream checkpoint info pStream->version = pStream->version + 1; @@ -1166,16 +1138,23 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { execNodeList.pNodeEntryList = taosArrayDestroy(execNodeList.pNodeEntryList); } - execNodeList.pNodeEntryList = doExtractNodeListFromStream(pMnode); + execNodeList.pNodeEntryList = extractNodeListFromStream(pMnode); } if (taosArrayGetSize(execNodeList.pNodeEntryList) == 0) { - mDebug("end to do stream task node change checking, no vgroup exists, do nothing"); + mDebug("stream task node change checking done, no vgroups exist, do nothing"); execNodeList.ts = ts; - atomic_store_32(&mndNodeCheckSentinel, 0); return 0; } + for(int32_t i = 0; i < taosArrayGetSize(execNodeList.pNodeEntryList); ++i) { + SNodeEntry* pNodeEntry = taosArrayGet(execNodeList.pNodeEntryList, i); + if (pNodeEntry->stageUpdated) { + mDebug("stream task not ready due to node update detected, checkpoint not issued"); + return 0; + } + } + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode); SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execNodeList.pNodeEntryList, pNodeSnapshot); @@ -1185,7 +1164,7 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { taosArrayDestroy(pNodeSnapshot); if (nodeUpdated) { - mDebug("stream task not ready due to node update, not generate checkpoint"); + mDebug("stream task not ready due to node update, checkpoint not issued"); return 0; } } @@ -1195,10 +1174,15 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { taosThreadMutexLock(&execNodeList.lock); for (int32_t i = 0; i < taosArrayGetSize(execNodeList.pTaskList); ++i) { - STaskStatusEntry *p = taosArrayGet(execNodeList.pTaskList, i); - if (p->status != TASK_STATUS__NORMAL) { - mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, create checkpoint msg not issued", - p->streamId, p->taskId, 0, streamGetTaskStatusStr(p->status)); + STaskId *p = taosArrayGet(execNodeList.pTaskList, i); + STaskStatusEntry* pEntry = taosHashGet(execNodeList.pTaskMap, p, sizeof(*p)); + if (pEntry == NULL) { + continue; + } + + if (pEntry->status != TASK_STATUS__NORMAL) { + mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued", + pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamGetTaskStatusStr(pEntry->status)); ready = false; break; } @@ -1268,15 +1252,18 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (dropReq.igNotExists) { mInfo("stream:%s, not exist, ignore not exist is set", dropReq.name); sdbRelease(pMnode->pSdb, pStream); + tFreeSMDropStreamReq(&dropReq); return 0; } else { terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; + tFreeSMDropStreamReq(&dropReq); return -1; } } if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { sdbRelease(pMnode->pSdb, pStream); + tFreeSMDropStreamReq(&dropReq); return -1; } @@ -1284,6 +1271,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (pTrans == NULL) { mError("stream:%s, failed to drop since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); + tFreeSMDropStreamReq(&dropReq); return -1; } @@ -1293,15 +1281,16 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (mndTransCheckConflict(pMnode, pTrans) != 0) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); + tFreeSMDropStreamReq(&dropReq); return -1; } - // mndTransSetSerial(pTrans); // drop all tasks if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); + tFreeSMDropStreamReq(&dropReq); return -1; } @@ -1309,6 +1298,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); + tFreeSMDropStreamReq(&dropReq); return -1; } @@ -1316,20 +1306,21 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); + tFreeSMDropStreamReq(&dropReq); return -1; } - char detail[100] = {0}; - sprintf(detail, "igNotExists:%d", dropReq.igNotExists); + removeStreamTasksInBuf(pStream, &execNodeList); SName name = {0}; tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB); //reuse this function for stream - auditRecord(pReq, pMnode->clusterId, "dropStream", name.dbname, "", detail); + auditRecord(pReq, pMnode->clusterId, "dropStream", name.dbname, "", dropReq.sql, dropReq.sqlLen); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); + tFreeSMDropStreamReq(&dropReq); return TSDB_CODE_ACTION_IN_PROGRESS; } @@ -1564,35 +1555,58 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock } pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)&level, false); + colDataSetVal(pColInfo, numOfRows, (const char *)level, false); // status - char status[20 + VARSTR_HEADER_SIZE] = {0}; - int8_t taskStatus = atomic_load_8(&pTask->status.taskStatus); - if (taskStatus == TASK_STATUS__NORMAL) { - memcpy(varDataVal(status), "normal", 6); - varDataSetLen(status, 6); - } else if (taskStatus == TASK_STATUS__DROPPING) { - memcpy(varDataVal(status), "dropping", 8); - varDataSetLen(status, 8); - } else if (taskStatus == TASK_STATUS__UNINIT) { - memcpy(varDataVal(status), "uninit", 6); - varDataSetLen(status, 4); - } else if (taskStatus == TASK_STATUS__STOP) { - memcpy(varDataVal(status), "stop", 4); - varDataSetLen(status, 4); - } else if (taskStatus == TASK_STATUS__SCAN_HISTORY) { - memcpy(varDataVal(status), "history", 7); - varDataSetLen(status, 7); - } else if (taskStatus == TASK_STATUS__HALT) { - memcpy(varDataVal(status), "halt", 4); - varDataSetLen(status, 4); - } else if (taskStatus == TASK_STATUS__PAUSE) { - memcpy(varDataVal(status), "pause", 5); - varDataSetLen(status, 5); + char status[20 + VARSTR_HEADER_SIZE] = {0}; + + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + STaskStatusEntry* pe = taosHashGet(execNodeList.pTaskMap, &id, sizeof(id)); + if (pe == NULL) { + continue; } + + const char* pStatus = streamGetTaskStatusStr(pe->status); + STR_TO_VARSTR(status, pStatus); + + // status pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)&status, false); + colDataSetVal(pColInfo, numOfRows, (const char *)status, false); + + // stage + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); + + // input queue + char vbuf[30] = {0}; + char buf[25] = {0}; + const char* queueInfoStr = "%4.2fMiB (%5.2f%)"; + sprintf(buf, queueInfoStr, pe->inputQUsed, pe->inputRate); + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); + + // output queue +// sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); +// STR_TO_VARSTR(vbuf, buf); + +// pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); +// colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); + + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + const char* sinkStr = "%.2fMiB"; + sprintf(buf, sinkStr, pe->sinkDataSize); + } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + // offset info + const char *offsetStr = "%" PRId64 " [%" PRId64 ", %" PRId64 "]"; + sprintf(buf, offsetStr, pe->processedVer, pe->verStart, pe->verEnd); + } + + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); numOfRows++; } @@ -1635,7 +1649,9 @@ static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) { return 0; } -int32_t mndPauseAllStreamTaskImpl(STrans *pTrans, SArray *tasks) { +int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) { + SArray* tasks = pStream->tasks; + int32_t size = taosArrayGetSize(tasks); for (int32_t i = 0; i < size; i++) { SArray *pTasks = taosArrayGetP(tasks, i); @@ -1655,16 +1671,6 @@ int32_t mndPauseAllStreamTaskImpl(STrans *pTrans, SArray *tasks) { return 0; } -int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) { - int32_t code = mndPauseAllStreamTaskImpl(pTrans, pStream->tasks); - if (code != 0) { - return code; - } - // pStream->pHTasksList is null - // code = mndPauseAllStreamTaskImpl(pTrans, pStream->pHTasksList); - return code; -} - static int32_t mndPersistStreamLog(STrans *pTrans, const SStreamObj *pStream, int8_t status) { SStreamObj streamObj = {0}; memcpy(streamObj.name, pStream->name, TSDB_STREAM_FNAME_LEN); @@ -1718,6 +1724,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { sdbRelease(pMnode->pSdb, pStream); return -1; } + mInfo("trans:%d, used to pause stream:%s", pTrans->id, pauseReq.name); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); @@ -1729,7 +1736,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { // pause all tasks if (mndPauseAllStreamTasks(pTrans, pStream) < 0) { - mError("stream:%s, failed to drop task since %s", pauseReq.name, terrstr()); + mError("stream:%s, failed to pause task since %s", pauseReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); return -1; @@ -1890,6 +1897,7 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha tEncodeSize(tEncodeStreamTaskUpdateMsg, &req, blen, code); if (code < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; + taosArrayDestroy(req.pNodeList); return -1; } @@ -1898,6 +1906,7 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha void *buf = taosMemoryMalloc(tlen); if (buf == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; + taosArrayDestroy(req.pNodeList); return -1; } @@ -1915,6 +1924,7 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha *pBuf = buf; *pLen = tlen; + taosArrayDestroy(req.pNodeList); return TSDB_CODE_SUCCESS; } @@ -1953,20 +1963,9 @@ void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_ // todo extract method: traverse stream tasks // build trans to update the epset static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgroupChangeInfo *pInfo) { - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, NULL, "stream-task-update"); + STrans* pTrans = doCreateTrans(pMnode, pStream, "stream-task-update"); if (pTrans == NULL) { - mError("failed to build stream task DAG update, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY)); - return -1; - } - - mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid); - - mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); - if (mndTransCheckConflict(pMnode, pTrans) != 0) { - mError("failed to build stream:0x%" PRIx64 " task DAG update, code:%s", pStream->uid, - tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); - mndTransDrop(pTrans); - return -1; + return terrno; } taosWLockLatch(&pStream->lock); @@ -2046,7 +2045,7 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP SNodeEntry *pCurrent = taosArrayGet(pNodeList, j); if (pCurrent->nodeId == pPrevEntry->nodeId) { - if (isNodeEpsetChanged(&pPrevEntry->epset, &pCurrent->epset)) { + if (pPrevEntry->stageUpdated || isNodeEpsetChanged(&pPrevEntry->epset, &pCurrent->epset)) { const SEp *pPrevEp = GET_ACTIVE_EP(&pPrevEntry->epset); char buf[256] = {0}; @@ -2127,7 +2126,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange return 0; } -static SArray *doExtractNodeListFromStream(SMnode *pMnode) { +static SArray *extractNodeListFromStream(SMnode *pMnode) { SSdb *pSdb = pMnode->pSdb; SStreamObj *pStream = NULL; void *pIter = NULL; @@ -2148,11 +2147,9 @@ static SArray *doExtractNodeListFromStream(SMnode *pMnode) { int32_t numOfTasks = taosArrayGetSize(pLevel); for (int32_t k = 0; k < numOfTasks; ++k) { SStreamTask *pTask = taosArrayGetP(pLevel, k); - SNodeEntry entry = {0}; - epsetAssign(&entry.epset, &pTask->info.epSet); - entry.nodeId = pTask->info.nodeId; - entry.hbTimestamp = -1; + SNodeEntry entry = {.hbTimestamp = -1, .nodeId = pTask->info.nodeId}; + epsetAssign(&entry.epset, &pTask->info.epSet); taosHashPut(pHash, &entry.nodeId, sizeof(entry.nodeId), &entry, sizeof(entry)); } } @@ -2190,6 +2187,81 @@ static void doExtractTasksFromStream(SMnode *pMnode) { } } +static int32_t doRemoveFromTask(SStreamExecNodeInfo* pExecNode, STaskId* pRemovedId) { + void *p = taosHashGet(pExecNode->pTaskMap, pRemovedId, sizeof(*pRemovedId)); + + if (p != NULL) { + taosHashRemove(pExecNode->pTaskMap, pRemovedId, sizeof(*pRemovedId)); + + for(int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { + STaskId* pId = taosArrayGet(pExecNode->pTaskList, k); + if (pId->taskId == pRemovedId->taskId && pId->streamId == pRemovedId->streamId) { + taosArrayRemove(pExecNode->pTaskList, k); + mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t) pRemovedId->taskId, + (int32_t)taosArrayGetSize(pExecNode->pTaskList)); + break; + } + } + } + return 0; +} + +static bool taskNodeExists(SArray* pList, int32_t nodeId) { + size_t num = taosArrayGetSize(pList); + + for(int32_t i = 0; i < num; ++i) { + SNodeEntry* pEntry = taosArrayGet(pList, i); + if (pEntry->nodeId == nodeId) { + return true; + } + } + + return false; +} + +int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { + SArray* pRemoveTaskList = taosArrayInit(4, sizeof(STaskId)); + + int32_t numOfTask = taosArrayGetSize(execNodeList.pTaskList); + for(int32_t i = 0; i < numOfTask; ++i) { + STaskId* pId = taosArrayGet(execNodeList.pTaskList, i); + STaskStatusEntry* pEntry = taosHashGet(execNodeList.pTaskMap, pId, sizeof(*pId)); + + bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId); + if (!existed) { + taosArrayPush(pRemoveTaskList, pId); + } + } + + for(int32_t i = 0; i < taosArrayGetSize(pRemoveTaskList); ++i) { + STaskId* pId = taosArrayGet(pRemoveTaskList, i); + doRemoveFromTask(&execNodeList, pId); + } + + mDebug("remove invalid stream tasks:%d, remain:%d", (int32_t)taosArrayGetSize(pRemoveTaskList), + (int32_t) taosArrayGetSize(execNodeList.pTaskList)); + + int32_t size = taosArrayGetSize(pNodeSnapshot); + SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); + for(int32_t i = 0; i < taosArrayGetSize(execNodeList.pNodeEntryList); ++i) { + SNodeEntry* p = taosArrayGet(execNodeList.pNodeEntryList, i); + + for(int32_t j = 0; j < size; ++j) { + SNodeEntry* pEntry = taosArrayGet(pNodeSnapshot, j); + if (pEntry->nodeId == p->nodeId) { + taosArrayPush(pValidNodeEntryList, p); + break; + } + } + } + + execNodeList.pNodeEntryList = taosArrayDestroy(execNodeList.pNodeEntryList); + execNodeList.pNodeEntryList = pValidNodeEntryList; + + taosArrayDestroy(pRemoveTaskList); + return 0; +} + // this function runs by only one thread, so it is not multi-thread safe static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { int32_t code = 0; @@ -2208,7 +2280,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { execNodeList.pNodeEntryList = taosArrayDestroy(execNodeList.pNodeEntryList); } - execNodeList.pNodeEntryList = doExtractNodeListFromStream(pMnode); + execNodeList.pNodeEntryList = extractNodeListFromStream(pMnode); } if (taosArrayGetSize(execNodeList.pNodeEntryList) == 0) { @@ -2220,6 +2292,9 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode); + taosThreadMutexLock(&execNodeList.lock); + removeExpirednodeEntryAndTask(pNodeSnapshot); + SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execNodeList.pNodeEntryList, pNodeSnapshot); if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { code = mndProcessVgroupChange(pMnode, &changeInfo); @@ -2236,6 +2311,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { taosArrayDestroy(pNodeSnapshot); } + taosThreadMutexUnlock(&execNodeList.lock); taosArrayDestroy(changeInfo.pUpdateNodeList); taosHashCleanup(changeInfo.pDBMap); @@ -2245,9 +2321,13 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { } typedef struct SMStreamNodeCheckMsg { - int8_t holder; // // to fix windows compile error, define place holder + int8_t placeHolder; // // to fix windows compile error, define place holder } SMStreamNodeCheckMsg; +typedef struct SMStreamTaskResetMsg { + int8_t placeHolder; +} SMStreamTaskResetMsg; + static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SSdb *pSdb = pMnode->pSdb; @@ -2256,13 +2336,39 @@ static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { } SMStreamNodeCheckMsg *pMsg = rpcMallocCont(sizeof(SMStreamNodeCheckMsg)); - SRpcMsg rpcMsg = { - .msgType = TDMT_MND_STREAM_NODECHANGE_CHECK, .pCont = pMsg, .contLen = sizeof(SMStreamNodeCheckMsg)}; + + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_STREAM_NODECHANGE_CHECK, .pCont = pMsg, .contLen = sizeof(SMStreamNodeCheckMsg)}; tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); return 0; } -static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamVnodeRevertIndex *pExecNode) { +void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecNodeInfo *pExecNode) { + int32_t level = taosArrayGetSize(pStream->tasks); + + for (int32_t i = 0; i < level; i++) { + SArray *pLevel = taosArrayGetP(pStream->tasks, i); + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < numOfTasks; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + void *p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); + if (p == NULL) { + STaskStatusEntry entry = {0}; + streamTaskStatusInit(&entry, pTask); + + taosHashPut(pExecNode->pTaskMap, &id, sizeof(id), &entry, sizeof(entry)); + taosArrayPush(pExecNode->pTaskList, &id); + mInfo("s-task:0x%x add into task buffer, total:%d", (int32_t)entry.id.taskId, + (int32_t)taosArrayGetSize(pExecNode->pTaskList)); + } + } + } +} + +void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecNodeInfo * pExecNode) { int32_t level = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < level; i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); @@ -2270,26 +2376,172 @@ static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamVnodeRevertIndex *p int32_t numOfTasks = taosArrayGetSize(pLevel); for (int32_t j = 0; j < numOfTasks; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); - int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void *p = taosHashGet(pExecNode->pTaskMap, keys, sizeof(keys)); - if (p == NULL) { - STaskStatusEntry entry = { - .streamId = pTask->id.streamId, .taskId = pTask->id.taskId, .status = TASK_STATUS__STOP}; - taosArrayPush(pExecNode->pTaskList, &entry); + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + void *p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); + if (p != NULL) { + taosHashRemove(pExecNode->pTaskMap, &id, sizeof(id)); + + for(int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { + STaskId* pId = taosArrayGet(pExecNode->pTaskList, k); + if (pId->taskId == id.taskId && pId->streamId == id.streamId) { + taosArrayRemove(pExecNode->pTaskList, k); + mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId, + (int32_t)taosArrayGetSize(pExecNode->pTaskList)); + break; + } + } - int32_t ordinal = taosArrayGetSize(pExecNode->pTaskList) - 1; - taosHashPut(pExecNode->pTaskMap, keys, sizeof(keys), &ordinal, sizeof(ordinal)); } } } + + ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList)); +} + +static STrans* doCreateTrans(SMnode* pMnode, SStreamObj* pStream, const char* name) { + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, name); + if (pTrans == NULL) { + mError("failed to build trans:%s, reason: %s", name, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid); + + mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); + if (mndTransCheckConflict(pMnode, pTrans) != 0) { + terrno = TSDB_CODE_MND_TRANS_CONFLICT; + mError("failed to build trans:%s for stream:0x%" PRIx64 " code:%s", name, pStream->uid, tstrerror(terrno)); + mndTransDrop(pTrans); + return NULL; + } + + terrno = 0; + return pTrans; +} + +int32_t createStreamResetStatusTrans(SMnode* pMnode, SStreamObj* pStream) { + STrans *pTrans = doCreateTrans(pMnode, pStream, "stream-task-reset"); + if (pTrans == NULL) { + return terrno; + } + + taosWLockLatch(&pStream->lock); + int32_t numOfLevels = taosArrayGetSize(pStream->tasks); + + for (int32_t j = 0; j < numOfLevels; ++j) { + SArray *pLevel = taosArrayGetP(pStream->tasks, j); + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t k = 0; k < numOfTasks; ++k) { + SStreamTask *pTask = taosArrayGetP(pLevel, k); + + // todo extract method, with pause stream task + SVResetStreamTaskReq* pReq = taosMemoryCalloc(1, sizeof(SVResetStreamTaskReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + mError("failed to malloc in reset stream, size:%" PRIzu ", code:%s", sizeof(SVResetStreamTaskReq), + tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return terrno; + } + + pReq->head.vgId = htonl(pTask->info.nodeId); + pReq->taskId = pTask->id.taskId; + pReq->streamId = pTask->id.streamId; + + STransAction action = {0}; + initTransAction(&action, pReq, sizeof(SVResetStreamTaskReq), TDMT_VND_STREAM_TASK_RESET, &pTask->info.epSet); + if (mndTransAppendRedoAction(pTrans, &action) != 0) { + taosMemoryFree(pReq); + taosWUnLockLatch(&pStream->lock); + mndTransDrop(pTrans); + return terrno; + } + } + } + + taosWUnLockLatch(&pStream->lock); + + int32_t code = mndPersistTransLog(pStream, pTrans); + if (code != TSDB_CODE_SUCCESS) { + sdbRelease(pMnode->pSdb, pStream); + return -1; + } + + if (mndTransPrepare(pMnode, pTrans) != 0) { + mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, terrstr()); + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return -1; + } + + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + + return TSDB_CODE_ACTION_IN_PROGRESS; +} + +int32_t mndResetFromCheckpoint(SMnode* pMnode) { + // find the checkpoint trans id + int32_t transId = 0; + + { + SSdb *pSdb = pMnode->pSdb; + STrans *pTrans = NULL; + void* pIter = NULL; + while (1) { + pIter = sdbFetch(pSdb, SDB_TRANS, pIter, (void **)&pTrans); + if (pIter == NULL) { + break; + } + + if (strncmp(pTrans->opername, MND_STREAM_CHECKPOINT_NAME, tListLen(pTrans->opername) - 1) == 0) { + transId = pTrans->id; + sdbRelease(pSdb, pTrans); + sdbCancelFetch(pSdb, pIter); + break; + } + + sdbRelease(pSdb, pTrans); + } + } + + if (transId == 0) { + mError("failed to find the checkpoint trans, reset not executed"); + return TSDB_CODE_SUCCESS; + } + + STrans* pTrans = mndAcquireTrans(pMnode, transId); + mndKillTrans(pMnode, pTrans); + + // set all tasks status to be normal, refactor later to be stream level, instead of vnode level. + SSdb *pSdb = pMnode->pSdb; + SStreamObj *pStream = NULL; + void *pIter = NULL; + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) { + break; + } + + mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, create reset trans", pStream->name, pStream->uid); + int32_t code = createStreamResetStatusTrans(pMnode, pStream); + if (code != TSDB_CODE_SUCCESS) { + sdbCancelFetch(pSdb, pIter); + return code; + } + } + + return 0; } -// todo: this process should be executed by the write queue worker of the mnode int32_t mndProcessStreamHb(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; + SMnode *pMnode = pReq->info.node; SStreamHbMsg req = {0}; - int32_t code = TSDB_CODE_SUCCESS; + + bool checkpointFailed = false; + int64_t activeCheckpointId = 0; SDecoder decoder = {0}; tDecoderInit(&decoder, pReq->pCont, pReq->contLen); @@ -2301,7 +2553,6 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { } tDecoderClear(&decoder); - // int64_t now = taosGetTimestampSec(); mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks); taosThreadMutexLock(&execNodeList.lock); @@ -2312,80 +2563,60 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { for (int32_t i = 0; i < req.numOfTasks; ++i) { STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); - int64_t k[2] = {p->streamId, p->taskId}; - int32_t *index = taosHashGet(execNodeList.pTaskMap, &k, sizeof(k)); - if (index == NULL) { + STaskStatusEntry *pEntry = taosHashGet(execNodeList.pTaskMap, &p->id, sizeof(p->id)); + if (pEntry == NULL) { + mError("s-task:0x%" PRIx64 " not found in mnode task list", p->id.taskId); continue; } - STaskStatusEntry *pStatusEntry = taosArrayGet(execNodeList.pTaskList, *index); - pStatusEntry->status = p->status; + if (p->stage != pEntry->stage && pEntry->stage != -1) { + int32_t numOfNodes = taosArrayGetSize(execNodeList.pNodeEntryList); + for(int32_t j = 0; j < numOfNodes; ++j) { + SNodeEntry* pNodeEntry = taosArrayGet(execNodeList.pNodeEntryList, j); + if (pNodeEntry->nodeId == pEntry->nodeId) { + mInfo("vgId:%d stage updated, from %d to %d, nodeUpdate trigger by s-task:0x%" PRIx64, + pEntry->nodeId, pEntry->stage, p->stage, pEntry->id.taskId); + + pNodeEntry->stageUpdated = true; + pEntry->stage = p->stage; + break; + } + } + } else { + streamTaskStatusCopy(pEntry, p); + if (p->activeCheckpointId != 0) { + if (activeCheckpointId != 0) { + ASSERT(activeCheckpointId == p->activeCheckpointId); + } else { + activeCheckpointId = p->activeCheckpointId; + } + + if (p->checkpointFailed) { + checkpointFailed = p->checkpointFailed; + } + } + } + + pEntry->status = p->status; if (p->status != TASK_STATUS__NORMAL) { - mDebug("received s-task:0x%x not in ready status:%s", p->taskId, streamGetTaskStatusStr(p->status)); + mDebug("received s-task:0x%"PRIx64" not in ready status:%s", p->id.taskId, streamGetTaskStatusStr(p->status)); } } + + // current checkpoint is failed, rollback from the checkpoint trans + // kill the checkpoint trans and then set all tasks status to be normal + if (checkpointFailed && activeCheckpointId != 0) { + if (execNodeList.activeCheckpoint != activeCheckpointId) { + mInfo("checkpointId:%"PRId64" failed, issue task-reset trans to reset all tasks status", activeCheckpointId); + execNodeList.activeCheckpoint = activeCheckpointId; + mndResetFromCheckpoint(pMnode); + } else { + mDebug("checkpoint:%"PRId64" reset has issued already, ignore it", activeCheckpointId); + } + } + taosThreadMutexUnlock(&execNodeList.lock); taosArrayDestroy(req.pTaskStatus); - - // bool nodeChanged = false; - // SArray* pList = taosArrayInit(4, sizeof(int32_t)); - /* - // record the timeout node - for(int32_t i = 0; i < taosArrayGetSize(execNodeList.pNodeEntryList); ++i) { - SNodeEntry* pEntry = taosArrayGet(execNodeList.pNodeEntryList, i); - int64_t duration = now - pEntry->hbTimestamp; - if (duration > MND_STREAM_HB_INTERVAL) { // execNode timeout, try next - taosArrayPush(pList, &pEntry); - mWarn("nodeId:%d stream node timeout, since last hb:%"PRId64"s", pEntry->nodeId, duration); - continue; - } - - if (pEntry->nodeId != req.vgId) { - continue; - } - - pEntry->hbTimestamp = now; - - // check epset to identify whether the node has been transferred to other dnodes. - // node the epset is changed, which means the node transfer has occurred for this node. - // if (!isEpsetEqual(&pEntry->epset, &req.epset)) { - // nodeChanged = true; - // break; - // } - } - - // todo handle the node timeout case. Once the vnode is off-line, we should check the dnode status from mnode, - // to identify whether the dnode is truely offline or not. - - // handle the node changed case - if (!nodeChanged) { - return TSDB_CODE_SUCCESS; - } - - int32_t nodeId = req.vgId; - - {// check all streams that involved this vnode should update the epset info - SStreamObj *pStream = NULL; - void *pIter = NULL; - while (1) { - pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) { - break; - } - - // update the related upstream and downstream tasks, todo remove this, no need this function - taosWLockLatch(&pStream->lock); - // streamTaskUpdateEpInfo(pStream->tasks, req.vgId, &req.epset); - // streamTaskUpdateEpInfo(pStream->pHTasksList, req.vgId, &req.epset); - taosWUnLockLatch(&pStream->lock); - - // code = createStreamUpdateTrans(pMnode, pStream, nodeId, ); - // if (code != TSDB_CODE_SUCCESS) { - // todo - //// } - // } - } - */ return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/mnode/impl/src/mndTopic.c b/source/dnode/mnode/impl/src/mndTopic.c index 94fd6027c0..e96acfef86 100644 --- a/source/dnode/mnode/impl/src/mndTopic.c +++ b/source/dnode/mnode/impl/src/mndTopic.c @@ -629,16 +629,6 @@ static int32_t mndProcessCreateTopicReq(SRpcMsg *pReq) { code = TSDB_CODE_ACTION_IN_PROGRESS; } - char detail[4000] = {0}; - char sql[3000] = {0}; - strncpy(sql, createTopicReq.sql, 2999); - - SName tableName = {0}; - tNameFromString(&tableName, createTopicReq.subStbName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - - sprintf(detail, "igExists:%d, subStbName:%s, subType:%d, withMeta:%d, sql:%s", - createTopicReq.igExists, tableName.tname, createTopicReq.subType, createTopicReq.withMeta, sql); - SName dbname = {0}; tNameFromString(&dbname, createTopicReq.subDbName, T_NAME_ACCT | T_NAME_DB); @@ -646,7 +636,8 @@ static int32_t mndProcessCreateTopicReq(SRpcMsg *pReq) { tNameFromString(&topicName, createTopicReq.name, T_NAME_ACCT | T_NAME_DB); //reuse this function for topic - auditRecord(pReq, pMnode->clusterId, "createTopic", topicName.dbname, dbname.dbname, detail); + auditRecord(pReq, pMnode->clusterId, "createTopic", topicName.dbname, dbname.dbname, + createTopicReq.sql, strlen(createTopicReq.sql)); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -697,10 +688,12 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { if (pTopic == NULL) { if (dropReq.igNotExists) { mInfo("topic:%s, not exist, ignore not exist is set", dropReq.name); + tFreeSMDropTopicReq(&dropReq); return 0; } else { terrno = TSDB_CODE_MND_TOPIC_NOT_EXIST; mError("topic:%s, failed to drop since %s", dropReq.name, terrstr()); + tFreeSMDropTopicReq(&dropReq); return -1; } } @@ -841,17 +834,17 @@ end: mndTransDrop(pTrans); if (code != 0) { mError("topic:%s, failed to drop since %s", dropReq.name, terrstr()); + tFreeSMDropTopicReq(&dropReq); return code; } - char detail[100] = {0}; - sprintf(detail, "igNotExists:%d", dropReq.igNotExists); - SName name = {0}; tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB); //reuse this function for topic - auditRecord(pReq, pMnode->clusterId, "dropTopic", name.dbname, "", detail); + auditRecord(pReq, pMnode->clusterId, "dropTopic", name.dbname, "", dropReq.sql, dropReq.sqlLen); + + tFreeSMDropTopicReq(&dropReq); return TSDB_CODE_ACTION_IN_PROGRESS; } diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 3717530481..1d8dd5e345 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -1699,7 +1699,6 @@ static int32_t mndRetrieveTrans(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBl int32_t numOfRows = 0; STrans *pTrans = NULL; int32_t cols = 0; - char *pWrite; while (numOfRows < rows) { pShow->pIter = sdbFetch(pSdb, SDB_TRANS, pShow->pIter, (void **)&pTrans); diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index f38f825302..2789f5a9d4 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -1275,11 +1275,7 @@ static int32_t mndProcessCreateUserReq(SRpcMsg *pReq) { code = mndCreateUser(pMnode, pOperUser->acct, &createReq, pReq); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - char detail[1000] = {0}; - sprintf(detail, "createType:%d, enable:%d, superUser:%d, sysInfo:%d", createReq.createType, createReq.enable, - createReq.superUser, createReq.sysInfo); - - auditRecord(pReq, pMnode->clusterId, "createUser", createReq.user, "", detail); + auditRecord(pReq, pMnode->clusterId, "createUser", createReq.user, "", createReq.sql, createReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -1289,6 +1285,7 @@ _OVER: mndReleaseUser(pMnode, pUser); mndReleaseUser(pMnode, pOperUser); tFreeSCreateUserReq(&createReq); + return code; } @@ -1818,41 +1815,51 @@ static int32_t mndProcessAlterUserReq(SRpcMsg *pReq) { code = mndAlterUser(pMnode, pUser, &newUser, pReq); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - char detail[1000] = {0}; - sprintf(detail, "alterType:%s, enable:%d, superUser:%d, sysInfo:%d, tabName:%s, password:", - mndUserAuditTypeStr(alterReq.alterType), alterReq.enable, alterReq.superUser, alterReq.sysInfo, - alterReq.tabName); - - if (alterReq.alterType == TSDB_ALTER_USER_PASSWD) { + if(alterReq.alterType == TSDB_ALTER_USER_PASSWD){ + char detail[1000] = {0}; sprintf(detail, "alterType:%s, enable:%d, superUser:%d, sysInfo:%d, tabName:%s, password:xxx", mndUserAuditTypeStr(alterReq.alterType), alterReq.enable, alterReq.superUser, alterReq.sysInfo, alterReq.tabName); - auditRecord(pReq, pMnode->clusterId, "alterUser", alterReq.user, "", detail); - } else if (alterReq.alterType == TSDB_ALTER_USER_SUPERUSER || alterReq.alterType == TSDB_ALTER_USER_ENABLE || - alterReq.alterType == TSDB_ALTER_USER_SYSINFO) { - auditRecord(pReq, pMnode->clusterId, "alterUser", alterReq.user, "", detail); - } else if (alterReq.alterType == TSDB_ALTER_USER_ADD_READ_DB || alterReq.alterType == TSDB_ALTER_USER_ADD_WRITE_DB || - alterReq.alterType == TSDB_ALTER_USER_ADD_ALL_DB || alterReq.alterType == TSDB_ALTER_USER_ADD_READ_TABLE || - alterReq.alterType == TSDB_ALTER_USER_ADD_WRITE_TABLE || - alterReq.alterType == TSDB_ALTER_USER_ADD_ALL_TABLE) { - if (strcmp(alterReq.objname, "1.*") != 0) { + auditRecord(pReq, pMnode->clusterId, "alterUser", alterReq.user, "", detail, strlen(detail)); + } + else if(alterReq.alterType == TSDB_ALTER_USER_SUPERUSER || + alterReq.alterType == TSDB_ALTER_USER_ENABLE || + alterReq.alterType == TSDB_ALTER_USER_SYSINFO){ + auditRecord(pReq, pMnode->clusterId, "alterUser", alterReq.user, "", alterReq.sql, alterReq.sqlLen); + } + else if(alterReq.alterType == TSDB_ALTER_USER_ADD_READ_DB|| + alterReq.alterType == TSDB_ALTER_USER_ADD_WRITE_DB|| + alterReq.alterType == TSDB_ALTER_USER_ADD_ALL_DB|| + alterReq.alterType == TSDB_ALTER_USER_ADD_READ_TABLE|| + alterReq.alterType == TSDB_ALTER_USER_ADD_WRITE_TABLE|| + alterReq.alterType == TSDB_ALTER_USER_ADD_ALL_TABLE){ + if (strcmp(alterReq.objname, "1.*") != 0){ SName name = {0}; tNameFromString(&name, alterReq.objname, T_NAME_ACCT | T_NAME_DB); - auditRecord(pReq, pMnode->clusterId, "GrantPrivileges", alterReq.user, name.dbname, detail); - } else { - auditRecord(pReq, pMnode->clusterId, "GrantPrivileges", alterReq.user, "*", detail); + auditRecord(pReq, pMnode->clusterId, "GrantPrivileges", alterReq.user, name.dbname, + alterReq.sql, alterReq.sqlLen); + }else{ + auditRecord(pReq, pMnode->clusterId, "GrantPrivileges", alterReq.user, "*", + alterReq.sql, alterReq.sqlLen); } - } else if (alterReq.alterType == TSDB_ALTER_USER_ADD_SUBSCRIBE_TOPIC) { - auditRecord(pReq, pMnode->clusterId, "GrantPrivileges", alterReq.user, alterReq.objname, detail); - } else if (alterReq.alterType == TSDB_ALTER_USER_REMOVE_SUBSCRIBE_TOPIC) { - auditRecord(pReq, pMnode->clusterId, "RevokePrivileges", alterReq.user, alterReq.objname, detail); - } else { - if (strcmp(alterReq.objname, "1.*") != 0) { + } + else if(alterReq.alterType == TSDB_ALTER_USER_ADD_SUBSCRIBE_TOPIC){ + auditRecord(pReq, pMnode->clusterId, "GrantPrivileges", alterReq.user, alterReq.objname, + alterReq.sql, alterReq.sqlLen); + } + else if(alterReq.alterType == TSDB_ALTER_USER_REMOVE_SUBSCRIBE_TOPIC){ + auditRecord(pReq, pMnode->clusterId, "RevokePrivileges", alterReq.user, alterReq.objname, + alterReq.sql, alterReq.sqlLen); + } + else{ + if (strcmp(alterReq.objname, "1.*") != 0){ SName name = {0}; tNameFromString(&name, alterReq.objname, T_NAME_ACCT | T_NAME_DB); - auditRecord(pReq, pMnode->clusterId, "RevokePrivileges", alterReq.user, name.dbname, detail); - } else { - auditRecord(pReq, pMnode->clusterId, "RevokePrivileges", alterReq.user, "*", detail); + auditRecord(pReq, pMnode->clusterId, "RevokePrivileges", alterReq.user, name.dbname, + alterReq.sql, alterReq.sqlLen); + }else{ + auditRecord(pReq, pMnode->clusterId, "RevokePrivileges", alterReq.user, "*", + alterReq.sql, alterReq.sqlLen); } } @@ -1926,7 +1933,7 @@ static int32_t mndProcessDropUserReq(SRpcMsg *pReq) { code = mndDropUser(pMnode, pReq, pUser); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - auditRecord(pReq, pMnode->clusterId, "dropUser", dropReq.user, "", ""); + auditRecord(pReq, pMnode->clusterId, "dropUser", dropReq.user, "", dropReq.sql, dropReq.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -1934,6 +1941,7 @@ _OVER: } mndReleaseUser(pMnode, pUser); + tFreeSDropUserReq(&dropReq); return code; } @@ -2423,6 +2431,47 @@ int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db) { return code; } +int32_t mndUserRemoveStb(SMnode *pMnode, STrans *pTrans, char *stb) { + int32_t code = 0; + SSdb *pSdb = pMnode->pSdb; + int32_t len = strlen(stb) + 1; + void *pIter = NULL; + SUserObj *pUser = NULL; + SUserObj newUser = {0}; + + while (1) { + pIter = sdbFetch(pSdb, SDB_USER, pIter, (void **)&pUser); + if (pIter == NULL) break; + + code = -1; + if (mndUserDupObj(pUser, &newUser) != 0) { + break; + } + + bool inRead = (taosHashGet(newUser.readTbs, stb, len) != NULL); + bool inWrite = (taosHashGet(newUser.writeTbs, stb, len) != NULL); + if (inRead || inWrite) { + (void)taosHashRemove(newUser.readTbs, stb, len); + (void)taosHashRemove(newUser.writeTbs, stb, len); + + SSdbRaw *pCommitRaw = mndUserActionEncode(&newUser); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + break; + } + (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); + } + + mndUserFreeObj(&newUser); + sdbRelease(pSdb, pUser); + code = 0; + } + + if (pUser != NULL) sdbRelease(pSdb, pUser); + if (pIter != NULL) sdbCancelFetch(pSdb, pIter); + mndUserFreeObj(&newUser); + return code; +} + int32_t mndUserRemoveTopic(SMnode *pMnode, STrans *pTrans, char *topic) { int32_t code = 0; SSdb *pSdb = pMnode->pSdb; diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 9e9397a927..417dcb0ce0 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2177,11 +2177,7 @@ static int32_t mndProcessRedistributeVgroupMsg(SRpcMsg *pReq) { char obj[33] = {0}; sprintf(obj, "%d", req.vgId); - char detail[1000] = {0}; - sprintf(detail, "dnodeId1:%d, dnodeId2:%d, dnodeId3:%d", - req.dnodeId1, req.dnodeId2, req.dnodeId3); - - auditRecord(pReq, pMnode->clusterId, "RedistributeVgroup", obj, "", detail); + auditRecord(pReq, pMnode->clusterId, "RedistributeVgroup", obj, "", req.sql, req.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -2197,6 +2193,7 @@ _OVER: mndReleaseDnode(pMnode, pOld3); mndReleaseVgroup(pMnode, pVgroup); mndReleaseDb(pMnode, pDb); + tFreeSRedistributeVgroupReq(&req); return code; } @@ -2993,7 +2990,7 @@ static int32_t mndProcessBalanceVgroupMsg(SRpcMsg *pReq) { code = mndBalanceVgroup(pMnode, pReq, pArray); } - auditRecord(pReq, pMnode->clusterId, "balanceVgroup", "", "", ""); + auditRecord(pReq, pMnode->clusterId, "balanceVgroup", "", "", req.sql, req.sqlLen); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { @@ -3001,6 +2998,7 @@ _OVER: } taosArrayDestroy(pArray); + tFreeSBalanceVgroupReq(&req); return code; } diff --git a/source/dnode/mnode/impl/test/profile/profile.cpp b/source/dnode/mnode/impl/test/profile/profile.cpp index 6ab6d364cb..b1b94c65fb 100644 --- a/source/dnode/mnode/impl/test/profile/profile.cpp +++ b/source/dnode/mnode/impl/test/profile/profile.cpp @@ -65,7 +65,7 @@ TEST_F(MndTestProfile, 01_ConnectMsg) { connId = connectRsp.connId; } -TEST_F(MndTestProfile, 02_ConnectMsg_InvalidDB) { +TEST_F(MndTestProfile, 02_ConnectMsg_NotExistDB) { char passwd[] = "taosdata"; char secretEncrypt[TSDB_PASSWORD_LEN + 1] = {0}; taosEncryptPass_c((uint8_t*)passwd, strlen(passwd), secretEncrypt); @@ -73,7 +73,7 @@ TEST_F(MndTestProfile, 02_ConnectMsg_InvalidDB) { SConnectReq connectReq = {0}; connectReq.pid = 1234; strcpy(connectReq.app, "mnode_test_profile"); - strcpy(connectReq.db, "invalid_db"); + strcpy(connectReq.db, "not_exist_db"); strcpy(connectReq.user, "root"); strcpy(connectReq.passwd, secretEncrypt); strcpy(connectReq.sVer, version); @@ -84,7 +84,7 @@ TEST_F(MndTestProfile, 02_ConnectMsg_InvalidDB) { SRpcMsg* pRsp = test.SendReq(TDMT_MND_CONNECT, pReq, contLen); ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DB); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DB_NOT_EXIST); ASSERT_EQ(pRsp->contLen, 0); } diff --git a/source/dnode/mnode/impl/test/stb/stb.cpp b/source/dnode/mnode/impl/test/stb/stb.cpp index dd03917fc2..1adbb87e19 100644 --- a/source/dnode/mnode/impl/test/stb/stb.cpp +++ b/source/dnode/mnode/impl/test/stb/stb.cpp @@ -448,7 +448,7 @@ TEST_F(MndTestStb, 02_Alter_Stb_AddTag) { { void* pReq = BuildAlterStbAddTagReq("1.d3.stb", "tag4", &contLen); SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); - ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DB); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DB_NOT_EXIST); } { @@ -665,7 +665,7 @@ TEST_F(MndTestStb, 06_Alter_Stb_AddColumn) { { void* pReq = BuildAlterStbAddColumnReq("1.d7.stb", "tag4", &contLen); SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); - ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DB); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DB_NOT_EXIST); } { diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 2b1885fb0e..6451dba2da 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -37,11 +37,8 @@ void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId); if (pTask) { - SRpcMsg rsp = { - .info = pMsg->info, - .code = 0, - }; - streamProcessDispatchMsg(pTask, &req, &rsp, false); + SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; + streamProcessDispatchMsg(pTask, &req, &rsp); streamMetaReleaseTask(pSnode->pMeta, pTask); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); @@ -56,9 +53,9 @@ FAIL: taosFreeQitem(pMsg); } -int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { - ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamInfoList) != 0); - int32_t code = streamTaskInit(pTask, pSnode->pMeta, &pSnode->msgCb, ver); +int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->upstreamInfo.pList) != 0); + int32_t code = streamTaskInit(pTask, pSnode->pMeta, &pSnode->msgCb, nextProcessVer); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -73,7 +70,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { qDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } - int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t numOfChildEp = taosArrayGetSize(pTask->upstreamInfo.pList); SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory }; initStreamStateAPI(&handle.api); @@ -115,18 +112,16 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto FAIL; } - pSnode->msgCb = pOption->msgCb; + pSnode->msgCb = pOption->msgCb; pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, -1); if (pSnode->pMeta == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto FAIL; } - // todo fix it: send msg to mnode to rollback to an existed checkpoint, and broadcast the rollback msg to all other - // computing nodes. - pSnode->pMeta->stage = 0; - + // todo fix it: send msg to mnode to rollback to an existed checkpoint + streamMetaInitForSnode(pSnode->pMeta); return pSnode; FAIL: @@ -189,15 +184,17 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; qDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId); - - SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, pReq->streamId, pReq->taskId); - if (pTask == NULL) { - qError("vgId:%d failed to acquire s-task:0x%x when dropping it", pSnode->pMeta->vgId, pReq->taskId); - return 0; - } - streamMetaUnregisterTask(pSnode->pMeta, pReq->streamId, pReq->taskId); - streamMetaReleaseTask(pSnode->pMeta, pTask); + + // commit the update + taosWLockLatch(&pSnode->pMeta->lock); + int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta); + qDebug("vgId:%d task:0x%x dropped, remain tasks:%d", pSnode->pMeta->vgId, pReq->taskId, numOfTasks); + + if (streamMetaCommit(pSnode->pMeta) < 0) { + // persist to disk + } + taosWUnLockLatch(&pSnode->pMeta->lock); return 0; } @@ -206,7 +203,7 @@ int32_t sndProcessTaskRunReq(SSnode *pSnode, SRpcMsg *pMsg) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pReq->streamId, pReq->taskId); if (pTask) { - streamProcessRunReq(pTask); + streamExecTask(pTask); streamMetaReleaseTask(pSnode->pMeta, pTask); return 0; } else { @@ -226,7 +223,7 @@ int32_t sndProcessTaskDispatchReq(SSnode *pSnode, SRpcMsg *pMsg, bool exec) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId); if (pTask) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessDispatchMsg(pTask, &req, &rsp, exec); + streamProcessDispatchMsg(pTask, &req, &rsp); streamMetaReleaseTask(pSnode->pMeta, pTask); return 0; } else { @@ -259,10 +256,11 @@ int32_t sndProcessTaskRetrieveReq(SSnode *pSnode, SRpcMsg *pMsg) { int32_t sndProcessTaskDispatchRsp(SSnode *pSnode, SRpcMsg *pMsg) { SStreamDispatchRsp *pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t taskId = htonl(pRsp->upstreamTaskId); - int64_t streamId = htobe64(pRsp->streamId); + pRsp->upstreamTaskId = htonl(pRsp->upstreamTaskId); + pRsp->streamId = htobe64(pRsp->streamId); + pRsp->msgId = htonl(pRsp->msgId); - SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, streamId, taskId); + SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pRsp->streamId, pRsp->upstreamTaskId); if (pTask) { streamProcessDispatchRsp(pTask, pRsp, pMsg->code); streamMetaReleaseTask(pSnode->pMeta, pTask); @@ -359,7 +357,7 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { qDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", pTask->id.idStr, pStatus, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } else { - rsp.status = 0; + rsp.status = TASK_DOWNSTREAM_NOT_READY; qDebug("recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d", taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 76ed0f4ed0..773591e6ad 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -189,7 +189,7 @@ int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader *pHandle); void *tsdbGetIdx2(SMeta *pMeta); void *tsdbGetIvtIdx2(SMeta *pMeta); uint64_t tsdbGetReaderMaxVersion2(STsdbReader *pReader); -void tsdbReaderSetCloseFlag2(STsdbReader *pReader); +void tsdbReaderSetCloseFlag(STsdbReader *pReader); int64_t tsdbGetLastTimestamp2(SVnode *pVnode, void *pTableList, int32_t numOfTables, const char *pIdStr); //====================================================================================================================== diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index ee96e602d8..96274ec102 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -39,7 +39,6 @@ extern "C" { #define tqInfo(...) do { if (tqDebugFlag & DEBUG_INFO) { taosPrintLog("TQ ", DEBUG_INFO, 255, __VA_ARGS__); }} while(0) #define tqDebug(...) do { if (tqDebugFlag & DEBUG_DEBUG) { taosPrintLog("TQ ", DEBUG_DEBUG, tqDebugFlag, __VA_ARGS__); }} while(0) #define tqTrace(...) do { if (tqDebugFlag & DEBUG_TRACE) { taosPrintLog("TQ ", DEBUG_TRACE, tqDebugFlag, __VA_ARGS__); }} while(0) - // clang-format on typedef struct STqOffsetStore STqOffsetStore; @@ -170,7 +169,7 @@ int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type, int64_t sver, int64_t ever); int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset); -void tqUpdateNodeStage(STQ* pTq); +void tqUpdateNodeStage(STQ* pTq, bool isLeader); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 66e7b99d55..826d8bb6e8 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -302,12 +302,11 @@ int32_t tsdbDelFReaderClose(SDelFReader **ppReader); int32_t tsdbReadDelDatav1(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, int64_t maxVer); int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData); int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx); -// tsdbRead.c ============================================================================================== -int32_t tsdbTakeReadSnap(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); -void tsdbUntakeReadSnap(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); +// tsdbRead.c ============================================================================================== int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); + // tsdbMerge.c ============================================================================================== int32_t tsdbMerge(void *arg); @@ -838,7 +837,6 @@ bool tMergeTreeNext(SMergeTree *pMTree); bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree); void tMergeTreeClose(SMergeTree *pMTree); -SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols, int32_t numOfStt); SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols); void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo); void getSttBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, SSttBlockLoadCostInfo *pLoadCost); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 2fadccdf2d..823e9d57f6 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -227,9 +227,10 @@ int tqPushMsg(STQ*, tmsg_t msgType); int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); int tqUnregisterPushHandle(STQ* pTq, void* pHandle); int tqScanWalAsync(STQ* pTq, bool ckPause); -int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessStreamTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp); +int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqCheckAndRunStreamTaskAsync(STQ* pTq); int tqCommit(STQ*); @@ -249,11 +250,11 @@ int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg); // tq-stream int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); -int32_t tqProcessTaskDropReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); +int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); -int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec); int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg); @@ -262,7 +263,6 @@ int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg); -int32_t tqCheckLogInWal(STQ* pTq, int64_t version); // sma int32_t smaInit(); diff --git a/source/dnode/vnode/src/meta/metaCommit.c b/source/dnode/vnode/src/meta/metaCommit.c index 1494325657..f5572e68dd 100644 --- a/source/dnode/vnode/src/meta/metaCommit.c +++ b/source/dnode/vnode/src/meta/metaCommit.c @@ -52,7 +52,9 @@ int metaFinishCommit(SMeta *pMeta, TXN *txn) { return tdbPostCommit(pMeta->pEnv int metaPrepareAsyncCommit(SMeta *pMeta) { // return tdbPrepareAsyncCommit(pMeta->pEnv, pMeta->txn); int code = 0; + metaWLock(pMeta); code = ttlMgrFlush(pMeta->pTtlMgr, pMeta->txn); + metaULock(pMeta); code = tdbCommit(pMeta->pEnv, pMeta->txn); return code; diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index 442a739076..f600925a0b 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -1016,21 +1016,16 @@ end: } int metaTtlFindExpired(SMeta *pMeta, int64_t timePointMs, SArray *tbUids, int32_t ttlDropMaxCount) { - metaWLock(pMeta); - int ret = ttlMgrFlush(pMeta->pTtlMgr, pMeta->txn); - if (ret != 0) { - metaError("ttl failed to flush, ret:%d", ret); - goto _err; - } + metaRLock(pMeta); + + int ret = ttlMgrFindExpired(pMeta->pTtlMgr, timePointMs, tbUids, ttlDropMaxCount); + + metaULock(pMeta); - ret = ttlMgrFindExpired(pMeta->pTtlMgr, timePointMs, tbUids, ttlDropMaxCount); if (ret != 0) { metaError("ttl failed to find expired table, ret:%d", ret); - goto _err; } -_err: - metaULock(pMeta); return ret; } diff --git a/source/dnode/vnode/src/meta/metaTtl.c b/source/dnode/vnode/src/meta/metaTtl.c index f920296b4a..58ecf54512 100644 --- a/source/dnode/vnode/src/meta/metaTtl.c +++ b/source/dnode/vnode/src/meta/metaTtl.c @@ -299,7 +299,7 @@ int ttlMgrInsertTtl(STtlManger *pTtlMgr, const STtlUpdTtlCtx *updCtx) { ret = 0; _out: - metaDebug("%s, ttl mgr insert ttl, uid: %" PRId64 ", ctime: %" PRId64 ", ttlDays: %" PRId64, pTtlMgr->logPrefix, + metaTrace("%s, ttl mgr insert ttl, uid: %" PRId64 ", ctime: %" PRId64 ", ttlDays: %" PRId64, pTtlMgr->logPrefix, updCtx->uid, updCtx->changeTimeMs, updCtx->ttlDays); return ret; @@ -323,7 +323,7 @@ int ttlMgrDeleteTtl(STtlManger *pTtlMgr, const STtlDelTtlCtx *delCtx) { ret = 0; _out: - metaDebug("%s, ttl mgr delete ttl, uid: %" PRId64, pTtlMgr->logPrefix, delCtx->uid); + metaTrace("%s, ttl mgr delete ttl, uid: %" PRId64, pTtlMgr->logPrefix, delCtx->uid); return ret; } @@ -363,17 +363,37 @@ int ttlMgrUpdateChangeTime(STtlManger *pTtlMgr, const STtlUpdCtimeCtx *pUpdCtime ret = 0; _out: - metaDebug("%s, ttl mgr update ctime, uid: %" PRId64 ", ctime: %" PRId64, pTtlMgr->logPrefix, pUpdCtimeCtx->uid, + metaTrace("%s, ttl mgr update ctime, uid: %" PRId64 ", ctime: %" PRId64, pTtlMgr->logPrefix, pUpdCtimeCtx->uid, pUpdCtimeCtx->changeTimeMs); return ret; } int ttlMgrFindExpired(STtlManger *pTtlMgr, int64_t timePointMs, SArray *pTbUids, int32_t ttlDropMaxCount) { + int ret = -1; + STtlIdxKeyV1 ttlKey = {.deleteTimeMs = timePointMs, .uid = INT64_MAX}; STtlExpiredCtx expiredCtx = { .ttlDropMaxCount = ttlDropMaxCount, .count = 0, .expiredKey = ttlKey, .pTbUids = pTbUids}; - return tdbTbTraversal(pTtlMgr->pTtlIdx, &expiredCtx, ttlMgrFindExpiredOneEntry); + ret = tdbTbTraversal(pTtlMgr->pTtlIdx, &expiredCtx, ttlMgrFindExpiredOneEntry); + if (ret) { + goto _out; + } + + size_t vIdx = 0; + for (size_t i = 0; i < pTbUids->size; i++) { + tb_uid_t *pUid = taosArrayGet(pTbUids, i); + if (taosHashGet(pTtlMgr->pDirtyUids, pUid, sizeof(tb_uid_t)) == NULL) { + // not in dirty && expired in tdb => must be expired + taosArraySet(pTbUids, vIdx, pUid); + vIdx++; + } + } + + taosArrayPopTailBatch(pTbUids, pTbUids->size - vIdx); + +_out: + return ret; } static bool ttlMgrNeedFlush(STtlManger *pTtlMgr) { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index adf3abe4d9..8f3661dffa 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -712,7 +712,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg continue; } if (pHandle->consumerId == req.newConsumerId) { // do nothing - tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains", req.vgId, req.newConsumerId); + tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId); } else { tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); @@ -733,11 +733,11 @@ end: void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); } -int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { +int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { int32_t vgId = TD_VID(pTq->pVnode); tqDebug("s-task:0x%x start to expand task", pTask->id.taskId); - int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, ver); + int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, nextProcessVer); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -748,7 +748,8 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { SStreamTask* pStateTask = pTask; SStreamTask task = {0}; if (pTask->info.fillHistory) { - task.id = pTask->streamTaskId; + task.id.streamId = pTask->streamTaskId.streamId; + task.id.taskId = pTask->streamTaskId.taskId; task.pMeta = pTask->pMeta; pStateTask = &task; } @@ -782,7 +783,8 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { SStreamTask* pSateTask = pTask; SStreamTask task = {0}; if (pTask->info.fillHistory) { - task.id = pTask->streamTaskId; + task.id.streamId = pTask->streamTaskId.streamId; + task.id.taskId = pTask->streamTaskId.taskId; task.pMeta = pTask->pMeta; pSateTask = &task; } @@ -795,7 +797,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } - int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList); SReadHandle handle = { .checkpointId = pTask->chkInfo.checkpointId, .vnode = NULL, @@ -816,27 +818,27 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { // sink if (pTask->outputInfo.type == TASK_OUTPUT__SMA) { - pTask->smaSink.vnode = pTq->pVnode; - pTask->smaSink.smaSink = smaHandleRes; + pTask->outputInfo.smaSink.vnode = pTq->pVnode; + pTask->outputInfo.smaSink.smaSink = smaHandleRes; } else if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { - pTask->tbSink.vnode = pTq->pVnode; - pTask->tbSink.tbSinkFunc = tqSinkDataIntoDstTable; + pTask->outputInfo.tbSink.vnode = pTq->pVnode; + pTask->outputInfo.tbSink.tbSinkFunc = tqSinkDataIntoDstTable; int32_t ver1 = 1; SMetaInfo info = {0}; - code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL); + code = metaGetInfo(pTq->pVnode->pMeta, pTask->outputInfo.tbSink.stbUid, &info, NULL); if (code == TSDB_CODE_SUCCESS) { ver1 = info.skmVer; } - SSchemaWrapper* pschemaWrapper = pTask->tbSink.pSchemaWrapper; - pTask->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1); - if (pTask->tbSink.pTSchema == NULL) { + SSchemaWrapper* pschemaWrapper = pTask->outputInfo.tbSink.pSchemaWrapper; + pTask->outputInfo.tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1); + if (pTask->outputInfo.tbSink.pTSchema == NULL) { return -1; } - pTask->tbSink.pTblInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); - tSimpleHashSetFreeFp(pTask->tbSink.pTblInfo, freePtr); + pTask->outputInfo.tbSink.pTblInfo = tSimpleHashInit(10240, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); + tSimpleHashSetFreeFp(pTask->outputInfo.tbSink.pTblInfo, freePtr); } if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { @@ -861,19 +863,30 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); } - tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 - " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", - vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, - pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), - pTask->info.fillHistory, pTask->info.triggerParam); + if (pTask->info.fillHistory) { + tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + " nextProcessVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, related stream task:0x%x trigger:%" PRId64 " ms", + vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, + pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->info.fillHistory, (int32_t)pTask->streamTaskId.taskId, pTask->info.triggerParam); + } else { + tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + " nextProcessVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, related fill-task:0x%x trigger:%" PRId64 " ms", + vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, + pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->info.fillHistory, (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.triggerParam); + } return 0; } -int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { +int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { char* msgStr = pMsg->pCont; char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + SStreamMeta* pMeta = pTq->pStreamMeta; SStreamTaskCheckReq req; SDecoder decoder; @@ -894,25 +907,32 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { .upstreamTaskId = req.upstreamTaskId, }; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, taskId); - if (pTask != NULL) { - rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - - const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - tqDebug("s-task:%s status:%s, stage:%d recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", - pTask->id.idStr, pStatus, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + // only the leader node handle the check request + if (pMeta->role == NODE_ROLE_FOLLOWER) { + tqError("s-task:0x%x invalid check msg from upstream:0x%x(vgId:%d), vgId:%d is follower, not handle check status msg", + taskId, req.upstreamTaskId, req.upstreamNodeId, pMeta->vgId); + rsp.status = TASK_DOWNSTREAM_NOT_LEADER; } else { - rsp.status = 0; - tqDebug("tq recv task check(taskId:0x%" PRIx64 "-0x%x not built yet) req(reqId:0x%" PRIx64 - ") from task:0x%x (vgId:%d), rsp status %d", - req.streamId, taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, taskId); + if (pTask != NULL) { + rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); + streamMetaReleaseTask(pMeta, pTask); + + const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + tqDebug("s-task:%s status:%s, stage:%d recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), check_status:%d", + pTask->id.idStr, pStatus, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + } else { + rsp.status = TASK_DOWNSTREAM_NOT_READY; + tqDebug("tq recv task check(taskId:0x%" PRIx64 "-0x%x not built yet) req(reqId:0x%" PRIx64 + ") from task:0x%x (vgId:%d), rsp check_status %d", + req.streamId, taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + } } - return streamSendCheckRsp(pTq->pStreamMeta, &req, &rsp, &pMsg->info, taskId); + return streamSendCheckRsp(pMeta, &req, &rsp, &pMsg->info, taskId); } -int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) { +int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) { char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); int32_t vgId = pTq->pStreamMeta->vgId; @@ -936,7 +956,7 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.streamId, rsp.upstreamTaskId); if (pTask == NULL) { - tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed", + tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed or stopped", rsp.streamId, rsp.upstreamTaskId, pTq->pStreamMeta->vgId); terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; return -1; @@ -953,18 +973,17 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms if (tsDisableStream) { tqInfo("vgId:%d stream disabled, not deploy stream tasks", vgId); - return 0; + return code; } tqDebug("vgId:%d receive new stream task deploy msg, start to build stream task", vgId); // 1.deserialize msg and build task - SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); + int32_t size = sizeof(SStreamTask); + SStreamTask* pTask = taosMemoryCalloc(1, size); if (pTask == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tqError("vgId:%d failed to create stream task due to out of memory, alloc size:%d", vgId, - (int32_t)sizeof(SStreamTask)); - return -1; + tqError("vgId:%d failed to create stream task due to out of memory, alloc size:%d", vgId, size); + return TSDB_CODE_OUT_OF_MEMORY; } SDecoder decoder; @@ -972,9 +991,9 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms code = tDecodeStreamTask(&decoder, pTask); tDecoderClear(&decoder); - if (code < 0) { + if (code != TSDB_CODE_SUCCESS) { taosMemoryFree(pTask); - return -1; + return TSDB_CODE_INVALID_MSG; } SStreamMeta* pStreamMeta = pTq->pStreamMeta; @@ -990,9 +1009,9 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms taosWUnLockLatch(&pStreamMeta->lock); if (code < 0) { - tqError("vgId:%d failed to add s-task:0x%x, total:%d, code:%s", vgId, taskId, numOfTasks, tstrerror(code)); + tqError("failed to add s-task:0x%x into vgId:%d meta, total:%d, code:%s", vgId, taskId, numOfTasks, tstrerror(code)); tFreeStreamTask(pTask); - return -1; + return code; } // added into meta store, pTask cannot be reference since it may have been destroyed by other threads already now if @@ -1005,8 +1024,8 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms bool restored = pTq->pVnode->restored; if (p != NULL && restored) { - p->tsInfo.init = taosGetTimestampMs(); - tqDebug("s-task:%s set the init ts:%"PRId64, p->id.idStr, p->tsInfo.init); + p->execInfo.init = taosGetTimestampMs(); + tqDebug("s-task:%s set the init ts:%"PRId64, p->id.idStr, p->execInfo.init); streamTaskCheckDownstream(p); } else if (!restored) { @@ -1024,17 +1043,18 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms tFreeStreamTask(pTask); } - return 0; + return code; } +// this function should be executed by only one thread int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont; SStreamMeta* pMeta = pTq->pStreamMeta; + int32_t code = TSDB_CODE_SUCCESS; - int32_t code = TSDB_CODE_SUCCESS; SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); if (pTask == NULL) { - tqError("vgId:%d failed to acquire stream task:0x%x during stream recover, task may have been destroyed", + tqError("vgId:%d failed to acquire stream task:0x%x during scan history data, task may have been destroyed", pMeta->vgId, pReq->taskId); return -1; } @@ -1042,26 +1062,47 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { // do recovery step1 const char* id = pTask->id.idStr; const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - tqDebug("s-task:%s start scan-history stage(step 1), status:%s", id, pStatus); - if (pTask->tsInfo.step1Start == 0) { + // avoid multi-thread exec + while(1) { + int32_t sentinel = atomic_val_compare_exchange_32(&pTask->status.inScanHistorySentinel, 0, 1); + if (sentinel != 0) { + tqDebug("s-task:%s already in scan-history func, wait for 100ms, and try again", id); + taosMsleep(100); + } else { + break; + } + } + + if (pTask->execInfo.step1Start == 0) { ASSERT(pTask->status.pauseAllowed == false); - pTask->tsInfo.step1Start = taosGetTimestampMs(); + int64_t ts = taosGetTimestampMs(); + + pTask->execInfo.step1Start = ts; + tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts); + + // NOTE: in case of stream task, scan-history data in wal is not allowed to pause if (pTask->info.fillHistory == 1) { streamTaskEnablePause(pTask); } } else { - tqDebug("s-task:%s resume from paused, start ts:%" PRId64, pTask->id.idStr, pTask->tsInfo.step1Start); + if (pTask->execInfo.step2Start == 0) { + tqDebug("s-task:%s resume from paused, original step1 startTs:%" PRId64, id, pTask->execInfo.step1Start); + } else { + tqDebug("s-task:%s already in step2, no need to scan-history data, step2 starTs:%"PRId64, id, pTask->execInfo.step2Start); + atomic_store_32(&pTask->status.inScanHistorySentinel, 0); + streamMetaReleaseTask(pMeta, pTask); + return 0; + } } // we have to continue retrying to successfully execute the scan history task. - int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, - TASK_SCHED_STATUS__WAITING); - if (schedStatus != TASK_SCHED_STATUS__INACTIVE) { + if (!streamTaskSetSchedStatusWait(pTask)) { tqError( "s-task:%s failed to start scan-history in first stream time window since already started, unexpected " "sched-status:%d", - id, schedStatus); + id, pTask->status.schedStatus); + atomic_store_32(&pTask->status.inScanHistorySentinel, 0); streamMetaReleaseTask(pMeta, pTask); return 0; } @@ -1071,17 +1112,18 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { } streamScanHistoryData(pTask); + + double el = (taosGetTimestampMs() - pTask->execInfo.step1Start) / 1000.0; if (pTask->status.taskStatus == TASK_STATUS__PAUSE) { - double el = (taosGetTimestampMs() - pTask->tsInfo.step1Start) / 1000.0; - tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs, sched-status:%d", pTask->id.idStr, el, - TASK_SCHED_STATUS__INACTIVE); - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + int8_t status = streamTaskSetSchedStatusInActive(pTask); + tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs, sched-status:%d", pTask->id.idStr, el, status); + + atomic_store_32(&pTask->status.inScanHistorySentinel, 0); streamMetaReleaseTask(pMeta, pTask); return 0; } // the following procedure should be executed, no matter status is stop/pause or not - double el = (taosGetTimestampMs() - pTask->tsInfo.step1Start) / 1000.0; tqDebug("s-task:%s scan-history stage(step 1) ended, elapsed time:%.2fs", id, el); if (pTask->info.fillHistory) { @@ -1093,12 +1135,14 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId); if (pStreamTask == NULL) { // todo delete this task, if the related stream task is dropped - qError("failed to find s-task:0x%x, it may have been destroyed, drop fill-history task:%s", + qError("failed to find s-task:0x%"PRIx64", it may have been destroyed, drop fill-history task:%s", pTask->streamTaskId.taskId, pTask->id.idStr); tqDebug("s-task:%s fill-history task set status to be dropping", id); - streamMetaUnregisterTask(pMeta, pTask->id.streamId, pTask->id.taskId); + streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id); + + atomic_store_32(&pTask->status.inScanHistorySentinel, 0); streamMetaReleaseTask(pMeta, pTask); return -1; } @@ -1115,24 +1159,69 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { } // now we can stop the stream task execution + int64_t nextProcessedVer = 0; - int64_t latestVer = 0; - taosThreadMutexLock(&pStreamTask->lock); - streamTaskHalt(pStreamTask); - tqDebug("s-task:%s level:%d sched-status:%d is halt by fill-history task:%s", pStreamTask->id.idStr, - pStreamTask->info.taskLevel, pStreamTask->status.schedStatus, id); - latestVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader); - taosThreadMutexUnlock(&pStreamTask->lock); + while (1) { + taosThreadMutexLock(&pStreamTask->lock); + int8_t status = pStreamTask->status.taskStatus; + if (status == TASK_STATUS__DROPPING || status == TASK_STATUS__STOP) { + // return; do nothing + } + + if (status == TASK_STATUS__HALT) { +// tqDebug("s-task:%s level:%d sched-status:%d is halt by fill-history task:%s", pStreamTask->id.idStr, +// pStreamTask->info.taskLevel, pStreamTask->status.schedStatus, id); +// latestVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader); +// +// taosThreadMutexUnlock(&pStreamTask->lock); +// break; + } + + if (pStreamTask->status.taskStatus == TASK_STATUS__CK) { + qDebug("s-task:%s status:%s during generating checkpoint, wait for 1sec and retry set status:halt", + pStreamTask->id.idStr, streamGetTaskStatusStr(TASK_STATUS__CK)); + taosThreadMutexUnlock(&pStreamTask->lock); + taosMsleep(1000); + continue; + } + + // upgrade to halt status + if (status == TASK_STATUS__PAUSE) { + qDebug("s-task:%s upgrade status to %s from %s", pStreamTask->id.idStr, streamGetTaskStatusStr(TASK_STATUS__HALT), + streamGetTaskStatusStr(TASK_STATUS__PAUSE)); + } else { + qDebug("s-task:%s halt task, prev status:%s", pStreamTask->id.idStr, streamGetTaskStatusStr(status)); + } + + pStreamTask->status.keepTaskStatus = status; + pStreamTask->status.taskStatus = TASK_STATUS__HALT; + + nextProcessedVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader); + + tqDebug("s-task:%s level:%d nextProcessedVer:%" PRId64 ", sched-status:%d is halt by fill-history task:%s", + pStreamTask->id.idStr, pStreamTask->info.taskLevel, nextProcessedVer, pStreamTask->status.schedStatus, + id); + + taosThreadMutexUnlock(&pStreamTask->lock); + break; + } // if it's an source task, extract the last version in wal. pRange = &pTask->dataRange.range; - done = streamHistoryTaskSetVerRangeStep2(pTask, latestVer); + done = streamHistoryTaskSetVerRangeStep2(pTask, nextProcessedVer); + pTask->execInfo.step2Start = taosGetTimestampMs(); if (done) { - pTask->tsInfo.step2Start = taosGetTimestampMs(); qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, 0.0); streamTaskPutTranstateIntoInputQ(pTask); - streamTryExec(pTask); // exec directly + + if (pTask->status.taskStatus == TASK_STATUS__PAUSE) { + pTask->status.keepTaskStatus = TASK_STATUS__NORMAL; + qDebug("s-task:%s prev status is %s, update the kept status to be:%s when after step 2", id, + streamGetTaskStatusStr(TASK_STATUS__PAUSE), streamGetTaskStatusStr(pTask->status.keepTaskStatus)); + } + + streamExecTask(pTask); // exec directly } else { STimeWindow* pWindow = &pTask->dataRange.window; tqDebug("s-task:%s level:%d verRange:%" PRId64 " - %" PRId64 " window:%" PRId64 "-%" PRId64 @@ -1141,31 +1230,30 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { pStreamTask->id.idStr); ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__WAITING); - pTask->tsInfo.step2Start = taosGetTimestampMs(); streamSetParamForStreamScannerStep2(pTask, pRange, pWindow); int64_t dstVer = pTask->dataRange.range.minVer; pTask->chkInfo.nextProcessVer = dstVer; + walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer); tqDebug("s-task:%s wal reader start scan WAL verRange:%" PRId64 "-%" PRId64 ", set sched-status:%d", id, dstVer, pTask->dataRange.range.maxVer, TASK_SCHED_STATUS__INACTIVE); - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + /*int8_t status = */streamTaskSetSchedStatusInActive(pTask); - // set the fill-history task to be normal + // the fill-history task starts to process data in wal, let's set it status to be normal now if (pTask->info.fillHistory == 1 && !streamTaskShouldStop(&pTask->status)) { streamSetStatusNormal(pTask); } tqScanWalAsync(pTq, false); } - - streamMetaReleaseTask(pMeta, pTask); streamMetaReleaseTask(pMeta, pStreamTask); + } else { STimeWindow* pWindow = &pTask->dataRange.window; - if (pTask->historyTaskId.taskId == 0) { + if (pTask->hTaskInfo.id.taskId == 0) { *pWindow = (STimeWindow){INT64_MIN, INT64_MAX}; tqDebug( "s-task:%s scan-history in stream time window completed, no related fill-history task, reset the time " @@ -1182,52 +1270,11 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { } code = streamTaskScanHistoryDataComplete(pTask); - streamMetaReleaseTask(pMeta, pTask); - - // when all source task complete to scan history data in stream time window, they are allowed to handle stream data - // at the same time. - return code; } - return 0; -} - -// notify the downstream tasks to transfer executor state after handle all history blocks. -int32_t tqProcessTaskTransferStateReq(STQ* pTq, SRpcMsg* pMsg) { - char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t len = pMsg->contLen - sizeof(SMsgHead); - - SStreamTransferReq req = {0}; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)pReq, len); - int32_t code = tDecodeStreamScanHistoryFinishReq(&decoder, &req); - tDecoderClear(&decoder); - - tqDebug("vgId:%d start to process transfer state msg, from s-task:0x%x", pTq->pStreamMeta->vgId, - req.downstreamTaskId); - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.downstreamTaskId); - if (pTask == NULL) { - tqError("failed to find task:0x%x, it may have been dropped already. process transfer state failed", - req.downstreamTaskId); - return -1; - } - - int32_t remain = streamAlignTransferState(pTask); - if (remain > 0) { - tqDebug("s-task:%s receive upstream transfer state msg, remain:%d", pTask->id.idStr, remain); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; - } - - // transfer the ownership of executor state - tqDebug("s-task:%s all upstream tasks send transfer msg, open transfer state flag", pTask->id.idStr); - ASSERT(pTask->streamTaskId.taskId != 0 && pTask->info.fillHistory == 1); - - streamSchedExec(pTask); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; + atomic_store_32(&pTask->status.inScanHistorySentinel, 0); + streamMetaReleaseTask(pMeta, pTask); + return code; } // only the agg tasks and the sink tasks will receive this message from upstream tasks @@ -1315,11 +1362,11 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { if (st == TASK_STATUS__NORMAL || st == TASK_STATUS__SCAN_HISTORY || st == TASK_STATUS__CK) { tqDebug("vgId:%d s-task:%s start to process block from inputQ, next checked ver:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.nextProcessVer); - streamProcessRunReq(pTask); + streamExecTask(pTask); } else { - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + int8_t status = streamTaskSetSchedStatusInActive(pTask); tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId, - pTask->id.idStr, streamGetTaskStatusStr(st), pTask->status.schedStatus); + pTask->id.idStr, streamGetTaskStatusStr(st), status); } streamMetaReleaseTask(pTq->pStreamMeta, pTask); @@ -1344,10 +1391,12 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { tDecodeStreamDispatchReq(&decoder, &req); tDecoderClear(&decoder); + tqDebug("s-task:0x%x recv dispatch msg from 0x%x(vgId:%d)", req.taskId, req.upstreamTaskId, req.upstreamNodeId); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.taskId); if (pTask) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessDispatchMsg(pTask, &req, &rsp, exec); + streamProcessDispatchMsg(pTask, &req, &rsp); streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } else { @@ -1366,6 +1415,8 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { pRsp->streamId = htobe64(pRsp->streamId); pRsp->downstreamTaskId = htonl(pRsp->downstreamTaskId); pRsp->downstreamNodeId = htonl(pRsp->downstreamNodeId); + pRsp->stage = htobe64(pRsp->stage); + pRsp->msgId = htonl(pRsp->msgId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pRsp->streamId, pRsp->upstreamTaskId); if (pTask) { @@ -1379,17 +1430,37 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { } } -int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { +int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; - tqDebug("vgId:%d receive msg to drop stream task:0x%x", TD_VID(pTq->pVnode), pReq->taskId); - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); - if (pTask == NULL) { - tqError("vgId:%d failed to acquire s-task:0x%x when dropping it", pTq->pStreamMeta->vgId, pReq->taskId); - return 0; + + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + tqDebug("vgId:%d receive msg to drop s-task:0x%x", vgId, pReq->taskId); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); + if (pTask != NULL) { + // drop the related fill-history task firstly + if (pTask->hTaskInfo.id.taskId != 0) { + STaskId* pHTaskId = &pTask->hTaskInfo.id; + streamMetaUnregisterTask(pMeta, pHTaskId->streamId, pHTaskId->taskId); + tqDebug("vgId:%d drop fill-history task:0x%x dropped firstly", vgId, (int32_t)pHTaskId->taskId); + } + streamMetaReleaseTask(pMeta, pTask); } - streamMetaUnregisterTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); + // drop the stream task now + streamMetaUnregisterTask(pMeta, pReq->streamId, pReq->taskId); + + // commit the update + taosWLockLatch(&pMeta->lock); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + tqDebug("vgId:%d task:0x%x dropped, remain tasks:%d", vgId, pReq->taskId, numOfTasks); + + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + taosWUnLockLatch(&pMeta->lock); + return 0; } @@ -1409,11 +1480,12 @@ int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg streamTaskPause(pTask, pMeta); SStreamTask* pHistoryTask = NULL; - if (pTask->historyTaskId.taskId != 0) { - pHistoryTask = streamMetaAcquireTask(pMeta, pTask->historyTaskId.streamId, pTask->historyTaskId.taskId); + if (pTask->hTaskInfo.id.taskId != 0) { + pHistoryTask = streamMetaAcquireTask(pMeta, pTask->hTaskInfo.id.streamId, pTask->hTaskInfo.id.taskId); if (pHistoryTask == NULL) { - tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%x, it may have been dropped already", - pMeta->vgId, pTask->historyTaskId.taskId); + tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%" PRIx64 + ", it may have been dropped already", + pMeta->vgId, pTask->hTaskInfo.id.taskId); streamMetaReleaseTask(pMeta, pTask); // since task is in [STOP|DROPPING] state, it is safe to assume the pause is active @@ -1462,7 +1534,7 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { streamStartScanHistoryAsync(pTask, igUntreated); - } else if (level == TASK_LEVEL__SOURCE && (taosQueueItemSize(pTask->inputInfo.queue->pQueue) == 0)) { + } else if (level == TASK_LEVEL__SOURCE && (streamQueueGetNumOfItems(pTask->inputInfo.queue) == 0)) { tqScanWalAsync(pTq, false); } else { streamSchedExec(pTask); @@ -1475,14 +1547,15 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); - int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated); + + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); + int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated); if (code != 0) { return code; } - SStreamTask* pHistoryTask = - streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.streamId, pTask->historyTaskId.taskId); + STaskId* pHTaskId = &pTask->hTaskInfo.id; + SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pHTaskId->streamId, pHTaskId->taskId); if (pHistoryTask) { code = tqProcessTaskResumeImpl(pTq, pHistoryTask, sversion, pReq->igUntreated); } @@ -1501,9 +1574,11 @@ int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) { tDecodeStreamRetrieveReq(&decoder, &req); tDecoderClear(&decoder); + int32_t vgId = pTq->pStreamMeta->vgId; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.dstTaskId); if (pTask == NULL) { - // tDeleteStreamDispatchReq(&req); + tqError("vgId:%d process retrieve req, failed to acquire task:0x%x, it may have been dropped already", vgId, + req.dstTaskId); return -1; } @@ -1543,10 +1618,16 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { int32_t taskId = req.taskId; tqDebug("vgId:%d receive dispatch msg to s-task:0x%" PRIx64 "-0x%x", vgId, req.streamId, taskId); + // for test purpose +// if (req.type == STREAM_INPUT__CHECKPOINT_TRIGGER) { +// code = TSDB_CODE_STREAM_TASK_NOT_EXIST; +// goto FAIL; +// } + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, taskId); if (pTask != NULL) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessDispatchMsg(pTask, &req, &rsp, false); + streamProcessDispatchMsg(pTask, &req, &rsp); streamMetaReleaseTask(pTq->pStreamMeta, pTask); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); @@ -1574,12 +1655,16 @@ FAIL: } pRspHead->vgId = htonl(req.upstreamNodeId); + ASSERT(pRspHead->vgId != 0); + SStreamDispatchRsp* pRsp = POINTER_SHIFT(pRspHead, sizeof(SMsgHead)); pRsp->streamId = htobe64(req.streamId); pRsp->upstreamTaskId = htonl(req.upstreamTaskId); pRsp->upstreamNodeId = htonl(req.upstreamNodeId); pRsp->downstreamNodeId = htonl(pVnode->config.vgId); pRsp->downstreamTaskId = htonl(req.taskId); + pRsp->msgId = htonl(req.msgId); + pRsp->stage = htobe64(req.stage); pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL; int32_t len = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); @@ -1592,17 +1677,34 @@ FAIL: return -1; } -int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } - // todo error code cannot be return, since this is invoked by an mnode-launched transaction. -int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg) { +int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); int32_t code = 0; + // disable auto rsp to mnode + pRsp->info.handle = NULL; + + // todo: add counter to make sure other tasks would not be trapped in checkpoint state SStreamCheckpointSourceReq req = {0}; + if (!vnodeIsRoleLeader(pTq->pVnode)) { + tqDebug("vgId:%d not leader, ignore checkpoint-source msg", vgId); + SRpcMsg rsp = {0}; + buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); + tmsgSendRsp(&rsp); // error occurs + return TSDB_CODE_SUCCESS; + } + + if (!pTq->pVnode->restored) { + tqDebug("vgId:%d checkpoint-source msg received during restoring, ignore it", vgId); + SRpcMsg rsp = {0}; + buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); + tmsgSendRsp(&rsp); // error occurs + return TSDB_CODE_SUCCESS; + } SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, len); @@ -1610,6 +1712,9 @@ int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg) { code = TSDB_CODE_MSG_DECODE_ERROR; tDecoderClear(&decoder); tqError("vgId:%d failed to decode checkpoint-source msg, code:%s", vgId, tstrerror(code)); + SRpcMsg rsp = {0}; + buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); + tmsgSendRsp(&rsp); // error occurs return code; } tDecoderClear(&decoder); @@ -1618,11 +1723,17 @@ int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg) { if (pTask == NULL) { tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. it may have been destroyed already", vgId, req.taskId); + SRpcMsg rsp = {0}; + buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); + tmsgSendRsp(&rsp); // error occurs return TSDB_CODE_SUCCESS; } // downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req. if (pTask->status.downstreamReady != 1) { + pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id + pTask->checkpointingId = req.checkpointId; + qError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64 ", set it failure", pTask->id.idStr, req.checkpointId); streamMetaReleaseTask(pMeta, pTask); @@ -1633,36 +1744,51 @@ int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg) { return TSDB_CODE_SUCCESS; } + taosThreadMutexLock(&pTask->lock); + if (pTask->status.taskStatus == TASK_STATUS__HALT) { + qError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure", + pTask->id.idStr, req.checkpointId); + taosThreadMutexUnlock(&pTask->lock); + + streamMetaReleaseTask(pMeta, pTask); + + SRpcMsg rsp = {0}; + buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); + tmsgSendRsp(&rsp); // error occurs + return TSDB_CODE_SUCCESS; + } + streamProcessCheckpointSourceReq(pTask, &req); + taosThreadMutexUnlock(&pTask->lock); + int32_t total = 0; taosWLockLatch(&pMeta->lock); // set the initial value for generating check point // set the mgmt epset info according to the checkout source msg from mnode, todo update mgmt epset if needed if (pMeta->chkptNotReadyTasks == 0) { - pMeta->chkptNotReadyTasks = streamMetaGetNumOfStreamTasks(pMeta); - pMeta->totalTasks = pMeta->chkptNotReadyTasks; + pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks; } - total = taosArrayGetSize(pMeta->pTaskList); + total = pMeta->numOfStreamTasks; taosWUnLockLatch(&pMeta->lock); - qDebug("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg, chkpt:%" PRId64 ", total checkpoint req:%d", + qInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64 ", total checkpoint reqs:%d", pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, total); code = streamAddCheckpointSourceRspMsg(&req, &pMsg->info, pTask, 1); if (code != TSDB_CODE_SUCCESS) { + SRpcMsg rsp = {0}; + buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); + tmsgSendRsp(&rsp); // error occurs return code; } - // todo: when generating checkpoint, no new tasks are allowed to add into current Vnode - // todo: when generating checkpoint, leader of mnode has transfer to other DNode? - streamProcessCheckpointSourceReq(pTask, &req); streamMetaReleaseTask(pMeta, pTask); return code; } // downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task -int32_t tqProcessStreamTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) { +int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); @@ -1709,36 +1835,36 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { if (tDecodeStreamTaskUpdateMsg(&decoder, &req) < 0) { rsp.code = TSDB_CODE_MSG_DECODE_ERROR; tqError("vgId:%d failed to decode task update msg, code:%s", vgId, tstrerror(rsp.code)); - goto _end; + tDecoderClear(&decoder); + return rsp.code; } + tDecoderClear(&decoder); + // update the nodeEpset when it exists taosWLockLatch(&pMeta->lock); - // when replay the WAL, we should update the task epset one again and again, the task may be in stop status. - int64_t keys[2] = {req.streamId, req.taskId}; - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); - + // the task epset may be updated again and again, when replaying the WAL, the task may be in stop status. + STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask == NULL || *ppTask == NULL) { tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", pMeta->vgId, req.taskId); rsp.code = TSDB_CODE_SUCCESS; taosWUnLockLatch(&pMeta->lock); - goto _end; + taosArrayDestroy(req.pNodeList); + return rsp.code; } SStreamTask* pTask = *ppTask; + tqDebug("s-task:%s receive nodeEp update msg from mnode", pTask->id.idStr); - tqDebug("s-task:%s receive task nodeEp update msg from mnode", pTask->id.idStr); streamTaskUpdateEpsetInfo(pTask, req.pNodeList); streamSetStatusNormal(pTask); SStreamTask** ppHTask = NULL; - if (pTask->historyTaskId.taskId != 0) { - keys[0] = pTask->historyTaskId.streamId; - keys[1] = pTask->historyTaskId.taskId; - - ppHTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + if (pTask->hTaskInfo.id.taskId != 0) { + ppHTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &pTask->hTaskInfo.id, sizeof(pTask->hTaskInfo.id)); if (ppHTask == NULL || *ppHTask == NULL) { tqError("vgId:%d failed to acquire fill-history task:0x%x when handling update, it may have been dropped already", pMeta->vgId, req.taskId); @@ -1760,62 +1886,90 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { } streamTaskStop(pTask); + taosHashPut(pMeta->pUpdateTaskSet, &pTask->id, sizeof(pTask->id), NULL, 0); + if (ppHTask != NULL) { streamTaskStop(*ppHTask); + tqDebug("s-task:%s task nodeEp update completed, streamTask and related fill-history task closed", pTask->id.idStr); + taosHashPut(pMeta->pUpdateTaskSet, &(*ppHTask)->id, sizeof(pTask->id), NULL, 0); + } else { + tqDebug("s-task:%s task nodeEp update completed, streamTask closed", pTask->id.idStr); } - tqDebug("s-task:%s task nodeEp update completed", pTask->id.idStr); - - pMeta->closedTask += 1; - if (ppHTask != NULL) { - pMeta->closedTask += 1; - } + rsp.code = 0; // possibly only handle the stream task. int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); - allStopped = (pMeta->closedTask == numOfTasks); - if (allStopped) { - pMeta->closedTask = 0; + int32_t updateTasks = taosHashGetSize(pMeta->pUpdateTaskSet); + + pMeta->startInfo.startedAfterNodeUpdate = 1; + + if (updateTasks < numOfTasks) { + tqDebug("vgId:%d closed tasks:%d, unclosed:%d, all tasks will be started when nodeEp update completed", vgId, + updateTasks, (numOfTasks - updateTasks)); + taosWUnLockLatch(&pMeta->lock); } else { - tqDebug("vgId:%d closed tasks:%d, not closed:%d", vgId, pMeta->closedTask, (numOfTasks - pMeta->closedTask)); - } - - taosWUnLockLatch(&pMeta->lock); - -_end: - tDecoderClear(&decoder); - - if (allStopped) { + taosHashClear(pMeta->pUpdateTaskSet); if (!pTq->pVnode->restored) { - tqDebug("vgId:%d vnode restore not completed, not restart the tasks", vgId); + tqDebug("vgId:%d vnode restore not completed, not restart the tasks, clear the start after nodeUpdate flag", vgId); + pMeta->startInfo.startedAfterNodeUpdate = 0; + taosWUnLockLatch(&pMeta->lock); } else { - tqDebug("vgId:%d all tasks are stopped, restart them", vgId); - taosWLockLatch(&pMeta->lock); + tqDebug("vgId:%d tasks are all updated and stopped, restart them", vgId); terrno = 0; - int32_t code = streamMetaReopen(pMeta, 0); + int32_t code = streamMetaReopen(pMeta); if (code != 0) { tqError("vgId:%d failed to reopen stream meta", vgId); taosWUnLockLatch(&pMeta->lock); + taosArrayDestroy(req.pNodeList); return -1; } if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { tqError("vgId:%d failed to load stream tasks", vgId); taosWUnLockLatch(&pMeta->lock); + taosArrayDestroy(req.pNodeList); return -1; } - taosWUnLockLatch(&pMeta->lock); if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { - vInfo("vgId:%d, restart all stream tasks", vgId); + vInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); tqStartStreamTasks(pTq); tqCheckAndRunStreamTaskAsync(pTq); + } else { + vInfo("vgId:%d, follower node not start stream tasks", vgId); } + + taosWUnLockLatch(&pMeta->lock); } } + taosArrayDestroy(req.pNodeList); return rsp.code; } +int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { + SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*) pMsg->pCont; + + SStreamMeta* pMeta = pTq->pStreamMeta; + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); + if (pTask == NULL) { + tqError("vgId:%d process task-reset req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, + pReq->taskId); + return TSDB_CODE_SUCCESS; + } + + tqDebug("s-task:%s receive task-reset msg from mnode, reset status and ready for data processing", pTask->id.idStr); + + // clear flag set during do checkpoint, and open inputQ for all upstream tasks + if (pTask->status.taskStatus == TASK_STATUS__CK) { + streamTaskClearCheckInfo(pTask); + taosArrayClear(pTask->pReadyMsgList); + streamSetStatusNormal(pTask); + } + + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 4d470ee5b6..2a56cd3847 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -102,6 +102,7 @@ bool isValValidForTable(STqHandle* pHandle, SWalCont* pHead) { for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { pCreateReq = req.pReqs + iReq; taosMemoryFreeClear(pCreateReq->comment); + taosMemoryFreeClear(pCreateReq->sql); if (pCreateReq->type == TSDB_CHILD_TABLE) { taosArrayDestroy(pCreateReq->ctb.tagName); } @@ -268,6 +269,8 @@ STqReader* tqReaderOpen(SVnode* pVnode) { } void tqReaderClose(STqReader* pReader) { + if (pReader == NULL) return; + // close wal reader if (pReader->pWalReader) { walCloseReader(pReader->pWalReader); @@ -308,7 +311,7 @@ int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, int64_t maxVer, con SWalCont* pCont = &pReader->pHead->head; int64_t ver = pCont->version; if (ver > maxVer) { - tqDebug("maxVer in WAL:%" PRId64 " reached current:%" PRId64 ", do not scan wal anymore, %s", maxVer, ver, id); + tqDebug("maxVer in WAL:%" PRId64 " reached, current:%" PRId64 ", do not scan wal anymore, %s", maxVer, ver, id); return TSDB_CODE_SUCCESS; } @@ -1118,6 +1121,7 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { taosArrayDestroy(list); taosHashCancelIterate(pTq->pHandle, pIter); taosWUnLockLatch(&pTq->lock); + return ret; } tqReaderSetTbUidList(pTqHandle->execHandle.pTqReader, list, NULL); @@ -1128,10 +1132,11 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { } } taosWUnLockLatch(&pTq->lock); + // update the table list handle for each stream scanner/wal reader taosWLockLatch(&pTq->pStreamMeta->lock); while (1) { - pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); + pIter = taosHashIterate(pTq->pStreamMeta->pTasksMap, pIter); if (pIter == NULL) { break; } diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 0925573248..7d1c754005 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -24,23 +24,23 @@ typedef struct STableSinkInfo { tstr name; } STableSinkInfo; +static bool hasOnlySubmitData(const SArray* pBlocks, int32_t numOfBlocks); +static int32_t tsAscendingSortFn(const void* p1, const void* p2); static int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDataBlock, char* stbFullName, SSubmitTbData* pTableData); static int32_t setDstTableDataPayload(SStreamTask* pTask, int32_t blockIndex, SSDataBlock* pDataBlock, SSubmitTbData* pTableData); static int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, int64_t suid); -static int32_t tqBuildSubmitReq(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen); -static int32_t tsAscendingSortFn(const void* p1, const void* p2); +static int32_t doBuildAndSendSubmitMsg(SVnode* pVnode, SStreamTask* pTask, SSubmitReq2* pReq, int32_t numOfBlocks); +static int32_t buildSubmitMsgImpl(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen); static int32_t doConvertRows(SSubmitTbData* pTableData, STSchema* pTSchema, SSDataBlock* pDataBlock, const char* id); static int32_t doWaitForDstTableCreated(SVnode* pVnode, SStreamTask* pTask, STableSinkInfo* pTableSinkInfo, const char* dstTableName, int64_t* uid); static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, const char* id); -static SVCreateTbReq* buildAutoCreateTableReq(char* stbFullName, int64_t suid, int32_t numOfCols, - SSDataBlock* pDataBlock); -static bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbName, int64_t suid); -static int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, const char* id); -static int32_t doBuildAndSendSubmitMsg(SVnode* pVnode, SStreamTask* pTask, SSubmitReq2* pReq, int32_t numOfBlocks); +static int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, const char* id); +static bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbName, int64_t suid); +static SVCreateTbReq* buildAutoCreateTableReq(char* stbFullName, int64_t suid, int32_t numOfCols, SSDataBlock* pDataBlock); int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr) { @@ -142,7 +142,7 @@ static int32_t doBuildAndSendCreateTableMsg(SVnode* pVnode, char* stbFullName, S int64_t suid) { tqDebug("s-task:%s build create table msg", pTask->id.idStr); - STSchema* pTSchema = pTask->tbSink.pTSchema; + STSchema* pTSchema = pTask->outputInfo.tbSink.pTSchema; int32_t rows = pDataBlock->info.rows; SArray* tagArray = NULL; int32_t code = 0; @@ -255,7 +255,7 @@ int32_t doBuildAndSendSubmitMsg(SVnode* pVnode, SStreamTask* pTask, SSubmitReq2* void* pBuf = NULL; int32_t numOfFinalBlocks = taosArrayGetSize(pReq->aSubmitTbData); - int32_t code = tqBuildSubmitReq(pReq, vgId, &pBuf, &len); + int32_t code = buildSubmitMsgImpl(pReq, vgId, &pBuf, &len); if (code != TSDB_CODE_SUCCESS) { tqError("s-task:%s build submit msg failed, vgId:%d, code:%s", id, vgId, tstrerror(code)); return code; @@ -270,14 +270,15 @@ int32_t doBuildAndSendSubmitMsg(SVnode* pVnode, SStreamTask* pTask, SSubmitReq2* tqError("s-task:%s failed to put into write-queue since %s", id, terrstr()); } - pTask->sinkRecorder.numOfSubmit += 1; + SSinkRecorder* pRec = &pTask->execInfo.sink; - if ((pTask->sinkRecorder.numOfSubmit % 5000) == 0) { - SSinkTaskRecorder* pRec = &pTask->sinkRecorder; - double el = (taosGetTimestampMs() - pTask->tsInfo.sinkStart) / 1000.0; + pRec->numOfSubmit += 1; + if ((pRec->numOfSubmit % 1000) == 0) { + double el = (taosGetTimestampMs() - pTask->execInfo.start) / 1000.0; tqInfo("s-task:%s vgId:%d write %" PRId64 " blocks (%" PRId64 " rows) in %" PRId64 - " submit into dst table, duration:%.2f Sec.", - pTask->id.idStr, vgId, pRec->numOfBlocks, pRec->numOfRows, pRec->numOfSubmit, el); + " submit into dst table, %.2fMiB duration:%.2f Sec.", + pTask->id.idStr, vgId, pRec->numOfBlocks, pRec->numOfRows, pRec->numOfSubmit, SIZE_IN_MiB(pRec->dataSize), + el); } return TSDB_CODE_SUCCESS; @@ -327,7 +328,7 @@ int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, c taosArrayDestroy(pExisted->aRowP); pExisted->aRowP = pFinal; - tqDebug("s-task:%s rows merged, final rows:%d, uid:%" PRId64 ", existed auto-create table:%d, new-block:%d", id, + tqTrace("s-task:%s rows merged, final rows:%d, uid:%" PRId64 ", existed auto-create table:%d, new-block:%d", id, (int32_t)taosArrayGetSize(pFinal), pExisted->uid, (pExisted->pCreateTbReq != NULL), (pNew->pCreateTbReq != NULL)); return TSDB_CODE_SUCCESS; } @@ -462,7 +463,7 @@ int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, return code; } -int32_t tqBuildSubmitReq(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen) { +int32_t buildSubmitMsgImpl(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen) { int32_t code = 0; void* pBuf = NULL; *msgLen = 0; @@ -537,7 +538,7 @@ int32_t doConvertRows(SSubmitTbData* pTableData, STSchema* pTSchema, SSDataBlock if (k == 0) { SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); void* colData = colDataGetData(pColData, j); - tqDebug("s-task:%s sink row %d, col %d ts %" PRId64, id, j, k, *(int64_t*)colData); + tqTrace("s-task:%s sink row %d, col %d ts %" PRId64, id, j, k, *(int64_t*)colData); } if (IS_SET_NULL(pCol)) { @@ -587,7 +588,7 @@ int32_t doConvertRows(SSubmitTbData* pTableData, STSchema* pTSchema, SSDataBlock int32_t doWaitForDstTableCreated(SVnode* pVnode, SStreamTask* pTask, STableSinkInfo* pTableSinkInfo, const char* dstTableName, int64_t* uid) { int32_t vgId = TD_VID(pVnode); - int64_t suid = pTask->tbSink.stbUid; + int64_t suid = pTask->outputInfo.tbSink.stbUid; const char* id = pTask->id.idStr; while (pTableSinkInfo->uid == 0) { @@ -630,12 +631,12 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat char* dstTableName = pDataBlock->info.parTbName; int32_t numOfRows = pDataBlock->info.rows; const char* id = pTask->id.idStr; - int64_t suid = pTask->tbSink.stbUid; - STSchema* pTSchema = pTask->tbSink.pTSchema; + int64_t suid = pTask->outputInfo.tbSink.stbUid; + STSchema* pTSchema = pTask->outputInfo.tbSink.pTSchema; int32_t vgId = TD_VID(pVnode); STableSinkInfo* pTableSinkInfo = NULL; - bool alreadyCached = tqGetTableInfo(pTask->tbSink.pTblInfo, groupId, &pTableSinkInfo); + bool alreadyCached = tqGetTableInfo(pTask->outputInfo.tbSink.pTblInfo, groupId, &pTableSinkInfo); if (alreadyCached) { if (dstTableName[0] == 0) { // data block does not set the destination table name @@ -672,10 +673,10 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat pTableData->uid = pTableSinkInfo->uid; if (pTableData->uid == 0) { - tqDebug("s-task:%s cached tableInfo uid is invalid, acquire it from meta", id); + tqTrace("s-task:%s cached tableInfo uid is invalid, acquire it from meta", id); return doWaitForDstTableCreated(pVnode, pTask, pTableSinkInfo, dstTableName, &pTableData->uid); } else { - tqDebug("s-task:%s set the dstTable uid from cache:%"PRId64, id, pTableData->uid); + tqTrace("s-task:%s set the dstTable uid from cache:%"PRId64, id, pTableData->uid); } } else { // The auto-create option will always set to be open for those submit messages, which arrive during the period @@ -701,7 +702,7 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat } pTableSinkInfo->uid = 0; - doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, id); + doPutIntoCache(pTask->outputInfo.tbSink.pTblInfo, pTableSinkInfo, groupId, id); } else { bool isValid = isValidDstChildTable(&mr, vgId, dstTableName, suid); if (!isValid) { @@ -715,7 +716,7 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat pTableSinkInfo->uid = mr.me.uid; metaReaderClear(&mr); - doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, id); + doPutIntoCache(pTask->outputInfo.tbSink.pTblInfo, pTableSinkInfo, groupId, id); } } } @@ -729,45 +730,44 @@ int32_t setDstTableDataPayload(SStreamTask* pTask, int32_t blockIndex, SSDataBlo const char* id = pTask->id.idStr; tqDebug("s-task:%s sink data pipeline, build submit msg from %dth resBlock, including %d rows, dst suid:%" PRId64, - id, blockIndex + 1, numOfRows, pTask->tbSink.stbUid); + id, blockIndex + 1, numOfRows, pTask->outputInfo.tbSink.stbUid); char* dstTableName = pDataBlock->info.parTbName; // convert all rows - int32_t code = doConvertRows(pTableData, pTask->tbSink.pTSchema, pDataBlock, id); + int32_t code = doConvertRows(pTableData, pTask->outputInfo.tbSink.pTSchema, pDataBlock, id); if (code != TSDB_CODE_SUCCESS) { tqError("s-task:%s failed to convert rows from result block, code:%s", id, tstrerror(terrno)); return code; } taosArraySort(pTableData->aRowP, tsAscendingSortFn); - tqDebug("s-task:%s build submit msg for dstTable:%s, numOfRows:%d", id, dstTableName, numOfRows); + tqTrace("s-task:%s build submit msg for dstTable:%s, numOfRows:%d", id, dstTableName, numOfRows); return code; } +bool hasOnlySubmitData(const SArray* pBlocks, int32_t numOfBlocks) { + for(int32_t i = 0; i < numOfBlocks; ++i) { + SSDataBlock* p = taosArrayGet(pBlocks, i); + if (p->info.type == STREAM_DELETE_RESULT || p->info.type == STREAM_CREATE_CHILD_TABLE) { + return false; + } + } + + return true; +} + void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { const SArray* pBlocks = (const SArray*)data; SVnode* pVnode = (SVnode*)vnode; - int64_t suid = pTask->tbSink.stbUid; - char* stbFullName = pTask->tbSink.stbFullName; - STSchema* pTSchema = pTask->tbSink.pTSchema; + int64_t suid = pTask->outputInfo.tbSink.stbUid; + char* stbFullName = pTask->outputInfo.tbSink.stbFullName; + STSchema* pTSchema = pTask->outputInfo.tbSink.pTSchema; int32_t vgId = TD_VID(pVnode); int32_t numOfBlocks = taosArrayGetSize(pBlocks); int32_t code = TSDB_CODE_SUCCESS; const char* id = pTask->id.idStr; - if (pTask->tsInfo.sinkStart == 0) { - pTask->tsInfo.sinkStart = taosGetTimestampMs(); - } - - bool onlySubmitData = true; - for(int32_t i = 0; i < numOfBlocks; ++i) { - SSDataBlock* p = taosArrayGet(pBlocks, i); - if (p->info.type == STREAM_DELETE_RESULT || p->info.type == STREAM_CREATE_CHILD_TABLE) { - onlySubmitData = false; - break; - } - } - + bool onlySubmitData = hasOnlySubmitData(pBlocks, numOfBlocks); if (!onlySubmitData) { tqDebug("vgId:%d, s-task:%s write %d stream resBlock(s) into table, has delete block, submit one-by-one", vgId, id, numOfBlocks); @@ -785,7 +785,7 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { } else if (pDataBlock->info.type == STREAM_CHECKPOINT) { continue; } else { - pTask->sinkRecorder.numOfBlocks += 1; + pTask->execInfo.sink.numOfBlocks += 1; SSubmitReq2 submitReq = {.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData))}; if (submitReq.aSubmitTbData == NULL) { @@ -824,6 +824,8 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { bool hasSubmit = false; for (int32_t i = 0; i < numOfBlocks; i++) { if (streamTaskShouldStop(&pTask->status)) { + taosHashCleanup(pTableIndexMap); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); return; } @@ -833,7 +835,7 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { } hasSubmit = true; - pTask->sinkRecorder.numOfBlocks += 1; + pTask->execInfo.sink.numOfBlocks += 1; uint64_t groupId = pDataBlock->info.id.groupId; SSubmitTbData tbData = {.suid = suid, .uid = 0, .sver = pTSchema->version}; @@ -867,7 +869,7 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { } } - pTask->sinkRecorder.numOfRows += pDataBlock->info.rows; + pTask->execInfo.sink.numOfRows += pDataBlock->info.rows; } taosHashCleanup(pTableIndexMap); diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index c4ddaa9e54..41392ba27b 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -91,7 +91,7 @@ int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) { uint8_t* rowData = NULL; int64_t len; code = streamSnapRead(pReader->pReaderImpl, &rowData, &len); - if (rowData == NULL || len == 0) { + if (code != 0 || rowData == NULL || len == 0) { return code; } *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + len); @@ -104,8 +104,8 @@ int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) { pHdr->type = SNAP_DATA_STREAM_STATE_BACKEND; pHdr->size = len; memcpy(pHdr->data, rowData, len); - taosMemoryFree(rowData); tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode)); + taosMemoryFree(rowData); return code; _err: @@ -169,7 +169,7 @@ int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) } int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) { tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); - int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta, chkpId); + int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta); if (code == 0) { code = streamStateLoadTasks(pWriter); } diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 1ac2ddb9cb..b9cb22e7a4 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -16,9 +16,12 @@ #include "tq.h" #include "vnd.h" +#define MAX_REPEAT_SCAN_THRESHOLD 3 +#define SCAN_WAL_IDLE_DURATION 100 + static int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle); static int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId); -static void handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver); +static bool handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver); // extract data blocks(submit/delete) from WAL, and add them into the input queue for all the sources tasks. int32_t tqScanWal(STQ* pTq) { @@ -34,12 +37,10 @@ int32_t tqScanWal(STQ* pTq) { bool shouldIdle = true; doScanWalForAllTasks(pTq->pStreamMeta, &shouldIdle); - int32_t times = 0; - if (shouldIdle) { taosWLockLatch(&pMeta->lock); - times = (--pMeta->walScanCounter); + int32_t times = (--pMeta->walScanCounter); ASSERT(pMeta->walScanCounter >= 0); if (pMeta->walScanCounter <= 0) { @@ -48,8 +49,10 @@ int32_t tqScanWal(STQ* pTq) { } taosWUnLockLatch(&pMeta->lock); - tqDebug("vgId:%d scan wal for stream tasks for %d times", vgId, times); + tqDebug("vgId:%d scan wal for stream tasks for %d times in %dms", vgId, times, SCAN_WAL_IDLE_DURATION); } + + taosMsleep(SCAN_WAL_IDLE_DURATION); } int64_t el = (taosGetTimestampMs() - st); @@ -70,6 +73,8 @@ int32_t tqCheckAndRunStreamTask(STQ* pTq) { SArray* pTaskList = NULL; taosWLockLatch(&pMeta->lock); pTaskList = taosArrayDup(pMeta->pTaskList, NULL); + taosHashClear(pMeta->startInfo.pReadyTaskSet); + pMeta->startInfo.startTs = taosGetTimestampMs(); taosWUnLockLatch(&pMeta->lock); // broadcast the check downstream tasks msg @@ -94,8 +99,8 @@ int32_t tqCheckAndRunStreamTask(STQ* pTq) { continue; } - pTask->tsInfo.init = taosGetTimestampMs(); - tqDebug("s-task:%s set the init ts:%"PRId64, pTask->id.idStr, pTask->tsInfo.init); + pTask->execInfo.init = taosGetTimestampMs(); + tqDebug("s-task:%s start check downstream tasks, set the init ts:%"PRId64, pTask->id.idStr, pTask->execInfo.init); streamSetStatusNormal(pTask); streamTaskCheckDownstream(pTask); @@ -111,12 +116,9 @@ int32_t tqCheckAndRunStreamTaskAsync(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; - taosWLockLatch(&pMeta->lock); - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); if (numOfTasks == 0) { tqDebug("vgId:%d no stream tasks existed to run", vgId); - taosWUnLockLatch(&pMeta->lock); return 0; } @@ -124,7 +126,6 @@ int32_t tqCheckAndRunStreamTaskAsync(STQ* pTq) { if (pRunReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr()); - taosWUnLockLatch(&pMeta->lock); return -1; } @@ -135,8 +136,6 @@ int32_t tqCheckAndRunStreamTaskAsync(STQ* pTq) { SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); - taosWUnLockLatch(&pMeta->lock); - return 0; } @@ -159,6 +158,9 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { } pMeta->walScanCounter += 1; + if (pMeta->walScanCounter > MAX_REPEAT_SCAN_THRESHOLD) { + pMeta->walScanCounter = MAX_REPEAT_SCAN_THRESHOLD; + } if (pMeta->walScanCounter > 1) { tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter); @@ -166,7 +168,7 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { return 0; } - int32_t numOfPauseTasks = pTq->pStreamMeta->pauseTaskNum; + int32_t numOfPauseTasks = pTq->pStreamMeta->numOfPausedTasks; if (ckPause && numOfTasks == numOfPauseTasks) { tqDebug("vgId:%d ignore all submit, all streams had been paused, reset the walScanCounter", vgId); @@ -201,8 +203,7 @@ int32_t tqStopStreamTasks(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start to stop all %d stream task(s)", vgId, numOfTasks); - + tqDebug("vgId:%d stop all %d stream task(s)", vgId, numOfTasks); if (numOfTasks == 0) { return TSDB_CODE_SUCCESS; } @@ -232,27 +233,23 @@ int32_t tqStartStreamTasks(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start to stop all %d stream task(s)", vgId, numOfTasks); - + tqDebug("vgId:%d start all %d stream task(s)", vgId, numOfTasks); if (numOfTasks == 0) { return TSDB_CODE_SUCCESS; } - taosWLockLatch(&pMeta->lock); - for (int32_t i = 0; i < numOfTasks; ++i) { SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i); - int64_t key[2] = {pTaskId->streamId, pTaskId->taskId}; - SStreamTask** pTask = taosHashGet(pMeta->pTasks, key, sizeof(key)); + STaskId id = {.streamId = pTaskId->streamId, .taskId = pTaskId->taskId}; + SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); int8_t status = (*pTask)->status.taskStatus; - if (status == TASK_STATUS__STOP) { + if (status == TASK_STATUS__STOP && (*pTask)->info.fillHistory != 1) { streamSetStatusNormal(*pTask); } } - taosWUnLockLatch(&pMeta->lock); return 0; } @@ -301,7 +298,7 @@ int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId) { } // todo handle memory error -void handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver) { +bool handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver) { const char* id = pTask->id.idStr; int64_t maxVer = pTask->dataRange.range.maxVer; @@ -311,15 +308,98 @@ void handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver) { ", not scan wal anymore, add transfer-state block into inputQ", id, ver, maxVer); - double el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0; + double el = (taosGetTimestampMs() - pTask->execInfo.step2Start) / 1000.0; qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, el); /*int32_t code = */streamTaskPutTranstateIntoInputQ(pTask); - /*int32_t code = */ streamSchedExec(pTask); + return true; } else { qWarn("s-task:%s fill-history scan WAL, nextProcessVer:%" PRId64 " out of the maximum ver:%" PRId64 ", not scan wal", id, ver, maxVer); } } + + return false; +} + +static bool taskReadyForDataFromWal(SStreamTask* pTask) { + // non-source or fill-history tasks don't need to response the WAL scan action. + if ((pTask->info.taskLevel != TASK_LEVEL__SOURCE) || (pTask->status.downstreamReady == 0)) { + return false; + } + + // not in ready state, do not handle the data from wal + int32_t status = pTask->status.taskStatus; + if (status != TASK_STATUS__NORMAL) { + tqTrace("s-task:%s not ready for submit block in wal, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); + return false; + } + + // fill-history task has entered into the last phase, no need to anything + if ((pTask->info.fillHistory == 1) && pTask->status.appendTranstateBlock) { + ASSERT(status == TASK_STATUS__NORMAL); + // the maximum version of data in the WAL has reached already, the step2 is done + tqDebug("s-task:%s fill-history reach the maximum ver:%" PRId64 ", not scan wal anymore", pTask->id.idStr, + pTask->dataRange.range.maxVer); + return false; + } + + // check if input queue is full or not + if (streamQueueIsFull(pTask->inputInfo.queue)) { + tqTrace("s-task:%s input queue is full, do nothing", pTask->id.idStr); + return false; + } + + // the input queue of downstream task is full, so the output is blocked, stopped for a while + if (pTask->inputInfo.status == TASK_INPUT_STATUS__BLOCKED) { + tqDebug("s-task:%s inputQ is blocked, do nothing", pTask->id.idStr); + return false; + } + + return true; +} + +static bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems) { + const char* id = pTask->id.idStr; + int32_t numOfNewItems = 0; + + while(1) { + if ((pTask->info.fillHistory == 1) && pTask->status.appendTranstateBlock) { + *numOfItems += numOfNewItems; + return numOfNewItems > 0; + } + + SStreamQueueItem* pItem = NULL; + int32_t code = extractMsgFromWal(pTask->exec.pWalReader, (void**)&pItem, maxVer, id); + if (code != TSDB_CODE_SUCCESS || pItem == NULL) { // failed, continue + int64_t currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader); + bool itemInFillhistory = handleFillhistoryScanComplete(pTask, currentVer); + if (itemInFillhistory) { + numOfNewItems += 1; + } + break; + } + + if (pItem != NULL) { + code = streamTaskPutDataIntoInputQ(pTask, pItem); + if (code == TSDB_CODE_SUCCESS) { + numOfNewItems += 1; + int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader); + pTask->chkInfo.nextProcessVer = ver; + tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", id, ver); + + bool itemInFillhistory = handleFillhistoryScanComplete(pTask, ver); + if (itemInFillhistory) { + break; + } + } else { + tqError("s-task:%s append input queue failed, code: too many items, ver:%" PRId64, id, pTask->chkInfo.nextProcessVer); + break; + } + } + } + + *numOfItems += numOfNewItems; + return numOfNewItems > 0; } int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { @@ -344,45 +424,13 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { numOfTasks = taosArrayGetSize(pTaskList); for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + STaskId* pTaskId = taosArrayGet(pTaskList, i); SStreamTask* pTask = streamMetaAcquireTask(pStreamMeta, pTaskId->streamId, pTaskId->taskId); if (pTask == NULL) { continue; } - int32_t status = pTask->status.taskStatus; - - // non-source or fill-history tasks don't need to response the WAL scan action. - if ((pTask->info.taskLevel != TASK_LEVEL__SOURCE) || (pTask->status.downstreamReady == 0)) { - streamMetaReleaseTask(pStreamMeta, pTask); - continue; - } - - const char* pStatus = streamGetTaskStatusStr(status); - if (status != TASK_STATUS__NORMAL) { - tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, pStatus); - streamMetaReleaseTask(pStreamMeta, pTask); - continue; - } - - if ((pTask->info.fillHistory == 1) && pTask->status.appendTranstateBlock) { - ASSERT(status == TASK_STATUS__NORMAL); - // the maximum version of data in the WAL has reached already, the step2 is done - tqDebug("s-task:%s fill-history reach the maximum ver:%" PRId64 ", not scan wal anymore", pTask->id.idStr, - pTask->dataRange.range.maxVer); - streamMetaReleaseTask(pStreamMeta, pTask); - continue; - } - - if (streamQueueIsFull(pTask->inputInfo.queue->pQueue, true)) { - tqTrace("s-task:%s input queue is full, do nothing", pTask->id.idStr); - streamMetaReleaseTask(pStreamMeta, pTask); - continue; - } - - // downstream task has blocked the output, stopped for a while - if (pTask->inputInfo.status == TASK_INPUT_STATUS__BLOCKED) { - tqDebug("s-task:%s inputQ is blocked, do nothing", pTask->id.idStr); + if (!taskReadyForDataFromWal(pTask)) { streamMetaReleaseTask(pStreamMeta, pTask); continue; } @@ -401,7 +449,7 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { taosThreadMutexLock(&pTask->lock); - pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); if (pTask->status.taskStatus != TASK_STATUS__NORMAL) { tqDebug("s-task:%s not ready for submit block from wal, status:%s", pTask->id.idStr, pStatus); taosThreadMutexUnlock(&pTask->lock); @@ -409,33 +457,11 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } - SStreamQueueItem* pItem = NULL; - code = extractMsgFromWal(pTask->exec.pWalReader, (void**)&pItem, maxVer, pTask->id.idStr); - - if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItems == 0)) { // failed, continue - handleFillhistoryScanComplete(pTask, walReaderGetCurrentVer(pTask->exec.pWalReader)); - streamMetaReleaseTask(pStreamMeta, pTask); - taosThreadMutexUnlock(&pTask->lock); - continue; - } - - if (pItem != NULL) { - noDataInWal = false; - code = streamTaskPutDataIntoInputQ(pTask, pItem); - if (code == TSDB_CODE_SUCCESS) { - int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader); - pTask->chkInfo.nextProcessVer = ver; - handleFillhistoryScanComplete(pTask, ver); - tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, ver); - } else { - tqError("s-task:%s append input queue failed, too many in inputQ, ver:%" PRId64, pTask->id.idStr, - pTask->chkInfo.nextProcessVer); - } - } - + bool hasNewData = doPutDataIntoInputQFromWal(pTask, maxVer, &numOfItems); taosThreadMutexUnlock(&pTask->lock); - if ((code == TSDB_CODE_SUCCESS) || (numOfItems > 0)) { + if ((numOfItems > 0) || hasNewData) { + noDataInWal = false; code = streamSchedExec(pTask); if (code != TSDB_CODE_SUCCESS) { streamMetaReleaseTask(pStreamMeta, pTask); diff --git a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c index 7b3f1aac6d..09fffa1f74 100644 --- a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c @@ -198,6 +198,8 @@ int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) { taosWLockLatch(&pTq->pStreamMeta->lock); tqDebug("vgId:%d, vnode stream-task snapshot writer closed", TD_VID(pTq->pVnode)); + + taosWLockLatch(&pTq->pStreamMeta->lock); if (rollback) { tdbAbort(pTq->pStreamMeta->db, pTq->pStreamMeta->txn); } else { @@ -206,6 +208,12 @@ int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) { code = tdbPostCommit(pTq->pStreamMeta->db, pTq->pStreamMeta->txn); if (code) goto _err; } + if (tdbBegin(pTq->pStreamMeta->db, &pTq->pStreamMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { + code = -1; + goto _err; + } + + taosWUnLockLatch(&pTq->pStreamMeta->lock); if (tdbBegin(pTq->pStreamMeta->db, &pTq->pStreamMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { code = -1; @@ -228,15 +236,13 @@ _err: int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t nData) { int32_t code = 0; STQ* pTq = pWriter->pTq; - STqHandle handle; SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; if (pHdr->type == SNAP_DATA_STREAM_TASK) { - SStreamTaskId task = {0}; + STaskId taskId = {0}; SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); - - code = tDecodeStreamTaskId(&decoder, &task); + code = tDecodeStreamTaskId(&decoder, &taskId); if (code < 0) { tDecoderClear(&decoder); goto _err; @@ -244,9 +250,7 @@ int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t tDecoderClear(&decoder); // tdbTbInsert(TTB *pTb, const void *pKey, int keyLen, const void *pVal, int valLen, TXN *pTxn) - taosWLockLatch(&pTq->pStreamMeta->lock); - int64_t key[2] = {task.streamId, task.taskId}; - + int64_t key[2] = {taskId.streamId, taskId.taskId}; taosWLockLatch(&pTq->pStreamMeta->lock); if (tdbTbUpsert(pTq->pStreamMeta->pTaskDb, key, sizeof(int64_t) << 1, (uint8_t*)pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr), pTq->pStreamMeta->txn) < 0) { diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 04695c1f63..897e3f1e2e 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -36,10 +36,21 @@ int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset) { return 0; } -void tqUpdateNodeStage(STQ* pTq) { - SSyncState state = syncGetState(pTq->pVnode->sync); - pTq->pStreamMeta->stage = state.term; - tqDebug("vgId:%d update the meta stage to be:%"PRId64, pTq->pStreamMeta->vgId, pTq->pStreamMeta->stage); +void tqUpdateNodeStage(STQ* pTq, bool isLeader) { + SSyncState state = syncGetState(pTq->pVnode->sync); + SStreamMeta* pMeta = pTq->pStreamMeta; + int64_t stage = pMeta->stage; + + pMeta->stage = state.term; + pMeta->role = (isLeader)? NODE_ROLE_LEADER:NODE_ROLE_FOLLOWER; + if (isLeader) { + tqInfo("vgId:%d update meta stage:%" PRId64 ", prev:%" PRId64 " leader:%d, start to send Hb", pMeta->vgId, + state.term, stage, isLeader); + streamMetaStartHb(pMeta); + } else { + tqInfo("vgId:%d update meta stage:%" PRId64 " prev:%" PRId64 " leader:%d", pMeta->vgId, state.term, stage, + isLeader); + } } static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, STqOffsetVal pOffset) { @@ -97,7 +108,6 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand if (pRequest->useSnapshot) { tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey:%s, vgId:%d, (earliest) set offset to be snapshot", consumerId, pHandle->subKey, vgId); - if (pHandle->fetchMeta) { tqOffsetResetToMeta(pOffsetVal, 0); } else { diff --git a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c index caf88f55fc..e9e848f1b0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c @@ -415,6 +415,9 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 _end: tsdbUntakeReadSnap2((STsdbReader*)pr, pr->pReadSnap, true); + if (pr->pCurFileSet) { + pr->pCurFileSet = NULL; + } taosThreadMutexUnlock(&pr->readerMutex); diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c index d4cb63fb7b..79964c5636 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit2.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -185,29 +185,22 @@ static int32_t tsdbCommitTombData(SCommitter2 *committer) { } if (record->ekey < committer->ctx->minKey) { - goto _next; + // do nothing } else if (record->skey > committer->ctx->maxKey) { - committer->ctx->maxKey = TMIN(record->skey, committer->ctx->maxKey); - goto _next; + committer->ctx->nextKey = TMIN(record->skey, committer->ctx->nextKey); + } else { + if (record->ekey > committer->ctx->maxKey) { + committer->ctx->nextKey = TMIN(committer->ctx->nextKey, committer->ctx->maxKey + 1); + } + + record->skey = TMAX(record->skey, committer->ctx->minKey); + record->ekey = TMIN(record->ekey, committer->ctx->maxKey); + + numRecord++; + code = tsdbFSetWriteTombRecord(committer->writer, record); + TSDB_CHECK_CODE(code, lino, _exit); } - TSKEY maxKey = committer->ctx->maxKey; - if (record->ekey > committer->ctx->maxKey) { - maxKey = committer->ctx->maxKey + 1; - } - - if (record->ekey > committer->ctx->maxKey && committer->ctx->nextKey > maxKey) { - committer->ctx->nextKey = maxKey; - } - - record->skey = TMAX(record->skey, committer->ctx->minKey); - record->ekey = TMIN(record->ekey, maxKey); - - numRecord++; - code = tsdbFSetWriteTombRecord(committer->writer, record); - TSDB_CHECK_CODE(code, lino, _exit); - - _next: code = tsdbIterMergerNext(committer->tombIterMerger); TSDB_CHECK_CODE(code, lino, _exit); } @@ -569,6 +562,8 @@ int32_t tsdbCommitBegin(STsdb *tsdb, SCommitInfo *info) { } else { SCommitter2 committer[1]; + tsdbFSCheckCommit(tsdb->pFS); + code = tsdbOpenCommitter(tsdb, info, committer); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index f43bb52d05..afe6ef6e1a 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -18,6 +18,8 @@ #include "vnd.h" #include "vndCos.h" +#define BLOCK_COMMIT_FACTOR 3 + extern int vnodeScheduleTask(int (*execute)(void *), void *arg); extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg); extern void remove_file(const char *fname); @@ -65,11 +67,17 @@ static int32_t create_fs(STsdb *pTsdb, STFileSystem **fs) { fs[0]->bgTaskQueue->next = fs[0]->bgTaskQueue; fs[0]->bgTaskQueue->prev = fs[0]->bgTaskQueue; + taosThreadMutexInit(&fs[0]->commitMutex, NULL); + taosThreadCondInit(&fs[0]->canCommit, NULL); + fs[0]->blockCommit = false; + return 0; } static int32_t destroy_fs(STFileSystem **fs) { if (fs[0] == NULL) return 0; + taosThreadMutexDestroy(&fs[0]->commitMutex); + taosThreadCondDestroy(&fs[0]->canCommit); taosThreadMutexDestroy(fs[0]->mutex); ASSERT(fs[0]->bgTaskNum == 0); @@ -236,6 +244,7 @@ static int32_t load_fs(STsdb *pTsdb, const char *fname, TFileSetArray *arr) { code = TARRAY2_APPEND(arr, fset); TSDB_CHECK_CODE(code, lino, _exit); } + TARRAY2_SORT(arr, tsdbTFileSetCmprFn); } else { code = TSDB_CODE_FILE_CORRUPTED; TSDB_CHECK_CODE(code, lino, _exit); @@ -828,6 +837,27 @@ _exit: return code; } +static int32_t tsdbFSSetBlockCommit(STFileSystem *fs, bool block) { + taosThreadMutexLock(&fs->commitMutex); + if (block) { + fs->blockCommit = true; + } else { + fs->blockCommit = false; + taosThreadCondSignal(&fs->canCommit); + } + taosThreadMutexUnlock(&fs->commitMutex); + return 0; +} + +int32_t tsdbFSCheckCommit(STFileSystem *fs) { + taosThreadMutexLock(&fs->commitMutex); + while (fs->blockCommit) { + taosThreadCondWait(&fs->canCommit, &fs->commitMutex); + } + taosThreadMutexUnlock(&fs->commitMutex); + return 0; +} + int32_t tsdbFSEditCommit(STFileSystem *fs) { int32_t code = 0; int32_t lino = 0; @@ -837,19 +867,36 @@ int32_t tsdbFSEditCommit(STFileSystem *fs) { TSDB_CHECK_CODE(code, lino, _exit); // schedule merge - if (fs->tsdb->pVnode->config.sttTrigger != 1) { + if (fs->tsdb->pVnode->config.sttTrigger > 1) { STFileSet *fset; + int32_t sttTrigger = fs->tsdb->pVnode->config.sttTrigger; + bool schedMerge = false; + bool blockCommit = false; + TARRAY2_FOREACH_REVERSE(fs->fSetArr, fset) { if (TARRAY2_SIZE(fset->lvlArr) == 0) continue; SSttLvl *lvl = TARRAY2_FIRST(fset->lvlArr); - if (lvl->level != 0 || TARRAY2_SIZE(lvl->fobjArr) < fs->tsdb->pVnode->config.sttTrigger) continue; + if (lvl->level != 0) continue; + int32_t numFile = TARRAY2_SIZE(lvl->fobjArr); + if (numFile >= sttTrigger) { + schedMerge = true; + } + + if (numFile >= sttTrigger * BLOCK_COMMIT_FACTOR) { + blockCommit = true; + } + + if (schedMerge && blockCommit) break; + } + + if (schedMerge) { code = tsdbFSScheduleBgTask(fs, TSDB_BG_TASK_MERGER, tsdbMerge, NULL, fs->tsdb, NULL); TSDB_CHECK_CODE(code, lino, _exit); - - break; } + + tsdbFSSetBlockCommit(fs, blockCommit); } _exit: @@ -920,7 +967,6 @@ int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr) { fsetArr[0] = taosMemoryCalloc(1, sizeof(*fsetArr[0])); if (fsetArr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; - taosThreadRwlockRdlock(&fs->tsdb->rwLock); TARRAY2_FOREACH(fs->fSetArr, fset) { code = tsdbTFileSetInitRef(fs->tsdb, fset, &fset1); if (code) break; @@ -928,7 +974,6 @@ int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr) { code = TARRAY2_APPEND(fsetArr[0], fset1); if (code) break; } - taosThreadRwlockUnlock(&fs->tsdb->rwLock); if (code) { TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); @@ -1103,4 +1148,4 @@ int32_t tsdbFSEnableBgTask(STFileSystem *fs) { fs->stop = false; taosThreadMutexUnlock(fs->mutex); return 0; -} +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index e814ab2fff..b0f42a0c48 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -67,6 +67,7 @@ int32_t tsdbFSDisableBgTask(STFileSystem *fs); int32_t tsdbFSEnableBgTask(STFileSystem *fs); // other int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset); +int32_t tsdbFSCheckCommit(STFileSystem *fs); struct STFSBgTask { EFSBgTaskT type; @@ -103,6 +104,11 @@ struct STFileSystem { int32_t bgTaskNum; STFSBgTask bgTaskQueue[1]; STFSBgTask *bgTaskRunning; + + // block commit variables + TdThreadMutex commitMutex; + TdThreadCond canCommit; + bool blockCommit; }; #ifdef __cplusplus diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index 37c7e2ffc1..cd47a54973 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -189,6 +189,7 @@ static int32_t tsdbJsonToSttLvl(STsdb *pTsdb, const cJSON *json, SSttLvl **lvl) code = TARRAY2_APPEND(lvl[0]->fobjArr, fobj); if (code) return code; } + TARRAY2_SORT(lvl[0]->fobjArr, tsdbTFileObjCmpr); return 0; } @@ -268,6 +269,7 @@ int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset) { code = TARRAY2_APPEND((*fset)->lvlArr, lvl); if (code) return code; } + TARRAY2_SORT((*fset)->lvlArr, tsdbSttLvlCmprFn); } else { return TSDB_CODE_FILE_CORRUPTED; } diff --git a/source/dnode/vnode/src/tsdb/tsdbMerge.c b/source/dnode/vnode/src/tsdb/tsdbMerge.c index ec0ea3c60f..42a8b5bb3f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMerge.c +++ b/source/dnode/vnode/src/tsdb/tsdbMerge.c @@ -15,6 +15,8 @@ #include "tsdbMerge.h" +#define TSDB_MAX_LEVEL 6 // means max level is 7 + typedef struct { STsdb *tsdb; TFileSetArray *fsetArr; @@ -34,7 +36,6 @@ typedef struct { STFileSet *fset; bool toData; int32_t level; - SSttLvl *lvl; TABLEID tbid[1]; } ctx[1]; @@ -68,18 +69,6 @@ static int32_t tsdbMergerClose(SMerger *merger) { int32_t lino = 0; SVnode *pVnode = merger->tsdb->pVnode; - // edit file system - code = tsdbFSEditBegin(merger->tsdb->pFS, merger->fopArr, TSDB_FEDIT_MERGE); - TSDB_CHECK_CODE(code, lino, _exit); - - taosThreadRwlockWrlock(&merger->tsdb->rwLock); - code = tsdbFSEditCommit(merger->tsdb->pFS); - if (code) { - taosThreadRwlockUnlock(&merger->tsdb->rwLock); - TSDB_CHECK_CODE(code, lino, _exit); - } - taosThreadRwlockUnlock(&merger->tsdb->rwLock); - ASSERT(merger->writer == NULL); ASSERT(merger->dataIterMerger == NULL); ASSERT(merger->tombIterMerger == NULL); @@ -101,58 +90,142 @@ _exit: } static int32_t tsdbMergeFileSetBeginOpenReader(SMerger *merger) { - int32_t code = 0; - int32_t lino = 0; + int32_t code = 0; + int32_t lino = 0; + SSttLvl *lvl; - merger->ctx->toData = true; - merger->ctx->level = 0; - - // TODO: optimize merge strategy - for (int32_t i = 0;; ++i) { - if (i >= TARRAY2_SIZE(merger->ctx->fset->lvlArr)) { - merger->ctx->lvl = NULL; + bool hasLevelLargerThanMax = false; + TARRAY2_FOREACH_REVERSE(merger->ctx->fset->lvlArr, lvl) { + if (lvl->level <= TSDB_MAX_LEVEL) { + break; + } else if (TARRAY2_SIZE(lvl->fobjArr) > 0) { + hasLevelLargerThanMax = true; break; } + } - merger->ctx->lvl = TARRAY2_GET(merger->ctx->fset->lvlArr, i); - if (merger->ctx->lvl->level != merger->ctx->level || - TARRAY2_SIZE(merger->ctx->lvl->fobjArr) + 1 < merger->sttTrigger) { - merger->ctx->toData = false; - merger->ctx->lvl = NULL; - break; + if (hasLevelLargerThanMax) { + // merge all stt files + merger->ctx->toData = true; + merger->ctx->level = TSDB_MAX_LEVEL; + + TARRAY2_FOREACH(merger->ctx->fset->lvlArr, lvl) { + int32_t numMergeFile = TARRAY2_SIZE(lvl->fobjArr); + + for (int32_t i = 0; i < numMergeFile; ++i) { + STFileObj *fobj = TARRAY2_GET(lvl->fobjArr, i); + + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = merger->ctx->fset->fid, + .of = fobj->f[0], + }; + code = TARRAY2_APPEND(merger->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + SSttFileReader *reader; + SSttFileReaderConfig config = { + .tsdb = merger->tsdb, + .szPage = merger->szPage, + .file[0] = fobj->f[0], + }; + + code = tsdbSttFileReaderOpen(fobj->fname, &config, &reader); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(merger->sttReaderArr, reader); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } else { + // do regular merge + merger->ctx->toData = true; + merger->ctx->level = 0; + + // find the highest level that can be merged to + for (int32_t i = 0, numCarry = 0;;) { + int32_t numFile = numCarry; + if (i < TARRAY2_SIZE(merger->ctx->fset->lvlArr) && + merger->ctx->level == TARRAY2_GET(merger->ctx->fset->lvlArr, i)->level) { + numFile += TARRAY2_SIZE(TARRAY2_GET(merger->ctx->fset->lvlArr, i)->fobjArr); + i++; + } + + numCarry = numFile / merger->sttTrigger; + if (numCarry == 0) { + break; + } else { + merger->ctx->level++; + } } - merger->ctx->level++; + ASSERT(merger->ctx->level > 0); - STFileObj *fobj; - int32_t numFile = 0; - TARRAY2_FOREACH(merger->ctx->lvl->fobjArr, fobj) { - if (numFile == merger->sttTrigger) { + if (merger->ctx->level <= TSDB_MAX_LEVEL) { + TARRAY2_FOREACH_REVERSE(merger->ctx->fset->lvlArr, lvl) { + if (TARRAY2_SIZE(lvl->fobjArr) == 0) { + continue; + } + + if (lvl->level >= merger->ctx->level) { + merger->ctx->toData = false; + } + break; + } + } + + // get number of level-0 files to merge + int32_t numFile = pow(merger->sttTrigger, merger->ctx->level); + TARRAY2_FOREACH(merger->ctx->fset->lvlArr, lvl) { + if (lvl->level == 0) continue; + if (lvl->level >= merger->ctx->level) break; + + numFile = numFile - TARRAY2_SIZE(lvl->fobjArr) * pow(merger->sttTrigger, lvl->level); + } + + ASSERT(numFile >= 0); + + // get file system operations + TARRAY2_FOREACH(merger->ctx->fset->lvlArr, lvl) { + if (lvl->level >= merger->ctx->level) { break; } - STFileOp op = { - .optype = TSDB_FOP_REMOVE, - .fid = merger->ctx->fset->fid, - .of = fobj->f[0], - }; - code = TARRAY2_APPEND(merger->fopArr, op); - TSDB_CHECK_CODE(code, lino, _exit); + int32_t numMergeFile; + if (lvl->level == 0) { + numMergeFile = numFile; + } else { + numMergeFile = TARRAY2_SIZE(lvl->fobjArr); + } - SSttFileReader *reader; - SSttFileReaderConfig config = { - .tsdb = merger->tsdb, - .szPage = merger->szPage, - .file[0] = fobj->f[0], - }; + for (int32_t i = 0; i < numMergeFile; ++i) { + STFileObj *fobj = TARRAY2_GET(lvl->fobjArr, i); - code = tsdbSttFileReaderOpen(fobj->fname, &config, &reader); - TSDB_CHECK_CODE(code, lino, _exit); + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = merger->ctx->fset->fid, + .of = fobj->f[0], + }; + code = TARRAY2_APPEND(merger->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); - code = TARRAY2_APPEND(merger->sttReaderArr, reader); - TSDB_CHECK_CODE(code, lino, _exit); + SSttFileReader *reader; + SSttFileReaderConfig config = { + .tsdb = merger->tsdb, + .szPage = merger->szPage, + .file[0] = fobj->f[0], + }; - numFile++; + code = tsdbSttFileReaderOpen(fobj->fname, &config, &reader); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(merger->sttReaderArr, reader); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + if (merger->ctx->level > TSDB_MAX_LEVEL) { + merger->ctx->level = TSDB_MAX_LEVEL; } } @@ -265,6 +338,8 @@ static int32_t tsdbMergeFileSetBegin(SMerger *merger) { ASSERT(merger->dataIterMerger == NULL); ASSERT(merger->writer == NULL); + TARRAY2_CLEAR(merger->fopArr, NULL); + merger->ctx->tbid->suid = 0; merger->ctx->tbid->uid = 0; @@ -317,6 +392,18 @@ static int32_t tsdbMergeFileSetEnd(SMerger *merger) { code = tsdbMergeFileSetEndCloseReader(merger); TSDB_CHECK_CODE(code, lino, _exit); + // edit file system + code = tsdbFSEditBegin(merger->tsdb->pFS, merger->fopArr, TSDB_FEDIT_MERGE); + TSDB_CHECK_CODE(code, lino, _exit); + + taosThreadRwlockWrlock(&merger->tsdb->rwLock); + code = tsdbFSEditCommit(merger->tsdb->pFS); + if (code) { + taosThreadRwlockUnlock(&merger->tsdb->rwLock); + TSDB_CHECK_CODE(code, lino, _exit); + } + taosThreadRwlockUnlock(&merger->tsdb->rwLock); + _exit: if (code) { TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code); @@ -434,7 +521,9 @@ int32_t tsdbMerge(void *arg) { .sttTrigger = tsdb->pVnode->config.sttTrigger, }}; - ASSERT(merger->sttTrigger > 1); + if (merger->sttTrigger <= 1) { + return 0; + } code = tsdbFSCreateCopySnapshot(tsdb->pFS, &merger->fsetArr); TSDB_CHECK_CODE(code, lino, _exit); @@ -447,6 +536,9 @@ int32_t tsdbMerge(void *arg) { _exit: if (code) { TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + tsdbFatal("vgId:%d, failed to merge stt files since %s. code:%d", TD_VID(tsdb->pVnode), terrstr(), code); + taosMsleep(100); + exit(EXIT_FAILURE); } else if (merger->ctx->opened) { tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); } diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c index 4927b1539b..260f4d8b2d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -22,38 +22,6 @@ static void tLDataIterClose2(SLDataIter *pIter); // SLDataIter ================================================= -SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols, - int32_t numOfSttTrigger) { - SSttBlockLoadInfo *pLoadInfo = taosMemoryCalloc(numOfSttTrigger, sizeof(SSttBlockLoadInfo)); - if (pLoadInfo == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - for (int32_t i = 0; i < numOfSttTrigger; ++i) { - pLoadInfo[i].blockIndex[0] = -1; - pLoadInfo[i].blockIndex[1] = -1; - pLoadInfo[i].currentLoadBlockIndex = 1; - - int32_t code = tBlockDataCreate(&pLoadInfo[i].blockData[0]); - if (code) { - terrno = code; - } - - code = tBlockDataCreate(&pLoadInfo[i].blockData[1]); - if (code) { - terrno = code; - } - - pLoadInfo[i].aSttBlk = taosArrayInit(4, sizeof(SSttBlk)); - pLoadInfo[i].pSchema = pSchema; - pLoadInfo[i].colIds = colList; - pLoadInfo[i].numOfCols = numOfCols; - } - - return pLoadInfo; -} - SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols) { SSttBlockLoadInfo *pLoadInfo = taosMemoryCalloc(1, sizeof(SSttBlockLoadInfo)); if (pLoadInfo == NULL) { @@ -83,25 +51,6 @@ SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colLi return pLoadInfo; } -void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { - for (int32_t i = 0; i < 1; ++i) { - pLoadInfo[i].currentLoadBlockIndex = 1; - pLoadInfo[i].blockIndex[0] = -1; - pLoadInfo[i].blockIndex[1] = -1; - - taosArrayClear(pLoadInfo[i].aSttBlk); - - pLoadInfo[i].cost.loadBlocks = 0; - pLoadInfo[i].cost.blockElapsedTime = 0; - pLoadInfo[i].cost.statisElapsedTime = 0; - pLoadInfo[i].cost.loadStatisBlocks = 0; - pLoadInfo[i].statisBlockIndex = -1; - tStatisBlockDestroy(pLoadInfo[i].statisBlock); - - pLoadInfo[i].sttBlockLoaded = false; - } -} - void getSttBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, SSttBlockLoadCostInfo* pLoadCost) { for (int32_t i = 0; i < 1; ++i) { pLoadCost->blockElapsedTime += pLoadInfo[i].cost.blockElapsedTime; @@ -309,12 +258,6 @@ static int32_t binarySearchForStartRowIndex(uint64_t *uidList, int32_t num, uint } } -int32_t tLDataIterOpen(struct SLDataIter *pIter, SDataFReader *pReader, int32_t iStt, int8_t backward, uint64_t suid, - uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange, SSttBlockLoadInfo *pBlockLoadInfo, - const char *idStr, bool strictTimeRange) { - return 0; -} - static int32_t extractSttBlockInfo(SLDataIter *pIter, const TSttBlkArray *pArray, SSttBlockLoadInfo *pBlockLoadInfo, uint64_t suid) { if (TARRAY2_SIZE(pArray) <= 0) { @@ -767,48 +710,23 @@ static FORCE_INLINE int32_t tLDataIterDescCmprFn(const SRBTreeNode *p1, const SR return -1 * tLDataIterCmprFn(p1, p2); } -int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid, - STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo, - bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter) { - int32_t code = TSDB_CODE_SUCCESS; +static void adjustValidLDataIters(SArray *pLDIterList, int32_t numOfFileObj) { + int32_t size = taosArrayGetSize(pLDIterList); - pMTree->backward = backward; - pMTree->pIter = NULL; - pMTree->idStr = idStr; - - if (!pMTree->backward) { // asc - tRBTreeCreate(&pMTree->rbt, tLDataIterCmprFn); - } else { // desc - tRBTreeCreate(&pMTree->rbt, tLDataIterDescCmprFn); - } - - pMTree->pLoadInfo = pBlockLoadInfo; - pMTree->destroyLoadInfo = destroyLoadInfo; - pMTree->ignoreEarlierTs = false; - - for (int32_t i = 0; i < pFReader->pSet->nSttF; ++i) { // open all last file - memset(&pLDataIter[i], 0, sizeof(SLDataIter)); - code = tLDataIterOpen(&pLDataIter[i], pFReader, i, pMTree->backward, suid, uid, pTimeWindow, pVerRange, - &pMTree->pLoadInfo[i], pMTree->idStr, strictTimeRange); - if (code != TSDB_CODE_SUCCESS) { - goto _end; + if (size < numOfFileObj) { + int32_t inc = numOfFileObj - size; + for (int32_t k = 0; k < inc; ++k) { + SLDataIter *pIter = taosMemoryCalloc(1, sizeof(SLDataIter)); + taosArrayPush(pLDIterList, &pIter); } + } else if (size > numOfFileObj) { // remove unused LDataIter + int32_t inc = size - numOfFileObj; - bool hasVal = tLDataIterNextRow(&pLDataIter[i], pMTree->idStr); - if (hasVal) { - tMergeTreeAddIter(pMTree, &pLDataIter[i]); - } else { - if (!pMTree->ignoreEarlierTs) { - pMTree->ignoreEarlierTs = pLDataIter[i].ignoreEarlierTs; - } + for (int i = 0; i < inc; ++i) { + SLDataIter *pIter = taosArrayPop(pLDIterList); + destroyLDataIter(pIter); } } - - return code; - -_end: - tMergeTreeClose(pMTree); - return code; } int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) { @@ -826,45 +744,33 @@ int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) { pMTree->ignoreEarlierTs = false; - int32_t size = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->size; - if (size == 0) { + // no data exists, go to end + int32_t numOfLevels = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->size; + if (numOfLevels == 0) { goto _end; } // add the list/iter placeholder - while (taosArrayGetSize(pConf->pSttFileBlockIterArray) < size) { + while (taosArrayGetSize(pConf->pSttFileBlockIterArray) < numOfLevels) { SArray *pList = taosArrayInit(4, POINTER_BYTES); taosArrayPush(pConf->pSttFileBlockIterArray, &pList); } - for (int32_t j = 0; j < size; ++j) { + for (int32_t j = 0; j < numOfLevels; ++j) { SSttLvl *pSttLevel = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->data[j]; - ASSERT(pSttLevel->level == j); + SArray *pList = taosArrayGetP(pConf->pSttFileBlockIterArray, j); - SArray *pList = taosArrayGetP(pConf->pSttFileBlockIterArray, j); - int32_t numOfIter = taosArrayGetSize(pList); + int32_t numOfFileObj = TARRAY2_SIZE(pSttLevel->fobjArr); + adjustValidLDataIters(pList, numOfFileObj); - if (numOfIter < TARRAY2_SIZE(pSttLevel->fobjArr)) { - int32_t inc = TARRAY2_SIZE(pSttLevel->fobjArr) - numOfIter; - for (int32_t k = 0; k < inc; ++k) { - SLDataIter *pIter = taosMemoryCalloc(1, sizeof(SLDataIter)); - taosArrayPush(pList, &pIter); - } - } else if (numOfIter > TARRAY2_SIZE(pSttLevel->fobjArr)){ - int32_t inc = numOfIter - TARRAY2_SIZE(pSttLevel->fobjArr); - for (int i = 0; i < inc; ++i) { - SLDataIter *pIter = taosArrayPop(pList); - destroyLDataIter(pIter); - } - } - - for (int32_t i = 0; i < TARRAY2_SIZE(pSttLevel->fobjArr); ++i) { // open all last file + for (int32_t i = 0; i < numOfFileObj; ++i) { // open all last file SLDataIter *pIter = taosArrayGetP(pList, i); SSttFileReader *pSttFileReader = pIter->pReader; SSttBlockLoadInfo *pLoadInfo = pIter->pBlockLoadInfo; - // open stt file reader if not + // open stt file reader if not opened yet + // if failed to open this stt file, ignore the error and try next one if (pSttFileReader == NULL) { SSttFileReaderConfig conf = {.tsdb = pConf->pTsdb, .szPage = pConf->pTsdb->pVnode->config.tsdbPageSize}; conf.file[0] = *pSttLevel->fobjArr->data[i]->f; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c deleted file mode 100644 index c02cff3aa9..0000000000 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ /dev/null @@ -1,5611 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "osDef.h" -#include "tsdb.h" -#include "tsimplehash.h" - -#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) -#define getCurrentKeyInLastBlock(_r) ((_r)->currentKey) - -typedef enum { - READER_STATUS_SUSPEND = 0x1, - READER_STATUS_NORMAL = 0x2, -} EReaderStatus; - -typedef enum { - EXTERNAL_ROWS_PREV = 0x1, - EXTERNAL_ROWS_MAIN = 0x2, - EXTERNAL_ROWS_NEXT = 0x3, -} EContentData; -/* -typedef enum { - READ_MODE_COUNT_ONLY = 0x1, - READ_MODE_ALL, -} EReadMode; -*/ -typedef struct { - STbDataIter* iter; - int32_t index; - bool hasVal; -} SIterInfo; - -typedef struct { - int32_t numOfBlocks; - int32_t numOfLastFiles; -} SBlockNumber; - -typedef struct SBlockIndex { - int32_t ordinalIndex; - int64_t inFileOffset; - STimeWindow window; // todo replace it with overlap flag. -} SBlockIndex; - -typedef struct STableBlockScanInfo { - uint64_t uid; - TSKEY lastKey; - TSKEY lastKeyInStt; // last accessed key in stt - SMapData mapData; // block info (compressed) - SArray* pBlockList; // block data index list, SArray - SIterInfo iter; // mem buffer skip list iterator - SIterInfo iiter; // imem buffer skip list iterator - SArray* delSkyline; // delete info for this table - int32_t fileDelIndex; // file block delete index - int32_t sttBlockDelIndex; // delete index for last block - bool iterInit; // whether to initialize the in-memory skip list iterator or not -} STableBlockScanInfo; - -typedef struct SBlockOrderWrapper { - int64_t uid; - int64_t offset; -} SBlockOrderWrapper; - -typedef struct SBlockOrderSupporter { - SBlockOrderWrapper** pDataBlockInfo; - int32_t* indexPerTable; - int32_t* numOfBlocksPerTable; - int32_t numOfTables; -} SBlockOrderSupporter; - -typedef struct SIOCostSummary { - int64_t numOfBlocks; - double blockLoadTime; - double buildmemBlock; - int64_t headFileLoad; - double headFileLoadTime; - int64_t smaDataLoad; - double smaLoadTime; - int64_t sttStatisBlockLoad; - int64_t sttBlockLoad; - double sttBlockLoadTime; - int64_t composedBlocks; - double buildComposedBlockTime; - double createScanInfoList; - SSttBlockLoadCostInfo sttCost; - double initDelSkylineIterTime; -} SIOCostSummary; - -typedef struct SBlockLoadSuppInfo { - SArray* pColAgg; - SColumnDataAgg tsColAgg; - int16_t* colId; - int16_t* slotId; - int32_t numOfCols; - char** buildBuf; // build string tmp buffer, todo remove it later after all string format being updated. - bool smaValid; // the sma on all queried columns are activated -} SBlockLoadSuppInfo; - -typedef struct SLastBlockReader { - STimeWindow window; - SVersionRange verRange; - int32_t order; - uint64_t uid; - SMergeTree mergeTree; - SSttBlockLoadInfo* pInfo; - int64_t currentKey; -} SLastBlockReader; - -typedef struct SFilesetIter { - int32_t numOfFiles; // number of total files - int32_t index; // current accessed index in the list - SArray* pFileList; // data file list - int32_t order; - SLastBlockReader* pLastBlockReader; // last file block reader -} SFilesetIter; - -typedef struct SFileDataBlockInfo { - // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it - uint64_t uid; - int32_t tbBlockIdx; -} SFileDataBlockInfo; - -typedef struct SDataBlockIter { - int32_t numOfBlocks; - int32_t index; - SArray* blockList; // SArray - int32_t order; - SDataBlk block; // current SDataBlk data - SSHashObj* pTableMap; -} SDataBlockIter; - -typedef struct SFileBlockDumpInfo { - int32_t totalRows; - int32_t rowIndex; - int64_t lastKey; - bool allDumped; -} SFileBlockDumpInfo; - -typedef struct STableUidList { - uint64_t* tableUidList; // access table uid list in uid ascending order list - int32_t currentIndex; // index in table uid list -} STableUidList; - -typedef struct SReaderStatus { - bool loadFromFile; // check file stage - bool composedDataBlock; // the returned data block is a composed block or not - bool mapDataCleaned; // mapData has been cleaned up alreay or not - SSHashObj* pTableMap; // SHash - STableBlockScanInfo** pTableIter; // table iterator used in building in-memory buffer data blocks. - STableUidList uidList; // check tables in uid order, to avoid the repeatly load of blocks in STT. - SFileBlockDumpInfo fBlockDumpInfo; - SDFileSet* pCurrentFileset; // current opened file set - SBlockData fileBlockData; - SFilesetIter fileIter; - SDataBlockIter blockIter; - SLDataIter* pLDataIter; - SRowMerger merger; - SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data -} SReaderStatus; - -typedef struct SBlockInfoBuf { - int32_t currentIndex; - SArray* pData; - int32_t numPerBucket; - int32_t numOfTables; -} SBlockInfoBuf; - -typedef struct STsdbReaderAttr { - STSchema* pSchema; - EReadMode readMode; - uint64_t rowsNum; - STimeWindow window; - bool freeBlock; - SVersionRange verRange; - int16_t order; -} STsdbReaderAttr; - -typedef struct SResultBlockInfo { - SSDataBlock* pResBlock; - bool freeBlock; - int64_t capacity; -} SResultBlockInfo; - -struct STsdbReader { - STsdb* pTsdb; - SVersionRange verRange; - TdThreadMutex readerMutex; - EReaderStatus flag; - int32_t code; - uint64_t suid; - int16_t order; - EReadMode readMode; - uint64_t rowsNum; - STimeWindow window; // the primary query time window that applies to all queries - SResultBlockInfo resBlockInfo; - SReaderStatus status; - char* idStr; // query info handle, for debug purpose - int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows - SBlockLoadSuppInfo suppInfo; - STsdbReadSnap* pReadSnap; - SIOCostSummary cost; - SHashObj** pIgnoreTables; - STSchema* pSchema; // the newest version schema - SSHashObj* pSchemaMap; // keep the retrieved schema info, to avoid the overhead by repeatly load schema - SDataFReader* pFileReader; // the file reader - SDelFReader* pDelFReader; // the del file reader - SArray* pDelIdx; // del file block index; - SBlockInfoBuf blockInfoBuf; - EContentData step; - STsdbReader* innerReader[2]; -}; - -static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter); -static int buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity, - STsdbReader* pReader); -static TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader); -static int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader); -static int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, - SRowMerger* pMerger, SVersionRange* pVerRange, const char* id); -static int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, STsdbReader* pReader); -static int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, SRow* pTSRow, - STableBlockScanInfo* pScanInfo); -static int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, - int32_t rowIndex); -static void setComposedBlockFlag(STsdbReader* pReader, bool composed); -static bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, - SVersionRange* pVerRange); - -static int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, - TSDBROW* pResRow, STsdbReader* pReader, bool* freeTSRow); -static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, - STsdbReader* pReader, SRow** pTSRow); -static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, - STsdbReader* pReader); - -static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData, - STbData* piMemTbData); -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, - int8_t* pLevel); -static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id); -static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); -static int32_t doBuildDataBlock(STsdbReader* pReader); -static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader); -static bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo); -static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter); -static int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order); - -static STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id); - -static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } - -static int32_t setColumnIdSlotList(SBlockLoadSuppInfo* pSupInfo, SColumnInfo* pCols, const int32_t* pSlotIdList, - int32_t numOfCols) { - pSupInfo->smaValid = true; - pSupInfo->numOfCols = numOfCols; - pSupInfo->colId = taosMemoryMalloc(numOfCols * (sizeof(int16_t) * 2 + POINTER_BYTES)); - if (pSupInfo->colId == NULL) { - taosMemoryFree(pSupInfo->colId); - return TSDB_CODE_OUT_OF_MEMORY; - } - - pSupInfo->slotId = (int16_t*)((char*)pSupInfo->colId + (sizeof(int16_t) * numOfCols)); - pSupInfo->buildBuf = (char**)((char*)pSupInfo->slotId + (sizeof(int16_t) * numOfCols)); - for (int32_t i = 0; i < numOfCols; ++i) { - pSupInfo->colId[i] = pCols[i].colId; - pSupInfo->slotId[i] = pSlotIdList[i]; - - if (IS_VAR_DATA_TYPE(pCols[i].type)) { - pSupInfo->buildBuf[i] = taosMemoryMalloc(pCols[i].bytes); - } else { - pSupInfo->buildBuf[i] = NULL; - } - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) { - int32_t i = 0, j = 0; - - if (j < pSupInfo->numOfCols && PRIMARYKEY_TIMESTAMP_COL_ID == pSupInfo->colId[j]) { - j += 1; - } - - while (i < pSchema->numOfCols && j < pSupInfo->numOfCols) { - STColumn* pTCol = &pSchema->columns[i]; - if (pTCol->colId == pSupInfo->colId[j]) { - if (!IS_BSMA_ON(pTCol)) { - pSupInfo->smaValid = false; - return TSDB_CODE_SUCCESS; - } - - i += 1; - j += 1; - } else if (pTCol->colId < pSupInfo->colId[j]) { - // do nothing - i += 1; - } else { - return TSDB_CODE_INVALID_PARA; - } - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { - int32_t num = numOfTables / pBuf->numPerBucket; - int32_t remainder = numOfTables % pBuf->numPerBucket; - if (pBuf->pData == NULL) { - pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES); - } - - for (int32_t i = 0; i < num; ++i) { - char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo)); - if (p == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - taosArrayPush(pBuf->pData, &p); - } - - if (remainder > 0) { - char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo)); - if (p == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - taosArrayPush(pBuf->pData, &p); - } - - pBuf->numOfTables = numOfTables; - - return TSDB_CODE_SUCCESS; -} - -static int32_t ensureBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { - if (numOfTables <= pBuf->numOfTables) { - return TSDB_CODE_SUCCESS; - } - - if (pBuf->numOfTables > 0) { - STableBlockScanInfo** p = (STableBlockScanInfo**)taosArrayPop(pBuf->pData); - taosMemoryFree(*p); - pBuf->numOfTables /= pBuf->numPerBucket; - } - - int32_t num = (numOfTables - pBuf->numOfTables) / pBuf->numPerBucket; - int32_t remainder = (numOfTables - pBuf->numOfTables) % pBuf->numPerBucket; - if (pBuf->pData == NULL) { - pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES); - } - - for (int32_t i = 0; i < num; ++i) { - char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo)); - if (p == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - taosArrayPush(pBuf->pData, &p); - } - - if (remainder > 0) { - char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo)); - if (p == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - taosArrayPush(pBuf->pData, &p); - } - - pBuf->numOfTables = numOfTables; - - return TSDB_CODE_SUCCESS; -} - -static void clearBlockScanInfoBuf(SBlockInfoBuf* pBuf) { - size_t num = taosArrayGetSize(pBuf->pData); - for (int32_t i = 0; i < num; ++i) { - char** p = taosArrayGet(pBuf->pData, i); - taosMemoryFree(*p); - } - - taosArrayDestroy(pBuf->pData); -} - -static void* getPosInBlockInfoBuf(SBlockInfoBuf* pBuf, int32_t index) { - int32_t bucketIndex = index / pBuf->numPerBucket; - char** pBucket = taosArrayGet(pBuf->pData, bucketIndex); - return (*pBucket) + (index % pBuf->numPerBucket) * sizeof(STableBlockScanInfo); -} - -static int32_t uidComparFunc(const void* p1, const void* p2) { - uint64_t pu1 = *(uint64_t*)p1; - uint64_t pu2 = *(uint64_t*)p2; - if (pu1 == pu2) { - return 0; - } else { - return (pu1 < pu2) ? -1 : 1; - } -} - -// NOTE: speedup the whole processing by preparing the buffer for STableBlockScanInfo in batch model -static SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList, - STableUidList* pUidList, int32_t numOfTables) { - // allocate buffer in order to load data blocks from file - // todo use simple hash instead, optimize the memory consumption - SSHashObj* pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); - if (pTableMap == NULL) { - return NULL; - } - - int64_t st = taosGetTimestampUs(); - initBlockScanInfoBuf(pBuf, numOfTables); - - pUidList->tableUidList = taosMemoryMalloc(numOfTables * sizeof(uint64_t)); - if (pUidList->tableUidList == NULL) { - tSimpleHashCleanup(pTableMap); - return NULL; - } - - pUidList->currentIndex = 0; - - for (int32_t j = 0; j < numOfTables; ++j) { - STableBlockScanInfo* pScanInfo = getPosInBlockInfoBuf(pBuf, j); - - pScanInfo->uid = idList[j].uid; - pUidList->tableUidList[j] = idList[j].uid; - - if (ASCENDING_TRAVERSE(pTsdbReader->order)) { - int64_t skey = pTsdbReader->window.skey; - pScanInfo->lastKey = (skey > INT64_MIN) ? (skey - 1) : skey; - pScanInfo->lastKeyInStt = skey; - } else { - int64_t ekey = pTsdbReader->window.ekey; - pScanInfo->lastKey = (ekey < INT64_MAX) ? (ekey + 1) : ekey; - pScanInfo->lastKeyInStt = ekey; - } - - tSimpleHashPut(pTableMap, &pScanInfo->uid, sizeof(uint64_t), &pScanInfo, POINTER_BYTES); - tsdbTrace("%p check table uid:%" PRId64 " from lastKey:%" PRId64 " %s", pTsdbReader, pScanInfo->uid, - pScanInfo->lastKey, pTsdbReader->idStr); - } - - taosSort(pUidList->tableUidList, numOfTables, sizeof(uint64_t), uidComparFunc); - - pTsdbReader->cost.createScanInfoList = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug("%p create %d tables scan-info, size:%.2f Kb, elapsed time:%.2f ms, %s", pTsdbReader, numOfTables, - (sizeof(STableBlockScanInfo) * numOfTables) / 1024.0, pTsdbReader->cost.createScanInfoList, - pTsdbReader->idStr); - - return pTableMap; -} - -static void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step) { - void* p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - pInfo->lastKey = ts; - pInfo->lastKeyInStt = ts + step; - } -} - -static void clearBlockScanInfo(STableBlockScanInfo* p) { - p->iterInit = false; - - p->iter.hasVal = false; - p->iiter.hasVal = false; - - if (p->iter.iter != NULL) { - p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter); - } - - if (p->iiter.iter != NULL) { - p->iiter.iter = tsdbTbDataIterDestroy(p->iiter.iter); - } - - p->delSkyline = taosArrayDestroy(p->delSkyline); - p->pBlockList = taosArrayDestroy(p->pBlockList); - tMapDataClear(&p->mapData); -} - -static void destroyAllBlockScanInfo(SSHashObj* pTableMap) { - void* p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { - clearBlockScanInfo(*(STableBlockScanInfo**)p); - } - - tSimpleHashCleanup(pTableMap); -} - -static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) { return pWindow->skey > pWindow->ekey; } - -// Update the query time window according to the data time to live(TTL) information, in order to avoid to return -// the expired data to client, even it is queried already. -static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) { - STsdbKeepCfg* pCfg = &pTsdb->keepCfg; - - int64_t now = taosGetTimestamp(pCfg->precision); - int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1; // needs to add one tick - - STimeWindow win = *pWindow; - if (win.skey < earilyTs) { - win.skey = earilyTs; - } - - return win; -} - -// init file iterator -static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, STsdbReader* pReader) { - size_t numOfFileset = taosArrayGetSize(aDFileSet); - - pIter->index = ASCENDING_TRAVERSE(pReader->order) ? -1 : numOfFileset; - pIter->order = pReader->order; - pIter->pFileList = aDFileSet; - pIter->numOfFiles = numOfFileset; - - if (pIter->pLastBlockReader == NULL) { - pIter->pLastBlockReader = taosMemoryCalloc(1, sizeof(struct SLastBlockReader)); - if (pIter->pLastBlockReader == NULL) { - int32_t code = TSDB_CODE_OUT_OF_MEMORY; - tsdbError("failed to prepare the last block iterator, since:%s %s", tstrerror(code), pReader->idStr); - return code; - } - } - - SLastBlockReader* pLReader = pIter->pLastBlockReader; - pLReader->order = pReader->order; - pLReader->window = pReader->window; - pLReader->verRange = pReader->verRange; - - pLReader->uid = 0; - tMergeTreeClose(&pLReader->mergeTree); - - if (pLReader->pInfo == NULL) { - // here we ignore the first column, which is always be the primary timestamp column - SBlockLoadSuppInfo* pInfo = &pReader->suppInfo; - - int32_t numOfStt = pReader->pTsdb->pVnode->config.sttTrigger; - pLReader->pInfo = tCreateLastBlockLoadInfo(pReader->pSchema, &pInfo->colId[1], pInfo->numOfCols - 1, numOfStt); - if (pLReader->pInfo == NULL) { - tsdbDebug("init fileset iterator failed, code:%s %s", tstrerror(terrno), pReader->idStr); - return terrno; - } - } - - tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, pReader->idStr); - return TSDB_CODE_SUCCESS; -} - -static int32_t filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader, bool* hasNext) { - bool asc = ASCENDING_TRAVERSE(pIter->order); - int32_t step = asc ? 1 : -1; - pIter->index += step; - int32_t code = 0; - - if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) { - *hasNext = false; - return TSDB_CODE_SUCCESS; - } - - SIOCostSummary* pCost = &pReader->cost; - getSttBlockLoadInfo(pIter->pLastBlockReader->pInfo, &pCost->sttCost); - - pIter->pLastBlockReader->uid = 0; - tMergeTreeClose(&pIter->pLastBlockReader->mergeTree); - resetLastBlockLoadInfo(pIter->pLastBlockReader->pInfo); - - // check file the time range of coverage - STimeWindow win = {0}; - - while (1) { - if (pReader->pFileReader != NULL) { - tsdbDataFReaderClose(&pReader->pFileReader); - } - - pReader->status.pCurrentFileset = (SDFileSet*)taosArrayGet(pIter->pFileList, pIter->index); - - code = tsdbDataFReaderOpen(&pReader->pFileReader, pReader->pTsdb, pReader->status.pCurrentFileset); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - - pReader->cost.headFileLoad += 1; - - int32_t fid = pReader->status.pCurrentFileset->fid; - tsdbFidKeyRange(fid, pReader->pTsdb->keepCfg.days, pReader->pTsdb->keepCfg.precision, &win.skey, &win.ekey); - - // current file are no longer overlapped with query time window, ignore remain files - if ((asc && win.skey > pReader->window.ekey) || (!asc && win.ekey < pReader->window.skey)) { - tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, - pReader->window.skey, pReader->window.ekey, pReader->idStr); - *hasNext = false; - return TSDB_CODE_SUCCESS; - } - - if ((asc && (win.ekey < pReader->window.skey)) || ((!asc) && (win.skey > pReader->window.ekey))) { - pIter->index += step; - if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) { - *hasNext = false; - return TSDB_CODE_SUCCESS; - } - continue; - } - - tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", %s", pReader, fid, pReader->window.skey, - pReader->window.ekey, pReader->idStr); - *hasNext = true; - return TSDB_CODE_SUCCESS; - } - -_err: - *hasNext = false; - return code; -} - -static void resetDataBlockIterator(SDataBlockIter* pIter, int32_t order) { - pIter->order = order; - pIter->index = -1; - pIter->numOfBlocks = 0; - if (pIter->blockList == NULL) { - pIter->blockList = taosArrayInit(4, sizeof(SFileDataBlockInfo)); - } else { - taosArrayClear(pIter->blockList); - } -} - -static void cleanupDataBlockIterator(SDataBlockIter* pIter) { taosArrayDestroy(pIter->blockList); } - -static void initReaderStatus(SReaderStatus* pStatus) { - pStatus->pTableIter = NULL; - pStatus->loadFromFile = true; -} - -static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) { - SSDataBlock* pResBlock = createDataBlock(); - if (pResBlock == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - for (int32_t i = 0; i < pCond->numOfCols; ++i) { - SColumnInfoData colInfo = {0}; - colInfo.info = pCond->colList[i]; - blockDataAppendColInfo(pResBlock, &colInfo); - } - - int32_t code = blockDataEnsureCapacity(pResBlock, capacity); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - taosMemoryFree(pResBlock); - return NULL; - } - return pResBlock; -} - -static int32_t tsdbInitReaderLock(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexInit(&pReader->readerMutex, NULL); - - qTrace("tsdb/read: %p, post-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -static int32_t tsdbUninitReaderLock(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexDestroy(&pReader->readerMutex); - - qTrace("tsdb/read: %p, post-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -static int32_t tsdbAcquireReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexLock(&pReader->readerMutex); - - qTrace("tsdb/read: %p, post-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -static int32_t tsdbTryAcquireReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexTryLock(&pReader->readerMutex); - - qTrace("tsdb/read: %p, post-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -static int32_t tsdbReleaseReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexUnlock(&pReader->readerMutex); - - qTrace("tsdb/read: %p, post-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -void tsdbReleaseDataBlock(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - if (!pStatus->composedDataBlock) { - tsdbReleaseReader(pReader); - } -} - -static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock, - SQueryTableDataCond* pCond) { - pResBlockInfo->capacity = capacity; - pResBlockInfo->pResBlock = pResBlock; - terrno = 0; - - if (pResBlockInfo->pResBlock == NULL) { - pResBlockInfo->freeBlock = true; - pResBlockInfo->pResBlock = createResBlock(pCond, pResBlockInfo->capacity); - } else { - pResBlockInfo->freeBlock = false; - } - - return terrno; -} - -static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void** ppReader, int32_t capacity, - SSDataBlock* pResBlock, const char* idstr) { - int32_t code = 0; - int8_t level = 0; - STsdbReader* pReader = (STsdbReader*)taosMemoryCalloc(1, sizeof(*pReader)); - if (pReader == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _end; - } - - if (VND_IS_TSMA(pVnode)) { - tsdbDebug("vgId:%d, tsma is selected to query, %s", TD_VID(pVnode), idstr); - } - - initReaderStatus(&pReader->status); - - pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level); - pReader->suid = pCond->suid; - pReader->order = pCond->order; - - pReader->idStr = (idstr != NULL) ? taosStrdup(idstr) : NULL; - pReader->verRange = getQueryVerRange(pVnode, pCond, idstr); - pReader->type = pCond->type; - pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); - pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket - - code = initResBlockInfo(&pReader->resBlockInfo, capacity, pResBlock, pCond); - if (code != TSDB_CODE_SUCCESS) { - goto _end; - } - - if (pCond->numOfCols <= 0) { - tsdbError("vgId:%d, invalid column number %d in query cond, %s", TD_VID(pVnode), pCond->numOfCols, idstr); - code = TSDB_CODE_INVALID_PARA; - goto _end; - } - - // allocate buffer in order to load data blocks from file - SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - pSup->pColAgg = taosArrayInit(pCond->numOfCols, sizeof(SColumnDataAgg)); - if (pSup->pColAgg == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _end; - } - - pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; - setColumnIdSlotList(pSup, pCond->colList, pCond->pSlotList, pCond->numOfCols); - - code = tBlockDataCreate(&pReader->status.fileBlockData); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - goto _end; - } - - if (pReader->suppInfo.colId[0] != PRIMARYKEY_TIMESTAMP_COL_ID) { - tsdbError("the first column isn't primary timestamp, %d, %s", pReader->suppInfo.colId[0], pReader->idStr); - code = TSDB_CODE_INVALID_PARA; - goto _end; - } - - pReader->status.pPrimaryTsCol = taosArrayGet(pReader->resBlockInfo.pResBlock->pDataBlock, pSup->slotId[0]); - int32_t type = pReader->status.pPrimaryTsCol->info.type; - if (type != TSDB_DATA_TYPE_TIMESTAMP) { - tsdbError("the first column isn't primary timestamp in result block, actual: %s, %s", tDataTypes[type].name, - pReader->idStr); - code = TSDB_CODE_INVALID_PARA; - goto _end; - } - - tsdbInitReaderLock(pReader); - - *ppReader = pReader; - return code; - -_end: - tsdbReaderClose(pReader); - *ppReader = NULL; - return code; -} - -static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, SArray* pIndexList) { - int64_t st = taosGetTimestampUs(); - LRUHandle* handle = NULL; - int32_t code = tsdbCacheGetBlockIdx(pFileReader->pTsdb->biCache, pFileReader, &handle); - if (code != TSDB_CODE_SUCCESS || handle == NULL) { - goto _end; - } - - int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - - SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); - size_t num = taosArrayGetSize(aBlockIdx); - if (num == 0) { - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return TSDB_CODE_SUCCESS; - } - - // todo binary search to the start position - int64_t et1 = taosGetTimestampUs(); - - SBlockIdx* pBlockIdx = NULL; - STableUidList* pList = &pReader->status.uidList; - - int32_t i = 0, j = 0; - while (i < num && j < numOfTables) { - pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i); - if (pBlockIdx->suid != pReader->suid) { - i += 1; - continue; - } - - if (pBlockIdx->uid < pList->tableUidList[j]) { - i += 1; - continue; - } - - if (pBlockIdx->uid > pList->tableUidList[j]) { - j += 1; - continue; - } - - if (pBlockIdx->uid == pList->tableUidList[j]) { - // this block belongs to a table that is not queried. - STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockIdx->uid, pReader->idStr); - if (pScanInfo == NULL) { - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return terrno; - } - - if (pScanInfo->pBlockList == NULL) { - pScanInfo->pBlockList = taosArrayInit(4, sizeof(SBlockIndex)); - } - - taosArrayPush(pIndexList, pBlockIdx); - - i += 1; - j += 1; - } - } - - int64_t et2 = taosGetTimestampUs(); - tsdbDebug("load block index for %d/%d tables completed, elapsed time:%.2f ms, set blockIdx:%.2f ms, size:%.2f Kb %s", - numOfTables, (int32_t)num, (et1 - st) / 1000.0, (et2 - et1) / 1000.0, num * sizeof(SBlockIdx) / 1024.0, - pReader->idStr); - - pReader->cost.headFileLoadTime += (et1 - st) / 1000.0; - -_end: - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return code; -} - -static void doCleanupTableScanInfo(STableBlockScanInfo* pScanInfo) { - // reset the index in last block when handing a new file - tMapDataClear(&pScanInfo->mapData); - taosArrayClear(pScanInfo->pBlockList); -} - -static void cleanupTableScanInfo(SReaderStatus* pStatus) { - if (pStatus->mapDataCleaned) { - return; - } - - SSHashObj* pTableMap = pStatus->pTableMap; - STableBlockScanInfo** px = NULL; - int32_t iter = 0; - - while (1) { - px = tSimpleHashIterate(pTableMap, px, &iter); - if (px == NULL) { - break; - } - - doCleanupTableScanInfo(*px); - } - - pStatus->mapDataCleaned = true; -} - -static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum, - SArray* pTableScanInfoList) { - size_t sizeInDisk = 0; - size_t numOfTables = taosArrayGetSize(pIndexList); - - int64_t st = taosGetTimestampUs(); - cleanupTableScanInfo(&pReader->status); - - // set the flag for the new file - pReader->status.mapDataCleaned = false; - for (int32_t i = 0; i < numOfTables; ++i) { - SBlockIdx* pBlockIdx = taosArrayGet(pIndexList, i); - STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockIdx->uid, pReader->idStr); - if (pScanInfo == NULL) { - return terrno; - } - - tMapDataReset(&pScanInfo->mapData); - tsdbReadDataBlk(pReader->pFileReader, pBlockIdx, &pScanInfo->mapData); - taosArrayEnsureCap(pScanInfo->pBlockList, pScanInfo->mapData.nItem); - - sizeInDisk += pScanInfo->mapData.nData; - - int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; - STimeWindow w = pReader->window; - if (ASCENDING_TRAVERSE(pReader->order)) { - w.skey = pScanInfo->lastKey + step; - } else { - w.ekey = pScanInfo->lastKey + step; - } - - if (isEmptyQueryTimeWindow(&w)) { - continue; - } - - SDataBlk block = {0}; - for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) { - tGetDataBlk(pScanInfo->mapData.pData + pScanInfo->mapData.aOffset[j], &block); - - // 1. time range check - // if (block.minKey.ts > pReader->window.ekey || block.maxKey.ts < pReader->window.skey) { - if (block.minKey.ts > w.ekey || block.maxKey.ts < w.skey) { - continue; - } - - // 2. version range check - if (block.minVer > pReader->verRange.maxVer || block.maxVer < pReader->verRange.minVer) { - continue; - } - - SBlockIndex bIndex = {.ordinalIndex = j, .inFileOffset = block.aSubBlock->offset}; - bIndex.window = (STimeWindow){.skey = block.minKey.ts, .ekey = block.maxKey.ts}; - - void* p1 = taosArrayPush(pScanInfo->pBlockList, &bIndex); - if (p1 == NULL) { - tMapDataClear(&pScanInfo->mapData); - return TSDB_CODE_OUT_OF_MEMORY; - } - - pBlockNum->numOfBlocks += 1; - } - - if (taosArrayGetSize(pScanInfo->pBlockList) > 0) { - taosArrayPush(pTableScanInfoList, &pScanInfo); - } - } - - pBlockNum->numOfLastFiles = pReader->pFileReader->pSet->nSttF; - int32_t total = pBlockNum->numOfLastFiles + pBlockNum->numOfBlocks; - - double el = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug( - "load block of %ld tables completed, blocks:%d in %d tables, last-files:%d, block-info-size:%.2f Kb, elapsed " - "time:%.2f ms %s", - numOfTables, pBlockNum->numOfBlocks, (int32_t)taosArrayGetSize(pTableScanInfoList), pBlockNum->numOfLastFiles, - sizeInDisk / 1000.0, el, pReader->idStr); - - pReader->cost.numOfBlocks += total; - pReader->cost.headFileLoadTime += el; - - return TSDB_CODE_SUCCESS; -} - -static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, int64_t maxKey, int32_t order) { - int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1; - pDumpInfo->allDumped = true; - pDumpInfo->lastKey = maxKey + step; -} - -static int32_t doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_t colIndex, SColVal* pColVal, - SBlockLoadSuppInfo* pSup) { - if (IS_VAR_DATA_TYPE(pColVal->type)) { - if (!COL_VAL_IS_VALUE(pColVal)) { - colDataSetNULL(pColInfoData, rowIndex); - } else { - varDataSetLen(pSup->buildBuf[colIndex], pColVal->value.nData); - if (pColVal->value.nData > pColInfoData->info.bytes) { - tsdbWarn("column cid:%d actual data len %d is bigger than schema len %d", pColVal->cid, pColVal->value.nData, - pColInfoData->info.bytes); - return TSDB_CODE_TDB_INVALID_TABLE_SCHEMA_VER; - } - if (pColVal->value.nData > 0) { // pData may be null, if nData is 0 - memcpy(varDataVal(pSup->buildBuf[colIndex]), pColVal->value.pData, pColVal->value.nData); - } - - colDataSetVal(pColInfoData, rowIndex, pSup->buildBuf[colIndex], false); - } - } else { - colDataSetVal(pColInfoData, rowIndex, (const char*)&pColVal->value, !COL_VAL_IS_VALUE(pColVal)); - } - - return TSDB_CODE_SUCCESS; -} - -static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { - size_t num = taosArrayGetSize(pBlockIter->blockList); - if (num == 0) { - ASSERT(pBlockIter->numOfBlocks == num); - return NULL; - } - - SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); - return pBlockInfo; -} - -static SDataBlk* getCurrentBlock(SDataBlockIter* pBlockIter) { return &pBlockIter->block; } - -static int doBinarySearchKey(TSKEY* keyList, int num, int pos, TSKEY key, int order) { - // start end position - int s, e; - s = pos; - - // check - ASSERT(pos >= 0 && pos < num && num > 0); - if (order == TSDB_ORDER_ASC) { - // find the first position which is smaller than the key - e = num - 1; - if (key < keyList[pos]) return -1; - while (1) { - // check can return - if (key >= keyList[e]) return e; - if (key <= keyList[s]) return s; - if (e - s <= 1) return s; - - // change start or end position - int mid = s + (e - s + 1) / 2; - if (keyList[mid] > key) - e = mid; - else if (keyList[mid] < key) - s = mid; - else - return mid; - } - } else { // DESC - // find the first position which is bigger than the key - e = 0; - if (key > keyList[pos]) return -1; - while (1) { - // check can return - if (key <= keyList[e]) return e; - if (key >= keyList[s]) return s; - if (s - e <= 1) return s; - - // change start or end position - int mid = s - (s - e + 1) / 2; - if (keyList[mid] < key) - e = mid; - else if (keyList[mid] > key) - s = mid; - else - return mid; - } - } -} - -static int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData, SDataBlk* pBlock, int32_t pos) { - // NOTE: reverse the order to find the end position in data block - int32_t endPos = -1; - bool asc = ASCENDING_TRAVERSE(pReader->order); - - if (asc && pReader->window.ekey >= pBlock->maxKey.ts) { - endPos = pBlock->nRow - 1; - } else if (!asc && pReader->window.skey <= pBlock->minKey.ts) { - endPos = 0; - } else { - int64_t key = asc ? pReader->window.ekey : pReader->window.skey; - endPos = doBinarySearchKey(pBlockData->aTSKEY, pBlock->nRow, pos, key, pReader->order); - } - - if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) || - (pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) { - int32_t i = endPos; - - if (asc) { - for (; i >= 0; --i) { - if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) { - break; - } - } - } else { - for (; i < pBlock->nRow; ++i) { - if (pBlockData->aVersion[i] >= pReader->verRange.minVer) { - break; - } - } - } - - endPos = i; - } - - return endPos; -} - -static void copyPrimaryTsCol(const SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, - int32_t dumpedRows, bool asc) { - if (asc) { - memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], dumpedRows * sizeof(int64_t)); - } else { - int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; - memcpy(pColData->pData, &pBlockData->aTSKEY[startIndex], dumpedRows * sizeof(int64_t)); - - // todo: opt perf by extract the loop - // reverse the array list - int32_t mid = dumpedRows >> 1u; - int64_t* pts = (int64_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - } -} - -// a faster version of copy procedure. -static void copyNumericCols(const SColData* pData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, - int32_t dumpedRows, bool asc) { - uint8_t* p = NULL; - if (asc) { - p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; - } else { - int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; - p = pData->pData + tDataTypes[pData->type].bytes * startIndex; - } - - int32_t step = asc ? 1 : -1; - - // make sure it is aligned to 8bit, the allocated memory address is aligned to 256bit - // ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); - - // 1. copy data in a batch model - memcpy(pColData->pData, p, dumpedRows * tDataTypes[pData->type].bytes); - - // 2. reverse the array list in case of descending order scan data block - if (!asc) { - switch (pColData->info.type) { - case TSDB_DATA_TYPE_TIMESTAMP: - case TSDB_DATA_TYPE_DOUBLE: - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_UBIGINT: { - int32_t mid = dumpedRows >> 1u; - int64_t* pts = (int64_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_UTINYINT: { - int32_t mid = dumpedRows >> 1u; - int8_t* pts = (int8_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int8_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_USMALLINT: { - int32_t mid = dumpedRows >> 1u; - int16_t* pts = (int16_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - - case TSDB_DATA_TYPE_FLOAT: - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_UINT: { - int32_t mid = dumpedRows >> 1u; - int32_t* pts = (int32_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int32_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - } - } - - // 3. if the null value exists, check items one-by-one - if (pData->flag != HAS_VALUE) { - int32_t rowIndex = 0; - - for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step, rowIndex++) { - uint8_t v = tColDataGetBitValue(pData, j); - if (v == 0 || v == 1) { - colDataSetNull_f(pColData->nullbitmap, rowIndex); - pColData->hasNull = true; - } - } - } -} - -static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - SBlockData* pBlockData = &pStatus->fileBlockData; - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - int32_t numOfOutputCols = pSupInfo->numOfCols; - int32_t code = TSDB_CODE_SUCCESS; - - SColVal cv = {0}; - int64_t st = taosGetTimestampUs(); - bool asc = ASCENDING_TRAVERSE(pReader->order); - int32_t step = asc ? 1 : -1; - - // no data exists, return directly. - if (pBlockData->nRow == 0 || pBlockData->aTSKEY == 0) { - tsdbWarn("%p no need to copy since no data in blockData, table uid:%" PRIu64 " has been dropped, %s", pReader, - pBlockInfo->uid, pReader->idStr); - pResBlock->info.rows = 0; - return 0; - } - - // row index of dump info remain the initial position, let's find the appropriate start position. - if ((pDumpInfo->rowIndex == 0 && asc) || (pDumpInfo->rowIndex == pBlock->nRow - 1 && (!asc))) { - if (asc && pReader->window.skey <= pBlock->minKey.ts && pReader->verRange.minVer <= pBlock->minVer) { - // pDumpInfo->rowIndex = 0; - } else if (!asc && pReader->window.ekey >= pBlock->maxKey.ts && pReader->verRange.maxVer >= pBlock->maxVer) { - // pDumpInfo->rowIndex = pBlock->nRow - 1; - } else { // find the appropriate the start position in current block, and set it to be the current rowIndex - int32_t pos = asc ? pBlock->nRow - 1 : 0; - int32_t order = asc ? TSDB_ORDER_DESC : TSDB_ORDER_ASC; - int64_t key = asc ? pReader->window.skey : pReader->window.ekey; - pDumpInfo->rowIndex = doBinarySearchKey(pBlockData->aTSKEY, pBlock->nRow, pos, key, order); - - if (pDumpInfo->rowIndex < 0) { - tsdbError( - "%p failed to locate the start position in current block, global index:%d, table index:%d, brange:%" PRId64 - "-%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 " %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->minVer, - pBlock->maxVer, pReader->idStr); - return TSDB_CODE_INVALID_PARA; - } - - ASSERT(pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.maxVer >= pBlock->minVer); - - // find the appropriate start position that satisfies the version requirement. - if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) || - (pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) { - int32_t i = pDumpInfo->rowIndex; - if (asc) { - for (; i < pBlock->nRow; ++i) { - if (pBlockData->aVersion[i] >= pReader->verRange.minVer) { - break; - } - } - } else { - for (; i >= 0; --i) { - if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) { - break; - } - } - } - - pDumpInfo->rowIndex = i; - } - } - } - - // time window check - int32_t endIndex = getEndPosInDataBlock(pReader, pBlockData, pBlock, pDumpInfo->rowIndex); - if (endIndex == -1) { - setBlockAllDumped(pDumpInfo, pReader->window.ekey, pReader->order); - return TSDB_CODE_SUCCESS; - } - - endIndex += step; - int32_t dumpedRows = asc ? (endIndex - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex - endIndex); - if (dumpedRows > pReader->resBlockInfo.capacity) { // output buffer check - dumpedRows = pReader->resBlockInfo.capacity; - } else if (dumpedRows <= 0) { // no qualified rows in current data block, abort directly. - setBlockAllDumped(pDumpInfo, pReader->window.ekey, pReader->order); - return TSDB_CODE_SUCCESS; - } - - int32_t i = 0; - int32_t rowIndex = 0; - - SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - if (pSupInfo->colId[i] == PRIMARYKEY_TIMESTAMP_COL_ID) { - copyPrimaryTsCol(pBlockData, pDumpInfo, pColData, dumpedRows, asc); - i += 1; - } - - int32_t colIndex = 0; - int32_t num = pBlockData->nColData; - while (i < numOfOutputCols && colIndex < num) { - rowIndex = 0; - - SColData* pData = tBlockDataGetColDataByIdx(pBlockData, colIndex); - if (pData->cid < pSupInfo->colId[i]) { - colIndex += 1; - } else if (pData->cid == pSupInfo->colId[i]) { - pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - - if (pData->flag == HAS_NONE || pData->flag == HAS_NULL || pData->flag == (HAS_NULL | HAS_NONE)) { - colDataSetNNULL(pColData, 0, dumpedRows); - } else { - if (IS_MATHABLE_TYPE(pColData->info.type)) { - copyNumericCols(pData, pDumpInfo, pColData, dumpedRows, asc); - } else { // varchar/nchar type - for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step) { - tColDataGetValue(pData, j, &cv); - code = doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo); - if (code) { - return code; - } - } - } - } - - colIndex += 1; - i += 1; - } else { // the specified column does not exist in file block, fill with null data - pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - colDataSetNNULL(pColData, 0, dumpedRows); - i += 1; - } - } - - // fill the mis-matched columns with null value - while (i < numOfOutputCols) { - pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - colDataSetNNULL(pColData, 0, dumpedRows); - i += 1; - } - - pResBlock->info.dataLoad = 1; - pResBlock->info.rows = dumpedRows; - pDumpInfo->rowIndex += step * dumpedRows; - - // check if current block are all handled - if (pDumpInfo->rowIndex >= 0 && pDumpInfo->rowIndex < pBlock->nRow) { - int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - if (outOfTimeWindow(ts, &pReader->window)) { // the remain data has out of query time window, ignore current block - setBlockAllDumped(pDumpInfo, ts, pReader->order); - } - } else { - int64_t ts = asc ? pBlock->maxKey.ts : pBlock->minKey.ts; - setBlockAllDumped(pDumpInfo, ts, pReader->order); - } - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; - pReader->cost.blockLoadTime += elapsedTime; - - int32_t unDumpedRows = asc ? pBlock->nRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1; - tsdbDebug("%p copy file block to sdatablock, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 - ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", uid:%" PRIu64 " elapsed time:%.2f ms, %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, dumpedRows, - unDumpedRows, pBlock->minVer, pBlock->maxVer, pBlockInfo->uid, elapsedTime, pReader->idStr); - - return TSDB_CODE_SUCCESS; -} - -static FORCE_INLINE STSchema* getTableSchemaImpl(STsdbReader* pReader, uint64_t uid) { - ASSERT(pReader->pSchema == NULL); - - int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, -1, &pReader->pSchema); - if (code != TSDB_CODE_SUCCESS || pReader->pSchema == NULL) { - terrno = code; - tsdbError("failed to get table schema, uid:%" PRIu64 ", it may have been dropped, ver:-1, %s", uid, pReader->idStr); - return NULL; - } - - code = tsdbRowMergerInit(&pReader->status.merger, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - tsdbError("failed to init merger, code:%s, %s", tstrerror(code), pReader->idStr); - return NULL; - } - - return pReader->pSchema; -} - -static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter, SBlockData* pBlockData, - uint64_t uid) { - int32_t code = 0; - STSchema* pSchema = pReader->pSchema; - int64_t st = taosGetTimestampUs(); - - tBlockDataReset(pBlockData); - - if (pReader->pSchema == NULL) { - pSchema = getTableSchemaImpl(pReader, uid); - if (pSchema == NULL) { - tsdbDebug("%p table uid:%" PRIu64 " has been dropped, no data existed, %s", pReader, uid, pReader->idStr); - return code; - } - } - - SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - TABLEID tid = {.suid = pReader->suid, .uid = uid}; - code = tBlockDataInit(pBlockData, &tid, pSchema, &pSup->colId[1], pSup->numOfCols - 1); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - code = tsdbReadDataBlock(pReader->pFileReader, pBlock, pBlockData); - if (code != TSDB_CODE_SUCCESS) { - tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 - ", rows:%d, code:%s %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow, - tstrerror(code), pReader->idStr); - return code; - } - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; - - tsdbDebug("%p load file block into buffer, global index:%d, index in table block list:%d, brange:%" PRId64 "-%" PRId64 - ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow, - pBlock->minVer, pBlock->maxVer, elapsedTime, pReader->idStr); - - pReader->cost.blockLoadTime += elapsedTime; - pDumpInfo->allDumped = false; - - return TSDB_CODE_SUCCESS; -} - -static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) { - taosMemoryFreeClear(pSup->numOfBlocksPerTable); - taosMemoryFreeClear(pSup->indexPerTable); - - for (int32_t i = 0; i < pSup->numOfTables; ++i) { - SBlockOrderWrapper* pBlockInfo = pSup->pDataBlockInfo[i]; - taosMemoryFreeClear(pBlockInfo); - } - - taosMemoryFreeClear(pSup->pDataBlockInfo); -} - -static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) { - pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); - pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); - pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables); - - if (pSup->numOfBlocksPerTable == NULL || pSup->indexPerTable == NULL || pSup->pDataBlockInfo == NULL) { - cleanupBlockOrderSupporter(pSup); - return TSDB_CODE_OUT_OF_MEMORY; - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) { - int32_t leftIndex = *(int32_t*)pLeft; - int32_t rightIndex = *(int32_t*)pRight; - - SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param; - - int32_t leftTableBlockIndex = pSupporter->indexPerTable[leftIndex]; - int32_t rightTableBlockIndex = pSupporter->indexPerTable[rightIndex]; - - if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftIndex]) { - /* left block is empty */ - return 1; - } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightIndex]) { - /* right block is empty */ - return -1; - } - - SBlockOrderWrapper* pLeftBlock = &pSupporter->pDataBlockInfo[leftIndex][leftTableBlockIndex]; - SBlockOrderWrapper* pRightBlock = &pSupporter->pDataBlockInfo[rightIndex][rightTableBlockIndex]; - - return pLeftBlock->offset > pRightBlock->offset ? 1 : -1; -} - -static int32_t doSetCurrentBlock(SDataBlockIter* pBlockIter, const char* idStr) { - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - if (pBlockInfo != NULL) { - STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pBlockIter->pTableMap, pBlockInfo->uid, idStr); - if (pScanInfo == NULL) { - return terrno; - } - - SBlockIndex* pIndex = taosArrayGet(pScanInfo->pBlockList, pBlockInfo->tbBlockIdx); - tMapDataGetItemByIdx(&pScanInfo->mapData, pIndex->ordinalIndex, &pBlockIter->block, tGetDataBlk); - } - -#if 0 - qDebug("check file block, table uid:%"PRIu64" index:%d offset:%"PRId64", ", pScanInfo->uid, *mapDataIndex, pBlockIter->block.aSubBlock[0].offset); -#endif - - return TSDB_CODE_SUCCESS; -} - -static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, - SArray* pTableList) { - bool asc = ASCENDING_TRAVERSE(pReader->order); - - SBlockOrderSupporter sup = {0}; - pBlockIter->numOfBlocks = numOfBlocks; - taosArrayClear(pBlockIter->blockList); - pBlockIter->pTableMap = pReader->status.pTableMap; - - // access data blocks according to the offset of each block in asc/desc order. - int32_t numOfTables = taosArrayGetSize(pTableList); - - int64_t st = taosGetTimestampUs(); - int32_t code = initBlockOrderSupporter(&sup, numOfTables); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - int32_t cnt = 0; - - for (int32_t i = 0; i < numOfTables; ++i) { - STableBlockScanInfo* pTableScanInfo = taosArrayGetP(pTableList, i); - ASSERT(pTableScanInfo->pBlockList != NULL && taosArrayGetSize(pTableScanInfo->pBlockList) > 0); - - size_t num = taosArrayGetSize(pTableScanInfo->pBlockList); - sup.numOfBlocksPerTable[sup.numOfTables] = num; - - char* buf = taosMemoryMalloc(sizeof(SBlockOrderWrapper) * num); - if (buf == NULL) { - cleanupBlockOrderSupporter(&sup); - return TSDB_CODE_OUT_OF_MEMORY; - } - - sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf; - - for (int32_t k = 0; k < num; ++k) { - SBlockIndex* pIndex = taosArrayGet(pTableScanInfo->pBlockList, k); - sup.pDataBlockInfo[sup.numOfTables][k] = - (SBlockOrderWrapper){.uid = pTableScanInfo->uid, .offset = pIndex->inFileOffset}; - cnt++; - } - - sup.numOfTables += 1; - } - - if (numOfBlocks != cnt && sup.numOfTables != numOfTables) { - cleanupBlockOrderSupporter(&sup); - return TSDB_CODE_INVALID_PARA; - } - - // since there is only one table qualified, blocks are not sorted - if (sup.numOfTables == 1) { - for (int32_t i = 0; i < numOfBlocks; ++i) { - SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i}; - taosArrayPush(pBlockIter->blockList, &blockInfo); - } - - int64_t et = taosGetTimestampUs(); - tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s", - pReader, numOfBlocks, (et - st) / 1000.0, pReader->idStr); - - pBlockIter->index = asc ? 0 : (numOfBlocks - 1); - cleanupBlockOrderSupporter(&sup); - doSetCurrentBlock(pBlockIter, pReader->idStr); - return TSDB_CODE_SUCCESS; - } - - tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables, - pReader->idStr); - - SMultiwayMergeTreeInfo* pTree = NULL; - - uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar); - if (ret != TSDB_CODE_SUCCESS) { - cleanupBlockOrderSupporter(&sup); - return TSDB_CODE_OUT_OF_MEMORY; - } - - int32_t numOfTotal = 0; - while (numOfTotal < cnt) { - int32_t pos = tMergeTreeGetChosenIndex(pTree); - int32_t index = sup.indexPerTable[pos]++; - - SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index}; - taosArrayPush(pBlockIter->blockList, &blockInfo); - - // set data block index overflow, in order to disable the offset comparator - if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) { - sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1; - } - - numOfTotal += 1; - tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); - } - - int64_t et = taosGetTimestampUs(); - tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, numOfBlocks, - (et - st) / 1000.0, pReader->idStr); - cleanupBlockOrderSupporter(&sup); - taosMemoryFree(pTree); - - pBlockIter->index = asc ? 0 : (numOfBlocks - 1); - doSetCurrentBlock(pBlockIter, pReader->idStr); - - return TSDB_CODE_SUCCESS; -} - -static bool blockIteratorNext(SDataBlockIter* pBlockIter, const char* idStr) { - bool asc = ASCENDING_TRAVERSE(pBlockIter->order); - - int32_t step = asc ? 1 : -1; - if ((pBlockIter->index >= pBlockIter->numOfBlocks - 1 && asc) || (pBlockIter->index <= 0 && (!asc))) { - return false; - } - - pBlockIter->index += step; - doSetCurrentBlock(pBlockIter, idStr); - - return true; -} - -/** - * This is an two rectangles overlap cases. - */ -static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SDataBlk* pBlock) { - return (pWindow->ekey < pBlock->maxKey.ts && pWindow->ekey >= pBlock->minKey.ts) || - (pWindow->skey > pBlock->minKey.ts && pWindow->skey <= pBlock->maxKey.ts) || - (pVerRange->minVer > pBlock->minVer && pVerRange->minVer <= pBlock->maxVer) || - (pVerRange->maxVer < pBlock->maxVer && pVerRange->maxVer >= pBlock->minVer); -} - -static bool getNeighborBlockOfSameTable(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pTableBlockScanInfo, - int32_t* nextIndex, int32_t order, SBlockIndex* pBlockIndex) { - bool asc = ASCENDING_TRAVERSE(order); - if (asc && pBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) { - return false; - } - - if (!asc && pBlockInfo->tbBlockIdx == 0) { - return false; - } - - int32_t step = asc ? 1 : -1; - *nextIndex = pBlockInfo->tbBlockIdx + step; - *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); - // tMapDataGetItemByIdx(&pTableBlockScanInfo->mapData, pIndex->ordinalIndex, pBlock, tGetDataBlk); - return true; -} - -static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) { - int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1; - int32_t index = pBlockIter->index; - - while (index < pBlockIter->numOfBlocks && index >= 0) { - SFileDataBlockInfo* pFBlock = taosArrayGet(pBlockIter->blockList, index); - if (pFBlock->uid == pFBlockInfo->uid && pFBlock->tbBlockIdx == pFBlockInfo->tbBlockIdx) { - return index; - } - - index += step; - } - - return -1; -} - -static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t index, int32_t step) { - if (index < 0 || index >= pBlockIter->numOfBlocks) { - return -1; - } - - SFileDataBlockInfo fblock = *(SFileDataBlockInfo*)taosArrayGet(pBlockIter->blockList, index); - pBlockIter->index += step; - - if (index != pBlockIter->index) { - taosArrayRemove(pBlockIter->blockList, index); - taosArrayInsert(pBlockIter->blockList, pBlockIter->index, &fblock); - - SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); - ASSERT(pBlockInfo->uid == fblock.uid && pBlockInfo->tbBlockIdx == fblock.tbBlockIdx); - } - - doSetCurrentBlock(pBlockIter, ""); - return TSDB_CODE_SUCCESS; -} - -// todo: this attribute could be acquired during extractin the global ordered block list. -static bool overlapWithNeighborBlock(SDataBlk* pBlock, SBlockIndex* pNeighborBlockIndex, int32_t order) { - // it is the last block in current file, no chance to overlap with neighbor blocks. - if (ASCENDING_TRAVERSE(order)) { - return pBlock->maxKey.ts == pNeighborBlockIndex->window.skey; - } else { - return pBlock->minKey.ts == pNeighborBlockIndex->window.ekey; - } -} - -static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SDataBlk* pBlock) { - bool ascScan = ASCENDING_TRAVERSE(order); - - return (ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts <= pBlock->minKey.ts)) || - (!ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts >= pBlock->maxKey.ts)); -} - -static bool keyOverlapFileBlock(TSDBKEY key, SDataBlk* pBlock, SVersionRange* pVerRange) { - return (key.ts >= pBlock->minKey.ts && key.ts <= pBlock->maxKey.ts) && (pBlock->maxVer >= pVerRange->minVer) && - (pBlock->minVer <= pVerRange->maxVer); -} - -static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SDataBlk* pBlock, - int32_t startIndex) { - size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); - - for (int32_t i = startIndex; i < num; i += 1) { - TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); - if (p->ts >= pBlock->minKey.ts && p->ts <= pBlock->maxKey.ts) { - if (p->version >= pBlock->minVer) { - return true; - } - } else if (p->ts < pBlock->minKey.ts) { // p->ts < pBlock->minKey.ts - if (p->version >= pBlock->minVer) { - if (i < num - 1) { - TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1); - if (pnext->ts >= pBlock->minKey.ts) { - return true; - } - } else { // it must be the last point - ASSERT(p->version == 0); - } - } - } else { // (p->ts > pBlock->maxKey.ts) { - return false; - } - } - - return false; -} - -static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SDataBlk* pBlock, int32_t order) { - if (pBlockScanInfo->delSkyline == NULL) { - return false; - } - - // ts is not overlap - TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0); - TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline); - if (pBlock->minKey.ts > pLast->ts || pBlock->maxKey.ts < pFirst->ts) { - return false; - } - - // version is not overlap - if (ASCENDING_TRAVERSE(order)) { - return doCheckforDatablockOverlap(pBlockScanInfo, pBlock, pBlockScanInfo->fileDelIndex); - } else { - int32_t index = pBlockScanInfo->fileDelIndex; - while (1) { - TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, index); - if (p->ts > pBlock->minKey.ts && index > 0) { - index -= 1; - } else { // find the first point that is smaller than the minKey.ts of dataBlock. - if (p->ts == pBlock->minKey.ts && p->version < pBlock->maxVer && index > 0) { - index -= 1; - } - break; - } - } - - return doCheckforDatablockOverlap(pBlockScanInfo, pBlock, index); - } -} - -typedef struct { - bool overlapWithNeighborBlock; - bool hasDupTs; - bool overlapWithDelInfo; - bool overlapWithLastBlock; - bool overlapWithKeyInBuf; - bool partiallyRequired; - bool moreThanCapcity; -} SDataBlockToLoadInfo; - -static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo* pBlockInfo, SDataBlk* pBlock, - STableBlockScanInfo* pScanInfo, TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader, - STsdbReader* pReader) { - int32_t neighborIndex = 0; - SBlockIndex bIndex = {0}; - - bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pScanInfo, &neighborIndex, pReader->order, &bIndex); - - // overlap with neighbor - if (hasNeighbor) { - pInfo->overlapWithNeighborBlock = overlapWithNeighborBlock(pBlock, &bIndex, pReader->order); - } - - // has duplicated ts of different version in this block - pInfo->hasDupTs = (pBlock->nSubBlock == 1) ? pBlock->hasDup : true; - pInfo->overlapWithDelInfo = overlapWithDelSkyline(pScanInfo, pBlock, pReader->order); - - if (hasDataInLastBlock(pLastBlockReader)) { - int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); - pInfo->overlapWithLastBlock = !(pBlock->maxKey.ts < tsLast || pBlock->minKey.ts > tsLast); - } - - pInfo->moreThanCapcity = pBlock->nRow > pReader->resBlockInfo.capacity; - pInfo->partiallyRequired = dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock); - pInfo->overlapWithKeyInBuf = keyOverlapFileBlock(keyInBuf, pBlock, &pReader->verRange); -} - -// 1. the version of all rows should be less than the endVersion -// 2. current block should not overlap with next neighbor block -// 3. current timestamp should not be overlap with each other -// 4. output buffer should be large enough to hold all rows in current block -// 5. delete info should not overlap with current block data -// 6. current block should not contain the duplicated ts -static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pBlockInfo, SDataBlk* pBlock, - STableBlockScanInfo* pScanInfo, TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader) { - SDataBlockToLoadInfo info = {0}; - getBlockToLoadInfo(&info, pBlockInfo, pBlock, pScanInfo, keyInBuf, pLastBlockReader, pReader); - - bool loadDataBlock = - (info.overlapWithNeighborBlock || info.hasDupTs || info.partiallyRequired || info.overlapWithKeyInBuf || - info.moreThanCapcity || info.overlapWithDelInfo || info.overlapWithLastBlock); - - // log the reason why load the datablock for profile - if (loadDataBlock) { - tsdbDebug("%p uid:%" PRIu64 - " need to load the datablock, overlapneighbor:%d, hasDup:%d, partiallyRequired:%d, " - "overlapWithKey:%d, greaterThanBuf:%d, overlapWithDel:%d, overlapWithlastBlock:%d, %s", - pReader, pBlockInfo->uid, info.overlapWithNeighborBlock, info.hasDupTs, info.partiallyRequired, - info.overlapWithKeyInBuf, info.moreThanCapcity, info.overlapWithDelInfo, info.overlapWithLastBlock, - pReader->idStr); - } - - return loadDataBlock; -} - -static bool isCleanFileDataBlock(STsdbReader* pReader, SFileDataBlockInfo* pBlockInfo, SDataBlk* pBlock, - STableBlockScanInfo* pScanInfo, TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader) { - SDataBlockToLoadInfo info = {0}; - getBlockToLoadInfo(&info, pBlockInfo, pBlock, pScanInfo, keyInBuf, pLastBlockReader, pReader); - bool isCleanFileBlock = !(info.overlapWithNeighborBlock || info.hasDupTs || info.overlapWithKeyInBuf || - info.overlapWithDelInfo || info.overlapWithLastBlock); - return isCleanFileBlock; -} - -static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) { - if (!(pBlockScanInfo->iiter.hasVal || pBlockScanInfo->iter.hasVal)) { - return TSDB_CODE_SUCCESS; - } - - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - - int64_t st = taosGetTimestampUs(); - int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->resBlockInfo.capacity, pReader); - - blockDataUpdateTsWindow(pBlock, pReader->suppInfo.slotId[0]); - pBlock->info.id.uid = pBlockScanInfo->uid; - - setComposedBlockFlag(pReader, true); - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug("%p build data block from cache completed, elapsed time:%.2f ms, numOfRows:%" PRId64 ", brange:%" PRId64 - " - %" PRId64 ", uid:%" PRIu64 ", %s", - pReader, elapsedTime, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey, - pBlockScanInfo->uid, pReader->idStr); - - pReader->cost.buildmemBlock += elapsedTime; - return code; -} - -static bool tryCopyDistinctRowFromFileBlock(STsdbReader* pReader, SBlockData* pBlockData, int64_t key, - SFileBlockDumpInfo* pDumpInfo, bool* copied) { - // opt version - // 1. it is not a border point - // 2. the direct next point is not an duplicated timestamp - int32_t code = TSDB_CODE_SUCCESS; - - *copied = false; - bool asc = (pReader->order == TSDB_ORDER_ASC); - if ((pDumpInfo->rowIndex < pDumpInfo->totalRows - 1 && asc) || (pDumpInfo->rowIndex > 0 && (!asc))) { - int32_t step = pReader->order == TSDB_ORDER_ASC ? 1 : -1; - - int64_t nextKey = pBlockData->aTSKEY[pDumpInfo->rowIndex + step]; - if (nextKey != key) { // merge is not needed - code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, pBlockData, pDumpInfo->rowIndex); - if (code) { - return code; - } - pDumpInfo->rowIndex += step; - *copied = true; - } - } - - return code; -} - -static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, - SVersionRange* pVerRange) { - int32_t step = ASCENDING_TRAVERSE(pLastBlockReader->order) ? 1 : -1; - - while (1) { - bool hasVal = tMergeTreeNext(&pLastBlockReader->mergeTree); - if (!hasVal) { // the next value will be the accessed key in stt - pScanInfo->lastKeyInStt += step; - return false; - } - - TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow]; - int64_t ver = pRow->pBlockData->aVersion[pRow->iRow]; - - pLastBlockReader->currentKey = key; - pScanInfo->lastKeyInStt = key; - - if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->sttBlockDelIndex, key, ver, pLastBlockReader->order, - pVerRange)) { - return true; - } - } -} - -static bool tryCopyDistinctRowFromSttBlock(TSDBROW* fRow, SLastBlockReader* pLastBlockReader, - STableBlockScanInfo* pScanInfo, int64_t ts, STsdbReader* pReader, - bool* copied) { - int32_t code = TSDB_CODE_SUCCESS; - - *copied = false; - - bool hasVal = nextRowFromLastBlocks(pLastBlockReader, pScanInfo, &pReader->verRange); - if (hasVal) { - int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); - if (next1 != ts) { - code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, fRow->pBlockData, fRow->iRow); - if (code) { - return code; - } - - *copied = true; - return code; - } - } else { - code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, fRow->pBlockData, fRow->iRow); - if (code) { - return code; - } - - *copied = true; - return code; - } - - return code; -} - -static FORCE_INLINE STSchema* doGetSchemaForTSRow(int32_t sversion, STsdbReader* pReader, uint64_t uid) { - // always set the newest schema version in pReader->pSchema - if (pReader->pSchema == NULL) { - STSchema* ps = getTableSchemaImpl(pReader, uid); - if (ps == NULL) { - return NULL; - } - } - - if (pReader->pSchema && sversion == pReader->pSchema->version) { - return pReader->pSchema; - } - - void** p = tSimpleHashGet(pReader->pSchemaMap, &sversion, sizeof(sversion)); - if (p != NULL) { - return *(STSchema**)p; - } - - STSchema* ptr = NULL; - int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &ptr); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - return NULL; - } else { - code = tSimpleHashPut(pReader->pSchemaMap, &sversion, sizeof(sversion), &ptr, POINTER_BYTES); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - return NULL; - } - return ptr; - } -} - -static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow, - SIterInfo* pIter, int64_t key, SLastBlockReader* pLastBlockReader) { - SRowMerger* pMerger = &pReader->status.merger; - SRow* pTSRow = NULL; - SBlockData* pBlockData = &pReader->status.fileBlockData; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - int64_t tsLast = INT64_MIN; - if (hasDataInLastBlock(pLastBlockReader)) { - tsLast = getCurrentKeyInLastBlock(pLastBlockReader); - } - - TSDBKEY k = TSDBROW_KEY(pRow); - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - - // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized - if (pMerger->pArray == NULL) { - ASSERT(pReader->pSchema == NULL); - STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); - if (ps == NULL) { - return terrno; - } - } - - int64_t minKey = 0; - if (pReader->order == TSDB_ORDER_ASC) { - minKey = INT64_MAX; // chosen the minimum value - if (minKey > tsLast && hasDataInLastBlock(pLastBlockReader)) { - minKey = tsLast; - } - - if (minKey > k.ts) { - minKey = k.ts; - } - - if (minKey > key && hasDataInFileBlock(pBlockData, pDumpInfo)) { - minKey = key; - } - } else { - minKey = INT64_MIN; - if (minKey < tsLast && hasDataInLastBlock(pLastBlockReader)) { - minKey = tsLast; - } - - if (minKey < k.ts) { - minKey = k.ts; - } - - if (minKey < key && hasDataInFileBlock(pBlockData, pDumpInfo)) { - minKey = key; - } - } - - // todo remove init - bool init = false; - - // ASC: file block ---> last block -----> imem -----> mem - // DESC: mem -----> imem -----> last block -----> file block - if (pReader->order == TSDB_ORDER_ASC) { - if (minKey == key) { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - - if (minKey == tsLast) { - TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, fRow1, NULL); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->verRange, pReader->idStr); - } - - if (minKey == k.ts) { - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return terrno; - } - if (init) { - tsdbRowMergerAdd(pMerger, pRow, pSchema); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - int32_t code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } else { - if (minKey == k.ts) { - init = true; - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return terrno; - } - - int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS || pMerger->pTSchema == NULL) { - return code; - } - } - - if (minKey == tsLast) { - TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, fRow1, NULL); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->verRange, pReader->idStr); - } - - if (minKey == key) { - if (init) { - tsdbRowMergerAdd(pMerger, &fRow, NULL); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - } - - int32_t code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - - return code; -} - -static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, STsdbReader* pReader, - STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, - bool mergeBlockData) { - SRowMerger* pMerger = &pReader->status.merger; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader); - bool copied = false; - int32_t code = TSDB_CODE_SUCCESS; - SRow* pTSRow = NULL; - TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - - // create local variable to hold the row value - TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData}; - - tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, - pReader->idStr); - - // only last block exists - if ((!mergeBlockData) || (tsLastBlock != pBlockData->aTSKEY[pDumpInfo->rowIndex])) { - code = tryCopyDistinctRowFromSttBlock(&fRow, pLastBlockReader, pBlockScanInfo, tsLastBlock, pReader, &copied); - if (code) { - return code; - } - - if (copied) { - pBlockScanInfo->lastKey = tsLastBlock; - return TSDB_CODE_SUCCESS; - } else { - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - tsdbRowMergerAdd(pMerger, pRow1, NULL); - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange, - pReader->idStr); - - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } else { // not merge block data - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange, pReader->idStr); - - // merge with block data if ts == key - if (tsLastBlock == pBlockData->aTSKEY[pDumpInfo->rowIndex]) { - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader* pLastBlockReader, int64_t key, - STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SRowMerger* pMerger = &pReader->status.merger; - - // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized - if (pMerger->pArray == NULL) { - ASSERT(pReader->pSchema == NULL); - STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); - if (ps == NULL) { - return terrno; - } - } - - if (hasDataInFileBlock(pBlockData, pDumpInfo)) { - // no last block available, only data block exists - if (!hasDataInLastBlock(pLastBlockReader)) { - return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); - } - - // row in last file block - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - int64_t ts = getCurrentKeyInLastBlock(pLastBlockReader); - ASSERT(ts >= key); - - if (ASCENDING_TRAVERSE(pReader->order)) { - if (key < ts) { // imem, mem are all empty, file blocks (data blocks and last block) exist - return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); - } else if (key == ts) { - SRow* pTSRow = NULL; - int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - tsdbRowMergerAdd(pMerger, pRow1, NULL); - - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, ts, pMerger, &pReader->verRange, pReader->idStr); - - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - return code; - } else { - return TSDB_CODE_SUCCESS; - } - } else { // desc order - return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, pBlockData, true); - } - } else { // only last block exists - return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, NULL, false); - } -} - -static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, - SLastBlockReader* pLastBlockReader) { - SRowMerger* pMerger = &pReader->status.merger; - SRow* pTSRow = NULL; - int32_t code = TSDB_CODE_SUCCESS; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SArray* pDelList = pBlockScanInfo->delSkyline; - - TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pDelList, pReader); - TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pDelList, pReader); - - int64_t tsLast = INT64_MIN; - if (hasDataInLastBlock(pLastBlockReader)) { - tsLast = getCurrentKeyInLastBlock(pLastBlockReader); - } - - int64_t key = hasDataInFileBlock(pBlockData, pDumpInfo) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] : INT64_MIN; - - TSDBKEY k = TSDBROW_KEY(pRow); - TSDBKEY ik = TSDBROW_KEY(piRow); - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return code; - } - - STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); - if (piSchema == NULL) { - return code; - } - - // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized - if (pMerger->pArray == NULL) { - ASSERT(pReader->pSchema == NULL); - STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); - if (ps == NULL) { - return terrno; - } - } - - int64_t minKey = 0; - if (ASCENDING_TRAVERSE(pReader->order)) { - minKey = INT64_MAX; // let's find the minimum - if (minKey > k.ts) { - minKey = k.ts; - } - - if (minKey > ik.ts) { - minKey = ik.ts; - } - - if (minKey > key && hasDataInFileBlock(pBlockData, pDumpInfo)) { - minKey = key; - } - - if (minKey > tsLast && hasDataInLastBlock(pLastBlockReader)) { - minKey = tsLast; - } - } else { - minKey = INT64_MIN; // let find the maximum ts value - if (minKey < k.ts) { - minKey = k.ts; - } - - if (minKey < ik.ts) { - minKey = ik.ts; - } - - if (minKey < key && hasDataInFileBlock(pBlockData, pDumpInfo)) { - minKey = key; - } - - if (minKey < tsLast && hasDataInLastBlock(pLastBlockReader)) { - minKey = tsLast; - } - } - - bool init = false; - - // ASC: file block -----> last block -----> imem -----> mem - // DESC: mem -----> imem -----> last block -----> file block - if (ASCENDING_TRAVERSE(pReader->order)) { - if (minKey == key) { - init = true; - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - - if (minKey == tsLast) { - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, pRow1, NULL); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, pRow1, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->verRange, pReader->idStr); - } - - if (minKey == ik.ts) { - if (init) { - tsdbRowMergerAdd(pMerger, piRow, piSchema); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, piRow, piSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - if (minKey == k.ts) { - if (init) { - tsdbRowMergerAdd(pMerger, pRow, pSchema); - } else { - // STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } else { - if (minKey == k.ts) { - init = true; - code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - if (minKey == ik.ts) { - if (init) { - tsdbRowMergerAdd(pMerger, piRow, piSchema); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, piRow, piSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - if (minKey == tsLast) { - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, pRow1, NULL); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, pRow1, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->verRange, pReader->idStr); - } - - if (minKey == key) { - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - if (!init) { - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } else { - tsdbRowMergerAdd(pMerger, &fRow, NULL); - } - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - } - - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - return code; -} - -static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { - if (pBlockScanInfo->iterInit) { - return TSDB_CODE_SUCCESS; - } - - int32_t code = TSDB_CODE_SUCCESS; - - TSDBKEY startKey = {0}; - if (ASCENDING_TRAVERSE(pReader->order)) { - // startKey = (TSDBKEY){.ts = pReader->window.skey, .version = pReader->verRange.minVer}; - startKey = (TSDBKEY){.ts = pBlockScanInfo->lastKey + 1, .version = pReader->verRange.minVer}; - } else { - // startKey = (TSDBKEY){.ts = pReader->window.ekey, .version = pReader->verRange.maxVer}; - startKey = (TSDBKEY){.ts = pBlockScanInfo->lastKey - 1, .version = pReader->verRange.maxVer}; - } - - int32_t backward = (!ASCENDING_TRAVERSE(pReader->order)); - int64_t st = 0; - - STbData* d = NULL; - if (pReader->pReadSnap->pMem != NULL) { - d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid); - if (d != NULL) { - code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter); - if (code == TSDB_CODE_SUCCESS) { - pBlockScanInfo->iter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iter.iter) != NULL); - - tsdbDebug("%p uid:%" PRIu64 ", check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64 - "-%" PRId64 " %s", - pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, d->minKey, d->maxKey, pReader->idStr); - } else { - tsdbError("%p uid:%" PRIu64 ", failed to create iterator for imem, code:%s, %s", pReader, pBlockScanInfo->uid, - tstrerror(code), pReader->idStr); - return code; - } - } - } else { - tsdbDebug("%p uid:%" PRIu64 ", no data in mem, %s", pReader, pBlockScanInfo->uid, pReader->idStr); - } - - STbData* di = NULL; - if (pReader->pReadSnap->pIMem != NULL) { - di = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid); - if (di != NULL) { - code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter); - if (code == TSDB_CODE_SUCCESS) { - pBlockScanInfo->iiter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iiter.iter) != NULL); - - tsdbDebug("%p uid:%" PRIu64 ", check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64 - "-%" PRId64 " %s", - pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, di->minKey, di->maxKey, pReader->idStr); - } else { - tsdbError("%p uid:%" PRIu64 ", failed to create iterator for mem, code:%s, %s", pReader, pBlockScanInfo->uid, - tstrerror(code), pReader->idStr); - return code; - } - } - } else { - tsdbDebug("%p uid:%" PRIu64 ", no data in imem, %s", pReader, pBlockScanInfo->uid, pReader->idStr); - } - - st = taosGetTimestampUs(); - initDelSkylineIterator(pBlockScanInfo, pReader, d, di); - pReader->cost.initDelSkylineIterTime += (taosGetTimestampUs() - st) / 1000.0; - - pBlockScanInfo->iterInit = true; - return TSDB_CODE_SUCCESS; -} - -static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, - STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { - // it is an multi-table data block - if (pBlockData->aUid != NULL) { - uint64_t uid = pBlockData->aUid[pDumpInfo->rowIndex]; - if (uid != pBlockScanInfo->uid) { // move to next row - return false; - } - } - - // check for version and time range - int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex]; - if (ver > pReader->verRange.maxVer || ver < pReader->verRange.minVer) { - return false; - } - - int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - if (ts > pReader->window.ekey || ts < pReader->window.skey) { - return false; - } - - if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, ts, ver, pReader->order, - &pReader->verRange)) { - return false; - } - - return true; -} - -static bool initLastBlockReader(SLastBlockReader* pLBlockReader, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { - // the last block reader has been initialized for this table. - if (pLBlockReader->uid == pScanInfo->uid) { - return hasDataInLastBlock(pLBlockReader); - } - - if (pLBlockReader->uid != 0) { - tMergeTreeClose(&pLBlockReader->mergeTree); - } - - initMemDataIterator(pScanInfo, pReader); - pLBlockReader->uid = pScanInfo->uid; - - STimeWindow w = pLBlockReader->window; - if (ASCENDING_TRAVERSE(pLBlockReader->order)) { - w.skey = pScanInfo->lastKeyInStt; - } else { - w.ekey = pScanInfo->lastKeyInStt; - } - - tsdbDebug("init last block reader, window:%" PRId64 "-%" PRId64 ", uid:%" PRIu64 ", %s", w.skey, w.ekey, - pScanInfo->uid, pReader->idStr); - int32_t code = tMergeTreeOpen(&pLBlockReader->mergeTree, (pLBlockReader->order == TSDB_ORDER_DESC), - pReader->pFileReader, pReader->suid, pScanInfo->uid, &w, &pLBlockReader->verRange, - pLBlockReader->pInfo, false, pReader->idStr, false, pReader->status.pLDataIter); - if (code != TSDB_CODE_SUCCESS) { - return false; - } - - return nextRowFromLastBlocks(pLBlockReader, pScanInfo, &pReader->verRange); -} - -static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader) { return pLastBlockReader->mergeTree.pIter != NULL; } - -bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo) { - if ((pBlockData->nRow > 0) && (pBlockData->nRow != pDumpInfo->totalRows)) { - return false; // this is an invalid result. - } - return pBlockData->nRow > 0 && (!pDumpInfo->allDumped); -} - -int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, - STsdbReader* pReader) { - SRowMerger* pMerger = &pReader->status.merger; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - bool copied = false; - int32_t code = tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo, &copied); - if (code) { - return code; - } - - // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized - if (pMerger->pArray == NULL) { - ASSERT(pReader->pSchema == NULL); - STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); - if (ps == NULL) { - return terrno; - } - } - - if (copied) { - pBlockScanInfo->lastKey = key; - return TSDB_CODE_SUCCESS; - } else { - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - - SRow* pTSRow = NULL; - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - return code; - } -} - -static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, - SBlockData* pBlockData, SLastBlockReader* pLastBlockReader) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - TSDBROW *pRow = NULL, *piRow = NULL; - int64_t key = (pBlockData->nRow > 0 && (!pDumpInfo->allDumped)) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] : INT64_MIN; - if (pBlockScanInfo->iter.hasVal) { - pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); - } - - if (pBlockScanInfo->iiter.hasVal) { - piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); - } - - // two levels of mem-table does contain the valid rows - if (pRow != NULL && piRow != NULL) { - return doMergeMultiLevelRows(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); - } - - // imem + file + last block - if (pBlockScanInfo->iiter.hasVal) { - return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, &pBlockScanInfo->iiter, key, pLastBlockReader); - } - - // mem + file + last block - if (pBlockScanInfo->iter.hasVal) { - return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, &pBlockScanInfo->iter, key, pLastBlockReader); - } - - // files data blocks + last block - return mergeFileBlockAndLastBlock(pReader, pLastBlockReader, key, pBlockScanInfo, pBlockData); -} - -static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pBlockScanInfo, - STsdbReader* pReader, bool* loadNeighbor) { - int32_t code = TSDB_CODE_SUCCESS; - int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; - int32_t nextIndex = -1; - SBlockIndex nxtBIndex = {0}; - - *loadNeighbor = false; - SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); - - bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pBlockScanInfo, &nextIndex, pReader->order, &nxtBIndex); - if (!hasNeighbor) { // do nothing - return code; - } - - if (overlapWithNeighborBlock(pBlock, &nxtBIndex, pReader->order)) { // load next block - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - - // 1. find the next neighbor block in the scan block list - SFileDataBlockInfo fb = {.uid = pBlockInfo->uid, .tbBlockIdx = nextIndex}; - int32_t neighborIndex = findFileBlockInfoIndex(pBlockIter, &fb); - - // 2. remove it from the scan block list - setFileBlockActiveInBlockIter(pBlockIter, neighborIndex, step); - - // 3. load the neighbor block, and set it to be the currently accessed file data block - code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pBlockInfo->uid); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - // 4. check the data values - initBlockDumpInfo(pReader, pBlockIter); - *loadNeighbor = true; - } - - return code; -} - -static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - - pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; - pResBlock->info.dataLoad = 1; - blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); - - setComposedBlockFlag(pReader, true); - - pReader->cost.composedBlocks += 1; - pReader->cost.buildComposedBlockTime += el; -} - -static int32_t buildComposedDataBlock(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); - SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - - bool asc = ASCENDING_TRAVERSE(pReader->order); - int64_t st = taosGetTimestampUs(); - int32_t step = asc ? 1 : -1; - double el = 0; - SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - STableBlockScanInfo* pBlockScanInfo = NULL; - if (pBlockInfo != NULL) { - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) { - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - return code; - } - - pBlockScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); - if (pBlockScanInfo == NULL) { - goto _end; - } - - TSDBKEY keyInBuf = getCurrentKeyInBuf(pBlockScanInfo, pReader); - - // it is a clean block, load it directly - if (isCleanFileDataBlock(pReader, pBlockInfo, pBlock, pBlockScanInfo, keyInBuf, pLastBlockReader) && - pBlock->nRow <= pReader->resBlockInfo.capacity) { - if (asc || (!hasDataInLastBlock(pLastBlockReader) && (pBlock->maxKey.ts > keyInBuf.ts))) { - code = copyBlockDataToSDataBlock(pReader); - if (code) { - goto _end; - } - - // record the last key value - pBlockScanInfo->lastKey = asc ? pBlock->maxKey.ts : pBlock->minKey.ts; - goto _end; - } - } - } else { // file blocks not exist - pBlockScanInfo = *pReader->status.pTableIter; - if (pReader->pIgnoreTables && - taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - return code; - } - } - - SBlockData* pBlockData = &pReader->status.fileBlockData; - - while (1) { - bool hasBlockData = false; - { - while (pBlockData->nRow > 0 && - pBlockData->uid == pBlockScanInfo->uid) { // find the first qualified row in data block - if (isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) { - hasBlockData = true; - break; - } - - pDumpInfo->rowIndex += step; - - pBlock = getCurrentBlock(&pReader->status.blockIter); - if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) { - pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); // NOTE: get the new block info - - // continue check for the next file block if the last ts in the current block - // is overlapped with the next neighbor block - bool loadNeighbor = false; - code = loadNeighborIfOverlap(pBlockInfo, pBlockScanInfo, pReader, &loadNeighbor); - if ((!loadNeighbor) || (code != 0)) { - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - break; - } - } - } - } - - // no data in last block and block, no need to proceed. - if (hasBlockData == false) { - break; - } - - code = buildComposedDataBlockImpl(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); - if (code) { - goto _end; - } - - // currently loaded file data block is consumed - if ((pBlockData->nRow > 0) && (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0)) { - pBlock = getCurrentBlock(&pReader->status.blockIter); - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - break; - } - - if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { - break; - } - } - -_end: - el = (taosGetTimestampUs() - st) / 1000.0; - updateComposedBlockInfo(pReader, el, pBlockScanInfo); - - if (pResBlock->info.rows > 0) { - tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 - ", elapsed time:%.2f ms %s", - pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, - pResBlock->info.rows, el, pReader->idStr); - } - - return code; -} - -void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; } - -int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order) { - if (pDelSkyline == NULL) { - return 0; - } - - return ASCENDING_TRAVERSE(order) ? 0 : taosArrayGetSize(pDelSkyline) - 1; -} - -int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData, - STbData* piMemTbData) { - if (pBlockScanInfo->delSkyline != NULL) { - return TSDB_CODE_SUCCESS; - } - - int32_t code = 0; - SArray* pDelData = taosArrayInit(4, sizeof(SDelData)); - - SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; - if (pDelFile && taosArrayGetSize(pReader->pDelIdx) > 0) { - SDelIdx idx = {.suid = pReader->suid, .uid = pBlockScanInfo->uid}; - SDelIdx* pIdx = taosArraySearch(pReader->pDelIdx, &idx, tCmprDelIdx, TD_EQ); - - if (pIdx != NULL) { - code = tsdbReadDelDatav1(pReader->pDelFReader, pIdx, pDelData, pReader->verRange.maxVer); - } - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - } - - SDelData* p = NULL; - if (pMemTbData != NULL) { - p = pMemTbData->pHead; - while (p) { - if (p->version <= pReader->verRange.maxVer) { - taosArrayPush(pDelData, p); - } - - p = p->pNext; - } - } - - if (piMemTbData != NULL) { - p = piMemTbData->pHead; - while (p) { - if (p->version <= pReader->verRange.maxVer) { - taosArrayPush(pDelData, p); - } - p = p->pNext; - } - } - - if (taosArrayGetSize(pDelData) > 0) { - pBlockScanInfo->delSkyline = taosArrayInit(4, sizeof(TSDBKEY)); - code = tsdbBuildDeleteSkyline(pDelData, 0, (int32_t)(taosArrayGetSize(pDelData) - 1), pBlockScanInfo->delSkyline); - } - - taosArrayDestroy(pDelData); - int32_t index = getInitialDelIndex(pBlockScanInfo->delSkyline, pReader->order); - - pBlockScanInfo->iter.index = index; - pBlockScanInfo->iiter.index = index; - pBlockScanInfo->fileDelIndex = index; - pBlockScanInfo->sttBlockDelIndex = index; - - return code; - -_err: - taosArrayDestroy(pDelData); - return code; -} - -TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { - bool asc = ASCENDING_TRAVERSE(pReader->order); - // TSKEY initialVal = asc? TSKEY_MIN:TSKEY_MAX; - - TSDBKEY key = {.ts = TSKEY_INITIAL_VAL}, ikey = {.ts = TSKEY_INITIAL_VAL}; - - bool hasKey = false, hasIKey = false; - TSDBROW* pRow = getValidMemRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader); - if (pRow != NULL) { - hasKey = true; - key = TSDBROW_KEY(pRow); - } - - TSDBROW* pIRow = getValidMemRow(&pScanInfo->iiter, pScanInfo->delSkyline, pReader); - if (pIRow != NULL) { - hasIKey = true; - ikey = TSDBROW_KEY(pIRow); - } - - if (hasKey) { - if (hasIKey) { // has data in mem & imem - if (asc) { - return key.ts <= ikey.ts ? key : ikey; - } else { - return key.ts <= ikey.ts ? ikey : key; - } - } else { // no data in imem - return key; - } - } else { - // no data in mem & imem, return the initial value - // only imem has data, return ikey - return ikey; - } -} - -static int32_t moveToNextFile(STsdbReader* pReader, SBlockNumber* pBlockNum, SArray* pTableList) { - SReaderStatus* pStatus = &pReader->status; - pBlockNum->numOfBlocks = 0; - pBlockNum->numOfLastFiles = 0; - - size_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - SArray* pIndexList = taosArrayInit(numOfTables, sizeof(SBlockIdx)); - - while (1) { - // only check here, since the iterate data in memory is very fast. - if (pReader->code != TSDB_CODE_SUCCESS) { - tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); - taosArrayDestroy(pIndexList); - return pReader->code; - } - - bool hasNext = false; - int32_t code = filesetIteratorNext(&pStatus->fileIter, pReader, &hasNext); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pIndexList); - return code; - } - - if (!hasNext) { // no data files on disk - break; - } - - taosArrayClear(pIndexList); - code = doLoadBlockIndex(pReader, pReader->pFileReader, pIndexList); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pIndexList); - return code; - } - - if (taosArrayGetSize(pIndexList) > 0 || pReader->pFileReader->pSet->nSttF > 0) { - code = doLoadFileBlock(pReader, pIndexList, pBlockNum, pTableList); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pIndexList); - return code; - } - - if (pBlockNum->numOfBlocks + pBlockNum->numOfLastFiles > 0) { - break; - } - } - - // no blocks in current file, try next files - } - - taosArrayDestroy(pIndexList); - - if (pReader->pReadSnap != NULL) { - SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; - if (pReader->pDelFReader == NULL && pDelFile != NULL) { - int32_t code = tsdbDelFReaderOpen(&pReader->pDelFReader, pDelFile, pReader->pTsdb); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - pReader->pDelIdx = taosArrayInit(4, sizeof(SDelIdx)); - if (pReader->pDelIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - return code; - } - - code = tsdbReadDelIdx(pReader->pDelFReader, pReader->pDelIdx); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pReader->pDelIdx); - return code; - } - } - } - - return TSDB_CODE_SUCCESS; -} - -static void resetTableListIndex(SReaderStatus* pStatus) { - STableUidList* pList = &pStatus->uidList; - - pList->currentIndex = 0; - uint64_t uid = pList->tableUidList[0]; - pStatus->pTableIter = tSimpleHashGet(pStatus->pTableMap, &uid, sizeof(uid)); -} - -static bool moveToNextTable(STableUidList* pOrderedCheckInfo, SReaderStatus* pStatus) { - pOrderedCheckInfo->currentIndex += 1; - if (pOrderedCheckInfo->currentIndex >= tSimpleHashGetSize(pStatus->pTableMap)) { - pStatus->pTableIter = NULL; - return false; - } - - uint64_t uid = pOrderedCheckInfo->tableUidList[pOrderedCheckInfo->currentIndex]; - pStatus->pTableIter = tSimpleHashGet(pStatus->pTableMap, &uid, sizeof(uid)); - return (pStatus->pTableIter != NULL); -} - -static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - SLastBlockReader* pLastBlockReader = pStatus->fileIter.pLastBlockReader; - STableUidList* pUidList = &pStatus->uidList; - int32_t code = TSDB_CODE_SUCCESS; - - if (tSimpleHashGetSize(pStatus->pTableMap) == 0) { - return TSDB_CODE_SUCCESS; - } - - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - - while (1) { - if (pReader->code != TSDB_CODE_SUCCESS) { - tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); - return pReader->code; - } - - // load the last data block of current table - STableBlockScanInfo* pScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; - if (pScanInfo == NULL) { - tsdbError("table Iter is null, invalid pScanInfo, try next table %s", pReader->idStr); - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - - continue; - } - - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pScanInfo->uid, sizeof(pScanInfo->uid))) { - // reset the index in last block when handing a new file - doCleanupTableScanInfo(pScanInfo); - pStatus->mapDataCleaned = true; - - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - - continue; - } - - // reset the index in last block when handing a new file - doCleanupTableScanInfo(pScanInfo); - pStatus->mapDataCleaned = true; - - bool hasDataInLastFile = initLastBlockReader(pLastBlockReader, pScanInfo, pReader); - if (!hasDataInLastFile) { - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - - continue; - } - - int64_t st = taosGetTimestampUs(); - while (1) { - bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); - - // no data in last block and block, no need to proceed. - if (hasBlockLData == false) { - break; - } - - code = buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); - if (code) { - return code; - } - - if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { - break; - } - } - - double el = (taosGetTimestampUs() - st) / 1000.0; - updateComposedBlockInfo(pReader, el, pScanInfo); - - if (pResBlock->info.rows > 0) { - tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 - ", elapsed time:%.2f ms %s", - pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, - pResBlock->info.rows, el, pReader->idStr); - return TSDB_CODE_SUCCESS; - } - - // current table is exhausted, let's try next table - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - } -} - -static int32_t doBuildDataBlock(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - STableBlockScanInfo* pScanInfo = NULL; - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) { - setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock->maxKey.ts, pReader->order); - return code; - } - - if (pReader->code != TSDB_CODE_SUCCESS) { - return pReader->code; - } - - pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); - if (pScanInfo == NULL) { - return terrno; - } - - initLastBlockReader(pLastBlockReader, pScanInfo, pReader); - TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader); - - if (fileBlockShouldLoad(pReader, pBlockInfo, pBlock, pScanInfo, keyInBuf, pLastBlockReader)) { - code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pScanInfo->uid); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - // build composed data block - code = buildComposedDataBlock(pReader); - } else if (bufferDataInFileBlockGap(pReader->order, keyInBuf, pBlock)) { - // data in memory that are earlier than current file block - // rows in buffer should be less than the file block in asc, greater than file block in desc - int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? pBlock->minKey.ts : pBlock->maxKey.ts; - code = buildDataBlockFromBuf(pReader, pScanInfo, endKey); - } else { - if (hasDataInLastBlock(pLastBlockReader) && !ASCENDING_TRAVERSE(pReader->order)) { - // only return the rows in last block - int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); - ASSERT(tsLast >= pBlock->maxKey.ts); - - SBlockData* pBData = &pReader->status.fileBlockData; - tBlockDataReset(pBData); - - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - tsdbDebug("load data in last block firstly, due to desc scan data, %s", pReader->idStr); - - int64_t st = taosGetTimestampUs(); - - while (1) { - bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); - - // no data in last block and block, no need to proceed. - if (hasBlockLData == false) { - break; - } - - code = buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); - if (code) { - return code; - } - - if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { - break; - } - } - - double el = (taosGetTimestampUs() - st) / 1000.0; - updateComposedBlockInfo(pReader, el, pScanInfo); - - if (pResBlock->info.rows > 0) { - tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 - ", elapsed time:%.2f ms %s", - pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, - pResBlock->info.rows, el, pReader->idStr); - } - } else { // whole block is required, return it directly - SDataBlockInfo* pInfo = &pReader->resBlockInfo.pResBlock->info; - pInfo->rows = pBlock->nRow; - pInfo->id.uid = pScanInfo->uid; - pInfo->dataLoad = 0; - pInfo->window = (STimeWindow){.skey = pBlock->minKey.ts, .ekey = pBlock->maxKey.ts}; - setComposedBlockFlag(pReader, false); - setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock->maxKey.ts, pReader->order); - - // update the last key for the corresponding table - pScanInfo->lastKey = ASCENDING_TRAVERSE(pReader->order) ? pInfo->window.ekey : pInfo->window.skey; - tsdbDebug("%p uid:%" PRIu64 - " clean file block retrieved from file, global index:%d, " - "table index:%d, rows:%d, brange:%" PRId64 "-%" PRId64 ", %s", - pReader, pScanInfo->uid, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->nRow, pBlock->minKey.ts, - pBlock->maxKey.ts, pReader->idStr); - } - } - - return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; -} - -static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReader) { - int64_t st = taosGetTimestampUs(); - LRUHandle* handle = NULL; - int32_t code = tsdbCacheGetBlockIdx(pFileReader->pTsdb->biCache, pFileReader, &handle); - if (code != TSDB_CODE_SUCCESS || handle == NULL) { - goto _end; - } - - int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - - SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); - size_t num = taosArrayGetSize(aBlockIdx); - if (num == 0) { - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return TSDB_CODE_SUCCESS; - } - - SBlockIdx* pBlockIdx = NULL; - for (int32_t i = 0; i < num; ++i) { - pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i); - if (pBlockIdx->suid != pReader->suid) { - continue; - } - - STableBlockScanInfo** p = tSimpleHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(pBlockIdx->uid)); - if (p == NULL) { - continue; - } - - STableBlockScanInfo* pScanInfo = *p; - tMapDataReset(&pScanInfo->mapData); - tsdbReadDataBlk(pReader->pFileReader, pBlockIdx, &pScanInfo->mapData); - - SDataBlk block = {0}; - for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) { - tGetDataBlk(pScanInfo->mapData.pData + pScanInfo->mapData.aOffset[j], &block); - pReader->rowsNum += block.nRow; - } - } - -_end: - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return code; -} - -static int32_t doSumSttBlockRows(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - SSttBlockLoadInfo* pBlockLoadInfo = NULL; - - for (int32_t i = 0; i < pReader->pFileReader->pSet->nSttF; ++i) { // open all last file - pBlockLoadInfo = &pLastBlockReader->pInfo[i]; - - code = tsdbReadSttBlk(pReader->pFileReader, i, pBlockLoadInfo->aSttBlk); - if (code) { - return code; - } - - size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); - if (size >= 1) { - SSttBlk* pStart = taosArrayGet(pBlockLoadInfo->aSttBlk, 0); - SSttBlk* pEnd = taosArrayGet(pBlockLoadInfo->aSttBlk, size - 1); - - // all identical - if (pStart->suid == pEnd->suid) { - if (pStart->suid != pReader->suid) { - // no qualified stt block existed - taosArrayClear(pBlockLoadInfo->aSttBlk); - continue; - } - for (int32_t j = 0; j < size; ++j) { - SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); - pReader->rowsNum += p->nRow; - } - } else { - for (int32_t j = 0; j < size; ++j) { - SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); - uint64_t s = p->suid; - if (s < pReader->suid) { - continue; - } - - if (s == pReader->suid) { - pReader->rowsNum += p->nRow; - } else if (s > pReader->suid) { - break; - } - } - } - } - } - - return code; -} - -static int32_t readRowsCountFromFiles(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - - while (1) { - bool hasNext = false; - code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext); - if (code) { - return code; - } - - if (!hasNext) { // no data files on disk - break; - } - - code = doSumFileBlockRows(pReader, pReader->pFileReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doSumSttBlockRows(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - pReader->status.loadFromFile = false; - - return code; -} - -static int32_t readRowsCountFromMem(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - int64_t memNum = 0, imemNum = 0; - if (pReader->pReadSnap->pMem != NULL) { - tsdbMemTableCountRows(pReader->pReadSnap->pMem, pReader->status.pTableMap, &memNum); - } - - if (pReader->pReadSnap->pIMem != NULL) { - tsdbMemTableCountRows(pReader->pReadSnap->pIMem, pReader->status.pTableMap, &imemNum); - } - - pReader->rowsNum += memNum + imemNum; - - return code; -} - -static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - STableUidList* pUidList = &pStatus->uidList; - - while (1) { - if (pReader->code != TSDB_CODE_SUCCESS) { - tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); - return pReader->code; - } - - STableBlockScanInfo** pBlockScanInfo = pStatus->pTableIter; - if (pReader->pIgnoreTables && - taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) { - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - pBlockScanInfo = pStatus->pTableIter; - } - - initMemDataIterator(*pBlockScanInfo, pReader); - - int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? INT64_MAX : INT64_MIN; - int32_t code = buildDataBlockFromBuf(pReader, *pBlockScanInfo, endKey); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - if (pReader->resBlockInfo.pResBlock->info.rows > 0) { - return TSDB_CODE_SUCCESS; - } - - // current table is exhausted, let's try next table - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - } -} - -// set the correct start position in case of the first/last file block, according to the query time window -static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) { - int64_t lastKey = ASCENDING_TRAVERSE(pReader->order) ? INT64_MIN : INT64_MAX; - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - if (pBlockInfo) { - STableBlockScanInfo* pScanInfo = tSimpleHashGet(pBlockIter->pTableMap, &pBlockInfo->uid, sizeof(pBlockInfo->uid)); - if (pScanInfo) { - lastKey = pScanInfo->lastKey; - } - } - SReaderStatus* pStatus = &pReader->status; - - SFileBlockDumpInfo* pDumpInfo = &pStatus->fBlockDumpInfo; - - pDumpInfo->totalRows = pBlock->nRow; - pDumpInfo->allDumped = false; - pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->order) ? 0 : pBlock->nRow - 1; - pDumpInfo->lastKey = lastKey; -} - -static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) { - SBlockNumber num = {0}; - SArray* pTableList = taosArrayInit(40, POINTER_BYTES); - - int32_t code = moveToNextFile(pReader, &num, pTableList); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pTableList); - return code; - } - - // all data files are consumed, try data in buffer - if (num.numOfBlocks + num.numOfLastFiles == 0) { - pReader->status.loadFromFile = false; - taosArrayDestroy(pTableList); - return code; - } - - // initialize the block iterator for a new fileset - if (num.numOfBlocks > 0) { - code = initBlockIterator(pReader, pBlockIter, num.numOfBlocks, pTableList); - } else { // no block data, only last block exists - tBlockDataReset(&pReader->status.fileBlockData); - resetDataBlockIterator(pBlockIter, pReader->order); - resetTableListIndex(&pReader->status); - } - - // set the correct start position according to the query time window - initBlockDumpInfo(pReader, pBlockIter); - taosArrayDestroy(pTableList); - return code; -} - -static bool fileBlockPartiallyRead(SFileBlockDumpInfo* pDumpInfo, bool asc) { - return (!pDumpInfo->allDumped) && - ((pDumpInfo->rowIndex > 0 && asc) || (pDumpInfo->rowIndex < (pDumpInfo->totalRows - 1) && (!asc))); -} - -typedef enum { - TSDB_READ_RETURN = 0x1, - TSDB_READ_CONTINUE = 0x2, -} ERetrieveType; - -static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - SDataBlockIter* pBlockIter = &pReader->status.blockIter; - - while (1) { - terrno = 0; - - code = doLoadLastBlockSequentially(pReader); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - return TSDB_READ_RETURN; - } - - if (pResBlock->info.rows > 0) { - return TSDB_READ_RETURN; - } - - // all data blocks are checked in this last block file, now let's try the next file - ASSERT(pReader->status.pTableIter == NULL); - code = initForFirstBlockInFile(pReader, pBlockIter); - - // error happens or all the data files are completely checked - if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { - terrno = code; - return TSDB_READ_RETURN; - } - - if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. - return TSDB_READ_CONTINUE; - } else { // all blocks in data file are checked, let's check the data in last files - resetTableListIndex(&pReader->status); - } - } -} - -static int32_t buildBlockFromFiles(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - bool asc = ASCENDING_TRAVERSE(pReader->order); - - SDataBlockIter* pBlockIter = &pReader->status.blockIter; - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - - if (pBlockIter->numOfBlocks == 0) { - // let's try to extract data from stt files. - ERetrieveType type = doReadDataFromLastFiles(pReader); - if (type == TSDB_READ_RETURN) { - return terrno; - } - - code = doBuildDataBlock(pReader); - if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { - return code; - } - } - - while (1) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - if (fileBlockPartiallyRead(pDumpInfo, asc)) { // file data block is partially loaded - code = buildComposedDataBlock(pReader); - } else { - // current block are exhausted, try the next file block - if (pDumpInfo->allDumped) { - // try next data block in current file - bool hasNext = blockIteratorNext(&pReader->status.blockIter, pReader->idStr); - if (hasNext) { // check for the next block in the block accessed order list - initBlockDumpInfo(pReader, pBlockIter); - } else { - // all data blocks in files are checked, let's check the data in last files. - ASSERT(pReader->status.pCurrentFileset->nSttF > 0); - - // data blocks in current file are exhausted, let's try the next file now - SBlockData* pBlockData = &pReader->status.fileBlockData; - if (pBlockData->uid != 0) { - tBlockDataClear(pBlockData); - } - - tBlockDataReset(pBlockData); - resetDataBlockIterator(pBlockIter, pReader->order); - resetTableListIndex(&pReader->status); - - ERetrieveType type = doReadDataFromLastFiles(pReader); - if (type == TSDB_READ_RETURN) { - return terrno; - } - } - } - - code = doBuildDataBlock(pReader); - } - - if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { - return code; - } - } -} - -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, - int8_t* pLevel) { - if (VND_IS_RSMA(pVnode)) { - int8_t level = 0; - int8_t precision = pVnode->config.tsdbCfg.precision; - int64_t now = taosGetTimestamp(precision); - int64_t offset = tsQueryRsmaTolerance * ((precision == TSDB_TIME_PRECISION_MILLI) ? 1L - : (precision == TSDB_TIME_PRECISION_MICRO) ? 1000L - : 1000000L); - - for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) { - SRetention* pRetention = retentions + level; - if (pRetention->keep <= 0) { - if (level > 0) { - --level; - } - break; - } - if ((now - pRetention->keep) <= (winSKey + offset)) { - break; - } - ++level; - } - - const char* str = (idStr != NULL) ? idStr : ""; - - if (level == TSDB_RETENTION_L0) { - *pLevel = TSDB_RETENTION_L0; - tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L0, str); - return VND_RSMA0(pVnode); - } else if (level == TSDB_RETENTION_L1) { - *pLevel = TSDB_RETENTION_L1; - tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L1, str); - return VND_RSMA1(pVnode); - } else { - *pLevel = TSDB_RETENTION_L2; - tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L2, str); - return VND_RSMA2(pVnode); - } - } - - return VND_TSDB(pVnode); -} - -SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id) { - int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion; - - int64_t endVer = 0; - if (pCond->endVersion == -1) { - // user not specified end version, set current maximum version of vnode as the endVersion - endVer = pVnode->state.applied; - } else { - endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; - } - - tsdbDebug("queried verRange:%" PRId64 "-%" PRId64 ", revised query verRange:%" PRId64 "-%" PRId64 ", %s", - pCond->startVersion, pCond->endVersion, startVer, endVer, id); - - return (SVersionRange){.minVer = startVer, .maxVer = endVer}; -} - -bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, - SVersionRange* pVerRange) { - if (pDelList == NULL) { - return false; - } - - size_t num = taosArrayGetSize(pDelList); - bool asc = ASCENDING_TRAVERSE(order); - int32_t step = asc ? 1 : -1; - - if (asc) { - if (*index >= num - 1) { - TSDBKEY* last = taosArrayGetLast(pDelList); - ASSERT(key >= last->ts); - - if (key > last->ts) { - return false; - } else if (key == last->ts) { - TSDBKEY* prev = taosArrayGet(pDelList, num - 2); - return (prev->version >= ver && prev->version <= pVerRange->maxVer && prev->version >= pVerRange->minVer); - } - } else { - TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); - TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1); - - if (key < pCurrent->ts) { - return false; - } - - if (pCurrent->ts <= key && pNext->ts >= key && pCurrent->version >= ver && - pVerRange->maxVer >= pCurrent->version) { - return true; - } - - while (pNext->ts <= key && (*index) < num - 1) { - (*index) += 1; - - if ((*index) < num - 1) { - pCurrent = taosArrayGet(pDelList, *index); - pNext = taosArrayGet(pDelList, (*index) + 1); - - // it is not a consecutive deletion range, ignore it - if (pCurrent->version == 0 && pNext->version > 0) { - continue; - } - - if (pCurrent->ts <= key && pNext->ts >= key && pCurrent->version >= ver && - pVerRange->maxVer >= pCurrent->version) { - return true; - } - } - } - - return false; - } - } else { - if (*index <= 0) { - TSDBKEY* pFirst = taosArrayGet(pDelList, 0); - - if (key < pFirst->ts) { - return false; - } else if (key == pFirst->ts) { - return pFirst->version >= ver; - } else { - ASSERT(0); - } - } else { - TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); - TSDBKEY* pPrev = taosArrayGet(pDelList, (*index) - 1); - - if (key > pCurrent->ts) { - return false; - } - - if (pPrev->ts <= key && pCurrent->ts >= key && pPrev->version >= ver) { - return true; - } - - while (pPrev->ts >= key && (*index) > 1) { - (*index) += step; - - if ((*index) >= 1) { - pCurrent = taosArrayGet(pDelList, *index); - pPrev = taosArrayGet(pDelList, (*index) - 1); - - // it is not a consecutive deletion range, ignore it - if (pCurrent->version > 0 && pPrev->version == 0) { - continue; - } - - if (pPrev->ts <= key && pCurrent->ts >= key && pPrev->version >= ver) { - return true; - } - } - } - - return false; - } - } - - return false; -} - -TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) { - if (!pIter->hasVal) { - return NULL; - } - - TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter); - TSDBKEY key = TSDBROW_KEY(pRow); - - if (outOfTimeWindow(key.ts, &pReader->window)) { - pIter->hasVal = false; - return NULL; - } - - // it is a valid data version - if ((key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer) && - (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->order, &pReader->verRange))) { - return pRow; - } - - while (1) { - pIter->hasVal = tsdbTbDataIterNext(pIter->iter); - if (!pIter->hasVal) { - return NULL; - } - - pRow = tsdbTbDataIterGet(pIter->iter); - - key = TSDBROW_KEY(pRow); - if (outOfTimeWindow(key.ts, &pReader->window)) { - pIter->hasVal = false; - return NULL; - } - - if (key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer && - (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->order, &pReader->verRange))) { - return pRow; - } - } -} - -int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, STsdbReader* pReader) { - SRowMerger* pMerger = &pReader->status.merger; - - while (1) { - pIter->hasVal = tsdbTbDataIterNext(pIter->iter); - if (!pIter->hasVal) { - break; - } - - // data exists but not valid - TSDBROW* pRow = getValidMemRow(pIter, pDelList, pReader); - if (pRow == NULL) { - break; - } - - // ts is not identical, quit - TSDBKEY k = TSDBROW_KEY(pRow); - if (k.ts != ts) { - break; - } - - if (pRow->type == TSDBROW_ROW_FMT) { - STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, uid); - if (pTSchema == NULL) { - return terrno; - } - - tsdbRowMergerAdd(pMerger, pRow, pTSchema); - } else { // column format - tsdbRowMergerAdd(pMerger, pRow, NULL); - } - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t doMergeRowsInFileBlockImpl(SBlockData* pBlockData, int32_t rowIndex, int64_t key, SRowMerger* pMerger, - SVersionRange* pVerRange, int32_t step) { - while (rowIndex < pBlockData->nRow && rowIndex >= 0 && pBlockData->aTSKEY[rowIndex] == key) { - if (pBlockData->aVersion[rowIndex] > pVerRange->maxVer || pBlockData->aVersion[rowIndex] < pVerRange->minVer) { - rowIndex += step; - continue; - } - - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, rowIndex); - tsdbRowMergerAdd(pMerger, &fRow, NULL); - rowIndex += step; - } - - return rowIndex; -} - -typedef enum { - CHECK_FILEBLOCK_CONT = 0x1, - CHECK_FILEBLOCK_QUIT = 0x2, -} CHECK_FILEBLOCK_STATE; - -static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, SDataBlk* pBlock, - SFileDataBlockInfo* pFBlock, SRowMerger* pMerger, int64_t key, - CHECK_FILEBLOCK_STATE* state) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SBlockData* pBlockData = &pReader->status.fileBlockData; - bool asc = ASCENDING_TRAVERSE(pReader->order); - - *state = CHECK_FILEBLOCK_QUIT; - int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; - - bool loadNeighbor = true; - int32_t code = loadNeighborIfOverlap(pFBlock, pScanInfo, pReader, &loadNeighbor); - - if (loadNeighbor && (code == TSDB_CODE_SUCCESS)) { - pDumpInfo->rowIndex = - doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step); - if ((pDumpInfo->rowIndex >= pDumpInfo->totalRows && asc) || (pDumpInfo->rowIndex < 0 && !asc)) { - *state = CHECK_FILEBLOCK_CONT; - } - } - - return code; -} - -int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - SRowMerger* pMerger = &pReader->status.merger; - bool asc = ASCENDING_TRAVERSE(pReader->order); - int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - int32_t step = asc ? 1 : -1; - - pDumpInfo->rowIndex += step; - if ((pDumpInfo->rowIndex <= pBlockData->nRow - 1 && asc) || (pDumpInfo->rowIndex >= 0 && !asc)) { - pDumpInfo->rowIndex = - doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step); - } - - // all rows are consumed, let's try next file block - if ((pDumpInfo->rowIndex >= pBlockData->nRow && asc) || (pDumpInfo->rowIndex < 0 && !asc)) { - while (1) { - CHECK_FILEBLOCK_STATE st; - - SFileDataBlockInfo* pFileBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); - SDataBlk* pCurrentBlock = getCurrentBlock(&pReader->status.blockIter); - if (pFileBlockInfo == NULL) { - st = CHECK_FILEBLOCK_QUIT; - break; - } - - checkForNeighborFileBlock(pReader, pScanInfo, pCurrentBlock, pFileBlockInfo, pMerger, key, &st); - if (st == CHECK_FILEBLOCK_QUIT) { - break; - } - } - } - - return TSDB_CODE_SUCCESS; -} - -int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, - SRowMerger* pMerger, SVersionRange* pVerRange, const char* idStr) { - while (nextRowFromLastBlocks(pLastBlockReader, pScanInfo, pVerRange)) { - int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); - if (next1 == ts) { - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - tsdbRowMergerAdd(pMerger, pRow1, NULL); - } else { - tsdbTrace("uid:%" PRIu64 " last del index:%d, del range:%d, lastKeyInStt:%" PRId64 ", %s", pScanInfo->uid, - pScanInfo->sttBlockDelIndex, (int32_t)taosArrayGetSize(pScanInfo->delSkyline), pScanInfo->lastKeyInStt, - idStr); - break; - } - } - - return TSDB_CODE_SUCCESS; -} - -int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, TSDBROW* pResRow, - STsdbReader* pReader, bool* freeTSRow) { - TSDBROW* pNextRow = NULL; - TSDBROW current = *pRow; - - { // if the timestamp of the next valid row has a different ts, return current row directly - pIter->hasVal = tsdbTbDataIterNext(pIter->iter); - - if (!pIter->hasVal) { - *pResRow = *pRow; - *freeTSRow = false; - return TSDB_CODE_SUCCESS; - } else { // has next point in mem/imem - pNextRow = getValidMemRow(pIter, pDelList, pReader); - if (pNextRow == NULL) { - *pResRow = current; - *freeTSRow = false; - return TSDB_CODE_SUCCESS; - } - - if (TSDBROW_TS(¤t) != TSDBROW_TS(pNextRow)) { - *pResRow = current; - *freeTSRow = false; - return TSDB_CODE_SUCCESS; - } - } - } - - terrno = 0; - int32_t code = 0; - - // start to merge duplicated rows - if (current.type == TSDBROW_ROW_FMT) { - // get the correct schema for data in memory - STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(¤t), pReader, uid); - if (pTSchema == NULL) { - return terrno; - } - - code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pTSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - STSchema* pTSchema1 = doGetSchemaForTSRow(TSDBROW_SVERSION(pNextRow), pReader, uid); - if (pTSchema1 == NULL) { - return terrno; - } - - tsdbRowMergerAdd(&pReader->status.merger, pNextRow, pTSchema1); - } else { // let's merge rows in file block - code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - tsdbRowMergerAdd(&pReader->status.merger, pNextRow, NULL); - } - - code = doMergeRowsInBuf(pIter, uid, TSDBROW_TS(¤t), pDelList, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = tsdbRowMergerGetRow(&pReader->status.merger, &pResRow->pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - pResRow->type = TSDBROW_ROW_FMT; - tsdbRowMergerClear(&pReader->status.merger); - *freeTSRow = true; - - return TSDB_CODE_SUCCESS; -} - -int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, - SRow** pTSRow) { - SRowMerger* pMerger = &pReader->status.merger; - - TSDBKEY k = TSDBROW_KEY(pRow); - TSDBKEY ik = TSDBROW_KEY(piRow); - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return terrno; - } - - STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); - if (piSchema == NULL) { - return terrno; - } - - if (ASCENDING_TRAVERSE(pReader->order)) { // ascending order imem --> mem - int32_t code = tsdbRowMergerAdd(&pReader->status.merger, piRow, piSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); - code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - } else { - int32_t code = tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS || pMerger->pTSchema == NULL) { - return code; - } - - code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - tsdbRowMergerAdd(&pReader->status.merger, piRow, piSchema); - code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - int32_t code = tsdbRowMergerGetRow(pMerger, pTSRow); - tsdbRowMergerClear(pMerger); - return code; -} - -int32_t tsdbGetNextRowInMem(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, TSDBROW* pResRow, int64_t endKey, - bool* freeTSRow) { - TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); - TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); - SArray* pDelList = pBlockScanInfo->delSkyline; - uint64_t uid = pBlockScanInfo->uid; - - // todo refactor - bool asc = ASCENDING_TRAVERSE(pReader->order); - if (pBlockScanInfo->iter.hasVal) { - TSDBKEY k = TSDBROW_KEY(pRow); - if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) { - pRow = NULL; - } - } - - if (pBlockScanInfo->iiter.hasVal) { - TSDBKEY k = TSDBROW_KEY(piRow); - if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) { - piRow = NULL; - } - } - - if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal && pRow != NULL && piRow != NULL) { - TSDBKEY k = TSDBROW_KEY(pRow); - TSDBKEY ik = TSDBROW_KEY(piRow); - - int32_t code = TSDB_CODE_SUCCESS; - if (ik.ts != k.ts) { - if (((ik.ts < k.ts) && asc) || ((ik.ts > k.ts) && (!asc))) { // ik.ts < k.ts - code = doMergeMemTableMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, pResRow, pReader, freeTSRow); - } else if (((k.ts < ik.ts) && asc) || ((k.ts > ik.ts) && (!asc))) { - code = doMergeMemTableMultiRows(pRow, uid, &pBlockScanInfo->iter, pDelList, pResRow, pReader, freeTSRow); - } - } else { // ik.ts == k.ts - *freeTSRow = true; - pResRow->type = TSDBROW_ROW_FMT; - code = doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, &pResRow->pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - return code; - } - - if (pBlockScanInfo->iter.hasVal && pRow != NULL) { - return doMergeMemTableMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pResRow, pReader, - freeTSRow); - } - - if (pBlockScanInfo->iiter.hasVal && piRow != NULL) { - return doMergeMemTableMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, pResRow, pReader, freeTSRow); - } - - return TSDB_CODE_SUCCESS; -} - -int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, SRow* pTSRow, STableBlockScanInfo* pScanInfo) { - int32_t outputRowIndex = pBlock->info.rows; - int64_t uid = pScanInfo->uid; - int32_t code = TSDB_CODE_SUCCESS; - - SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - STSchema* pSchema = doGetSchemaForTSRow(pTSRow->sver, pReader, uid); - if (pSchema == NULL) { - return terrno; - } - - SColVal colVal = {0}; - int32_t i = 0, j = 0; - - if (pSupInfo->colId[i] == PRIMARYKEY_TIMESTAMP_COL_ID) { - SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); - ((int64_t*)pColData->pData)[outputRowIndex] = pTSRow->ts; - i += 1; - } - - while (i < pSupInfo->numOfCols && j < pSchema->numOfCols) { - col_id_t colId = pSupInfo->colId[i]; - - if (colId == pSchema->columns[j].colId) { - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); - - tRowGet(pTSRow, pSchema, j, &colVal); - code = doCopyColVal(pColInfoData, outputRowIndex, i, &colVal, pSupInfo); - if (code) { - return code; - } - i += 1; - j += 1; - } else if (colId < pSchema->columns[j].colId) { - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); - - colDataSetNULL(pColInfoData, outputRowIndex); - i += 1; - } else if (colId > pSchema->columns[j].colId) { - j += 1; - } - } - - // set null value since current column does not exist in the "pSchema" - while (i < pSupInfo->numOfCols) { - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); - colDataSetNULL(pColInfoData, outputRowIndex); - i += 1; - } - - pBlock->info.dataLoad = 1; - pBlock->info.rows += 1; - pScanInfo->lastKey = pTSRow->ts; - return TSDB_CODE_SUCCESS; -} - -int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, - int32_t rowIndex) { - int32_t i = 0, j = 0; - int32_t outputRowIndex = pResBlock->info.rows; - int32_t code = TSDB_CODE_SUCCESS; - - SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - ((int64_t*)pReader->status.pPrimaryTsCol->pData)[outputRowIndex] = pBlockData->aTSKEY[rowIndex]; - i += 1; - - SColVal cv = {0}; - int32_t numOfInputCols = pBlockData->nColData; - int32_t numOfOutputCols = pSupInfo->numOfCols; - - while (i < numOfOutputCols && j < numOfInputCols) { - SColData* pData = tBlockDataGetColDataByIdx(pBlockData, j); - if (pData->cid < pSupInfo->colId[i]) { - j += 1; - continue; - } - - SColumnInfoData* pCol = TARRAY_GET_ELEM(pResBlock->pDataBlock, pSupInfo->slotId[i]); - if (pData->cid == pSupInfo->colId[i]) { - tColDataGetValue(pData, rowIndex, &cv); - code = doCopyColVal(pCol, outputRowIndex, i, &cv, pSupInfo); - if (code) { - return code; - } - j += 1; - } else if (pData->cid > pCol->info.colId) { - // the specified column does not exist in file block, fill with null data - colDataSetNULL(pCol, outputRowIndex); - } - - i += 1; - } - - while (i < numOfOutputCols) { - SColumnInfoData* pCol = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - colDataSetNULL(pCol, outputRowIndex); - i += 1; - } - - pResBlock->info.dataLoad = 1; - pResBlock->info.rows += 1; - return TSDB_CODE_SUCCESS; -} - -int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity, - STsdbReader* pReader) { - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - int32_t code = TSDB_CODE_SUCCESS; - - do { - // SRow* pTSRow = NULL; - TSDBROW row = {.type = -1}; - bool freeTSRow = false; - tsdbGetNextRowInMem(pBlockScanInfo, pReader, &row, endKey, &freeTSRow); - if (row.type == -1) { - break; - } - - if (row.type == TSDBROW_ROW_FMT) { - code = doAppendRowFromTSRow(pBlock, pReader, row.pTSRow, pBlockScanInfo); - - if (freeTSRow) { - taosMemoryFree(row.pTSRow); - } - - if (code) { - return code; - } - } else { - code = doAppendRowFromFileBlock(pBlock, pReader, row.pBlockData, row.iRow); - if (code) { - break; - } - } - - // no data in buffer, return immediately - if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) { - break; - } - - if (pBlock->info.rows >= capacity) { - break; - } - } while (1); - - return code; -} - -// TODO refactor: with createDataBlockScanInfo -int32_t tsdbSetTableList(STsdbReader* pReader, const void* pTableList, int32_t num) { - int32_t size = tSimpleHashGetSize(pReader->status.pTableMap); - - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pReader->status.pTableMap, p, &iter)) != NULL) { - clearBlockScanInfo(*p); - } - - if (size < num) { - int32_t code = ensureBlockScanInfoBuf(&pReader->blockInfoBuf, num); - if (code) { - return code; - } - - char* p1 = taosMemoryRealloc(pReader->status.uidList.tableUidList, sizeof(uint64_t) * num); - if (p1 == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - pReader->status.uidList.tableUidList = (uint64_t*)p1; - } - - tSimpleHashClear(pReader->status.pTableMap); - STableUidList* pUidList = &pReader->status.uidList; - pUidList->currentIndex = 0; - - STableKeyInfo* pList = (STableKeyInfo*)pTableList; - for (int32_t i = 0; i < num; ++i) { - STableBlockScanInfo* pInfo = getPosInBlockInfoBuf(&pReader->blockInfoBuf, i); - pInfo->uid = pList[i].uid; - pUidList->tableUidList[i] = pList[i].uid; - - // todo extract method - if (ASCENDING_TRAVERSE(pReader->order)) { - int64_t skey = pReader->window.skey; - pInfo->lastKey = (skey > INT64_MIN) ? (skey - 1) : skey; - pInfo->lastKeyInStt = skey; - } else { - int64_t ekey = pReader->window.ekey; - pInfo->lastKey = (ekey < INT64_MAX) ? (ekey + 1) : ekey; - pInfo->lastKeyInStt = ekey; - } - - tSimpleHashPut(pReader->status.pTableMap, &pInfo->uid, sizeof(uint64_t), &pInfo, POINTER_BYTES); - } - - return TDB_CODE_SUCCESS; -} - -void* tsdbGetIdx(SMeta* pMeta) { - if (pMeta == NULL) { - return NULL; - } - return metaGetIdx(pMeta); -} - -void* tsdbGetIvtIdx(SMeta* pMeta) { - if (pMeta == NULL) { - return NULL; - } - return metaGetIvtIdx(pMeta); -} - -uint64_t tsdbGetReaderMaxVersion(STsdbReader* pReader) { return pReader->verRange.maxVer; } - -static int32_t doOpenReaderImpl(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - - initFilesetIterator(&pStatus->fileIter, pReader->pReadSnap->fs.aDFileSet, pReader); - resetDataBlockIterator(&pStatus->blockIter, pReader->order); - - int32_t code = TSDB_CODE_SUCCESS; - if (pStatus->fileIter.numOfFiles == 0) { - pStatus->loadFromFile = false; - } else if (READ_MODE_COUNT_ONLY == pReader->readMode) { - // DO NOTHING - } else { - code = initForFirstBlockInFile(pReader, pBlockIter); - } - - if (!pStatus->loadFromFile) { - resetTableListIndex(pStatus); - } - - return code; -} - -static void freeSchemaFunc(void* param) { - void** p = (void**)param; - taosMemoryFreeClear(*p); -} - -// ====================================== EXPOSED APIs ====================================== -int32_t tsdbReaderOpen(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, - SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, - SHashObj** pIgnoreTables) { - STimeWindow window = pCond->twindows; - SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); - - int32_t capacity = pConf->tsdbCfg.maxRows; - if (pResBlock != NULL) { - blockDataEnsureCapacity(pResBlock, capacity); - } - - int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, capacity, pResBlock, idstr); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - - // check for query time window - STsdbReader* pReader = *ppReader; - if (isEmptyQueryTimeWindow(&pReader->window) && pCond->type == TIMEWINDOW_RANGE_CONTAINED) { - tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pReader, pReader->idStr); - return TSDB_CODE_SUCCESS; - } - - if (pCond->type == TIMEWINDOW_RANGE_EXTERNAL) { - // update the SQueryTableDataCond to create inner reader - int32_t order = pCond->order; - if (order == TSDB_ORDER_ASC) { - pCond->twindows.ekey = window.skey - 1; - pCond->twindows.skey = INT64_MIN; - pCond->order = TSDB_ORDER_DESC; - } else { - pCond->twindows.skey = window.ekey + 1; - pCond->twindows.ekey = INT64_MAX; - pCond->order = TSDB_ORDER_ASC; - } - - // here we only need one more row, so the capacity is set to be ONE. - code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[0], 1, pResBlock, idstr); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - - if (order == TSDB_ORDER_ASC) { - pCond->twindows.skey = window.ekey + 1; - pCond->twindows.ekey = INT64_MAX; - } else { - pCond->twindows.skey = INT64_MIN; - pCond->twindows.ekey = window.ekey - 1; - } - pCond->order = order; - - code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[1], 1, pResBlock, idstr); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - } - - // NOTE: the endVersion in pCond is the data version not schema version, so pCond->endVersion is not correct here. - // no valid error code set in metaGetTbTSchema, so let's set the error code here. - // we should proceed in case of tmq processing. - if (pCond->suid != 0) { - pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pReader->suid, -1, 1); - if (pReader->pSchema == NULL) { - tsdbError("failed to get table schema, suid:%" PRIu64 ", ver:-1, %s", pReader->suid, pReader->idStr); - } - } else if (numOfTables > 0) { - STableKeyInfo* pKey = pTableList; - pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pKey->uid, -1, 1); - if (pReader->pSchema == NULL) { - tsdbError("failed to get table schema, uid:%" PRIu64 ", ver:-1, %s", pKey->uid, pReader->idStr); - } - } - - if (pReader->pSchema != NULL) { - tsdbRowMergerInit(&pReader->status.merger, pReader->pSchema); - } - - pReader->pSchemaMap = tSimpleHashInit(8, taosFastHash); - if (pReader->pSchemaMap == NULL) { - tsdbError("failed init schema hash for reader %s", pReader->idStr); - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - tSimpleHashSetFreeFp(pReader->pSchemaMap, freeSchemaFunc); - if (pReader->pSchema != NULL) { - code = updateBlockSMAInfo(pReader->pSchema, &pReader->suppInfo); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - } - - STsdbReader* p = (pReader->innerReader[0] != NULL) ? pReader->innerReader[0] : pReader; - pReader->status.pTableMap = - createDataBlockScanInfo(p, &pReader->blockInfoBuf, pTableList, &pReader->status.uidList, numOfTables); - if (pReader->status.pTableMap == NULL) { - *ppReader = NULL; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - pReader->status.pLDataIter = taosMemoryCalloc(pConf->sttTrigger, sizeof(SLDataIter)); - if (pReader->status.pLDataIter == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - pReader->flag = READER_STATUS_SUSPEND; - - if (countOnly) { - pReader->readMode = READ_MODE_COUNT_ONLY; - } - - pReader->pIgnoreTables = pIgnoreTables; - - tsdbDebug("%p total numOfTable:%d, window:%" PRId64 " - %" PRId64 ", verRange:%" PRId64 " - %" PRId64 - " in this query %s", - pReader, numOfTables, pReader->window.skey, pReader->window.ekey, pReader->verRange.minVer, - pReader->verRange.maxVer, pReader->idStr); - - return code; - -_err: - tsdbError("failed to create data reader, code:%s %s", tstrerror(code), idstr); - tsdbReaderClose(*ppReader); - *ppReader = NULL; // reset the pointer value. - return code; -} - -static void clearSharedPtr(STsdbReader* p) { - p->status.pLDataIter = NULL; - p->status.pTableMap = NULL; - p->status.uidList.tableUidList = NULL; - p->pReadSnap = NULL; - p->pSchema = NULL; - p->pSchemaMap = NULL; -} - -static void setSharedPtr(STsdbReader* pDst, const STsdbReader* pSrc) { - pDst->status.pTableMap = pSrc->status.pTableMap; - pDst->status.pLDataIter = pSrc->status.pLDataIter; - pDst->status.uidList = pSrc->status.uidList; - pDst->pSchema = pSrc->pSchema; - pDst->pSchemaMap = pSrc->pSchemaMap; - pDst->pReadSnap = pSrc->pReadSnap; - - if (pDst->pSchema) { - tsdbRowMergerInit(&pDst->status.merger, pDst->pSchema); - } -} - -void tsdbReaderClose(STsdbReader* pReader) { - if (pReader == NULL) { - return; - } - - tsdbAcquireReader(pReader); - - { - if (pReader->innerReader[0] != NULL || pReader->innerReader[1] != NULL) { - STsdbReader* p = pReader->innerReader[0]; - clearSharedPtr(p); - - p = pReader->innerReader[1]; - clearSharedPtr(p); - - tsdbReaderClose(pReader->innerReader[0]); - tsdbReaderClose(pReader->innerReader[1]); - } - } - - SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - - taosArrayDestroy(pSupInfo->pColAgg); - for (int32_t i = 0; i < pSupInfo->numOfCols; ++i) { - if (pSupInfo->buildBuf[i] != NULL) { - taosMemoryFreeClear(pSupInfo->buildBuf[i]); - } - } - - if (pReader->resBlockInfo.freeBlock) { - pReader->resBlockInfo.pResBlock = blockDataDestroy(pReader->resBlockInfo.pResBlock); - } - - taosMemoryFree(pSupInfo->colId); - tBlockDataDestroy(&pReader->status.fileBlockData); - cleanupDataBlockIterator(&pReader->status.blockIter); - - size_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - if (pReader->status.pTableMap != NULL) { - destroyAllBlockScanInfo(pReader->status.pTableMap); - clearBlockScanInfoBuf(&pReader->blockInfoBuf); - } - - if (pReader->pFileReader != NULL) { - tsdbDataFReaderClose(&pReader->pFileReader); - } - - if (pReader->pDelFReader != NULL) { - tsdbDelFReaderClose(&pReader->pDelFReader); - } - - if (pReader->pDelIdx != NULL) { - taosArrayDestroy(pReader->pDelIdx); - pReader->pDelIdx = NULL; - } - - qTrace("tsdb/reader-close: %p, untake snapshot", pReader); - tsdbUntakeReadSnap(pReader, pReader->pReadSnap, true); - pReader->pReadSnap = NULL; - - tsdbReleaseReader(pReader); - - tsdbUninitReaderLock(pReader); - - taosMemoryFreeClear(pReader->status.pLDataIter); - taosMemoryFreeClear(pReader->status.uidList.tableUidList); - SIOCostSummary* pCost = &pReader->cost; - - SFilesetIter* pFilesetIter = &pReader->status.fileIter; - if (pFilesetIter->pLastBlockReader != NULL) { - SLastBlockReader* pLReader = pFilesetIter->pLastBlockReader; - tMergeTreeClose(&pLReader->mergeTree); - - getSttBlockLoadInfo(pLReader->pInfo, &pCost->sttCost); - - pLReader->pInfo = destroyLastBlockLoadInfo(pLReader->pInfo); - taosMemoryFree(pLReader); - } - - tsdbDebug( - "%p :io-cost summary: head-file:%" PRIu64 ", head-file time:%.2f ms, SMA:%" PRId64 - " SMA-time:%.2f ms, fileBlocks:%" PRId64 - ", fileBlocks-load-time:%.2f ms, " - "build in-memory-block-time:%.2f ms, lastBlocks:%" PRId64 ", lastBlocks-time:%.2f ms, composed-blocks:%" PRId64 - ", composed-blocks-time:%.2fms, STableBlockScanInfo size:%.2f Kb, createTime:%.2f ms,initDelSkylineIterTime:%.2f " - "ms, %s", - pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaDataLoad, pCost->smaLoadTime, pCost->numOfBlocks, - pCost->blockLoadTime, pCost->buildmemBlock, pCost->sttBlockLoad, pCost->sttBlockLoadTime, pCost->composedBlocks, - pCost->buildComposedBlockTime, numOfTables * sizeof(STableBlockScanInfo) / 1000.0, pCost->createScanInfoList, - pCost->initDelSkylineIterTime, pReader->idStr); - - taosMemoryFree(pReader->idStr); - - tsdbRowMergerCleanup(&pReader->status.merger); - taosMemoryFree(pReader->pSchema); - - tSimpleHashCleanup(pReader->pSchemaMap); - taosMemoryFreeClear(pReader); -} - -int32_t tsdbReaderSuspend(STsdbReader* pReader) { - int32_t code = 0; - - // save reader's base state & reset top state to be reconstructed from base state - SReaderStatus* pStatus = &pReader->status; - STableBlockScanInfo* pBlockScanInfo = NULL; - - if (pStatus->loadFromFile) { - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); - if (pBlockInfo != NULL) { - pBlockScanInfo = getTableBlockScanInfo(pStatus->pTableMap, pBlockInfo->uid, pReader->idStr); - if (pBlockScanInfo == NULL) { - goto _err; - } - } else { - pBlockScanInfo = *pStatus->pTableIter; - } - - tsdbDataFReaderClose(&pReader->pFileReader); - - // resetDataBlockScanInfo excluding lastKey - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - } - } else { - // resetDataBlockScanInfo excluding lastKey - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - } - - pBlockScanInfo = pStatus->pTableIter == NULL ? NULL : *pStatus->pTableIter; - if (pBlockScanInfo) { - // save lastKey to restore memory iterator - STimeWindow w = pReader->resBlockInfo.pResBlock->info.window; - pBlockScanInfo->lastKey = ASCENDING_TRAVERSE(pReader->order) ? w.ekey : w.skey; - - // reset current current table's data block scan info, - pBlockScanInfo->iterInit = false; - pBlockScanInfo->iter.hasVal = false; - pBlockScanInfo->iiter.hasVal = false; - if (pBlockScanInfo->iter.iter != NULL) { - pBlockScanInfo->iter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iter.iter); - } - - if (pBlockScanInfo->iiter.iter != NULL) { - pBlockScanInfo->iiter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iiter.iter); - } - - pBlockScanInfo->pBlockList = taosArrayDestroy(pBlockScanInfo->pBlockList); - tMapDataClear(&pBlockScanInfo->mapData); - // TODO: keep skyline for reuse - pBlockScanInfo->delSkyline = taosArrayDestroy(pBlockScanInfo->delSkyline); - } - } - - tsdbUntakeReadSnap(pReader, pReader->pReadSnap, false); - pReader->pReadSnap = NULL; - pReader->flag = READER_STATUS_SUSPEND; - - tsdbDebug("reader: %p suspended uid %" PRIu64 " in this query %s", pReader, pBlockScanInfo ? pBlockScanInfo->uid : 0, - pReader->idStr); - return code; - -_err: - tsdbError("failed to suspend data reader, code:%s %s", tstrerror(code), pReader->idStr); - return code; -} - -static int32_t tsdbSetQueryReseek(void* pQHandle) { - int32_t code = 0; - STsdbReader* pReader = pQHandle; - - code = tsdbTryAcquireReader(pReader); - if (code == 0) { - if (pReader->flag == READER_STATUS_SUSPEND) { - tsdbReleaseReader(pReader); - return code; - } - - tsdbReaderSuspend(pReader); - - tsdbReleaseReader(pReader); - - return code; - } else if (code == EBUSY) { - return TSDB_CODE_VND_QUERY_BUSY; - } else { - terrno = TAOS_SYSTEM_ERROR(code); - return TSDB_CODE_FAILED; - } -} - -int32_t tsdbReaderResume(STsdbReader* pReader) { - int32_t code = 0; - - STableBlockScanInfo** pBlockScanInfo = pReader->status.pTableIter; - - // restore reader's state - // task snapshot - int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - if (numOfTables > 0) { - qTrace("tsdb/reader: %p, take snapshot", pReader); - code = tsdbTakeReadSnap(pReader, tsdbSetQueryReseek, &pReader->pReadSnap); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - - if (pReader->type == TIMEWINDOW_RANGE_CONTAINED) { - code = doOpenReaderImpl(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } else { - STsdbReader* pPrevReader = pReader->innerReader[0]; - STsdbReader* pNextReader = pReader->innerReader[1]; - - // we need only one row - pPrevReader->resBlockInfo.capacity = 1; - setSharedPtr(pPrevReader, pReader); - - pNextReader->resBlockInfo.capacity = 1; - setSharedPtr(pNextReader, pReader); - - code = doOpenReaderImpl(pPrevReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } - - pReader->flag = READER_STATUS_NORMAL; - tsdbDebug("reader: %p resumed uid %" PRIu64 ", numOfTable:%" PRId32 ", in this query %s", pReader, - pBlockScanInfo ? (*pBlockScanInfo)->uid : 0, numOfTables, pReader->idStr); - return code; - -_err: - tsdbError("failed to resume data reader, code:%s %s", tstrerror(code), pReader->idStr); - return code; -} - -static bool tsdbReadRowsCountOnly(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - - if (pReader->status.loadFromFile == false) { - return false; - } - - code = readRowsCountFromFiles(pReader); - if (code != TSDB_CODE_SUCCESS) { - return false; - } - - code = readRowsCountFromMem(pReader); - if (code != TSDB_CODE_SUCCESS) { - return false; - } - - pBlock->info.rows = pReader->rowsNum; - pBlock->info.id.uid = 0; - pBlock->info.dataLoad = 0; - - pReader->rowsNum = 0; - - return pBlock->info.rows > 0; -} - -static int32_t doTsdbNextDataBlock(STsdbReader* pReader, bool* hasNext) { - int32_t code = TSDB_CODE_SUCCESS; - - // cleanup the data that belongs to the previous data block - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - blockDataCleanup(pBlock); - - *hasNext = false; - - SReaderStatus* pStatus = &pReader->status; - if (tSimpleHashGetSize(pStatus->pTableMap) == 0) { - return code; - } - - if (READ_MODE_COUNT_ONLY == pReader->readMode) { - return tsdbReadRowsCountOnly(pReader); - } - - if (pStatus->loadFromFile) { - code = buildBlockFromFiles(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - if (pBlock->info.rows <= 0) { - resetTableListIndex(&pReader->status); - code = buildBlockFromBufferSequentially(pReader); - } - } else { // no data in files, let's try the buffer - code = buildBlockFromBufferSequentially(pReader); - } - - *hasNext = pBlock->info.rows > 0; - - return code; -} - -int32_t tsdbNextDataBlock(STsdbReader* pReader, bool* hasNext) { - int32_t code = TSDB_CODE_SUCCESS; - - *hasNext = false; - - if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT || - pReader->code != TSDB_CODE_SUCCESS) { - return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; - } - - SReaderStatus* pStatus = &pReader->status; - - code = tsdbAcquireReader(pReader); - qTrace("tsdb/read: %p, take read mutex, code: %d", pReader, code); - - if (pReader->flag == READER_STATUS_SUSPEND) { - code = tsdbReaderResume(pReader); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - } - - if (pReader->innerReader[0] != NULL && pReader->step == 0) { - code = doTsdbNextDataBlock(pReader->innerReader[0], hasNext); - if (code) { - tsdbReleaseReader(pReader); - return code; - } - - pReader->step = EXTERNAL_ROWS_PREV; - if (*hasNext) { - pStatus = &pReader->innerReader[0]->status; - if (pStatus->composedDataBlock) { - qTrace("tsdb/read: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - } - - return code; - } - } - - if (pReader->step == EXTERNAL_ROWS_PREV) { - // prepare for the main scan - code = doOpenReaderImpl(pReader); - int32_t step = 1; - resetAllDataBlockScanInfo(pReader->status.pTableMap, pReader->innerReader[0]->window.ekey, step); - - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - pReader->step = EXTERNAL_ROWS_MAIN; - } - - code = doTsdbNextDataBlock(pReader, hasNext); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - - if (*hasNext) { - if (pStatus->composedDataBlock) { - qTrace("tsdb/read: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - } - - return code; - } - - if (pReader->step == EXTERNAL_ROWS_MAIN && pReader->innerReader[1] != NULL) { - // prepare for the next row scan - int32_t step = -1; - code = doOpenReaderImpl(pReader->innerReader[1]); - resetAllDataBlockScanInfo(pReader->innerReader[1]->status.pTableMap, pReader->window.ekey, step); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doTsdbNextDataBlock(pReader->innerReader[1], hasNext); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - - pReader->step = EXTERNAL_ROWS_NEXT; - if (*hasNext) { - pStatus = &pReader->innerReader[1]->status; - if (pStatus->composedDataBlock) { - qTrace("tsdb/read: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - } - - return code; - } - } - - qTrace("tsdb/read: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - - return code; -} - -static bool doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_t numOfCols, SColumnDataAgg* pTsAgg) { - bool hasNullSMA = false; - // do fill all null column value SMA info - int32_t i = 0, j = 0; - int32_t size = (int32_t)taosArrayGetSize(pSup->pColAgg); - taosArrayInsert(pSup->pColAgg, 0, pTsAgg); - size++; - - while (j < numOfCols && i < size) { - SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i); - if (pAgg->colId == pSup->colId[j]) { - i += 1; - j += 1; - } else if (pAgg->colId < pSup->colId[j]) { - i += 1; - } else if (pSup->colId[j] < pAgg->colId) { - if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { - SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; - taosArrayInsert(pSup->pColAgg, i, &nullColAgg); - i += 1; - size++; - hasNullSMA = true; - } - j += 1; - } - } - - while (j < numOfCols) { - if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { - SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; - taosArrayInsert(pSup->pColAgg, i, &nullColAgg); - i += 1; - hasNullSMA = true; - } - j++; - } - - return hasNullSMA; -} - -int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool* hasNullSMA) { - SColumnDataAgg*** pBlockSMA = &pDataBlock->pBlockAgg; - - int32_t code = 0; - *allHave = false; - *pBlockSMA = NULL; - - if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { - return TSDB_CODE_SUCCESS; - } - - // there is no statistics data for composed block - if (pReader->status.composedDataBlock || (!pReader->suppInfo.smaValid)) { - return TSDB_CODE_SUCCESS; - } - - SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); - SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - - if (pReader->resBlockInfo.pResBlock->info.id.uid != pFBlock->uid) { - return TSDB_CODE_SUCCESS; - } - - int64_t st = taosGetTimestampUs(); - - SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); - if (tDataBlkHasSma(pBlock)) { - code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg); - if (code != TSDB_CODE_SUCCESS) { - tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code), - pReader->idStr); - return code; - } - } else { - *pBlockSMA = NULL; - return TSDB_CODE_SUCCESS; - } - - *allHave = true; - - // always load the first primary timestamp column data - SColumnDataAgg* pTsAgg = &pSup->tsColAgg; - - pTsAgg->numOfNull = 0; - pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID; - pTsAgg->min = pReader->resBlockInfo.pResBlock->info.window.skey; - pTsAgg->max = pReader->resBlockInfo.pResBlock->info.window.ekey; - - // update the number of NULL data rows - size_t numOfCols = pSup->numOfCols; - - // ensure capacity - if (pDataBlock->pDataBlock) { - size_t colsNum = taosArrayGetSize(pDataBlock->pDataBlock); - taosArrayEnsureCap(pSup->pColAgg, colsNum); - } - - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - if (pResBlock->pBlockAgg == NULL) { - size_t num = taosArrayGetSize(pResBlock->pDataBlock); - pResBlock->pBlockAgg = taosMemoryCalloc(num, POINTER_BYTES); - } - - // do fill all null column value SMA info - if (doFillNullColSMA(pSup, pBlock->nRow, numOfCols, pTsAgg)) { - *hasNullSMA = true; - return TSDB_CODE_SUCCESS; - } - size_t size = taosArrayGetSize(pSup->pColAgg); - - int32_t i = 0, j = 0; - while (j < numOfCols && i < size) { - SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i); - if (pAgg->colId == pSup->colId[j]) { - pResBlock->pBlockAgg[pSup->slotId[j]] = pAgg; - i += 1; - j += 1; - } else if (pAgg->colId < pSup->colId[j]) { - i += 1; - } else if (pSup->colId[j] < pAgg->colId) { - pResBlock->pBlockAgg[pSup->slotId[j]] = NULL; - *allHave = false; - j += 1; - } - } - - *pBlockSMA = pResBlock->pBlockAgg; - pReader->cost.smaDataLoad += 1; - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; - pReader->cost.smaLoadTime += elapsedTime; - - tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64 ", %s", 0, pFBlock->uid, pReader->idStr); - return code; -} - -STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id) { - STableBlockScanInfo** p = tSimpleHashGet(pTableMap, &uid, sizeof(uid)); - if (p == NULL || *p == NULL) { - terrno = TSDB_CODE_INVALID_PARA; - int32_t size = tSimpleHashGetSize(pTableMap); - tsdbError("failed to locate the uid:%" PRIu64 " in query table uid list, total tables:%d, %s", uid, size, id); - return NULL; - } - - return *p; -} - -static SSDataBlock* doRetrieveDataBlock(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - int32_t code = TSDB_CODE_SUCCESS; - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter); - - if (pReader->code != TSDB_CODE_SUCCESS) { - return NULL; - } - - STableBlockScanInfo* pBlockScanInfo = getTableBlockScanInfo(pStatus->pTableMap, pBlockInfo->uid, pReader->idStr); - if (pBlockScanInfo == NULL) { - return NULL; - } - - code = doLoadFileBlockData(pReader, &pStatus->blockIter, &pStatus->fileBlockData, pBlockScanInfo->uid); - if (code != TSDB_CODE_SUCCESS) { - tBlockDataDestroy(&pStatus->fileBlockData); - terrno = code; - return NULL; - } - - code = copyBlockDataToSDataBlock(pReader); - if (code != TSDB_CODE_SUCCESS) { - tBlockDataDestroy(&pStatus->fileBlockData); - terrno = code; - return NULL; - } - - return pReader->resBlockInfo.pResBlock; -} - -SSDataBlock* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) { - STsdbReader* pTReader = pReader; - if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { - if (pReader->step == EXTERNAL_ROWS_PREV) { - pTReader = pReader->innerReader[0]; - } else if (pReader->step == EXTERNAL_ROWS_NEXT) { - pTReader = pReader->innerReader[1]; - } - } - - SReaderStatus* pStatus = &pTReader->status; - if (pStatus->composedDataBlock) { - return pTReader->resBlockInfo.pResBlock; - } - - SSDataBlock* ret = doRetrieveDataBlock(pTReader); - - qTrace("tsdb/read-retrieve: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - - return ret; -} - -int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { - int32_t code = TSDB_CODE_SUCCESS; - - qTrace("tsdb/reader-reset: %p, take read mutex", pReader); - tsdbAcquireReader(pReader); - - if (pReader->flag == READER_STATUS_SUSPEND) { - code = tsdbReaderResume(pReader); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - } - - if (isEmptyQueryTimeWindow(&pReader->window) || pReader->pReadSnap == NULL) { - tsdbDebug("tsdb reader reset return %p, %s", pReader->pReadSnap, pReader->idStr); - tsdbReleaseReader(pReader); - return TSDB_CODE_SUCCESS; - } - - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - - pReader->order = pCond->order; - pReader->type = TIMEWINDOW_RANGE_CONTAINED; - pStatus->loadFromFile = true; - pStatus->pTableIter = NULL; - pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); - - // allocate buffer in order to load data blocks from file - memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg)); - - pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; - tsdbDataFReaderClose(&pReader->pFileReader); - - int32_t numOfTables = tSimpleHashGetSize(pStatus->pTableMap); - - initFilesetIterator(&pStatus->fileIter, pReader->pReadSnap->fs.aDFileSet, pReader); - resetDataBlockIterator(pBlockIter, pReader->order); - resetTableListIndex(&pReader->status); - - bool asc = ASCENDING_TRAVERSE(pReader->order); - int32_t step = asc ? 1 : -1; - int64_t ts = asc ? pReader->window.skey - 1 : pReader->window.ekey + 1; - resetAllDataBlockScanInfo(pStatus->pTableMap, ts, step); - - // no data in files, let's try buffer in memory - if (pStatus->fileIter.numOfFiles == 0) { - pStatus->loadFromFile = false; - resetTableListIndex(pStatus); - } else { - code = initForFirstBlockInFile(pReader, pBlockIter); - if (code != TSDB_CODE_SUCCESS) { - tsdbError("%p reset reader failed, numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s", pReader, - numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr); - - tsdbReleaseReader(pReader); - return code; - } - } - - tsdbDebug("%p reset reader, suid:%" PRIu64 ", numOfTables:%d, skey:%" PRId64 ", query range:%" PRId64 " - %" PRId64 - " in query %s", - pReader, pReader->suid, numOfTables, pCond->twindows.skey, pReader->window.skey, pReader->window.ekey, - pReader->idStr); - - tsdbReleaseReader(pReader); - - return code; -} - -static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows, int32_t numOfBucket) { - if (numOfRows < startRow) { - return 0; - } - int32_t bucketIndex = ((numOfRows - startRow) / bucketRange); - if (bucketIndex == numOfBucket) { - bucketIndex -= 1; - } - return bucketIndex; -} - -int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) { - int32_t code = TSDB_CODE_SUCCESS; - pTableBlockInfo->totalSize = 0; - pTableBlockInfo->totalRows = 0; - pTableBlockInfo->numOfVgroups = 1; - - const int32_t numOfBuckets = 20.0; - - // find the start data block in file - tsdbAcquireReader(pReader); - if (pReader->flag == READER_STATUS_SUSPEND) { - code = tsdbReaderResume(pReader); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - } - SReaderStatus* pStatus = &pReader->status; - - STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg; - pTableBlockInfo->defMinRows = pc->minRows; - pTableBlockInfo->defMaxRows = pc->maxRows; - - int32_t bucketRange = ceil(((double)(pc->maxRows - pc->minRows)) / numOfBuckets); - - pTableBlockInfo->numOfFiles += 1; - - int32_t numOfTables = (int32_t)tSimpleHashGetSize(pStatus->pTableMap); - int defaultRows = 4096; - - SDataBlockIter* pBlockIter = &pStatus->blockIter; - pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles; - - if (pBlockIter->numOfBlocks > 0) { - pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; - } - - pTableBlockInfo->numOfTables = numOfTables; - bool hasNext = (pBlockIter->numOfBlocks > 0); - - while (true) { - if (hasNext) { - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - - int32_t numOfRows = pBlock->nRow; - pTableBlockInfo->totalRows += numOfRows; - - if (numOfRows > pTableBlockInfo->maxRows) { - pTableBlockInfo->maxRows = numOfRows; - } - - if (numOfRows < pTableBlockInfo->minRows) { - pTableBlockInfo->minRows = numOfRows; - } - - if (numOfRows < defaultRows) { - pTableBlockInfo->numOfSmallBlocks += 1; - } - - pTableBlockInfo->totalSize += pBlock->aSubBlock[0].szBlock; - - int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows, numOfBuckets); - pTableBlockInfo->blockRowsHisto[bucketIndex]++; - - hasNext = blockIteratorNext(&pStatus->blockIter, pReader->idStr); - } else { - code = initForFirstBlockInFile(pReader, pBlockIter); - if ((code != TSDB_CODE_SUCCESS) || (pStatus->loadFromFile == false)) { - break; - } - - pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; - hasNext = (pBlockIter->numOfBlocks > 0); - } - - // tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables, - // pReader->pFileGroup->fid, pReader->idStr); - } - tsdbReleaseReader(pReader); - return code; -} - -int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - int64_t rows = 0; - - SReaderStatus* pStatus = &pReader->status; - tsdbAcquireReader(pReader); - if (pReader->flag == READER_STATUS_SUSPEND) { - code = tsdbReaderResume(pReader); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - } - - int32_t iter = 0; - pStatus->pTableIter = tSimpleHashIterate(pStatus->pTableMap, NULL, &iter); - - while (pStatus->pTableIter != NULL) { - STableBlockScanInfo* pBlockScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; - - STbData* d = NULL; - if (pReader->pReadSnap->pMem != NULL) { - d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid); - if (d != NULL) { - rows += tsdbGetNRowsInTbData(d); - } - } - - STbData* di = NULL; - if (pReader->pReadSnap->pIMem != NULL) { - di = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid); - if (di != NULL) { - rows += tsdbGetNRowsInTbData(di); - } - } - - // current table is exhausted, let's try the next table - pStatus->pTableIter = tSimpleHashIterate(pStatus->pTableMap, pStatus->pTableIter, &iter); - } - - tsdbReleaseReader(pReader); - - return rows; -} - -int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { - SMetaReader mr = {0}; - metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); - int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid); - if (code != TSDB_CODE_SUCCESS) { - terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; - metaReaderClear(&mr); - return terrno; - } - - *suid = 0; - - // only child table and ordinary table is allowed, super table is not allowed. - if (mr.me.type == TSDB_CHILD_TABLE) { - tDecoderClear(&mr.coder); - *suid = mr.me.ctbEntry.suid; - code = metaReaderGetTableEntryByUidCache(&mr, *suid); - if (code != TSDB_CODE_SUCCESS) { - terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; - metaReaderClear(&mr); - return terrno; - } - } else if (mr.me.type == TSDB_NORMAL_TABLE) { // do nothing - } else { - terrno = TSDB_CODE_INVALID_PARA; - metaReaderClear(&mr); - return terrno; - } - - metaReaderClear(&mr); - - // get the newest table schema version - code = metaGetTbTSchemaEx(((SVnode*)pVnode)->pMeta, *suid, uid, -1, pSchema); - return code; -} - -int32_t tsdbTakeReadSnap(STsdbReader* pReader, _query_reseek_func_t reseek, STsdbReadSnap** ppSnap) { - int32_t code = 0; - STsdb* pTsdb = pReader->pTsdb; - SVersionRange* pRange = &pReader->verRange; - - // alloc - STsdbReadSnap* pSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(*pSnap)); - if (pSnap == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - - // lock - taosThreadRwlockRdlock(&pTsdb->rwLock); - - // take snapshot - if (pTsdb->mem && (pRange->minVer <= pTsdb->mem->maxVer && pRange->maxVer >= pTsdb->mem->minVer)) { - pSnap->pMem = pTsdb->mem; - pSnap->pNode = taosMemoryMalloc(sizeof(*pSnap->pNode)); - if (pSnap->pNode == NULL) { - taosThreadRwlockUnlock(&pTsdb->rwLock); - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - pSnap->pNode->pQHandle = pReader; - pSnap->pNode->reseek = reseek; - - tsdbRefMemTable(pTsdb->mem, pSnap->pNode); - } - - if (pTsdb->imem && (pRange->minVer <= pTsdb->imem->maxVer && pRange->maxVer >= pTsdb->imem->minVer)) { - pSnap->pIMem = pTsdb->imem; - pSnap->pINode = taosMemoryMalloc(sizeof(*pSnap->pINode)); - if (pSnap->pINode == NULL) { - taosThreadRwlockUnlock(&pTsdb->rwLock); - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - pSnap->pINode->pQHandle = pReader; - pSnap->pINode->reseek = reseek; - - tsdbRefMemTable(pTsdb->imem, pSnap->pINode); - } - - // fs - code = tsdbFSRef(pTsdb, &pSnap->fs); - if (code) { - taosThreadRwlockUnlock(&pTsdb->rwLock); - goto _exit; - } - - // unlock - taosThreadRwlockUnlock(&pTsdb->rwLock); - - tsdbTrace("vgId:%d, take read snapshot", TD_VID(pTsdb->pVnode)); - -_exit: - if (code) { - *ppSnap = NULL; - if (pSnap) { - if (pSnap->pNode) taosMemoryFree(pSnap->pNode); - if (pSnap->pINode) taosMemoryFree(pSnap->pINode); - taosMemoryFree(pSnap); - } - } else { - *ppSnap = pSnap; - } - return code; -} - -void tsdbUntakeReadSnap(STsdbReader* pReader, STsdbReadSnap* pSnap, bool proactive) { - STsdb* pTsdb = pReader->pTsdb; - - if (pSnap) { - if (pSnap->pMem) { - tsdbUnrefMemTable(pSnap->pMem, pSnap->pNode, proactive); - } - - if (pSnap->pIMem) { - tsdbUnrefMemTable(pSnap->pIMem, pSnap->pINode, proactive); - } - - tsdbFSUnref(pTsdb, &pSnap->fs); - if (pSnap->pNode) taosMemoryFree(pSnap->pNode); - if (pSnap->pINode) taosMemoryFree(pSnap->pINode); - taosMemoryFree(pSnap); - } - tsdbTrace("vgId:%d, untake read snapshot", TD_VID(pTsdb->pVnode)); -} - -// if failed, do nothing -void tsdbReaderSetId(STsdbReader* pReader, const char* idstr) { - taosMemoryFreeClear(pReader->idStr); - pReader->idStr = taosStrdup(idstr); -} - -void tsdbReaderSetCloseFlag(STsdbReader* pReader) { /*pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED;*/ } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index b6762df3b7..868529e4dd 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -452,6 +452,9 @@ static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFileReader* pFileRead const TBrinBlkArray* pBlkArray = NULL; int32_t code = tsdbDataFileReadBrinBlk(pFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + return code; + } #if 0 LRUHandle* handle = NULL; @@ -1117,8 +1120,8 @@ static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlo // *nextIndex = pBlockInfo->tbBlockIdx + step; // *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pTableBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx + step); - SBrinRecord* p = taosArrayGet(pBlockIter->blockList, pTableDataBlockIdx->globalIndex); - memcpy(pRecord, p, sizeof(SBrinRecord)); + SFileDataBlockInfo* p = taosArrayGet(pBlockIter->blockList, pTableDataBlockIdx->globalIndex); + memcpy(pRecord, &p->record, sizeof(SBrinRecord)); *nextIndex = pBlockInfo->tbBlockIdx + step; @@ -1649,8 +1652,8 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, // create local variable to hold the row value TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData}; - tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, - pReader->idStr); + tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", ts:%" PRId64 " %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, + fRow.pBlockData->aTSKEY[fRow.iRow], pReader->idStr); // only last block exists if ((!mergeBlockData) || (tsLastBlock != pBlockData->aTSKEY[pDumpInfo->rowIndex])) { @@ -2760,6 +2763,7 @@ static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReade goto _end; } +#if 0 int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); @@ -2788,6 +2792,7 @@ static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReade // pReader->rowsNum += block.nRow; // } } +#endif _end: tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); @@ -4453,7 +4458,11 @@ static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ // do fill all null column value SMA info int32_t i = 0, j = 0; int32_t size = (int32_t)TARRAY2_SIZE(&pSup->colAggArray); - TARRAY2_INSERT_PTR(&pSup->colAggArray, 0, pTsAgg); + int32_t code = TARRAY2_INSERT_PTR(&pSup->colAggArray, 0, pTsAgg); + if (code != TSDB_CODE_SUCCESS) { + return; + } + size++; while (j < numOfCols && i < size) { @@ -4466,7 +4475,11 @@ static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ } else if (pSup->colId[j] < pAgg->colId) { if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; - TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + code = TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + if (code != TSDB_CODE_SUCCESS) { + return; + } + i += 1; size++; } @@ -4477,7 +4490,11 @@ static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ while (j < numOfCols) { if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; - TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + code = TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + if (code != TSDB_CODE_SUCCESS) { + return; + } + i += 1; } j++; @@ -4588,14 +4605,14 @@ static SSDataBlock* doRetrieveDataBlock(STsdbReader* pReader) { code = doLoadFileBlockData(pReader, &pStatus->blockIter, &pStatus->fileBlockData, pBlockScanInfo->uid); if (code != TSDB_CODE_SUCCESS) { - tBlockDataDestroy(&pStatus->fileBlockData); + tBlockDataReset(&pStatus->fileBlockData); terrno = code; return NULL; } code = copyBlockDataToSDataBlock(pReader); if (code != TSDB_CODE_SUCCESS) { - tBlockDataDestroy(&pStatus->fileBlockData); + tBlockDataReset(&pStatus->fileBlockData); terrno = code; return NULL; } @@ -4835,7 +4852,7 @@ int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader* pReader) { return rows; } -int32_t tsdbGetTableSchema2(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { +int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { SMetaReader mr = {0}; metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid); @@ -4918,11 +4935,12 @@ int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STs tsdbRefMemTable(pTsdb->imem, pSnap->pINode); } + // fs + code = tsdbFSCreateRefSnapshot(pTsdb->pFS, &pSnap->pfSetArray); + // unlock taosThreadRwlockUnlock(&pTsdb->rwLock); - // fs - code = tsdbFSCreateRefSnapshot(pTsdb->pFS, &pSnap->pfSetArray); if (code == TSDB_CODE_SUCCESS) { tsdbTrace("vgId:%d, take read snapshot", TD_VID(pTsdb->pVnode)); } @@ -4974,4 +4992,4 @@ void tsdbReaderSetId2(STsdbReader* pReader, const char* idstr) { pReader->status.fileIter.pLastBlockReader->mergeTree.idStr = pReader->idStr; } -void tsdbReaderSetCloseFlag2(STsdbReader* pReader) { pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED; } +void tsdbReaderSetCloseFlag(STsdbReader* pReader) { /*pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED;*/ } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index ed4257b86d..e4011ca400 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -424,7 +424,10 @@ int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, reader[0]->ever = ever; reader[0]->type = type; + taosThreadRwlockRdlock(&tsdb->rwLock); code = tsdbFSCreateRefSnapshot(tsdb->pFS, &reader[0]->fsetArr); + taosThreadRwlockUnlock(&tsdb->rwLock); + TSDB_CHECK_CODE(code, lino, _exit); _exit: @@ -1045,6 +1048,7 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr writer[0]->precision = pTsdb->keepCfg.precision; writer[0]->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; writer[0]->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; + writer[0]->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; writer[0]->commitID = tsdbFSAllocEid(pTsdb->pFS); writer[0]->szPage = pTsdb->pVnode->config.tsdbPageSize; writer[0]->compactVersion = INT64_MAX; diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 6b1beef8e2..87917cd243 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -516,10 +516,13 @@ int32_t tGetDelData(uint8_t *p, void *ph) { } int32_t tsdbKeyFid(TSKEY key, int32_t minutes, int8_t precision) { + int64_t fid; if (key < 0) { - return (int)((key + 1) / tsTickPerMin[precision] / minutes - 1); + fid = ((key + 1) / tsTickPerMin[precision] / minutes - 1); + return (fid < INT32_MIN) ? INT32_MIN : (int32_t)fid; } else { - return (int)((key / tsTickPerMin[precision] / minutes)); + fid = ((key / tsTickPerMin[precision] / minutes)); + return (fid > INT32_MAX) ? INT32_MAX : (int32_t)fid; } } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index d7f0246133..efa722d41a 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -160,7 +160,7 @@ static int32_t vnodePreProcessDropTtlMsg(SVnode *pVnode, SRpcMsg *pMsg) { } { // find expired uids - tbUids = taosArrayInit(8, sizeof(int64_t)); + tbUids = taosArrayInit(8, sizeof(tb_uid_t)); if (tbUids == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); @@ -468,7 +468,6 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg void *ptr = NULL; void *pReq; int32_t len; - int32_t ret; if (ver <= pVnode->state.applied) { vError("vgId:%d, duplicate write request. ver: %" PRId64 ", applied: %" PRId64 "", TD_VID(pVnode), ver, @@ -561,12 +560,14 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg } break; case TDMT_STREAM_TASK_DEPLOY: { - if (tqProcessTaskDeployReq(pVnode->pTq, ver, pReq, len) < 0) { + int32_t code = tqProcessTaskDeployReq(pVnode->pTq, ver, pReq, len); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; goto _err; } } break; case TDMT_STREAM_TASK_DROP: { - if (tqProcessTaskDropReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { + if (tqProcessTaskDropReq(pVnode->pTq, pMsg->pCont, pMsg->contLen) < 0) { goto _err; } } break; @@ -582,13 +583,17 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg goto _err; } } break; + case TDMT_VND_STREAM_TASK_RESET: { + if (pVnode->restored/* && vnodeIsLeader(pVnode)*/) { + tqProcessTaskResetReq(pVnode->pTq, pMsg); + } + } break; case TDMT_VND_ALTER_CONFIRM: needCommit = pVnode->config.hashChange; if (vnodeProcessAlterConfirmReq(pVnode, ver, pReq, len, pRsp) < 0) { goto _err; } break; - case TDMT_VND_ALTER_CONFIG: vnodeProcessAlterConfigReq(pVnode, ver, pReq, len, pRsp); break; @@ -602,7 +607,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg vnodeProcessDropIndexReq(pVnode, ver, pReq, len, pRsp); break; case TDMT_VND_STREAM_CHECK_POINT_SOURCE: - tqProcessStreamCheckPointSourceReq(pVnode->pTq, pMsg); + tqProcessTaskCheckPointSourceReq(pVnode->pTq, pMsg, pRsp); break; case TDMT_VND_STREAM_TASK_UPDATE: tqProcessTaskUpdateReq(pVnode->pTq, pMsg); @@ -754,9 +759,9 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) case TDMT_STREAM_TASK_DISPATCH_RSP: return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); case TDMT_VND_STREAM_TASK_CHECK: - return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg); + return tqProcessTaskCheckReq(pVnode->pTq, pMsg); case TDMT_VND_STREAM_TASK_CHECK_RSP: - return tqProcessStreamTaskCheckRsp(pVnode->pTq, pMsg); + return tqProcessTaskCheckRsp(pVnode->pTq, pMsg); case TDMT_STREAM_RETRIEVE: return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg); case TDMT_STREAM_RETRIEVE_RSP: @@ -768,7 +773,7 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP: return tqProcessTaskScanHistoryFinishRsp(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_CHECKPOINT_READY: - return tqProcessStreamTaskCheckpointReadyMsg(pVnode->pTq, pMsg); + return tqProcessTaskCheckpointReadyMsg(pVnode->pTq, pMsg); default: vError("unknown msg type:%d in stream queue", pMsg->msgType); return TSDB_CODE_APP_ERROR; @@ -942,16 +947,14 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, taosArrayPush(rsp.pArray, &cRsp); - int32_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId; + if(pCreateReq->sqlLen > 0){ //skip auto create table, not set sql when auto create table + int32_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId; - char detail[1000] = {0}; - sprintf(detail, "btime:%" PRId64 ", flags:%d, ttl:%d, type:%d", - pCreateReq->btime, pCreateReq->flags, pCreateReq->ttl, pCreateReq->type); + SName name = {0}; + tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB); - SName name = {0}; - tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB); - - auditRecord(pReq, clusterId, "createTable", name.dbname, pCreateReq->name, detail); + auditRecord(pReq, clusterId, "createTable", name.dbname, pCreateReq->name, pCreateReq->sql, pCreateReq->sqlLen); + } } vDebug("vgId:%d, add %d new created tables into query table list", TD_VID(pVnode), (int32_t)taosArrayGetSize(tbUids)); @@ -976,6 +979,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, _exit: for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { pCreateReq = req.pReqs + iReq; + taosMemoryFree(pCreateReq->sql); taosMemoryFree(pCreateReq->comment); taosArrayDestroy(pCreateReq->ctb.tagName); } @@ -1443,11 +1447,8 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, in SColData *pColData = (SColData *)taosArrayGet(pSubmitTbData->aCol, 0); TSKEY *aKey = (TSKEY *)(pColData->pData); - vDebug("vgId:%d submit %d rows data, uid:%"PRId64, TD_VID(pVnode), pColData->nVal, pSubmitTbData->uid); for (int32_t iRow = 0; iRow < pColData->nVal; iRow++) { - vDebug("vgId:%d uid:%"PRId64" ts:%"PRId64, TD_VID(pVnode), pSubmitTbData->uid, aKey[iRow]); - if (aKey[iRow] < minKey || aKey[iRow] > maxKey || (iRow > 0 && aKey[iRow] <= aKey[iRow - 1])) { code = TSDB_CODE_INVALID_MSG; vError("vgId:%d %s failed since %s, version:%" PRId64, TD_VID(pVnode), __func__, tstrerror(terrno), ver); @@ -1458,10 +1459,7 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, in } else { int32_t nRow = TARRAY_SIZE(pSubmitTbData->aRowP); SRow **aRow = (SRow **)TARRAY_DATA(pSubmitTbData->aRowP); - - vDebug("vgId:%d submit %d rows data, uid:%"PRId64, TD_VID(pVnode), nRow, pSubmitTbData->uid); for (int32_t iRow = 0; iRow < nRow; ++iRow) { - vDebug("vgId:%d uid:%"PRId64" ts:%"PRId64, TD_VID(pVnode), pSubmitTbData->uid, aRow[iRow]->ts); if (aRow[iRow]->ts < minKey || aRow[iRow]->ts > maxKey || (iRow > 0 && aRow[iRow]->ts <= aRow[iRow - 1]->ts)) { code = TSDB_CODE_INVALID_MSG; @@ -1881,7 +1879,6 @@ static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, in tDecoderInit(pCoder, pReq, len); tDecodeDeleteRes(pCoder, pRes); - ASSERT(taosArrayGetSize(pRes->uidList) == 0 || (pRes->skey != 0 && pRes->ekey != 0)); for (int32_t iUid = 0; iUid < taosArrayGetSize(pRes->uidList); iUid++) { uint64_t uid = *(uint64_t *)taosArrayGet(pRes->uidList, iUid); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 43850ebfee..a6c743c87d 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -549,23 +549,29 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) ASSERT(commitIdx == vnodeSyncAppliedIndex(pFsm)); walApplyVer(pVnode->pWal, commitIdx); - pVnode->restored = true; - if (vnodeIsRoleLeader(pVnode)) { - vInfo("vgId:%d, sync restore finished, start to launch stream tasks", vgId); + taosWLockLatch(&pVnode->pTq->pStreamMeta->lock); + if (pVnode->pTq->pStreamMeta->startInfo.startedAfterNodeUpdate) { + vInfo("vgId:%d, sync restore finished, stream tasks will be launched by other thread", vgId); + taosWUnLockLatch(&pVnode->pTq->pStreamMeta->lock); + return; + } + if (vnodeIsRoleLeader(pVnode)) { // start to restore all stream tasks if (tsDisableStream) { - vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", vgId); + vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId); } else { - vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId); + vInfo("vgId:%d sync restore finished, start to launch stream tasks", pVnode->config.vgId); tqStartStreamTasks(pVnode->pTq); tqCheckAndRunStreamTaskAsync(pVnode->pTq); } } else { vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId); } + + taosWUnLockLatch(&pVnode->pTq->pStreamMeta->lock); } static void vnodeBecomeFollower(const SSyncFSM *pFsm) { @@ -580,7 +586,10 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) { } taosThreadMutexUnlock(&pVnode->lock); - tqStopStreamTasks(pVnode->pTq); + if (pVnode->pTq) { + tqUpdateNodeStage(pVnode->pTq, false); + tqStopStreamTasks(pVnode->pTq); + } } static void vnodeBecomeLearner(const SSyncFSM *pFsm) { @@ -598,10 +607,10 @@ static void vnodeBecomeLearner(const SSyncFSM *pFsm) { static void vnodeBecomeLeader(const SSyncFSM *pFsm) { SVnode *pVnode = pFsm->data; - if (pVnode->pTq) { - tqUpdateNodeStage(pVnode->pTq); - } vDebug("vgId:%d, become leader", pVnode->config.vgId); + if (pVnode->pTq) { + tqUpdateNodeStage(pVnode->pTq, true); + } } static bool vnodeApplyQueueEmpty(const SSyncFSM *pFsm) { diff --git a/source/libs/audit/src/auditMain.c b/source/libs/audit/src/auditMain.c index d4b6465ac7..c408f0d87b 100644 --- a/source/libs/audit/src/auditMain.c +++ b/source/libs/audit/src/auditMain.c @@ -30,14 +30,16 @@ int32_t auditInit(const SAuditCfg *pCfg) { return 0; } -extern void auditRecordImp(SRpcMsg *pReq, int64_t clusterId, char *operation, char *target1, char *target2, char *detail); +extern void auditRecordImp(SRpcMsg *pReq, int64_t clusterId, char *operation, char *target1, char *target2, + char *detail, int32_t len); -void auditRecord(SRpcMsg *pReq, int64_t clusterId, char *operation, char *target1, char *target2, char *detail) { - auditRecordImp(pReq, clusterId, operation, target1, target2, detail); +void auditRecord(SRpcMsg *pReq, int64_t clusterId, char *operation, char *target1, char *target2, + char *detail, int32_t len) { + auditRecordImp(pReq, clusterId, operation, target1, target2, detail, len); } #ifndef TD_ENTERPRISE -void auditRecordImp(SRpcMsg *pReq, int64_t clusterId, char *operation, char *target1, char *target2, char *detail) { +void auditRecordImp(SRpcMsg *pReq, int64_t clusterId, char *operation, char *target1, char *target2, + char *detail, int32_t len) { } #endif - diff --git a/source/libs/catalog/src/catalog.c b/source/libs/catalog/src/catalog.c index f975517669..44854d334b 100644 --- a/source/libs/catalog/src/catalog.c +++ b/source/libs/catalog/src/catalog.c @@ -165,7 +165,7 @@ int32_t ctgRefreshTbMeta(SCatalog* pCtg, SRequestConnInfo* pConn, SCtgTbMetaCtx* } if (CTG_IS_META_NULL(output->metaType)) { - ctgError("no tbmeta got, tbNmae:%s", tNameGetTableName(ctx->pName)); + ctgError("no tbmeta got, tbName:%s", tNameGetTableName(ctx->pName)); ctgRemoveTbMetaFromCache(pCtg, ctx->pName, false); CTG_ERR_JRET(CTG_ERR_CODE_TABLE_NOT_EXIST); } diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index fb5ecf7ad2..ba7106ea51 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -1171,7 +1171,7 @@ int32_t ctgHandleGetTbMetaRsp(SCtgTaskReq* tReq, int32_t reqType, const SDataBuf STableMetaOutput* pOut = (STableMetaOutput*)pMsgCtx->out; if (CTG_IS_META_NULL(pOut->metaType)) { - ctgError("no tbmeta got, tbNmae:%s", tNameGetTableName(pName)); + ctgError("no tbmeta got, tbName:%s", tNameGetTableName(pName)); ctgRemoveTbMetaFromCache(pCtg, pName, false); CTG_ERR_JRET(CTG_ERR_CODE_TABLE_NOT_EXIST); } @@ -1341,7 +1341,7 @@ int32_t ctgHandleGetTbMetasRsp(SCtgTaskReq* tReq, int32_t reqType, const SDataBu STableMetaOutput* pOut = (STableMetaOutput*)pMsgCtx->out; if (CTG_IS_META_NULL(pOut->metaType)) { - ctgTaskError("no tbmeta got, tbNmae:%s", tNameGetTableName(pName)); + ctgTaskError("no tbmeta got, tbName:%s", tNameGetTableName(pName)); ctgRemoveTbMetaFromCache(pCtg, pName, false); CTG_ERR_JRET(CTG_ERR_CODE_TABLE_NOT_EXIST); } diff --git a/source/libs/catalog/src/ctgCache.c b/source/libs/catalog/src/ctgCache.c index b541cdd411..5c8a1f456d 100644 --- a/source/libs/catalog/src/ctgCache.c +++ b/source/libs/catalog/src/ctgCache.c @@ -760,12 +760,14 @@ int32_t ctgGetCachedStbNameFromSuid(SCatalog* pCtg, char* dbFName, uint64_t suid char *stb = taosHashAcquire(dbCache->stbCache, &suid, sizeof(suid)); if (NULL == stb) { ctgDebug("stb 0x%" PRIx64 " not in cache, dbFName:%s", suid, dbFName); + ctgReleaseDBCache(pCtg, dbCache); return TSDB_CODE_SUCCESS; } *stbName = taosStrdup(stb); taosHashRelease(dbCache->stbCache, stb); + ctgReleaseDBCache(pCtg, dbCache); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/command/inc/commandInt.h b/source/libs/command/inc/commandInt.h index f74c61ea78..c704eb3951 100644 --- a/source/libs/command/inc/commandInt.h +++ b/source/libs/command/inc/commandInt.h @@ -112,6 +112,7 @@ extern "C" { #define COMMAND_CATALOG_DEBUG "catalogDebug" #define COMMAND_ENABLE_MEM_DEBUG "enableMemDebug" #define COMMAND_DISABLE_MEM_DEBUG "disableMemDebug" +#define COMMAND_ASYNCLOG "asynclog" typedef struct SExplainGroup { int32_t nodeNum; @@ -168,7 +169,7 @@ typedef struct SExplainCtx { } \ tlen += snprintf(tbuf + VARSTR_HEADER_SIZE + tlen, TSDB_EXPLAIN_RESULT_ROW_SIZE - VARSTR_HEADER_SIZE - tlen, __VA_ARGS__); \ } while (0) - + #define EXPLAIN_ROW_APPEND(...) tlen += snprintf(tbuf + VARSTR_HEADER_SIZE + tlen, TSDB_EXPLAIN_RESULT_ROW_SIZE - VARSTR_HEADER_SIZE - tlen, __VA_ARGS__) #define EXPLAIN_ROW_END() do { varDataSetLen(tbuf, tlen); tlen += VARSTR_HEADER_SIZE; isVerboseLine = true; } while (0) diff --git a/source/libs/command/src/command.c b/source/libs/command/src/command.c index bd20e96ac1..f204f239b4 100644 --- a/source/libs/command/src/command.c +++ b/source/libs/command/src/command.c @@ -760,6 +760,16 @@ static int32_t execAlterCmd(char* cmd, char* value, bool* processed) { return code; } qInfo("memory dbg disabled"); + } else if (0 == strcasecmp(cmd, COMMAND_ASYNCLOG)) { + int newAsyncLogValue = (strlen(value) == 0) ? 1 : atoi(value); + if (newAsyncLogValue != 0 && newAsyncLogValue != 1) { + code = TSDB_CODE_INVALID_CFG_VALUE; + qError("failed to alter asynclog, error:%s", tstrerror(code)); + goto _return; + } + + code = TSDB_CODE_SUCCESS; + tsAsyncLog = newAsyncLogValue; } else { goto _return; } diff --git a/source/libs/executor/src/aggregateoperator.c b/source/libs/executor/src/aggregateoperator.c index 5e649af47e..2d0a044559 100644 --- a/source/libs/executor/src/aggregateoperator.c +++ b/source/libs/executor/src/aggregateoperator.c @@ -335,6 +335,7 @@ static int32_t createDataBlockForEmptyInput(SOperatorInfo* pOperator, SSDataBloc colInfo.info.type = TSDB_DATA_TYPE_NULL; colInfo.info.bytes = 1; + SExprInfo* pOneExpr = &pOperator->exprSupp.pExprInfo[i]; for (int32_t j = 0; j < pOneExpr->base.numOfParams; ++j) { SFunctParam* pFuncParam = &pOneExpr->base.pParam[j]; @@ -354,6 +355,10 @@ static int32_t createDataBlockForEmptyInput(SOperatorInfo* pOperator, SSDataBloc } blockDataEnsureCapacity(pBlock, pBlock->info.rows); + for (int32_t i = 0; i < blockDataGetNumOfCols(pBlock); ++i) { + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i); + colDataSetNULL(pColInfoData, 0); + } *ppBlock = pBlock; return TSDB_CODE_SUCCESS; diff --git a/source/libs/executor/src/dataInserter.c b/source/libs/executor/src/dataInserter.c index e47cbb7eba..f301ddf4be 100644 --- a/source/libs/executor/src/dataInserter.c +++ b/source/libs/executor/src/dataInserter.c @@ -289,8 +289,8 @@ int32_t buildSubmitReqFromBlock(SDataInserterHandle* pInserter, SSubmitReq2** pp } if (disorderTs) { - tRowSort(tbData.aRowP); - if ((terrno = tRowMerge(tbData.aRowP, (STSchema*)pTSchema, 0)) != 0) { + if ((tRowSort(tbData.aRowP) != TSDB_CODE_SUCCESS) || + (terrno = tRowMerge(tbData.aRowP, (STSchema*)pTSchema, 0)) != 0) { goto _end; } } diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index f6b0a87f54..467a49b37a 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -1145,7 +1145,8 @@ static SSDataBlock* doStreamHashPartition(SOperatorInfo* pOperator) { } break; case STREAM_CREATE_CHILD_TABLE: case STREAM_RETRIEVE: - case STREAM_CHECKPOINT: { + case STREAM_CHECKPOINT: + case STREAM_GET_ALL: { return pBlock; } default: diff --git a/source/libs/executor/src/projectoperator.c b/source/libs/executor/src/projectoperator.c index 00b246afad..ab7a15eacd 100644 --- a/source/libs/executor/src/projectoperator.c +++ b/source/libs/executor/src/projectoperator.c @@ -293,7 +293,8 @@ SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { // for stream interval if (pBlock->info.type == STREAM_RETRIEVE || pBlock->info.type == STREAM_DELETE_RESULT || - pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_CREATE_CHILD_TABLE || + pBlock->info.type == STREAM_CHECKPOINT) { return pBlock; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b95a948937..8dbb8a979e 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1317,8 +1317,9 @@ static STimeWindow getSlidingWindow(TSKEY* startTsCol, TSKEY* endTsCol, uint64_t } static SSDataBlock* doRangeScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_t tsColIndex, int32_t* pRowIndex) { - qInfo("do stream range scan. windows index:%d", *pRowIndex); + qDebug("do stream range scan. windows index:%d", *pRowIndex); bool prepareRes = true; + while (1) { SSDataBlock* pResult = NULL; pResult = doTableScan(pInfo->pTableScanOp); @@ -2217,7 +2218,9 @@ FETCH_NEXT_BLOCK: if (pSDB) { STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; pSDB->info.type = pInfo->scanMode == STREAM_SCAN_FROM_DATAREADER_RANGE ? STREAM_NORMAL : STREAM_PULL_DATA; - checkUpdateData(pInfo, true, pSDB, false); + if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) { + checkUpdateData(pInfo, true, pSDB, false); + } printSpecDataBlock(pSDB, getStreamOpName(pOperator->operatorType), "update", GET_TASKID(pTaskInfo)); calBlockTbName(pInfo, pSDB); return pSDB; diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 6fc862b438..8bfa8e1a5d 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -369,10 +369,25 @@ static void doBuildDeleteResult(SStreamIntervalOperatorInfo* pInfo, SArray* pWin } } +void clearGroupResInfo(SGroupResInfo* pGroupResInfo) { + if (pGroupResInfo->freeItem) { + int32_t size = taosArrayGetSize(pGroupResInfo->pRows); + for (int32_t i = pGroupResInfo->index; i < size; i++) { + void* pVal = taosArrayGetP(pGroupResInfo->pRows, i); + taosMemoryFree(pVal); + } + pGroupResInfo->freeItem = false; + } + pGroupResInfo->pRows = taosArrayDestroy(pGroupResInfo->pRows); + pGroupResInfo->index = 0; +} + void destroyStreamFinalIntervalOperatorInfo(void* param) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); cleanupAggSup(&pInfo->aggSup); + clearGroupResInfo(&pInfo->groupResInfo); + // it should be empty. void* pIte = NULL; while ((pIte = taosHashIterate(pInfo->pPullDataMap, pIte)) != NULL) { @@ -389,7 +404,6 @@ void destroyStreamFinalIntervalOperatorInfo(void* param) { nodesDestroyNode((SNode*)pInfo->pPhyNode); colDataDestroy(&pInfo->twAggSup.timeWindowData); - pInfo->groupResInfo.pRows = taosArrayDestroy(pInfo->groupResInfo.pRows); cleanupExprSupp(&pInfo->scalarSupp); tSimpleHashCleanup(pInfo->pUpdatedMap); pInfo->pUpdatedMap = NULL; @@ -1023,7 +1037,7 @@ int32_t doStreamIntervalEncodeOpState(void** buf, int32_t len, SOperatorInfo* pO while ((pIte = taosHashIterate(pInfo->pPullDataMap, pIte)) != NULL) { void* key = taosHashGetKey(pIte, &keyLen); tlen += encodeSWinKey(buf, key); - SArray* pArray = (SArray*)pIte; + SArray* pArray = *(SArray**)pIte; int32_t chSize = taosArrayGetSize(pArray); tlen += taosEncodeFixedI32(buf, chSize); for (int32_t i = 0; i < chSize; i++) { @@ -1530,6 +1544,7 @@ void destroyStreamSessionAggOperatorInfo(void* param) { cleanupBasicInfo(&pInfo->binfo); destroyStreamAggSupporter(&pInfo->streamAggSup); cleanupExprSupp(&pInfo->scalarSupp); + clearGroupResInfo(&pInfo->groupResInfo); if (pInfo->pChildren != NULL) { int32_t size = taosArrayGetSize(pInfo->pChildren); @@ -3025,7 +3040,7 @@ void destroyStreamStateOperatorInfo(void* param) { SStreamStateAggOperatorInfo* pInfo = (SStreamStateAggOperatorInfo*)param; cleanupBasicInfo(&pInfo->binfo); destroyStreamAggSupporter(&pInfo->streamAggSup); - cleanupGroupResInfo(&pInfo->groupResInfo); + clearGroupResInfo(&pInfo->groupResInfo); cleanupExprSupp(&pInfo->scalarSupp); if (pInfo->pChildren != NULL) { int32_t size = taosArrayGetSize(pInfo->pChildren); diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index c62b5946dc..68a83fa662 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -651,7 +651,7 @@ static int32_t translateApercentileImpl(SFunctionNode* pFunc, char* pErrBuf, int (SDataType){.bytes = getApercentileMaxSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; } else { // original percent param is reserved - if (2 != numOfParams) { + if (3 != numOfParams && 2 != numOfParams) { return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); } uint8_t para1Type = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type; @@ -660,6 +660,19 @@ static int32_t translateApercentileImpl(SFunctionNode* pFunc, char* pErrBuf, int return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); } + if (3 == numOfParams) { + uint8_t para3Type = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 2))->resType.type; + if (!IS_STR_DATA_TYPE(para3Type)) { + return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); + } + + SNode* pParamNode2 = nodesListGetNode(pFunc->pParameterList, 2); + if (QUERY_NODE_VALUE != nodeType(pParamNode2) || !validateApercentileAlgo((SValueNode*)pParamNode2)) { + return buildFuncErrMsg(pErrBuf, len, TSDB_CODE_FUNC_FUNTION_ERROR, + "Third parameter algorithm of apercentile must be 'default' or 't-digest'"); + } + } + pFunc->node.resType = (SDataType){.bytes = tDataTypes[TSDB_DATA_TYPE_DOUBLE].bytes, .type = TSDB_DATA_TYPE_DOUBLE}; } @@ -744,7 +757,11 @@ int32_t topBotCreateMergeParam(SNodeList* pRawParameters, SNode* pPartialRes, SN } int32_t apercentileCreateMergeParam(SNodeList* pRawParameters, SNode* pPartialRes, SNodeList** pParameters) { - return reserveFirstMergeParam(pRawParameters, pPartialRes, pParameters); + int32_t code = reserveFirstMergeParam(pRawParameters, pPartialRes, pParameters); + if (TSDB_CODE_SUCCESS == code && pRawParameters->length >= 3) { + code = nodesListStrictAppend(*pParameters, nodesCloneNode(nodesListGetNode(pRawParameters, 2))); + } + return code; } static int32_t translateSpread(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { @@ -1786,7 +1803,7 @@ static int32_t translateDiff(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { } uint8_t colType = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type; - if (!IS_SIGNED_NUMERIC_TYPE(colType) && !IS_FLOAT_TYPE(colType) && TSDB_DATA_TYPE_BOOL != colType && + if (!IS_INTEGER_TYPE(colType) && !IS_FLOAT_TYPE(colType) && TSDB_DATA_TYPE_BOOL != colType && !IS_TIMESTAMP_TYPE(colType)) { return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); } @@ -1815,6 +1832,8 @@ static int32_t translateDiff(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { uint8_t resType; if (IS_SIGNED_NUMERIC_TYPE(colType) || IS_TIMESTAMP_TYPE(colType) || TSDB_DATA_TYPE_BOOL == colType) { resType = TSDB_DATA_TYPE_BIGINT; + } else if (IS_UNSIGNED_NUMERIC_TYPE(colType)) { + resType = TSDB_DATA_TYPE_UBIGINT; } else { resType = TSDB_DATA_TYPE_DOUBLE; } diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index bcbb3af950..071afe0159 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -1904,7 +1904,7 @@ int32_t apercentileFunction(SqlFunctionCtx* pCtx) { return TSDB_CODE_SUCCESS; } -static void apercentileTransferInfo(SAPercentileInfo* pInput, SAPercentileInfo* pOutput) { +static void apercentileTransferInfo(SAPercentileInfo* pInput, SAPercentileInfo* pOutput, bool* hasRes) { pOutput->percent = pInput->percent; pOutput->algo = pInput->algo; if (pOutput->algo == APERCT_ALGO_TDIGEST) { @@ -1915,6 +1915,10 @@ static void apercentileTransferInfo(SAPercentileInfo* pInput, SAPercentileInfo* return; } + if (hasRes) { + *hasRes = true; + } + buildTDigestInfo(pOutput); TDigest* pTDigest = pOutput->pTDigest; tdigestAutoFill(pTDigest, COMPRESSION); @@ -1931,6 +1935,10 @@ static void apercentileTransferInfo(SAPercentileInfo* pInput, SAPercentileInfo* return; } + if (hasRes) { + *hasRes = true; + } + buildHistogramInfo(pOutput); SHistogramInfo* pHisto = pOutput->pHisto; @@ -1970,12 +1978,13 @@ int32_t apercentileFunctionMerge(SqlFunctionCtx* pCtx) { qDebug("%s total %" PRId64 " rows will merge, %p", __FUNCTION__, pInput->numOfRows, pInfo->pHisto); + bool hasRes = false; int32_t start = pInput->startRowIndex; for (int32_t i = start; i < start + pInput->numOfRows; ++i) { char* data = colDataGetData(pCol, i); SAPercentileInfo* pInputInfo = (SAPercentileInfo*)varDataVal(data); - apercentileTransferInfo(pInputInfo, pInfo); + apercentileTransferInfo(pInputInfo, pInfo, &hasRes); } if (pInfo->algo != APERCT_ALGO_TDIGEST) { @@ -1984,7 +1993,7 @@ int32_t apercentileFunctionMerge(SqlFunctionCtx* pCtx) { pInfo->pHisto->numOfEntries, pInfo->pHisto); } - SET_VAL(pResInfo, 1, 1); + SET_VAL(pResInfo, hasRes ? 1 : 0, 1); return TSDB_CODE_SUCCESS; } @@ -2056,7 +2065,7 @@ int32_t apercentileCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx) qDebug("%s start to combine apercentile, %p", __FUNCTION__, pDBuf->pHisto); - apercentileTransferInfo(pSBuf, pDBuf); + apercentileTransferInfo(pSBuf, pDBuf, NULL); pDResInfo->numOfRes = TMAX(pDResInfo->numOfRes, pSResInfo->numOfRes); pDResInfo->isNullRes &= pSResInfo->isNullRes; return TSDB_CODE_SUCCESS; @@ -2714,16 +2723,20 @@ static int32_t doSetPrevVal(SDiffInfo* pDiffInfo, int32_t type, const char* pv, case TSDB_DATA_TYPE_BOOL: pDiffInfo->prev.i64 = *(bool*)pv ? 1 : 0; break; + case TSDB_DATA_TYPE_UTINYINT: case TSDB_DATA_TYPE_TINYINT: pDiffInfo->prev.i64 = *(int8_t*)pv; break; + case TSDB_DATA_TYPE_UINT: case TSDB_DATA_TYPE_INT: pDiffInfo->prev.i64 = *(int32_t*)pv; break; + case TSDB_DATA_TYPE_USMALLINT: case TSDB_DATA_TYPE_SMALLINT: pDiffInfo->prev.i64 = *(int16_t*)pv; break; case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_UBIGINT: case TSDB_DATA_TYPE_BIGINT: pDiffInfo->prev.i64 = *(int64_t*)pv; break; @@ -2745,6 +2758,7 @@ static int32_t doHandleDiff(SDiffInfo* pDiffInfo, int32_t type, const char* pv, int64_t ts) { pDiffInfo->prevTs = ts; switch (type) { + case TSDB_DATA_TYPE_UINT: case TSDB_DATA_TYPE_INT: { int32_t v = *(int32_t*)pv; int64_t delta = v - pDiffInfo->prev.i64; // direct previous may be null @@ -2758,6 +2772,7 @@ static int32_t doHandleDiff(SDiffInfo* pDiffInfo, int32_t type, const char* pv, break; } case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_UTINYINT: case TSDB_DATA_TYPE_TINYINT: { int8_t v = *(int8_t*)pv; int64_t delta = v - pDiffInfo->prev.i64; // direct previous may be null @@ -2769,6 +2784,7 @@ static int32_t doHandleDiff(SDiffInfo* pDiffInfo, int32_t type, const char* pv, pDiffInfo->prev.i64 = v; break; } + case TSDB_DATA_TYPE_USMALLINT: case TSDB_DATA_TYPE_SMALLINT: { int16_t v = *(int16_t*)pv; int64_t delta = v - pDiffInfo->prev.i64; // direct previous may be null @@ -2781,6 +2797,7 @@ static int32_t doHandleDiff(SDiffInfo* pDiffInfo, int32_t type, const char* pv, break; } case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_UBIGINT: case TSDB_DATA_TYPE_BIGINT: { int64_t v = *(int64_t*)pv; int64_t delta = v - pDiffInfo->prev.i64; // direct previous may be null diff --git a/source/libs/parser/src/parInsertUtil.c b/source/libs/parser/src/parInsertUtil.c index 79e305989b..3efb5dafcb 100644 --- a/source/libs/parser/src/parInsertUtil.c +++ b/source/libs/parser/src/parInsertUtil.c @@ -495,9 +495,9 @@ int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks) { tColDataSortMerge(pTableCxt->pData->aCol); } else { if (!pTableCxt->ordered) { - tRowSort(pTableCxt->pData->aRowP); + code = tRowSort(pTableCxt->pData->aRowP); } - if (!pTableCxt->ordered || pTableCxt->duplicateTs) { + if (code == TSDB_CODE_SUCCESS && (!pTableCxt->ordered || pTableCxt->duplicateTs)) { code = tRowMerge(pTableCxt->pData->aRowP, pTableCxt->pSchema, 0); } } diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 0bad382956..1c31993a92 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -4667,9 +4667,133 @@ static int32_t checkCreateDatabase(STranslateContext* pCxt, SCreateDatabaseStmt* return checkDatabaseOptions(pCxt, pStmt->dbName, pStmt->pOptions); } +#define FILL_CMD_SQL(sql, sqlLen, pCmdReq, CMD_TYPE, genericCmd) \ + CMD_TYPE* pCmdReq = genericCmd; \ + char* cmdSql = taosMemoryMalloc(sqlLen); \ + if (cmdSql == NULL) { \ + return TSDB_CODE_OUT_OF_MEMORY; \ + } \ + memcpy(cmdSql, sql, sqlLen); \ + pCmdReq->sqlLen = sqlLen; \ + pCmdReq->sql = cmdSql; \ + +static int32_t fillCmdSql(STranslateContext* pCxt, int16_t msgType, void* pReq) { + const char* sql = pCxt->pParseCxt->pSql; + size_t sqlLen = pCxt->pParseCxt->sqlLen; + + switch (msgType) { + case TDMT_MND_CREATE_DB: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SCreateDbReq, pReq); + break; + } + case TDMT_MND_ALTER_DB: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SAlterDbReq, pReq); + break; + } + case TDMT_MND_DROP_DB: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SDropDbReq, pReq); + break; + } + case TDMT_MND_COMPACT_DB: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SCompactDbReq, pReq); + break; + } + + case TDMT_MND_TMQ_DROP_TOPIC: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMDropTopicReq, pReq); + break; + } + + case TDMT_MND_BALANCE_VGROUP_LEADER: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SBalanceVgroupLeaderReq, pReq); + break; + } + case TDMT_MND_BALANCE_VGROUP: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SBalanceVgroupReq, pReq); + break; + } + case TDMT_MND_REDISTRIBUTE_VGROUP: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SRedistributeVgroupReq, pReq); + break; + } + case TDMT_MND_CREATE_STB: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMCreateStbReq, pReq); + break; + } + case TDMT_MND_DROP_STB: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMDropStbReq, pReq); + break; + } + case TDMT_MND_ALTER_STB: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMAlterStbReq, pReq); + break; + } + + case TDMT_MND_DROP_USER: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SDropUserReq, pReq); + break; + } + case TDMT_MND_CREATE_USER: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SCreateUserReq, pReq); + break; + } + case TDMT_MND_ALTER_USER: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SAlterUserReq, pReq); + break; + } + + case TDMT_MND_CREATE_QNODE: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMCreateQnodeReq, pReq); + break; + } + case TDMT_MND_DROP_QNODE: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMDropQnodeReq, pReq); + break; + } + + case TDMT_MND_CREATE_MNODE: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMCreateMnodeReq, pReq); + break; + } + case TDMT_MND_DROP_MNODE: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMDropMnodeReq, pReq); + break; + } + + case TDMT_MND_CREATE_DNODE: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SCreateDnodeReq, pReq); + break; + } + case TDMT_MND_DROP_DNODE: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SDropDnodeReq, pReq); + break; + } + case TDMT_MND_RESTORE_DNODE: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SRestoreDnodeReq, pReq); + break; + } + case TDMT_MND_CONFIG_DNODE: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMCfgDnodeReq, pReq); + break; + } + + case TDMT_MND_DROP_STREAM: { + FILL_CMD_SQL(sql, sqlLen, pCmdReq, SMDropStreamReq, pReq); + break; + } + default: { + break; + } + + } + + return TSDB_CODE_SUCCESS; +} + typedef int32_t (*FSerializeFunc)(void* pBuf, int32_t bufLen, void* pReq); static int32_t buildCmdMsg(STranslateContext* pCxt, int16_t msgType, FSerializeFunc func, void* pReq) { + fillCmdSql(pCxt, msgType, pReq); pCxt->pCmdMsg = taosMemoryMalloc(sizeof(SCmdMsgInfo)); if (NULL == pCxt->pCmdMsg) { return TSDB_CODE_OUT_OF_MEMORY; @@ -4706,7 +4830,9 @@ static int32_t translateDropDatabase(STranslateContext* pCxt, SDropDatabaseStmt* tNameGetFullDbName(&name, dropReq.db); dropReq.ignoreNotExists = pStmt->ignoreNotExists; - return buildCmdMsg(pCxt, TDMT_MND_DROP_DB, (FSerializeFunc)tSerializeSDropDbReq, &dropReq); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_DROP_DB, (FSerializeFunc)tSerializeSDropDbReq, &dropReq); + tFreeSDropDbReq(&dropReq); + return code; } static void buildAlterDbReq(STranslateContext* pCxt, SAlterDatabaseStmt* pStmt, SAlterDbReq* pReq) { @@ -4743,7 +4869,9 @@ static int32_t translateAlterDatabase(STranslateContext* pCxt, SAlterDatabaseStm SAlterDbReq alterReq = {0}; buildAlterDbReq(pCxt, pStmt, &alterReq); - return buildCmdMsg(pCxt, TDMT_MND_ALTER_DB, (FSerializeFunc)tSerializeSAlterDbReq, &alterReq); + code = buildCmdMsg(pCxt, TDMT_MND_ALTER_DB, (FSerializeFunc)tSerializeSAlterDbReq, &alterReq); + tFreeSAlterDbReq(&alterReq); + return code; } static int32_t translateTrimDatabase(STranslateContext* pCxt, STrimDatabaseStmt* pStmt) { @@ -5460,6 +5588,7 @@ static int32_t doTranslateDropSuperTable(STranslateContext* pCxt, const SName* p tNameExtractFullName(pTableName, dropReq.name); dropReq.igNotExists = ignoreNotExists; code = buildCmdMsg(pCxt, TDMT_MND_DROP_STB, (FSerializeFunc)tSerializeSMDropStbReq, &dropReq); + tFreeSMDropStbReq(&dropReq); } return code; } @@ -5775,7 +5904,9 @@ static int32_t translateDropUser(STranslateContext* pCxt, SDropUserStmt* pStmt) SDropUserReq dropReq = {0}; strcpy(dropReq.user, pStmt->userName); - return buildCmdMsg(pCxt, TDMT_MND_DROP_USER, (FSerializeFunc)tSerializeSDropUserReq, &dropReq); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_DROP_USER, (FSerializeFunc)tSerializeSDropUserReq, &dropReq); + tFreeSDropUserReq(&dropReq); + return code; } static int32_t translateCreateDnode(STranslateContext* pCxt, SCreateDnodeStmt* pStmt) { @@ -5783,7 +5914,9 @@ static int32_t translateCreateDnode(STranslateContext* pCxt, SCreateDnodeStmt* p strcpy(createReq.fqdn, pStmt->fqdn); createReq.port = pStmt->port; - return buildCmdMsg(pCxt, TDMT_MND_CREATE_DNODE, (FSerializeFunc)tSerializeSCreateDnodeReq, &createReq); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_CREATE_DNODE, (FSerializeFunc)tSerializeSCreateDnodeReq, &createReq); + tFreeSCreateDnodeReq(&createReq); + return code; } static int32_t translateDropDnode(STranslateContext* pCxt, SDropDnodeStmt* pStmt) { @@ -5794,7 +5927,9 @@ static int32_t translateDropDnode(STranslateContext* pCxt, SDropDnodeStmt* pStmt dropReq.force = pStmt->force; dropReq.unsafe = pStmt->unsafe; - return buildCmdMsg(pCxt, TDMT_MND_DROP_DNODE, (FSerializeFunc)tSerializeSDropDnodeReq, &dropReq); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_DROP_DNODE, (FSerializeFunc)tSerializeSDropDnodeReq, &dropReq); + tFreeSDropDnodeReq(&dropReq); + return code; } static int32_t translateAlterDnode(STranslateContext* pCxt, SAlterDnodeStmt* pStmt) { @@ -5803,7 +5938,9 @@ static int32_t translateAlterDnode(STranslateContext* pCxt, SAlterDnodeStmt* pSt strcpy(cfgReq.config, pStmt->config); strcpy(cfgReq.value, pStmt->value); - return buildCmdMsg(pCxt, TDMT_MND_CONFIG_DNODE, (FSerializeFunc)tSerializeSMCfgDnodeReq, &cfgReq); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_CONFIG_DNODE, (FSerializeFunc)tSerializeSMCfgDnodeReq, &cfgReq); + tFreeSMCfgDnodeReq(&cfgReq); + return code; } static int32_t translateRestoreDnode(STranslateContext* pCxt, SRestoreComponentNodeStmt* pStmt) { @@ -5825,7 +5962,10 @@ static int32_t translateRestoreDnode(STranslateContext* pCxt, SRestoreComponentN default: return -1; } - return buildCmdMsg(pCxt, TDMT_MND_RESTORE_DNODE, (FSerializeFunc)tSerializeSRestoreDnodeReq, &restoreReq); + + int32_t code = buildCmdMsg(pCxt, TDMT_MND_RESTORE_DNODE, (FSerializeFunc)tSerializeSRestoreDnodeReq, &restoreReq); + tFreeSRestoreDnodeReq(&restoreReq); + return code; } static int32_t getSmaIndexDstVgId(STranslateContext* pCxt, const char* pDbName, const char* pTableName, @@ -6095,8 +6235,10 @@ static int16_t getCreateComponentNodeMsgType(ENodeType type) { static int32_t translateCreateComponentNode(STranslateContext* pCxt, SCreateComponentNodeStmt* pStmt) { SMCreateQnodeReq createReq = {.dnodeId = pStmt->dnodeId}; - return buildCmdMsg(pCxt, getCreateComponentNodeMsgType(nodeType(pStmt)), + int32_t code = buildCmdMsg(pCxt, getCreateComponentNodeMsgType(nodeType(pStmt)), (FSerializeFunc)tSerializeSCreateDropMQSNodeReq, &createReq); + tFreeSMCreateQnodeReq(&createReq); + return code; } static int16_t getDropComponentNodeMsgType(ENodeType type) { @@ -6117,8 +6259,10 @@ static int16_t getDropComponentNodeMsgType(ENodeType type) { static int32_t translateDropComponentNode(STranslateContext* pCxt, SDropComponentNodeStmt* pStmt) { SDDropQnodeReq dropReq = {.dnodeId = pStmt->dnodeId}; - return buildCmdMsg(pCxt, getDropComponentNodeMsgType(nodeType(pStmt)), + int32_t code = buildCmdMsg(pCxt, getDropComponentNodeMsgType(nodeType(pStmt)), (FSerializeFunc)tSerializeSCreateDropMQSNodeReq, &dropReq); + tFreeSDDropQnodeReq(&dropReq); + return code; } static int32_t checkTopicQuery(STranslateContext* pCxt, SSelectStmt* pSelect) { @@ -6307,7 +6451,9 @@ static int32_t translateDropTopic(STranslateContext* pCxt, SDropTopicStmt* pStmt snprintf(dropReq.name, sizeof(dropReq.name), "%d.%s", pCxt->pParseCxt->acctId, pStmt->topicName); dropReq.igNotExists = pStmt->ignoreNotExists; - return buildCmdMsg(pCxt, TDMT_MND_TMQ_DROP_TOPIC, (FSerializeFunc)tSerializeSMDropTopicReq, &dropReq); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_TMQ_DROP_TOPIC, (FSerializeFunc)tSerializeSMDropTopicReq, &dropReq); + tFreeSMDropTopicReq(&dropReq); + return code; } static int32_t translateDropCGroup(STranslateContext* pCxt, SDropCGroupStmt* pStmt) { @@ -6375,6 +6521,7 @@ static int32_t translateCompact(STranslateContext* pCxt, SCompactDatabaseStmt* p if (TSDB_CODE_SUCCESS == code) { code = buildCmdMsg(pCxt, TDMT_MND_COMPACT_DB, (FSerializeFunc)tSerializeSCompactDbReq, &compactReq); } + tFreeSCompactDbReq(&compactReq); return code; } @@ -7005,8 +7152,8 @@ static int32_t createLastTsSelectStmt(char* pDb, char* pTable, STableMeta* pMeta return TSDB_CODE_OUT_OF_MEMORY; } - strcpy(col->tableAlias, pTable); - strcpy(col->colName, pMeta->schema[0].name); + tstrncpy(col->tableAlias, pTable, tListLen(col->tableAlias)); + tstrncpy(col->colName, pMeta->schema[0].name, tListLen(col->colName)); SNodeList* pParamterList = nodesMakeList(); if (NULL == pParamterList) { nodesDestroyNode((SNode*)col); @@ -7234,7 +7381,9 @@ static int32_t translateDropStream(STranslateContext* pCxt, SDropStreamStmt* pSt tNameSetDbName(&name, pCxt->pParseCxt->acctId, pStmt->streamName, strlen(pStmt->streamName)); tNameGetFullDbName(&name, dropReq.name); dropReq.igNotExists = pStmt->ignoreNotExists; - return buildCmdMsg(pCxt, TDMT_MND_DROP_STREAM, (FSerializeFunc)tSerializeSMDropStreamReq, &dropReq); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_DROP_STREAM, (FSerializeFunc)tSerializeSMDropStreamReq, &dropReq); + tFreeSMDropStreamReq(&dropReq); + return code; } static int32_t translatePauseStream(STranslateContext* pCxt, SPauseStreamStmt* pStmt) { @@ -7429,18 +7578,24 @@ static int32_t translateRevoke(STranslateContext* pCxt, SRevokeStmt* pStmt) { strcpy(req.user, pStmt->userName); sprintf(req.objname, "%d.%s", pCxt->pParseCxt->acctId, pStmt->objName); sprintf(req.tabName, "%s", pStmt->tabName); - return buildCmdMsg(pCxt, TDMT_MND_ALTER_USER, (FSerializeFunc)tSerializeSAlterUserReq, &req); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_ALTER_USER, (FSerializeFunc)tSerializeSAlterUserReq, &req); + tFreeSAlterUserReq(&req); + return code; } static int32_t translateBalanceVgroup(STranslateContext* pCxt, SBalanceVgroupStmt* pStmt) { SBalanceVgroupReq req = {0}; - return buildCmdMsg(pCxt, TDMT_MND_BALANCE_VGROUP, (FSerializeFunc)tSerializeSBalanceVgroupReq, &req); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_BALANCE_VGROUP, (FSerializeFunc)tSerializeSBalanceVgroupReq, &req); + tFreeSBalanceVgroupReq(&req); + return code; } static int32_t translateBalanceVgroupLeader(STranslateContext* pCxt, SBalanceVgroupLeaderStmt* pStmt) { SBalanceVgroupLeaderReq req = {0}; req.vgId = pStmt->vgId; - return buildCmdMsg(pCxt, TDMT_MND_BALANCE_VGROUP_LEADER, (FSerializeFunc)tSerializeSBalanceVgroupLeaderReq, &req); + int32_t code = buildCmdMsg(pCxt, TDMT_MND_BALANCE_VGROUP_LEADER, (FSerializeFunc)tSerializeSBalanceVgroupLeaderReq, &req); + tFreeSBalanceVgroupLeaderReq(&req); + return code; } static int32_t translateMergeVgroup(STranslateContext* pCxt, SMergeVgroupStmt* pStmt) { @@ -7484,6 +7639,7 @@ static int32_t translateRedistributeVgroup(STranslateContext* pCxt, SRedistribut req.dnodeId3 = pStmt->dnodeId3; code = buildCmdMsg(pCxt, TDMT_MND_REDISTRIBUTE_VGROUP, (FSerializeFunc)tSerializeSRedistributeVgroupReq, &req); } + tFreeSRedistributeVgroupReq(&req); return code; } diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index a3baa5d43a..0e56615451 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -88,6 +88,7 @@ static EDealRes doRewriteExpr(SNode** pNode, void* pContext) { pCxt->pOutputs[index] = true; break; } + index++; } } break; @@ -174,6 +175,7 @@ static int32_t cloneRewriteExprs(SNodeList* pExprs, bool* pOutputs, SNodeList** break; } } + index++; } return code; } diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index f10c42310d..430e69f46f 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -806,7 +806,7 @@ static bool pushDownCondOptIsColEqualOnCond(SJoinLogicNode* pJoin, SNode* pCond, return false; } SOperatorNode* pOper = (SOperatorNode*)pCond; - if (QUERY_NODE_COLUMN != nodeType(pOper->pLeft) || QUERY_NODE_COLUMN != nodeType(pOper->pRight)) { + if (QUERY_NODE_COLUMN != nodeType(pOper->pLeft) || NULL == pOper->pRight || QUERY_NODE_COLUMN != nodeType(pOper->pRight)) { return false; } SColumnNode* pLeft = (SColumnNode*)(pOper->pLeft); @@ -3217,8 +3217,11 @@ int32_t stbJoinOptAddFuncToScanNode(char* funcName, SScanLogicNode* pScan) { SFunctionNode* pUidFunc = createFunction(funcName, NULL); snprintf(pUidFunc->node.aliasName, sizeof(pUidFunc->node.aliasName), "%s.%p", pUidFunc->functionName, pUidFunc); - nodesListStrictAppend(pScan->pScanPseudoCols, (SNode *)pUidFunc); - return createColumnByRewriteExpr((SNode*)pUidFunc, &pScan->node.pTargets); + int32_t code = nodesListStrictAppend(pScan->pScanPseudoCols, (SNode *)pUidFunc); + if (TSDB_CODE_SUCCESS == code) { + code = createColumnByRewriteExpr((SNode*)pUidFunc, &pScan->node.pTargets); + } + return code; } @@ -3365,12 +3368,7 @@ static int32_t stbJoinOptCreateTableScanNodes(SLogicNode* pJoin, SNodeList** ppL pScan->scanType = SCAN_TYPE_TABLE; } - if (TSDB_CODE_SUCCESS == code) { - *ppList = pList; - } else { - nodesDestroyList(pList); - *ppList = NULL; - } + *ppList = pList; return code; } @@ -3474,12 +3472,15 @@ static int32_t stbJoinOptCreateMergeJoinNode(SLogicNode* pOrig, SLogicNode* pChi FOREACH(pNode, pJoin->node.pChildren) { ERASE_NODE(pJoin->node.pChildren); } - nodesListStrictAppend(pJoin->node.pChildren, (SNode *)pChild); - pChild->pParent = (SLogicNode*)pJoin; + int32_t code = nodesListStrictAppend(pJoin->node.pChildren, (SNode *)pChild); + if (TSDB_CODE_SUCCESS == code) { + pChild->pParent = (SLogicNode*)pJoin; + *ppLogic = (SLogicNode*)pJoin; + } else { + nodesDestroyNode((SNode*)pJoin); + } - *ppLogic = (SLogicNode*)pJoin; - - return TSDB_CODE_SUCCESS; + return code; } static int32_t stbJoinOptCreateDynQueryCtrlNode(SLogicNode* pRoot, SLogicNode* pPrev, SLogicNode* pPost, bool* srcScan, SLogicNode** ppDynNode) { @@ -3519,11 +3520,18 @@ static int32_t stbJoinOptCreateDynQueryCtrlNode(SLogicNode* pRoot, SLogicNode* p nodesListStrictAppend(pDynCtrl->stbJoin.pUidList, nodesListGetNode(pHJoin->node.pTargets, 2)); nodesListStrictAppend(pDynCtrl->stbJoin.pVgList, nodesListGetNode(pHJoin->node.pTargets, 1)); nodesListStrictAppend(pDynCtrl->stbJoin.pVgList, nodesListGetNode(pHJoin->node.pTargets, 3)); - + if (TSDB_CODE_SUCCESS == code) { - nodesListStrictAppend(pDynCtrl->node.pChildren, (SNode*)pPrev); - nodesListStrictAppend(pDynCtrl->node.pChildren, (SNode*)pPost); - pDynCtrl->node.pTargets = nodesCloneList(pPost->pTargets); + code = nodesListStrictAppend(pDynCtrl->node.pChildren, (SNode*)pPrev); + if (TSDB_CODE_SUCCESS == code) { + code = nodesListStrictAppend(pDynCtrl->node.pChildren, (SNode*)pPost); + } + if (TSDB_CODE_SUCCESS == code) { + pDynCtrl->node.pTargets = nodesCloneList(pPost->pTargets); + if (!pDynCtrl->node.pTargets) { + code = TSDB_CODE_OUT_OF_MEMORY; + } + } } if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index d55e80a23d..d6799a25a7 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -1025,11 +1025,7 @@ static int32_t createGroupCachePhysiNode(SPhysiPlanContext* pCxt, SNodeList* pCh } */ - if (TSDB_CODE_SUCCESS == code) { - *pPhyNode = (SPhysiNode*)pGrpCache; - } else { - nodesDestroyNode((SNode*)pGrpCache); - } + *pPhyNode = (SPhysiNode*)pGrpCache; return code; } @@ -1059,6 +1055,8 @@ static int32_t updateDynQueryCtrlStbJoinInfo(SPhysiPlanContext* pCxt, SNodeList* } pDynCtrl->stbJoin.batchFetch = pLogicNode->stbJoin.batchFetch; } + nodesDestroyList(pVgList); + nodesDestroyList(pUidList); return code; } diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 12729c3262..adc3e9fd27 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3983,18 +3983,15 @@ _return: return code; } -static int32_t fltSclGetDatumValueFromPoint(SFltSclPoint *point, SFltSclDatum *d) { +static int32_t fltSclGetTimeStampDatum(SFltSclPoint *point, SFltSclDatum *d) { *d = point->val; - if (point->val.kind == FLT_SCL_DATUM_KIND_NULL) { - return TSDB_CODE_SUCCESS; - } - if (point->val.kind == FLT_SCL_DATUM_KIND_MAX) { - getDataMax(d->type.type, &(d->i)); - } else if (point->val.kind == FLT_SCL_DATUM_KIND_MIN) { - getDataMin(d->type.type, &(d->i)); - } + d->kind = FLT_SCL_DATUM_KIND_INT64; - if (IS_INTEGER_TYPE(d->type.type) || IS_TIMESTAMP_TYPE(d->type.type)) { + if (point->val.kind == FLT_SCL_DATUM_KIND_MAX) { + getDataMax(point->val.type.type, &(d->i)); + } else if (point->val.kind == FLT_SCL_DATUM_KIND_MIN) { + getDataMin(point->val.type.type, &(d->i)); + } else if (point->val.kind == FLT_SCL_DATUM_KIND_INT64) { if (point->excl) { if (point->start) { ++d->i; @@ -4002,6 +3999,28 @@ static int32_t fltSclGetDatumValueFromPoint(SFltSclPoint *point, SFltSclDatum *d --d->i; } } + } else if (point->val.kind == FLT_SCL_DATUM_KIND_FLOAT64) { + double v = d->d; + if (point->excl) { + if (point->start) { + d->i = v + 1; + } else { + d->i = v - 1; + } + } else { + d->i = v; + } + } else if (point->val.kind == FLT_SCL_DATUM_KIND_UINT64) { + uint64_t v = d->u; + if (point->excl) { + if (point->start) { + d->i = v + 1; + } else { + d->i = v - 1; + } + } else { + d->i = v; + } } else { qError("not supported type %d when get datum from point", d->type.type); } @@ -4022,12 +4041,13 @@ int32_t filterGetTimeRange(SNode *pNode, STimeWindow *win, bool *isStrict) { SFltSclColumnRange *colRange = taosArrayGet(colRanges, 0); SArray *points = colRange->points; if (taosArrayGetSize(points) == 2) { + *win = TSWINDOW_DESC_INITIALIZER; SFltSclPoint *startPt = taosArrayGet(points, 0); SFltSclPoint *endPt = taosArrayGet(points, 1); SFltSclDatum start; SFltSclDatum end; - fltSclGetDatumValueFromPoint(startPt, &start); - fltSclGetDatumValueFromPoint(endPt, &end); + fltSclGetTimeStampDatum(startPt, &start); + fltSclGetTimeStampDatum(endPt, &end); win->skey = start.i; win->ekey = end.i; *isStrict = true; diff --git a/source/libs/scalar/src/scalar.c b/source/libs/scalar/src/scalar.c index cc6be68c85..3e003234cf 100644 --- a/source/libs/scalar/src/scalar.c +++ b/source/libs/scalar/src/scalar.c @@ -1671,6 +1671,9 @@ static int32_t sclGetJsonOperatorResType(SOperatorNode *pOp) { } static int32_t sclGetBitwiseOperatorResType(SOperatorNode *pOp) { + if (!pOp->pLeft || !pOp->pRight) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } SDataType ldt = ((SExprNode *)(pOp->pLeft))->resType; SDataType rdt = ((SExprNode *)(pOp->pRight))->resType; if(TSDB_DATA_TYPE_VARBINARY == ldt.type || TSDB_DATA_TYPE_VARBINARY == rdt.type){ diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index e12c62ad87..c5789a65ca 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -329,6 +329,7 @@ static FORCE_INLINE void varToVarbinary(char *buf, SScalarParam *pOut, int32_t r if (t == NULL) { sclError("Out of memory"); terrno = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(data); return; } varDataSetLen(t, size); diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index bbb7595e5a..4cd8319a07 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -26,27 +26,68 @@ extern "C" { #endif -#define ONE_MB_F (1048576.0) -#define SIZE_IN_MB(_v) ((_v) / ONE_MB_F) +#define CHECK_DOWNSTREAM_INTERVAL 100 +#define LAUNCH_HTASK_INTERVAL 100 +#define WAIT_FOR_MINIMAL_INTERVAL 100.00 +#define MAX_RETRY_LAUNCH_HISTORY_TASK 40 +#define RETRY_LAUNCH_INTERVAL_INC_RATE 1.2 -typedef struct { +#define MAX_BLOCK_NAME_NUM 1024 +#define DISPATCH_RETRY_INTERVAL_MS 300 +#define MAX_CONTINUE_RETRY_COUNT 5 + +#define META_HB_CHECK_INTERVAL 200 +#define META_HB_SEND_IDLE_COUNTER 25 // send hb every 5 sec +#define STREAM_TASK_KEY_LEN ((sizeof(int64_t)) << 1) + +#define STREAM_TASK_QUEUE_CAPACITY 20480 +#define STREAM_TASK_QUEUE_CAPACITY_IN_SIZE (30) + +// clang-format off +#define stFatal(...) do { if (stDebugFlag & DEBUG_FATAL) { taosPrintLog("STM FATAL ", DEBUG_FATAL, 255, __VA_ARGS__); }} while(0) +#define stError(...) do { if (stDebugFlag & DEBUG_ERROR) { taosPrintLog("STM ERROR ", DEBUG_ERROR, 255, __VA_ARGS__); }} while(0) +#define stWarn(...) do { if (stDebugFlag & DEBUG_WARN) { taosPrintLog("STM WARN ", DEBUG_WARN, 255, __VA_ARGS__); }} while(0) +#define stInfo(...) do { if (stDebugFlag & DEBUG_INFO) { taosPrintLog("STM ", DEBUG_INFO, 255, __VA_ARGS__); }} while(0) +#define stDebug(...) do { if (stDebugFlag & DEBUG_DEBUG) { taosPrintLog("STM ", DEBUG_DEBUG, tqDebugFlag, __VA_ARGS__); }} while(0) +#define stTrace(...) do { if (stDebugFlag & DEBUG_TRACE) { taosPrintLog("STM ", DEBUG_TRACE, tqDebugFlag, __VA_ARGS__); }} while(0) +// clang-format on + +typedef struct SStreamGlobalEnv { int8_t inited; void* timer; } SStreamGlobalEnv; -typedef struct { +typedef struct SStreamContinueExecInfo { SEpSet epset; int32_t taskId; SRpcMsg msg; } SStreamContinueExecInfo; +struct STokenBucket { + int32_t numCapacity; // total capacity, available token per second + int32_t numOfToken; // total available tokens + int32_t numRate; // number of token per second + double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second + double quotaRemain; // not consumed bytes per second + double quotaRate; // number of token per second + int64_t fillTimestamp; // fill timestamp +}; + +struct SStreamQueue { + STaosQueue* pQueue; + STaosQall* qall; + void* qItem; + int8_t status; +}; + extern SStreamGlobalEnv streamEnv; extern int32_t streamBackendId; extern int32_t streamBackendCfWrapperId; -const char* streamGetBlockTypeStr(int32_t type); -void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration); +void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration); int32_t streamDispatchStreamBlock(SStreamTask* pTask); +void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups); +int32_t getNumOfDispatchBranch(SStreamTask* pTask); int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); @@ -61,14 +102,18 @@ int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* p int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId); int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); -int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); +int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId); int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask); int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask); int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask); -int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks); +int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, int32_t* blockSize); +int32_t streamQueueItemGetSize(const SStreamQueueItem* pItem); +void streamQueueItemIncSize(const SStreamQueueItem* pItem, int32_t size); +const char* streamQueueItemGetTypeStr(int32_t type); + SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen); @@ -77,7 +122,20 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); int32_t streamTransferStateToStreamTask(SStreamTask* pTask); -int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t cap, int32_t rate); +int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, int32_t quotaRate); +STaskId streamTaskExtractKey(const SStreamTask* pTask); +void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo); +void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); + +void streamMetaResetStartInfo(STaskStartInfo* pMeta); + +SStreamQueue* streamQueueOpen(int64_t cap); +void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); +void streamQueueProcessSuccess(SStreamQueue* queue); +void streamQueueProcessFail(SStreamQueue* queue); +void* streamQueueNextItem(SStreamQueue* pQueue); +void streamFreeQitem(SStreamQueueItem* data); +int32_t streamQueueGetItemSize(const SStreamQueue* pQueue); #ifdef __cplusplus } diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index d1bf6a91c5..03ba796b2c 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -58,39 +58,54 @@ char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) { static void streamSchedByTimer(void* param, void* tmrId) { SStreamTask* pTask = (void*)param; + const char* id = pTask->id.idStr; + int32_t nextTrigger = (int32_t)pTask->info.triggerParam; int8_t status = atomic_load_8(&pTask->schedInfo.status); - qDebug("s-task:%s in scheduler, trigger status:%d, next:%dms", pTask->id.idStr, status, (int32_t)pTask->info.triggerParam); + stDebug("s-task:%s in scheduler, trigger status:%d, next:%dms", id, status, nextTrigger); if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) { - qDebug("s-task:%s jump out of schedTimer", pTask->id.idStr); + stDebug("s-task:%s jump out of schedTimer", id); return; } - if (status == TASK_TRIGGER_STATUS__ACTIVE) { - SStreamTrigger* pTrigger = taosAllocateQitem(sizeof(SStreamTrigger), DEF_QITEM, 0); - if (pTrigger == NULL) { - return; - } + if (pTask->status.taskStatus == TASK_STATUS__CK) { + stDebug("s-task:%s in checkpoint procedure, not retrieve result, next:%dms", id, nextTrigger); + } else { + if (status == TASK_TRIGGER_STATUS__ACTIVE) { + SStreamTrigger* pTrigger = taosAllocateQitem(sizeof(SStreamTrigger), DEF_QITEM, 0); + if (pTrigger == NULL) { + stError("s-task:%s failed to prepare retrieve data trigger, code:%s, try again in %dms", id, "out of memory", + nextTrigger); + taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamEnv.timer, &pTask->schedInfo.pTimer); + return; + } - pTrigger->type = STREAM_INPUT__GET_RES; - pTrigger->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); - if (pTrigger->pBlock == NULL) { - taosFreeQitem(pTrigger); - return; - } + pTrigger->type = STREAM_INPUT__GET_RES; + pTrigger->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); + if (pTrigger->pBlock == NULL) { + taosFreeQitem(pTrigger); - atomic_store_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE); - pTrigger->pBlock->info.type = STREAM_GET_ALL; - if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTrigger) < 0) { - taosTmrReset(streamSchedByTimer, (int32_t)pTask->info.triggerParam, pTask, streamEnv.timer, &pTask->schedInfo.pTimer); - return; - } + stError("s-task:%s failed to prepare retrieve data trigger, code:%s, try again in %dms", id, "out of memory", + nextTrigger); + taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamEnv.timer, &pTask->schedInfo.pTimer); + return; + } - streamSchedExec(pTask); + atomic_store_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE); + pTrigger->pBlock->info.type = STREAM_GET_ALL; + + int32_t code = streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTrigger); + if (code != TSDB_CODE_SUCCESS) { + taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamEnv.timer, &pTask->schedInfo.pTimer); + return; + } + + streamSchedExec(pTask); + } } - taosTmrReset(streamSchedByTimer, (int32_t)pTask->info.triggerParam, pTask, streamEnv.timer, &pTask->schedInfo.pTimer); + taosTmrReset(streamSchedByTimer, nextTrigger, pTask, streamEnv.timer, &pTask->schedInfo.pTimer); } int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { @@ -98,7 +113,7 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1); ASSERT(ref == 2 && pTask->schedInfo.pTimer == NULL); - qDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->info.triggerParam); + stDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->info.triggerParam); pTask->schedInfo.pTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->info.triggerParam, pTask, streamEnv.timer); pTask->schedInfo.status = TASK_TRIGGER_STATUS__INACTIVE; @@ -108,15 +123,12 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { } int32_t streamSchedExec(SStreamTask* pTask) { - int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, - TASK_SCHED_STATUS__WAITING); - - if (schedStatus == TASK_SCHED_STATUS__INACTIVE) { + if (streamTaskSetSchedStatusWait(pTask)) { SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); if (pRunReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - qError("failed to create msg to aunch s-task:%s, reason out of memory", pTask->id.idStr); + /*int8_t status = */streamTaskSetSchedStatusInActive(pTask); + stError("failed to create msg to aunch s-task:%s, reason out of memory", pTask->id.idStr); return -1; } @@ -124,12 +136,12 @@ int32_t streamSchedExec(SStreamTask* pTask) { pRunReq->streamId = pTask->id.streamId; pRunReq->taskId = pTask->id.taskId; - qDebug("trigger to run s-task:%s", pTask->id.idStr); + stDebug("trigger to run s-task:%s", pTask->id.idStr); SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg); } else { - qDebug("s-task:%s not launch task since sched status:%d", pTask->id.idStr, pTask->status.schedStatus); + stTrace("s-task:%s not launch task since sched status:%d", pTask->id.idStr, pTask->status.schedStatus); } return 0; @@ -142,8 +154,12 @@ static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchR } ((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId); + ASSERT(((SMsgHead*)(*pBuf))->vgId != 0); + SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead)); + pDispatchRsp->stage = htobe64(pReq->stage); + pDispatchRsp->msgId = htonl(pReq->msgId); pDispatchRsp->inputStatus = status; pDispatchRsp->streamId = htobe64(pReq->streamId); pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); @@ -161,7 +177,7 @@ static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDisp if (pBlock == NULL) { streamTaskInputFail(pTask); status = TASK_INPUT_STATUS__FAILED; - qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId, + stError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId, pTask->id.idStr); } else { if (pBlock->type == STREAM_INPUT__TRANS_STATE) { @@ -182,7 +198,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, // enqueue if (pData != NULL) { - qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, + stDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId, pReq->srcTaskId, pReq->srcNodeId, pReq->reqId); pData->type = STREAM_INPUT__DATA_RETRIEVE; @@ -212,42 +228,55 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1; } -int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { - qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr, - pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen); - int32_t status = 0; +int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp) { + int32_t status = 0; + const char* id = pTask->id.idStr; + + stDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64 ", msgId:%d", id, + pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen, pReq->msgId); SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); ASSERT(pInfo != NULL); - // upstream task has restarted/leader-follower switch/transferred to other dnodes - if (pReq->stage > pInfo->stage) { - qError("s-task:%s upstream task:0x%x (vgId:%d) has restart/leader-switch/vnode-transfer, prev stage:%" PRId64 - ", current:%" PRId64 " dispatch msg rejected", - pTask->id.idStr, pReq->upstreamTaskId, pReq->upstreamNodeId, pInfo->stage, pReq->stage); - status = TASK_INPUT_STATUS__BLOCKED; + if (pTask->pMeta->role == NODE_ROLE_FOLLOWER) { + stError("s-task:%s task on follower received dispatch msgs, dispatch msg rejected", id); + status = TASK_INPUT_STATUS__REFUSED; } else { - if (!pInfo->dataAllowed) { - qWarn("s-task:%s data from task:0x%x is denied, since inputQ is closed for it", pTask->id.idStr, - pReq->upstreamTaskId); - status = TASK_INPUT_STATUS__BLOCKED; + if (pReq->stage > pInfo->stage) { + // upstream task has restarted/leader-follower switch/transferred to other dnodes + stError("s-task:%s upstream task:0x%x (vgId:%d) has restart/leader-switch/vnode-transfer, prev stage:%" PRId64 + ", current:%" PRId64 " dispatch msg rejected", + id, pReq->upstreamTaskId, pReq->upstreamNodeId, pInfo->stage, pReq->stage); + status = TASK_INPUT_STATUS__REFUSED; } else { - // Current task has received the checkpoint req from the upstream task, from which the message should all be - // blocked - if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { - streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); - qDebug("s-task:%s close inputQ for upstream:0x%x", pTask->id.idStr, pReq->upstreamTaskId); - } + if (!pInfo->dataAllowed) { + stWarn("s-task:%s data from task:0x%x is denied, since inputQ is closed for it", id, pReq->upstreamTaskId); + status = TASK_INPUT_STATUS__BLOCKED; + } else { + // This task has received the checkpoint req from the upstream task, from which all the messages should be + // blocked. Note that there is no race condition here. + if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); + streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); + stDebug("s-task:%s close inputQ for upstream:0x%x, msgId:%d", id, pReq->upstreamTaskId, pReq->msgId); + } - status = streamTaskAppendInputBlocks(pTask, pReq); + status = streamTaskAppendInputBlocks(pTask, pReq); + } } } + // disable the data from upstream tasks + int8_t st = pTask->status.taskStatus; + if (st == TASK_STATUS__HALT) { + status = TASK_INPUT_STATUS__BLOCKED; + } + { // do send response with the input status int32_t code = buildDispatchRsp(pTask, pReq, status, &pRsp->pCont); if (code != TSDB_CODE_SUCCESS) { - // todo handle failure + stError("s-task:%s failed to build dispatch rsp, msgId:%d, code:%s", id, pReq->msgId, tstrerror(code)); return code; } @@ -261,13 +290,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S return 0; } -int32_t streamProcessRunReq(SStreamTask* pTask) { - if (streamTryExec(pTask) < 0) { - return -1; - } - return 0; -} - int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) { streamTaskEnqueueRetrieve(pTask, pReq, pRsp); ASSERT(pTask->info.taskLevel != TASK_LEVEL__SINK); @@ -278,15 +300,17 @@ int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, S void streamTaskInputFail(SStreamTask* pTask) { atomic_store_8(&pTask->inputInfo.status, TASK_INPUT_STATUS__FAILED); } void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { - int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); if (num == 0) { return; } for (int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); pInfo->dataAllowed = true; } + + pTask->upstreamInfo.numOfClosed = 0; } void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { @@ -297,14 +321,14 @@ void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { } SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { - int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (pInfo->taskId == taskId) { return pInfo; } } - qError("s-task:%s failed to find upstream task:0x%x", pTask->id.idStr, taskId); + stError("s-task:%s failed to find upstream task:0x%x", pTask->id.idStr, taskId); return NULL; } \ No newline at end of file diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 6cef3cca75..ce4feb38eb 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -178,6 +178,10 @@ void bkdMgtDestroy(SBackendManager* bm) { taosHashCleanup(bm->pSstTbl[0]); taosHashCleanup(bm->pSstTbl[1]); + + taosMemoryFree(bm->pCurrent); + taosMemoryFree(bm->pManifest); + taosMemoryFree(bm); } @@ -239,7 +243,7 @@ int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { continue; } if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { - char* p = taosStrdup(name); + // char* p = taosStrdup(name); taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); continue; } @@ -267,7 +271,7 @@ int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { taosArrayClearP(bm->pDel, taosMemoryFree); taosHashClear(bm->pSstTbl[1 - bm->idx]); bm->update = 0; - + taosCloseDir(&pDir); return code; } @@ -280,6 +284,8 @@ int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { taosHashClear(bm->pSstTbl[bm->idx]); bm->idx = 1 - bm->idx; + taosCloseDir(&pDir); + return 0; } @@ -287,8 +293,8 @@ int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { int32_t code = 0; int32_t len = bm->len + 128; - char* dstBuf = taosMemoryCalloc(1, len); char* srcBuf = taosMemoryCalloc(1, len); + char* dstBuf = taosMemoryCalloc(1, len); char* srcDir = taosMemoryCalloc(1, len); char* dstDir = taosMemoryCalloc(1, len); @@ -297,12 +303,16 @@ int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { sprintf(dstDir, "%s%s%s", bm->path, TD_DIRSEP, dname); if (!taosDirExist(srcDir)) { - return 0; + stError("failed to dump srcDir %s, reason: not exist such dir", srcDir); + code = -1; + goto _ERROR; } code = taosMkDir(dstDir); if (code != 0) { - return code; + terrno = TAOS_SYSTEM_ERROR(errno); + stError("failed to mkdir srcDir %s, reason: %s", dstDir, terrstr()); + goto _ERROR; } // clear current file @@ -353,6 +363,7 @@ int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { taosArrayClearP(bm->pAdd, taosMemoryFree); taosArrayClearP(bm->pDel, taosMemoryFree); +_ERROR: taosMemoryFree(srcBuf); taosMemoryFree(dstBuf); taosMemoryFree(srcDir); @@ -388,7 +399,11 @@ int32_t copyFiles(const char* src, const char* dst) { char* dstName = taosMemoryCalloc(1, dLen + 64); TdDirPtr pDir = taosOpenDir(src); - if (pDir == NULL) return 0; + if (pDir == NULL) { + taosMemoryFree(srcName); + taosMemoryFree(dstName); + return -1; + } TdDirEntryPtr de = NULL; while ((de = taosReadDir(pDir)) != NULL) { @@ -437,14 +452,14 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { taosMkDir(state); code = copyFiles(chkp, state); if (code != 0) { - qError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno))); + stError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno))); } else { - qInfo("start to restart stream backend at checkpoint path: %s", chkp); + stInfo("start to restart stream backend at checkpoint path: %s", chkp); } } else { - qError("failed to start stream backend at %s, reason: %s, restart from default state dir:%s", chkp, - tstrerror(TAOS_SYSTEM_ERROR(errno)), state); + stError("failed to start stream backend at %s, reason: %s, restart from default state dir:%s", chkp, + tstrerror(TAOS_SYSTEM_ERROR(errno)), state); taosMkDir(state); } taosMemoryFree(chkp); @@ -458,7 +473,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId) { char* backendPath = NULL; int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); - qDebug("start to init stream backend at %s, checkpointid: %" PRId64 "", backendPath, chkpId); + stDebug("start to init stream backend at %s, checkpointid: %" PRId64 "", backendPath, chkpId); uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper)); @@ -502,7 +517,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId) { taosMemoryFreeClear(err); pHandle->db = rocksdb_open(opts, backendPath, &err); if (err != NULL) { - qError("failed to open rocksdb, path:%s, reason:%s", backendPath, err); + stError("failed to open rocksdb, path:%s, reason:%s", backendPath, err); taosMemoryFreeClear(err); goto _EXIT; } @@ -519,7 +534,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId) { if (cfs != NULL) { rocksdb_list_column_families_destroy(cfs, nCf); } - qDebug("succ to init stream backend at %s, backend:%p", backendPath, pHandle); + stDebug("succ to init stream backend at %s, backend:%p", backendPath, pHandle); taosMemoryFreeClear(backendPath); return (void*)pHandle; @@ -532,7 +547,7 @@ _EXIT: taosHashCleanup(pHandle->cfInst); tdListFree(pHandle->list); taosMemoryFree(pHandle); - qDebug("failed to init stream backend at %s", backendPath); + stDebug("failed to init stream backend at %s", backendPath); taosMemoryFree(backendPath); return NULL; } @@ -566,7 +581,7 @@ void streamBackendCleanup(void* arg) { taosThreadMutexDestroy(&pHandle->mutex); taosThreadMutexDestroy(&pHandle->cfMutex); - qDebug("destroy stream backend :%p", pHandle); + stDebug("destroy stream backend :%p", pHandle); taosMemoryFree(pHandle); return; } @@ -575,7 +590,7 @@ void streamBackendHandleCleanup(void* arg) { bool remove = wrapper->remove; taosThreadRwlockWrlock(&wrapper->rwLock); - qDebug("start to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); + stDebug("start to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); if (wrapper->rocksdb == NULL) { taosThreadRwlockUnlock(&wrapper->rwLock); return; @@ -588,7 +603,7 @@ void streamBackendHandleCleanup(void* arg) { for (int i = 0; i < cfLen; i++) { if (wrapper->pHandle[i] != NULL) rocksdb_drop_column_family(wrapper->rocksdb, wrapper->pHandle[i], &err); if (err != NULL) { - qError("failed to drop cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); + stError("failed to drop cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); taosMemoryFreeClear(err); } } @@ -599,7 +614,7 @@ void streamBackendHandleCleanup(void* arg) { for (int i = 0; i < cfLen; i++) { if (wrapper->pHandle[i] != NULL) rocksdb_flush_cf(wrapper->rocksdb, flushOpt, wrapper->pHandle[i], &err); if (err != NULL) { - qError("failed to flush cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); + stError("failed to flush cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); taosMemoryFreeClear(err); } } @@ -634,7 +649,7 @@ void streamBackendHandleCleanup(void* arg) { wrapper->rocksdb = NULL; taosReleaseRef(streamBackendId, wrapper->backendId); - qDebug("end to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); + stDebug("end to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); taosMemoryFree(wrapper); return; } @@ -817,14 +832,14 @@ int32_t chkpDoDbCheckpoint(rocksdb_t* db, char* path) { char* err = NULL; rocksdb_checkpoint_t* cp = rocksdb_checkpoint_object_create(db, &err); if (cp == NULL || err != NULL) { - qError("failed to do checkpoint at:%s, reason:%s", path, err); + stError("failed to do checkpoint at:%s, reason:%s", path, err); taosMemoryFreeClear(err); goto _ERROR; } rocksdb_checkpoint_create(cp, path, 64 << 20, &err); if (err != NULL) { - qError("failed to do checkpoint at:%s, reason:%s", path, err); + stError("failed to do checkpoint at:%s, reason:%s", path, err); taosMemoryFreeClear(err); } else { code = 0; @@ -843,7 +858,7 @@ int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32 rocksdb_flush_cfs(db, flushOpt, cf, nCf, &err); if (err != NULL) { - qError("failed to flush db before streamBackend clean up, reason:%s", err); + stError("failed to flush db before streamBackend clean up, reason:%s", err); taosMemoryFree(err); code = -1; } @@ -858,7 +873,7 @@ int32_t chkpPreCheckDir(char* path, int64_t chkpId, char** chkpDir, char** chkpI sprintf(pChkpDir, "%s%s%s", path, TD_DIRSEP, "checkpoints"); code = taosMulModeMkDir(pChkpDir, 0755, true); if (code != 0) { - qError("failed to prepare checkpoint dir, path:%s, reason:%s", path, tstrerror(code)); + stError("failed to prepare checkpoint dir, path:%s, reason:%s", path, tstrerror(code)); taosMemoryFree(pChkpDir); taosMemoryFree(pChkpIdDir); code = -1; @@ -867,7 +882,7 @@ int32_t chkpPreCheckDir(char* path, int64_t chkpId, char** chkpDir, char** chkpI sprintf(pChkpIdDir, "%s%scheckpoint%" PRId64, pChkpDir, TD_DIRSEP, chkpId); if (taosIsDir(pChkpIdDir)) { - qInfo("stream rm exist checkpoint%s", pChkpIdDir); + stInfo("stream rm exist checkpoint%s", pChkpIdDir); taosRemoveFile(pChkpIdDir); } *chkpDir = pChkpDir; @@ -891,19 +906,19 @@ int32_t streamBackendTriggerChkp(void* arg, char* dst) { goto _ERROR; } int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); - qDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, dst, nCf); + stDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, dst, nCf); code = chkpPreFlushDb(pHandle->db, ppCf, nCf); if (code == 0) { code = chkpDoDbCheckpoint(pHandle->db, dst); if (code != 0) { - qError("stream backend:%p failed to do checkpoint at:%s", pHandle, dst); + stError("stream backend:%p failed to do checkpoint at:%s", pHandle, dst); } else { - qDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, dst, - taosGetTimestampMs() - st); + stDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, dst, + taosGetTimestampMs() - st); } } else { - qError("stream backend:%p failed to flush db at:%s", pHandle, dst); + stError("stream backend:%p failed to flush db at:%s", pHandle, dst); } // release all ref to cfWrapper; @@ -961,24 +976,25 @@ int32_t streamBackendDoCheckpoint(void* arg, uint64_t checkpointId) { SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid); if (pHandle == NULL || pHandle->db == NULL) { + stError("failed to acquire state-backend handle"); goto _ERROR; } // Get all cf and acquire cfWrappter int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); - qDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, pChkpIdDir, nCf); + stDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, pChkpIdDir, nCf); code = chkpPreFlushDb(pHandle->db, ppCf, nCf); if (code == 0) { code = chkpDoDbCheckpoint(pHandle->db, pChkpIdDir); if (code != 0) { - qError("stream backend:%p failed to do checkpoint at:%s", pHandle, pChkpIdDir); + stError("stream backend:%p failed to do checkpoint at:%s", pHandle, pChkpIdDir); } else { - qDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, pChkpIdDir, - taosGetTimestampMs() - st); + stDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, pChkpIdDir, + taosGetTimestampMs() - st); } - } else { - qError("stream backend:%p failed to flush db at:%s", pHandle, pChkpIdDir); + } else { + stError("stream backend:%p failed to flush db at:%s", pHandle, pChkpIdDir); } // release all ref to cfWrapper; for (int i = 0; i < taosArrayGetSize(refs); i++) { @@ -1384,7 +1400,7 @@ int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest) { p = taosDecodeFixedI64(p, &key.unixTimestamp); p = taosDecodeFixedI32(p, &key.len); if (vlen != (sizeof(int64_t) + sizeof(int32_t) + key.len)) { - qError("vlen: %d, read len: %d", vlen, key.len); + stError("vlen: %d, read len: %d", vlen, key.len); goto _EXCEPT; } if (key.len != 0 && dest != NULL) p = taosDecodeBinary(p, (void**)dest, key.len); @@ -1492,7 +1508,11 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t rocksdb_options_set_block_based_table_factory((rocksdb_options_t*)cfOpts[i], tableOpt); params[i].tableOpt = tableOpt; - int idx = streamStateGetCfIdx(NULL, funcname); + int idx = streamStateGetCfIdx(NULL, funcname); + if (idx < 0 || idx >= sizeof(ginitDict) / sizeof(ginitDict[0])) { + stError("failed to open cf"); + return -1; + } SCfInit* cfPara = &ginitDict[idx]; rocksdb_comparator_t* compare = @@ -1504,7 +1524,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t rocksdb_t* db = rocksdb_open_column_families(handle->dbOpt, name, nCf, (const char* const*)cfs, (const rocksdb_options_t* const*)cfOpts, cfHandle, &err); if (err != NULL) { - qError("failed to open rocksdb cf, reason:%s", err); + stError("failed to open rocksdb cf, reason:%s", err); taosMemoryFree(err); taosMemoryFree(cfHandle); taosMemoryFree(pCompare); @@ -1513,7 +1533,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t // fix other leak return -1; } else { - qDebug("succ to open rocksdb cf"); + stDebug("succ to open rocksdb cf"); } // close default cf if (((rocksdb_column_family_handle_t**)cfHandle)[0] != 0) { @@ -1624,7 +1644,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { int64_t id = taosAddRef(streamBackendCfWrapperId, pBackendCfWrapper); pState->pTdbState->backendCfWrapperId = id; pState->pTdbState->pBackendCfWrapper = pBackendCfWrapper; - qInfo("succ to open state %p on backendWrapper, %p, %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr); + stInfo("succ to open state %p on backendWrapper, %p, %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr); return 0; } taosThreadMutexUnlock(&handle->cfMutex); @@ -1676,7 +1696,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { int64_t id = taosAddRef(streamBackendCfWrapperId, pBackendCfWrapper); pState->pTdbState->backendCfWrapperId = id; pState->pTdbState->pBackendCfWrapper = pBackendCfWrapper; - qInfo("succ to open state %p on backendWrapper %p %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr); + stInfo("succ to open state %p on backendWrapper %p %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr); return 0; } @@ -1684,7 +1704,7 @@ void streamStateCloseBackend(SStreamState* pState, bool remove) { SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SBackendWrapper* pHandle = wrapper->pBackend; - qInfo("start to close state on backend: %p", pHandle); + stInfo("start to close state on backend: %p", pHandle); taosThreadMutexLock(&pHandle->cfMutex); RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, wrapper->idstr, strlen(pState->pTdbState->idstr) + 1); @@ -1696,8 +1716,8 @@ void streamStateCloseBackend(SStreamState* pState, bool remove) { taosThreadMutexUnlock(&pHandle->cfMutex); char* status[] = {"close", "drop"}; - qInfo("start to %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper, - wrapper->idstr); + stInfo("start to %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper, + wrapper->idstr); wrapper->remove |= remove; // update by other pState taosReleaseRef(streamBackendCfWrapperId, pState->pTdbState->backendCfWrapperId); } @@ -1731,10 +1751,10 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) { cf = rocksdb_create_column_family(wrapper->rocksdb, wrapper->cfOpts[idx], buf, &err); if (err != NULL) { idx = -1; - qError("failed to to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err); + stError("failed to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err); taosMemoryFree(err); } else { - qDebug("succ to to open cf, %p %s_%s", pState, wrapper->idstr, funcName); + stDebug("succ to to open cf, %p %s_%s", pState, wrapper->idstr, funcName); wrapper->pHandle[idx] = cf; } } @@ -1770,35 +1790,36 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]); } -#define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ - do { \ - code = 0; \ - char buf[128] = {0}; \ - char* err = NULL; \ - int i = streamStateGetCfIdx(pState, funcname); \ - if (i < 0) { \ - qWarn("streamState failed to get cf name: %s", funcname); \ - code = -1; \ - break; \ - } \ - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ - char toString[128] = {0}; \ - if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ - int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = wrapper->rocksdb; \ - rocksdb_writeoptions_t* opts = wrapper->writeOpts; \ - char* ttlV = NULL; \ - int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \ - rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \ - if (err != NULL) { \ - qError("streamState str: %s failed to write to %s, err: %s", toString, funcname, err); \ - taosMemoryFree(err); \ - code = -1; \ - } else { \ - qTrace("streamState str:%s succ to write to %s, rowValLen:%d, ttlValLen:%d", toString, funcname, vLen, ttlVLen); \ - } \ - taosMemoryFree(ttlV); \ +#define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ + do { \ + code = 0; \ + char buf[128] = {0}; \ + char* err = NULL; \ + int i = streamStateGetCfIdx(pState, funcname); \ + if (i < 0) { \ + stWarn("streamState failed to get cf name: %s", funcname); \ + code = -1; \ + break; \ + } \ + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ + char toString[128] = {0}; \ + if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ + int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ + rocksdb_t* db = wrapper->rocksdb; \ + rocksdb_writeoptions_t* opts = wrapper->writeOpts; \ + char* ttlV = NULL; \ + int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \ + rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \ + if (err != NULL) { \ + stError("streamState str: %s failed to write to %s, err: %s", toString, funcname, err); \ + taosMemoryFree(err); \ + code = -1; \ + } else { \ + stTrace("streamState str:%s succ to write to %s, rowValLen:%d, ttlValLen:%d", toString, funcname, vLen, \ + ttlVLen); \ + } \ + taosMemoryFree(ttlV); \ } while (0); #define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \ @@ -1808,7 +1829,7 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe char* err = NULL; \ int i = streamStateGetCfIdx(pState, funcname); \ if (i < 0) { \ - qWarn("streamState failed to get cf name: %s", funcname); \ + stWarn("streamState failed to get cf name: %s", funcname); \ code = -1; \ break; \ } \ @@ -1823,9 +1844,9 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \ if (val == NULL || len == 0) { \ if (err == NULL) { \ - qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \ + stTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \ } else { \ - qError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ + stError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ taosMemoryFreeClear(err); \ } \ code = -1; \ @@ -1833,11 +1854,11 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe char* p = NULL; \ int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \ if (tlen <= 0) { \ - qError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, wrapper->idstr, \ - funcname); \ + stError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, wrapper->idstr, \ + funcname); \ code = -1; \ } else { \ - qTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, wrapper->idstr, funcname, tlen); \ + stTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, wrapper->idstr, funcname, tlen); \ } \ taosMemoryFree(val); \ if (vLen != NULL) *vLen = tlen; \ @@ -1851,7 +1872,7 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe char* err = NULL; \ int i = streamStateGetCfIdx(pState, funcname); \ if (i < 0) { \ - qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \ + stWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \ code = -1; \ break; \ } \ @@ -1864,11 +1885,11 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe rocksdb_writeoptions_t* opts = wrapper->writeOpts; \ rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \ if (err != NULL) { \ - qError("streamState str: %s failed to del from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ + stError("streamState str: %s failed to del from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ taosMemoryFree(err); \ code = -1; \ } else { \ - qTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \ + stTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \ } \ } while (0); @@ -1893,7 +1914,7 @@ int32_t streamStateDel_rocksdb(SStreamState* pState, const SWinKey* key) { return code; } int32_t streamStateClear_rocksdb(SStreamState* pState) { - qDebug("streamStateClear_rocksdb"); + stDebug("streamStateClear_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; @@ -1915,7 +1936,7 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) { stateKeyToString(&sKey, toStringStart); stateKeyToString(&eKey, toStringEnd); - qWarn("failed to delete range cf(state) start: %s, end:%s, reason:%s", toStringStart, toStringEnd, err); + stWarn("failed to delete range cf(state) start: %s, end:%s, reason:%s", toStringStart, toStringEnd, err); taosMemoryFree(err); } else { rocksdb_compact_range_cf(wrapper->rocksdb, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen); @@ -1932,7 +1953,7 @@ int32_t streamStateCurNext_rocksdb(SStreamState* pState, SStreamStateCur* pCur) return 0; } int32_t streamStateGetFirst_rocksdb(SStreamState* pState, SWinKey* key) { - qDebug("streamStateGetFirst_rocksdb"); + stDebug("streamStateGetFirst_rocksdb"); SWinKey tmp = {.ts = 0, .groupId = 0}; streamStatePut_rocksdb(pState, &tmp, NULL, 0); @@ -1944,7 +1965,7 @@ int32_t streamStateGetFirst_rocksdb(SStreamState* pState, SWinKey* key) { } int32_t streamStateGetGroupKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, const void** pVal, int32_t* pVLen) { - qDebug("streamStateGetGroupKVByCur_rocksdb"); + stDebug("streamStateGetGroupKVByCur_rocksdb"); if (!pCur) { return -1; } @@ -1961,7 +1982,7 @@ int32_t streamStateGetGroupKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, return -1; } int32_t streamStateAddIfNotExist_rocksdb(SStreamState* pState, const SWinKey* key, void** pVal, int32_t* pVLen) { - qDebug("streamStateAddIfNotExist_rocksdb"); + stDebug("streamStateAddIfNotExist_rocksdb"); int32_t size = *pVLen; if (streamStateGet_rocksdb(pState, key, pVal, pVLen) == 0) { return 0; @@ -1971,14 +1992,14 @@ int32_t streamStateAddIfNotExist_rocksdb(SStreamState* pState, const SWinKey* ke return 0; } int32_t streamStateCurPrev_rocksdb(SStreamStateCur* pCur) { - qDebug("streamStateCurPrev_rocksdb"); + stDebug("streamStateCurPrev_rocksdb"); if (!pCur) return -1; rocksdb_iter_prev(pCur->iter); return 0; } int32_t streamStateGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, const void** pVal, int32_t* pVLen) { - qDebug("streamStateGetKVByCur_rocksdb"); + stDebug("streamStateGetKVByCur_rocksdb"); if (!pCur) return -1; SStateKey tkey; SStateKey* pKtmp = &tkey; @@ -2003,7 +2024,7 @@ int32_t streamStateGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, cons return -1; } SStreamStateCur* streamStateGetAndCheckCur_rocksdb(SStreamState* pState, SWinKey* key) { - qDebug("streamStateGetAndCheckCur_rocksdb"); + stDebug("streamStateGetAndCheckCur_rocksdb"); SStreamStateCur* pCur = streamStateFillGetCur_rocksdb(pState, key); if (pCur) { int32_t code = streamStateGetGroupKVByCur_rocksdb(pCur, key, NULL, 0); @@ -2014,7 +2035,7 @@ SStreamStateCur* streamStateGetAndCheckCur_rocksdb(SStreamState* pState, SWinKey } SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) { - qDebug("streamStateSeekKeyNext_rocksdb"); + stDebug("streamStateSeekKeyNext_rocksdb"); SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; @@ -2053,7 +2074,7 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin } SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState) { - qDebug("streamStateGetCur_rocksdb"); + stDebug("streamStateGetCur_rocksdb"); int32_t code = 0; const SStateKey maxStateKey = {.key = {.groupId = UINT64_MAX, .ts = INT64_MAX}, .opNum = INT64_MAX}; @@ -2065,7 +2086,7 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState) { { char tbuf[256] = {0}; stateKeyToString((void*)&maxStateKey, tbuf); - qDebug("seek to last:%s", tbuf); + stDebug("seek to last:%s", tbuf); } SStreamStateCur* pCur = createStreamStateCursor(); @@ -2094,7 +2115,7 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState) { } SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { - qDebug("streamStateGetCur_rocksdb"); + stDebug("streamStateGetCur_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SStreamStateCur* pCur = createStreamStateCursor(); @@ -2151,7 +2172,7 @@ int32_t streamStateSessionPut_rocksdb(SStreamState* pState, const SSessionKey* k return code; } int32_t streamStateSessionGet_rocksdb(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen) { - qDebug("streamStateSessionGet_rocksdb"); + stDebug("streamStateSessionGet_rocksdb"); int code = 0; SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext_rocksdb(pState, key); SSessionKey resKey = *key; @@ -2229,7 +2250,7 @@ int32_t streamStateSessionCurPrev_rocksdb(SStreamStateCur* pCur) { } SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pState, const SSessionKey* key) { - qDebug("streamStateSessionSeekKeyCurrentPrev_rocksdb"); + stDebug("streamStateSessionSeekKeyCurrentPrev_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SStreamStateCur* pCur = createStreamStateCursor(); @@ -2270,7 +2291,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta return pCur; } SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pState, SSessionKey* key) { - qDebug("streamStateSessionSeekKeyCurrentNext_rocksdb"); + stDebug("streamStateSessionSeekKeyCurrentNext_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { @@ -2308,7 +2329,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta } SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, const SSessionKey* key) { - qDebug("streamStateSessionSeekKeyNext_rocksdb"); + stDebug("streamStateSessionSeekKeyNext_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { @@ -2348,7 +2369,7 @@ SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, con } int32_t streamStateSessionGetKVByCur_rocksdb(SStreamStateCur* pCur, SSessionKey* pKey, void** pVal, int32_t* pVLen) { - qDebug("streamStateSessionGetKVByCur_rocksdb"); + stDebug("streamStateSessionGetKVByCur_rocksdb"); if (!pCur) { return -1; } @@ -2369,6 +2390,7 @@ int32_t streamStateSessionGetKVByCur_rocksdb(SStreamStateCur* pCur, SSessionKey* char* val = NULL; int32_t len = decodeValueFunc((void*)vval, vLen, NULL, &val); if (len < 0) { + taosMemoryFree(val); return -1; } @@ -2411,7 +2433,7 @@ int32_t streamStateFillDel_rocksdb(SStreamState* pState, const SWinKey* key) { } SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { - qDebug("streamStateFillGetCur_rocksdb"); + stDebug("streamStateFillGetCur_rocksdb"); SStreamStateCur* pCur = createStreamStateCursor(); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; @@ -2447,7 +2469,7 @@ SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinK return NULL; } int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, const void** pVal, int32_t* pVLen) { - qDebug("streamStateFillGetKVByCur_rocksdb"); + stDebug("streamStateFillGetKVByCur_rocksdb"); if (!pCur) { return -1; } @@ -2471,7 +2493,7 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, } SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) { - qDebug("streamStateFillSeekKeyNext_rocksdb"); + stDebug("streamStateFillSeekKeyNext_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SStreamStateCur* pCur = createStreamStateCursor(); if (!pCur) { @@ -2509,7 +2531,7 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const return NULL; } SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const SWinKey* key) { - qDebug("streamStateFillSeekKeyPrev_rocksdb"); + stDebug("streamStateFillSeekKeyPrev_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { @@ -2547,7 +2569,7 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const return NULL; } int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) { - qDebug("streamStateSessionGetKeyByRange_rocksdb"); + stDebug("streamStateSessionGetKeyByRange_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { @@ -2606,7 +2628,7 @@ int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSes int32_t streamStateSessionAddIfNotExist_rocksdb(SStreamState* pState, SSessionKey* key, TSKEY gap, void** pVal, int32_t* pVLen) { - qDebug("streamStateSessionAddIfNotExist_rocksdb"); + stDebug("streamStateSessionAddIfNotExist_rocksdb"); // todo refactor int32_t res = 0; SSessionKey originKey = *key; @@ -2656,7 +2678,7 @@ _end: return res; } int32_t streamStateSessionClear_rocksdb(SStreamState* pState) { - qDebug("streamStateSessionClear_rocksdb"); + stDebug("streamStateSessionClear_rocksdb"); SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0}; SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext_rocksdb(pState, &key); @@ -2682,7 +2704,7 @@ int32_t streamStateSessionClear_rocksdb(SStreamState* pState) { } int32_t streamStateStateAddIfNotExist_rocksdb(SStreamState* pState, SSessionKey* key, char* pKeyData, int32_t keyDataLen, state_key_cmpr_fn fn, void** pVal, int32_t* pVLen) { - qDebug("streamStateStateAddIfNotExist_rocksdb"); + stDebug("streamStateStateAddIfNotExist_rocksdb"); // todo refactor int32_t res = 0; SSessionKey tmpKey = *key; @@ -2852,6 +2874,7 @@ char* streamDefaultIterVal_rocksdb(void* iter, int32_t* len) { const char* val = rocksdb_iter_value(pCur->iter, (size_t*)&vlen); *len = decodeValueFunc((void*)val, vlen, NULL, &ret); if (*len < 0) { + taosMemoryFree(ret); return NULL; } @@ -2875,7 +2898,7 @@ int32_t streamStatePutBatch(SStreamState* pState, const char* cfKeyName, rocksdb int i = streamStateGetCfIdx(pState, cfKeyName); if (i < 0) { - qError("streamState failed to put to cf name:%s", cfKeyName); + stError("streamState failed to put to cf name:%s", cfKeyName); return -1; } @@ -2892,7 +2915,7 @@ int32_t streamStatePutBatch(SStreamState* pState, const char* cfKeyName, rocksdb { char tbuf[256] = {0}; ginitDict[i].toStrFunc((void*)key, tbuf); - qDebug("streamState str: %s succ to write to %s_%s, len: %d", tbuf, wrapper->idstr, ginitDict[i].key, vlen); + stDebug("streamState str: %s succ to write to %s_%s, len: %d", tbuf, wrapper->idstr, ginitDict[i].key, vlen); } return 0; } @@ -2915,7 +2938,7 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb { char tbuf[256] = {0}; ginitDict[cfIdx].toStrFunc((void*)key, tbuf); - qDebug("streamState str: %s succ to write to %s_%s", tbuf, wrapper->idstr, ginitDict[cfIdx].key); + stDebug("streamState str: %s succ to write to %s_%s", tbuf, wrapper->idstr, ginitDict[cfIdx].key); } return 0; } @@ -2924,11 +2947,11 @@ int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; rocksdb_write(wrapper->rocksdb, wrapper->writeOpts, (rocksdb_writebatch_t*)pBatch, &err); if (err != NULL) { - qError("streamState failed to write batch, err:%s", err); + stError("streamState failed to write batch, err:%s", err); taosMemoryFree(err); return -1; } else { - qDebug("write batch to backend:%p", wrapper->pBackend); + stDebug("write batch to backend:%p", wrapper->pBackend); } return 0; } diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index cc93d25fd5..2cde368195 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -92,10 +92,10 @@ int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointRea } static int32_t streamAlignCheckpoint(SStreamTask* pTask) { - int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); int64_t old = atomic_val_compare_exchange_32(&pTask->checkpointAlignCnt, 0, num); if (old == 0) { - qDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num); + stDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num); } return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1); @@ -136,17 +136,16 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); // 1. set task status to be prepared for check point, no data are allowed to put into inputQ. - taosThreadMutexLock(&pTask->lock); - pTask->status.taskStatus = TASK_STATUS__CK; pTask->checkpointingId = pReq->checkpointId; pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + pTask->chkInfo.startTs = taosGetTimestampMs(); - // 2. let's dispatch checkpoint msg to downstream task directly and do nothing else. put the checkpoint block into - // inputQ, to make sure all blocks with less version have been handled by this task already. + pTask->execInfo.checkpoint += 1; + + // 2. Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task + // already. int32_t code = appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); - taosThreadMutexUnlock(&pTask->lock); - return code; } @@ -154,10 +153,11 @@ static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStream pBlock->srcTaskId = pTask->id.taskId; pBlock->srcVgId = pTask->pMeta->vgId; - int32_t code = taosWriteQitem(pTask->outputInfo.queue->pQueue, pBlock); + int32_t code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock); if (code == 0) { streamDispatchStreamBlock(pTask); } else { + stError("s-task:%s failed to put checkpoint into outputQ, code:%s", pTask->id.idStr, tstrerror(code)); streamFreeQitem((SStreamQueueItem*)pBlock); } @@ -182,8 +182,7 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc taosWLockLatch(&pMeta->lock); if (pMeta->chkptNotReadyTasks == 0) { - pMeta->chkptNotReadyTasks = streamMetaGetNumOfStreamTasks(pMeta); - pMeta->totalTasks = pMeta->chkptNotReadyTasks; + pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks; } taosWUnLockLatch(&pMeta->lock); @@ -193,35 +192,39 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc int32_t taskLevel = pTask->info.taskLevel; if (taskLevel == TASK_LEVEL__SOURCE) { if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - qDebug("s-task:%s set childIdx:%d, and add checkpoint block into outputQ", id, pTask->info.selfChildId); + stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); continueDispatchCheckpointBlock(pBlock, pTask); } else { // only one task exists, no need to dispatch downstream info streamProcessCheckpointReadyMsg(pTask); streamFreeQitem((SStreamQueueItem*)pBlock); } } else if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { - ASSERT(taosArrayGetSize(pTask->pUpstreamInfoList) > 0); + ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) > 0); + if (pTask->chkInfo.startTs == 0) { + pTask->chkInfo.startTs = taosGetTimestampMs(); + pTask->execInfo.checkpoint += 1; + } // update the child Id for downstream tasks streamAddCheckpointReadyMsg(pTask, pBlock->srcTaskId, pTask->info.selfChildId, checkpointId); // there are still some upstream tasks not send checkpoint request, do nothing and wait for then int32_t notReady = streamAlignCheckpoint(pTask); - int32_t num = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); if (notReady > 0) { - qDebug("s-task:%s received checkpoint block, idx:%d, %d upstream tasks not send checkpoint info yet, total:%d", + stDebug("s-task:%s received checkpoint block, idx:%d, %d upstream tasks not send checkpoint info yet, total:%d", id, pTask->info.selfChildId, notReady, num); streamFreeQitem((SStreamQueueItem*)pBlock); return code; } if (taskLevel == TASK_LEVEL__SINK) { - qDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, send ready msg to upstream", + stDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, send ready msg to upstream", id, num); streamFreeQitem((SStreamQueueItem*)pBlock); streamTaskBuildCheckpoint(pTask); } else { - qDebug( + stDebug( "s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, dispatch checkpoint msg " "downstream", id, num); @@ -250,27 +253,32 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { ASSERT(notReady >= 0); if (notReady == 0) { - qDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for current task", + stDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for current task", pTask->id.idStr); appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT); } else { int32_t total = streamTaskGetNumOfDownstream(pTask); - qDebug("s-task:%s %d/%d downstream tasks are not ready, wait", pTask->id.idStr, notReady, total); + stDebug("s-task:%s %d/%d downstream tasks are not ready, wait", pTask->id.idStr, notReady, total); } return 0; } +void streamTaskClearCheckInfo(SStreamTask* pTask) { + pTask->checkpointingId = 0; // clear the checkpoint id + pTask->chkInfo.failedId = 0; + pTask->chkInfo.startTs = 0; // clear the recorded start time + pTask->checkpointNotReadyTasks = 0; + pTask->checkpointAlignCnt = 0; + streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks +} + int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { taosWLockLatch(&pMeta->lock); - int64_t keys[2]; for (int32_t i = 0; i < taosArrayGetSize(pMeta->pTaskList); ++i) { - SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); - keys[0] = pId->streamId; - keys[1] = pId->taskId; - - SStreamTask** ppTask = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId* pId = taosArrayGet(pMeta->pTaskList, i); + SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); if (ppTask == NULL) { continue; } @@ -284,25 +292,27 @@ int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId); p->chkInfo.checkpointId = p->checkpointingId; + streamTaskClearCheckInfo(p); streamSetStatusNormal(p); // save the task streamMetaSaveTask(pMeta, p); - streamTaskOpenAllUpstreamInput(p); // open inputQ for all upstream tasks - qDebug("vgId:%d s-task:%s level:%d commit task status after checkpoint completed, checkpointId:%" PRId64 - ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status to be normal, prev:%s", - pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer, - streamGetTaskStatusStr(prev)); + + stDebug( + "vgId:%d s-task:%s level:%d open upstream inputQ, commit task status after checkpoint completed, " + "checkpointId:%" PRId64 ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status to be normal, prev:%s", + pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer, + streamGetTaskStatusStr(prev)); } if (streamMetaCommit(pMeta) < 0) { taosWUnLockLatch(&pMeta->lock); - qError("vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", pMeta->vgId, + stError("vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", pMeta->vgId, checkpointId, terrstr()); return -1; } else { taosWUnLockLatch(&pMeta->lock); - qInfo("vgId:%d commit stream meta after do checkpoint, checkpointId:%" PRId64 " DONE", pMeta->vgId, checkpointId); + stInfo("vgId:%d commit stream meta after do checkpoint, checkpointId:%" PRId64 " DONE", pMeta->vgId, checkpointId); } return TSDB_CODE_SUCCESS; @@ -316,17 +326,20 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { int32_t remain = atomic_sub_fetch_32(&pMeta->chkptNotReadyTasks, 1); ASSERT(remain >= 0); + double el = (taosGetTimestampMs() - pTask->chkInfo.startTs) / 1000.0; if (remain == 0) { // all tasks are ready - qDebug("s-task:%s is ready for checkpoint", pTask->id.idStr); - pMeta->totalTasks = 0; - + stDebug("s-task:%s all downstreams are ready, ready for do checkpoint", pTask->id.idStr); streamBackendDoCheckpoint(pMeta, pTask->checkpointingId); streamSaveAllTaskStatus(pMeta, pTask->checkpointingId); - qDebug("vgId:%d vnode wide checkpoint completed, save all tasks status, checkpointId:%" PRId64, pMeta->vgId, - pTask->checkpointingId); + stInfo( + "vgId:%d vnode wide checkpoint completed, save all tasks status, last:%s, level:%d elapsed time:%.2f Sec " + "checkpointId:%" PRId64, + pMeta->vgId, pTask->id.idStr, pTask->info.taskLevel, el, pTask->checkpointingId); } else { - qDebug("vgId:%d vnode wide tasks not reach checkpoint ready status, ready s-task:%s, not ready:%d/%d", pMeta->vgId, - pTask->id.idStr, remain, pMeta->totalTasks); + stInfo( + "vgId:%d vnode wide tasks not reach checkpoint ready status, ready s-task:%s, level:%d elapsed time:%.2f Sec " + "not ready:%d/%d", + pMeta->vgId, pTask->id.idStr, pTask->info.taskLevel, el, remain, pMeta->numOfStreamTasks); } // send check point response to upstream task @@ -338,7 +351,7 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { if (code != TSDB_CODE_SUCCESS) { // todo: let's retry send rsp to upstream/mnode - qError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr, + stError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr, pTask->checkpointingId, tstrerror(code)); } diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 00bf631d74..80927b36b9 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -65,6 +65,10 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT pStreamBlocks->type = STREAM_INPUT__DATA_BLOCK; pStreamBlocks->blocks = pRes; + if (pItem == NULL) { + return pStreamBlocks; + } + if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* pSubmit = (SStreamDataSubmit*)pItem; pStreamBlocks->sourceVer = pSubmit->ver; @@ -161,12 +165,16 @@ SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)pElem; taosArrayAddAll(pBlock->blocks, pBlockSrc->blocks); taosArrayDestroy(pBlockSrc->blocks); + streamQueueItemIncSize(dst, streamQueueItemGetSize(pElem)); + taosFreeQitem(pElem); return dst; } else if (dst->type == STREAM_INPUT__MERGED_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)dst; SStreamDataSubmit* pBlockSrc = (SStreamDataSubmit*)pElem; streamMergeSubmit(pMerged, pBlockSrc); + streamQueueItemIncSize(dst, streamQueueItemGetSize(pElem)); + taosFreeQitem(pElem); return dst; } else if (dst->type == STREAM_INPUT__DATA_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { @@ -176,13 +184,16 @@ SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* return NULL; } + streamQueueItemIncSize((SStreamQueueItem*)pMerged, streamQueueItemGetSize(pElem)); + streamMergeSubmit(pMerged, (SStreamDataSubmit*)dst); streamMergeSubmit(pMerged, (SStreamDataSubmit*)pElem); + taosFreeQitem(dst); taosFreeQitem(pElem); return (SStreamQueueItem*)pMerged; } else { - qDebug("block type:%s not merged with existed blocks list, type:%d", streamGetBlockTypeStr(pElem->type), dst->type); + stDebug("block type:%s not merged with existed blocks list, type:%d", streamQueueItemGetTypeStr(pElem->type), dst->type); return NULL; } } @@ -223,16 +234,3 @@ void streamFreeQitem(SStreamQueueItem* data) { taosFreeQitem(pBlock); } } - -const char* streamGetBlockTypeStr(int32_t type) { - switch (type) { - case STREAM_INPUT__CHECKPOINT: - return "checkpoint"; - case STREAM_INPUT__CHECKPOINT_TRIGGER: - return "checkpoint-trigger"; - case STREAM_INPUT__TRANS_STATE: - return "trans-state"; - default: - return ""; - } -} \ No newline at end of file diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 4d5234a68c..a7a06dd884 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -18,10 +18,6 @@ #include "ttimer.h" #include "tmisce.h" -#define MAX_BLOCK_NAME_NUM 1024 -#define DISPATCH_RETRY_INTERVAL_MS 300 -#define MAX_CONTINUE_RETRY_COUNT 5 - typedef struct SBlockName { uint32_t hashValue; char parTbName[TSDB_TABLE_NAME_LEN]; @@ -53,6 +49,7 @@ void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) { int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->stage) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->msgId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->srcVgId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->type) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; @@ -78,6 +75,7 @@ int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* p int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->stage) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->msgId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->srcVgId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->type) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; @@ -112,6 +110,7 @@ static int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTas pReq->streamId = pTask->id.streamId; pReq->srcVgId = vgId; pReq->stage = pTask->pMeta->stage; + pReq->msgId = pTask->execInfo.dispatch; pReq->upstreamTaskId = pTask->id.taskId; pReq->upstreamChildId = pTask->info.selfChildId; pReq->upstreamNodeId = pTask->info.nodeId; @@ -196,11 +195,11 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) .retrieveLen = dataStrLen, }; - int32_t sz = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t sz = taosArrayGetSize(pTask->upstreamInfo.pList); ASSERT(sz > 0); for (int32_t i = 0; i < sz; i++) { req.reqId = tGenIdPI64(); - SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); req.dstNodeId = pEpInfo->nodeId; req.dstTaskId = pEpInfo->taskId; int32_t len; @@ -231,7 +230,7 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) } buf = NULL; - qDebug("s-task:%s (child %d) send retrieve req to task:0x%x (vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, + stDebug("s-task:%s (child %d) send retrieve req to task:0x%x (vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId); } code = 0; @@ -242,7 +241,7 @@ CLEAR: return code; } -int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet) { +int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet) { void* buf = NULL; int32_t code = -1; SRpcMsg msg = {0}; @@ -270,66 +269,73 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR tEncoderClear(&encoder); initRpcMsg(&msg, TDMT_VND_STREAM_TASK_CHECK, buf, tlen + sizeof(SMsgHead)); - qDebug("s-task:%s (level:%d) send check msg to s-task:0x%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr, + stDebug("s-task:%s (level:%d) send check msg to s-task:0x%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr, pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId); tmsgSendReq(pEpSet, &msg); return 0; } -static int32_t doDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) { +void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups) { + for (int32_t i = 0; i < numOfVgroups; i++) { + taosArrayDestroyP(pReq[i].data, taosMemoryFree); + taosArrayDestroy(pReq[i].dataLen); + } + + taosMemoryFree(pReq); +} + +int32_t getNumOfDispatchBranch(SStreamTask* pTask) { + return (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) + ? 1 + : taosArrayGetSize(pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos); +} + +static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pData) { int32_t code = 0; int32_t numOfBlocks = taosArrayGetSize(pData->blocks); - ASSERT(numOfBlocks != 0); + ASSERT(numOfBlocks != 0 && pTask->msgInfo.pData == NULL); + + pTask->msgInfo.dispatchMsgType = pData->type; if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { - SStreamDispatchReq req = {0}; + SStreamDispatchReq* pReq = taosMemoryCalloc(1, sizeof(SStreamDispatchReq)); - int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId; - code = tInitStreamDispatchReq(&req, pTask, pData->srcVgId, numOfBlocks, downstreamTaskId, pData->type); + int32_t downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; + code = tInitStreamDispatchReq(pReq, pTask, pData->srcVgId, numOfBlocks, downstreamTaskId, pData->type); if (code != TSDB_CODE_SUCCESS) { return code; } for (int32_t i = 0; i < numOfBlocks; i++) { SSDataBlock* pDataBlock = taosArrayGet(pData->blocks, i); - - code = streamAddBlockIntoDispatchMsg(pDataBlock, &req); + code = streamAddBlockIntoDispatchMsg(pDataBlock, pReq); if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroyP(req.data, taosMemoryFree); - taosArrayDestroy(req.dataLen); + destroyDispatchMsg(pReq, 1); return code; } } - int32_t vgId = pTask->fixedEpDispatcher.nodeId; - SEpSet* pEpSet = &pTask->fixedEpDispatcher.epSet; - - qDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d)", pTask->id.idStr, - pTask->info.selfChildId, numOfBlocks, downstreamTaskId, vgId); - - code = doSendDispatchMsg(pTask, &req, vgId, pEpSet); - taosArrayDestroyP(req.data, taosMemoryFree); - taosArrayDestroy(req.dataLen); - return code; + pTask->msgInfo.pData = pReq; } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - int32_t rspCnt = atomic_load_32(&pTask->shuffleDispatcher.waitingRspCnt); + int32_t rspCnt = atomic_load_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt); ASSERT(rspCnt == 0); - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - int32_t vgSz = taosArrayGetSize(vgInfo); + SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; + int32_t numOfVgroups = taosArrayGetSize(vgInfo); - SStreamDispatchReq* pReqs = taosMemoryCalloc(vgSz, sizeof(SStreamDispatchReq)); + SStreamDispatchReq* pReqs = taosMemoryCalloc(numOfVgroups, sizeof(SStreamDispatchReq)); if (pReqs == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - for (int32_t i = 0; i < vgSz; i++) { + for (int32_t i = 0; i < numOfVgroups; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); code = tInitStreamDispatchReq(&pReqs[i], pTask, pData->srcVgId, 0, pVgInfo->taskId, pData->type); if (code != TSDB_CODE_SUCCESS) { - goto FAIL_SHUFFLE_DISPATCH; + destroyDispatchMsg(pReqs, numOfVgroups); + return code; } } @@ -338,50 +344,72 @@ static int32_t doDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* p // TODO: do not use broadcast if (pDataBlock->info.type == STREAM_DELETE_RESULT || pDataBlock->info.type == STREAM_CHECKPOINT || pDataBlock->info.type == STREAM_TRANS_STATE) { - for (int32_t j = 0; j < vgSz; j++) { - if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { - goto FAIL_SHUFFLE_DISPATCH; + for (int32_t j = 0; j < numOfVgroups; j++) { + code = streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]); + if (code != 0) { + destroyDispatchMsg(pReqs, numOfVgroups); + return code; } if (pReqs[j].blockNum == 0) { - atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); + atomic_add_fetch_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, 1); } + pReqs[j].blockNum++; } continue; } - if (streamSearchAndAddBlock(pTask, pReqs, pDataBlock, vgSz, pDataBlock->info.id.groupId) < 0) { - goto FAIL_SHUFFLE_DISPATCH; + code = streamSearchAndAddBlock(pTask, pReqs, pDataBlock, numOfVgroups, pDataBlock->info.id.groupId); + if(code != 0) { + destroyDispatchMsg(pReqs, numOfVgroups); + return code; } } - qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to %d vgroups", pTask->id.idStr, - pTask->info.selfChildId, numOfBlocks, vgSz); + pTask->msgInfo.pData = pReqs; + } - for (int32_t i = 0; i < vgSz; i++) { - if (pReqs[i].blockNum > 0) { + stDebug("s-task:%s build dispatch msg success, msgId:%d", pTask->id.idStr, pTask->execInfo.dispatch); + return code; +} + +static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatchMsg) { + int32_t code = 0; + int32_t msgId = pTask->execInfo.dispatch; + const char* id = pTask->id.idStr; + + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + int32_t vgId = pTask->outputInfo.fixedDispatcher.nodeId; + SEpSet* pEpSet = &pTask->outputInfo.fixedDispatcher.epSet; + int32_t downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; + + stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), id:%d", id, + pTask->info.selfChildId, 1, downstreamTaskId, vgId, msgId); + + code = doSendDispatchMsg(pTask, pDispatchMsg, vgId, pEpSet); + } else { + SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; + int32_t numOfVgroups = taosArrayGetSize(vgInfo); + + stDebug("s-task:%s (child taskId:%d) start to shuffle-dispatch blocks to %d vgroup(s), msgId:%d", + id, pTask->info.selfChildId, numOfVgroups, msgId); + + for (int32_t i = 0; i < numOfVgroups; i++) { + if (pDispatchMsg[i].blockNum > 0) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, - pTask->info.selfChildId, pReqs[i].blockNum, pVgInfo->vgId); + stDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, + pTask->info.selfChildId, pDispatchMsg[i].blockNum, pVgInfo->vgId); - code = doSendDispatchMsg(pTask, &pReqs[i], pVgInfo->vgId, &pVgInfo->epSet); + code = doSendDispatchMsg(pTask, &pDispatchMsg[i], pVgInfo->vgId, &pVgInfo->epSet); if (code < 0) { - goto FAIL_SHUFFLE_DISPATCH; + break; } } } - code = 0; - - FAIL_SHUFFLE_DISPATCH: - for (int32_t i = 0; i < vgSz; i++) { - taosArrayDestroyP(pReqs[i].data, taosMemoryFree); - taosArrayDestroy(pReqs[i].dataLen); - } - - taosMemoryFree(pReqs); + stDebug("s-task:%s complete shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, numOfVgroups, msgId); } return code; @@ -389,48 +417,98 @@ static int32_t doDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* p static void doRetryDispatchData(void* param, void* tmrId) { SStreamTask* pTask = param; + const char* id = pTask->id.idStr; + int32_t msgId = pTask->execInfo.dispatch; if (streamTaskShouldStop(&pTask->status)) { - int8_t ref = atomic_sub_fetch_8(&pTask->status.timerActive, 1); - qDebug("s-task:%s should stop, abort from timer, ref:%d", pTask->id.idStr, ref); + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s should stop, abort from timer, ref:%d", pTask->id.idStr, ref); return; } - ASSERT(pTask->outputInfo.status == TASK_OUTPUT_STATUS__WAIT); + ASSERT(pTask->outputq.status == TASK_OUTPUT_STATUS__WAIT); + + int32_t code = 0; + { + SArray* pList = taosArrayDup(pTask->msgInfo.pRetryList, NULL); + taosArrayClear(pTask->msgInfo.pRetryList); + + SStreamDispatchReq *pReq = pTask->msgInfo.pData; + + if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; + int32_t numOfVgroups = taosArrayGetSize(vgInfo); + + int32_t numOfFailed = taosArrayGetSize(pList); + stDebug("s-task:%s (child taskId:%d) re-try shuffle-dispatch blocks to %d vgroup(s), msgId:%d", + id, pTask->info.selfChildId, numOfFailed, msgId); + + for (int32_t i = 0; i < numOfFailed; i++) { + int32_t vgId = *(int32_t*) taosArrayGet(pList, i); + + for(int32_t j = 0; j < numOfVgroups; ++j) { + SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j); + if (pVgInfo->vgId == vgId) { + stDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, + pTask->info.selfChildId, pReq[j].blockNum, pVgInfo->vgId); + + code = doSendDispatchMsg(pTask, &pReq[j], pVgInfo->vgId, &pVgInfo->epSet); + if (code < 0) { + break; + } + } + } + } + + stDebug("s-task:%s complete re-try shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, numOfFailed, msgId); + } else { + int32_t vgId = pTask->outputInfo.fixedDispatcher.nodeId; + SEpSet* pEpSet = &pTask->outputInfo.fixedDispatcher.epSet; + int32_t downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; + + stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), id:%d", id, + pTask->info.selfChildId, 1, downstreamTaskId, vgId, msgId); + + code = doSendDispatchMsg(pTask, pReq, vgId, pEpSet); + } + } - int32_t code = doDispatchAllBlocks(pTask, pTask->msgInfo.pData); if (code != TSDB_CODE_SUCCESS) { if (!streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr); - atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0); +// stDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr); +// atomic_store_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, 0); if (streamTaskShouldPause(&pTask->status)) { - streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS * 10); + streamRetryDispatchData(pTask, DISPATCH_RETRY_INTERVAL_MS * 10); } else { - streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); + streamRetryDispatchData(pTask, DISPATCH_RETRY_INTERVAL_MS); } } else { - int32_t ref = atomic_sub_fetch_8(&pTask->status.timerActive, 1); - qDebug("s-task:%s should stop, abort from timer, ref:%d", pTask->id.idStr, ref); + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s should stop, abort from timer, ref:%d", pTask->id.idStr, ref); } } else { - int8_t ref = atomic_sub_fetch_8(&pTask->status.timerActive, 1); - qDebug("s-task:%s send success, jump out of timer, ref:%d", pTask->id.idStr, ref); + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s send success, jump out of timer, ref:%d", pTask->id.idStr, ref); } } -void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration) { - qWarn("s-task:%s dispatch data in %" PRId64 "ms, in timer", pTask->id.idStr, waitDuration); - if (pTask->launchTaskTimer != NULL) { - taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamEnv.timer, &pTask->launchTaskTimer); +void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration) { + pTask->msgInfo.retryCount++; + + stWarn("s-task:%s retry send dispatch data in %" PRId64 "ms, in timer msgId:%d, retryTimes:%d", pTask->id.idStr, + waitDuration, pTask->execInfo.dispatch, pTask->msgInfo.retryCount); + + if (pTask->msgInfo.pTimer != NULL) { + taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamEnv.timer, &pTask->msgInfo.pTimer); } else { - pTask->launchTaskTimer = taosTmrStart(doRetryDispatchData, waitDuration, pTask, streamEnv.timer); + pTask->msgInfo.pTimer = taosTmrStart(doRetryDispatchData, waitDuration, pTask, streamEnv.timer); } } int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, SSDataBlock* pDataBlock, int32_t vgSz, int64_t groupId) { uint32_t hashValue = 0; - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; if (pTask->pNameMap == NULL) { pTask->pNameMap = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); } @@ -450,14 +528,14 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S } if (pDataBlock->info.parTbName[0]) { - snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); + snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->outputInfo.shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); } else { - buildCtbNameByGroupIdImpl(pTask->shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName); - snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); + buildCtbNameByGroupIdImpl(pTask->outputInfo.shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName); + snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->outputInfo.shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); } /*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/ - SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo; + SUseDbRsp* pDbInfo = &pTask->outputInfo.shuffleDispatcher.dbInfo; hashValue = taosGetTbHashVal(ctbName, strlen(ctbName), pDbInfo->hashMethod, pDbInfo->hashPrefix, pDbInfo->hashSuffix); taosMemoryFree(ctbName); @@ -482,7 +560,7 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S } if (pReqs[j].blockNum == 0) { - atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); + atomic_add_fetch_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, 1); } pReqs[j].blockNum++; @@ -498,59 +576,66 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { ASSERT((pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH)); const char* id = pTask->id.idStr; - int32_t numOfElems = streamQueueGetNumOfItems(pTask->outputInfo.queue); + int32_t numOfElems = streamQueueGetNumOfItems(pTask->outputq.queue); if (numOfElems > 0) { - double size = SIZE_IN_MB(taosQueueMemorySize(pTask->outputInfo.queue->pQueue)); - qDebug("s-task:%s start to dispatch intermediate block to downstream, elem in outputQ:%d, size:%.2fMiB", id, numOfElems, size); + double size = SIZE_IN_MiB(taosQueueMemorySize(pTask->outputq.queue->pQueue)); + stDebug("s-task:%s start to dispatch intermediate block to downstream, elem in outputQ:%d, size:%.2fMiB", id, numOfElems, size); } // to make sure only one dispatch is running int8_t old = - atomic_val_compare_exchange_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); + atomic_val_compare_exchange_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); if (old != TASK_OUTPUT_STATUS__NORMAL) { - qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", id, old); + stDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", id, old); return 0; } ASSERT(pTask->msgInfo.pData == NULL); - qDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputInfo.status); + stDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputq.status); - SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputInfo.queue); + SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputq.queue); if (pBlock == NULL) { - atomic_store_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL); - qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", id, pTask->outputInfo.status); + atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); + stDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", id, pTask->outputq.status); return 0; } - pTask->msgInfo.pData = pBlock; ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK || pBlock->type == STREAM_INPUT__CHECKPOINT_TRIGGER || pBlock->type == STREAM_INPUT__TRANS_STATE); - int32_t retryCount = 0; + pTask->execInfo.dispatch += 1; + pTask->msgInfo.startTs = taosGetTimestampMs(); + int32_t code = doBuildDispatchMsg(pTask, pBlock); + if (code == 0) { + destroyStreamDataBlock(pBlock); + } else { // todo handle build dispatch msg failed + } + + int32_t retryCount = 0; while (1) { - int32_t code = doDispatchAllBlocks(pTask, pBlock); + code = sendDispatchMsg(pTask, pTask->msgInfo.pData); if (code == TSDB_CODE_SUCCESS) { break; } - qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", id, - tstrerror(terrno), pTask->outputInfo.status, retryCount); + stDebug("s-task:%s failed to dispatch msg:%d to downstream, code:%s, output status:%d, retry cnt:%d", id, + pTask->execInfo.dispatch, tstrerror(terrno), pTask->outputq.status, retryCount); // todo deal with only partially success dispatch case - atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0); + atomic_store_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, 0); if (terrno == TSDB_CODE_APP_IS_STOPPING) { // in case of this error, do not retry anymore - destroyStreamDataBlock(pTask->msgInfo.pData); + destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask)); pTask->msgInfo.pData = NULL; return code; } if (++retryCount > MAX_CONTINUE_RETRY_COUNT) { // add to timer to retry - int8_t ref = atomic_add_fetch_8(&pTask->status.timerActive, 1); - - qDebug("s-task:%s failed to dispatch msg to downstream for %d times, code:%s, add timer to retry in %dms, ref:%d", + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s failed to dispatch msg to downstream for %d times, code:%s, add timer to retry in %dms, ref:%d", pTask->id.idStr, retryCount, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS, ref); - streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); + + streamRetryDispatchData(pTask, DISPATCH_RETRY_INTERVAL_MS); break; } } @@ -569,15 +654,15 @@ int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { // serialize if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { - req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; + req.downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; pTask->notReadyTasks = 1; - doDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); + doDispatchScanHistoryFinishMsg(pTask, &req, pTask->outputInfo.fixedDispatcher.nodeId, &pTask->outputInfo.fixedDispatcher.epSet); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); pTask->notReadyTasks = numOfVgs; - qDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, + stDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus)); for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); @@ -585,7 +670,7 @@ int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { doDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { - qDebug("s-task:%s no downstream tasks, invoke scan-history finish rsp directly", pTask->id.idStr); + stDebug("s-task:%s no downstream tasks, invoke scan-history finish rsp directly", pTask->id.idStr); streamProcessScanHistoryFinishRsp(pTask); } @@ -595,18 +680,18 @@ int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { // this function is usually invoked by sink/agg task int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { int32_t num = taosArrayGetSize(pTask->pReadyMsgList); - ASSERT(taosArrayGetSize(pTask->pUpstreamInfoList) == num); + ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) == num); for (int32_t i = 0; i < num; ++i) { SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, i); tmsgSendReq(&pInfo->upstreamNodeEpset, &pInfo->msg); - qDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel, + stDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel, pInfo->upStreamTaskId); } taosArrayClear(pTask->pReadyMsgList); - qDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, num); + stDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, num); return TSDB_CODE_SUCCESS; } @@ -619,7 +704,7 @@ int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) { tmsgSendRsp(&pInfo->msg); taosArrayClear(pTask->pReadyMsgList); - qDebug("s-task:%s level:%d source checkpoint completed msg sent to mnode", pTask->id.idStr, pTask->info.taskLevel); + stDebug("s-task:%s level:%d source checkpoint completed msg sent to mnode", pTask->id.idStr, pTask->info.taskLevel); return TSDB_CODE_SUCCESS; } @@ -691,7 +776,7 @@ int32_t doDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHist tmsgSendReq(pEpSet, &msg); const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - qDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus, + stDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus, pReq->downstreamTaskId, vgId); return 0; } @@ -725,7 +810,7 @@ int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, in tEncoderClear(&encoder); initRpcMsg(&msg, pTask->msgInfo.msgType, buf, tlen + sizeof(SMsgHead)); - qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId); + stDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId); return tmsgSendReq(pEpSet, &msg); @@ -764,7 +849,6 @@ int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInf } ((SMsgHead*)pBuf)->vgId = htonl(pReq->mnodeId); - void* abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); tEncoderInit(&encoder, (uint8_t*)abuf, len); @@ -786,7 +870,7 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa } taosArrayPush(pTask->pReadyMsgList, &info); - qDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, (int32_t)taosArrayGetSize(pTask->pReadyMsgList)); + stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, (int32_t)taosArrayGetSize(pTask->pReadyMsgList)); return TSDB_CODE_SUCCESS; } @@ -836,8 +920,8 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); info.msg.info.noResp = 1; // refactor later. - qDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d", - pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.downstreamNodeId, index); + stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d", + pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index); if (pTask->pReadyMsgList == NULL) { pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); @@ -924,7 +1008,7 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, taosThreadMutexUnlock(&pTask->lock); int32_t num = taosArrayGetSize(pTask->pRspMsgList); - qDebug("s-task:%s add scan history finish rsp msg for task:0x%x, total:%d", pTask->id.idStr, pReq->upstreamTaskId, + stDebug("s-task:%s add scan history finish rsp msg for task:0x%x, total:%d", pTask->id.idStr, pReq->upstreamTaskId, num); return TSDB_CODE_SUCCESS; } @@ -932,108 +1016,152 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, int32_t streamNotifyUpstreamContinue(SStreamTask* pTask) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG || pTask->info.taskLevel == TASK_LEVEL__SINK); + const char* id = pTask->id.idStr; + int32_t level = pTask->info.taskLevel; + int32_t num = taosArrayGetSize(pTask->pRspMsgList); for (int32_t i = 0; i < num; ++i) { SStreamContinueExecInfo* pInfo = taosArrayGet(pTask->pRspMsgList, i); tmsgSendRsp(&pInfo->msg); - qDebug("s-task:%s level:%d notify upstream:0x%x to continue process data in WAL", pTask->id.idStr, pTask->info.taskLevel, - pInfo->taskId); + stDebug("s-task:%s level:%d notify upstream:0x%x continuing scan data in WAL", id, level, pInfo->taskId); } taosArrayClear(pTask->pRspMsgList); - qDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, - num); + stDebug("s-task:%s level:%d continue process msg sent to all %d upstreams", id, level, num); + return 0; +} + +// this message has been sent successfully, let's try next one. +static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId) { + destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask)); + pTask->msgInfo.pData = NULL; + + int64_t el = taosGetTimestampMs() - pTask->msgInfo.startTs; + + // put data into inputQ of current task is also allowed + if (pTask->inputInfo.status == TASK_INPUT_STATUS__BLOCKED) { + pTask->inputInfo.status = TASK_INPUT_STATUS__NORMAL; + stDebug("s-task:%s downstream task:0x%x resume to normal from inputQ blocking, blocking time:%" PRId64 "ms", + pTask->id.idStr, downstreamId, el); + } else { + stDebug("s-task:%s dispatch completed, elapsed time:%"PRId64"ms", pTask->id.idStr, el); + } + + // now ready for next data output + atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); + + // otherwise, continue dispatch the first block to down stream task in pipeline + streamDispatchStreamBlock(pTask); return 0; } int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) { const char* id = pTask->id.idStr; + int32_t vgId = pTask->pMeta->vgId; + int32_t msgId = pTask->execInfo.dispatch; + + // follower not handle the dispatch rsp + if ((pTask->pMeta->role == NODE_ROLE_FOLLOWER) || (pTask->status.downstreamReady != 1)) { + stError("s-task:%s vgId:%d is follower or task just re-launched, not handle the dispatch rsp, discard it", id, vgId); + return TSDB_CODE_STREAM_TASK_NOT_EXIST; + } + + // discard invalid dispatch rsp msg + if ((pRsp->msgId != msgId) || (pRsp->stage != pTask->pMeta->stage)) { + stError("s-task:%s vgId:%d not expect rsp, expected: msgId:%d, stage:%" PRId64 " actual msgId:%d, stage:%" PRId64 + " discard it", + id, vgId, msgId, pTask->pMeta->stage, pRsp->msgId, pRsp->stage); + return TSDB_CODE_INVALID_MSG; + } if (code != TSDB_CODE_SUCCESS) { // dispatch message failed: network error, or node not available. - // in case of the input queue is full, the code will be TSDB_CODE_SUCCESS, the and pRsp>inputStatus will be set - // flag. here we need to retry dispatch this message to downstream task immediately. handle the case the failure + // in case of the input queue is full, the code will be TSDB_CODE_SUCCESS, the and pRsp->inputStatus will be set + // flag. Here we need to retry dispatch this message to downstream task immediately. handle the case the failure // happened too fast. - // todo handle the shuffle dispatch failure - if (code == TSDB_CODE_STREAM_TASK_NOT_EXIST) { // destination task does not exist, not retry anymore - qWarn("s-task:%s failed to dispatch msg to task:0x%x, no retry, since it is destroyed already", id, pRsp->downstreamTaskId); + if (code == TSDB_CODE_STREAM_TASK_NOT_EXIST) { // destination task does not exist, not retry anymore + stError("s-task:%s failed to dispatch msg to task:0x%x(vgId:%d), msgId:%d no retry, since task destroyed already", id, + pRsp->downstreamTaskId, pRsp->downstreamNodeId, msgId); } else { - qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, retry cnt:%d", id, pRsp->downstreamTaskId, - tstrerror(code), ++pTask->msgInfo.retryCount); - int32_t ret = doDispatchAllBlocks(pTask, pTask->msgInfo.pData); - if (ret != TSDB_CODE_SUCCESS) { + stError("s-task:%s failed to dispatch msgId:%d to task:0x%x(vgId:%d), code:%s, add to retry list", id, msgId, + pRsp->downstreamTaskId, pRsp->downstreamNodeId, tstrerror(code)); + taosThreadMutexLock(&pTask->lock); + taosArrayPush(pTask->msgInfo.pRetryList, &pRsp->downstreamNodeId); + taosThreadMutexUnlock(&pTask->lock); + } + + } else { // code == 0 + if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) { + pTask->inputInfo.status = TASK_INPUT_STATUS__BLOCKED; + // block the input of current task, to push pressure to upstream + taosThreadMutexLock(&pTask->lock); + taosArrayPush(pTask->msgInfo.pRetryList, &pRsp->downstreamNodeId); + taosThreadMutexUnlock(&pTask->lock); + + stError("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch data", id, + pRsp->downstreamTaskId, pRsp->downstreamNodeId, DISPATCH_RETRY_INTERVAL_MS); + } else if (pRsp->inputStatus == TASK_INPUT_STATUS__REFUSED) { + stError("s-task:%s downstream task:0x%x(vgId:%d) refused the dispatch msg, treat it as success", id, + pRsp->downstreamTaskId, pRsp->downstreamNodeId); + } + } + + int32_t leftRsp = 0; + if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + leftRsp = atomic_sub_fetch_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, 1); + ASSERT(leftRsp >= 0); + + if (leftRsp > 0) { + stDebug( "s-task:%s recv dispatch rsp, msgId:%d from 0x%x(vgId:%d), downstream task input status:%d code:%d, waiting for %d rsp", + id, msgId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->inputStatus, code, leftRsp); + } else { + stDebug( + "s-task:%s recv dispatch rsp, msgId:%d from 0x%x(vgId:%d), downstream task input status:%d code:%d, all rsp", + id, msgId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->inputStatus, code); + } + } else { + stDebug("s-task:%s recv fix-dispatch rsp, msgId:%d from 0x%x(vgId:%d), downstream task input status:%d code:%d", + id, msgId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->inputStatus, code); + } + + ASSERT(leftRsp >= 0); + + // all msg rsp already, continue + if (leftRsp == 0) { + ASSERT(pTask->outputq.status == TASK_OUTPUT_STATUS__WAIT); + + // we need to re-try send dispatch msg to downstream tasks + int32_t numOfFailed = taosArrayGetSize(pTask->msgInfo.pRetryList); + if (numOfFailed > 0) { + if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + atomic_store_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, numOfFailed); + stDebug("s-task:%s waiting rsp set to be %d", id, pTask->outputInfo.shuffleDispatcher.waitingRspCnt); + } + + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s failed to dispatch msg to downstream code:%s, add timer to retry in %dms, ref:%d", + pTask->id.idStr, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS, ref); + + streamRetryDispatchData(pTask, DISPATCH_RETRY_INTERVAL_MS); + } else { // this message has been sent successfully, let's try next one. + pTask->msgInfo.retryCount = 0; + + // transtate msg has been sent to downstream successfully. let's transfer the fill-history task state + if (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__TRANS_STATE) { + stDebug("s-task:%s dispatch transtate msgId:%d to downstream successfully, start to transfer state", id, msgId); + ASSERT(pTask->info.fillHistory == 1); + + code = streamTransferStateToStreamTask(pTask); + if (code != TSDB_CODE_SUCCESS) { // todo: do nothing if error happens + } + + // now ready for next data output + atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); + } else { + handleDispatchSuccessRsp(pTask, pRsp->downstreamTaskId); } } - - return TSDB_CODE_SUCCESS; - } - - qDebug("s-task:%s recv dispatch rsp from 0x%x, downstream task input status:%d code:%d", id, pRsp->downstreamTaskId, - pRsp->inputStatus, code); - - // there are other dispatch message not response yet - if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); - qDebug("s-task:%s is shuffle, left waiting rsp %d", id, leftRsp); - if (leftRsp > 0) { - return 0; - } - } - - // transtate msg has been sent to downstream successfully. let's transfer the fill-history task state - SStreamDataBlock* p = pTask->msgInfo.pData; - if (p->type == STREAM_INPUT__TRANS_STATE) { - qDebug("s-task:%s dispatch transtate msg to downstream successfully, start to transfer state", id); - ASSERT(pTask->info.fillHistory == 1); - code = streamTransferStateToStreamTask(pTask); - if (code != TSDB_CODE_SUCCESS) { // todo: do nothing if error happens - } - - return TSDB_CODE_SUCCESS; - } - - pTask->msgInfo.retryCount = 0; - ASSERT(pTask->outputInfo.status == TASK_OUTPUT_STATUS__WAIT); - - qDebug("s-task:%s output status is set to:%d", id, pTask->outputInfo.status); - - // the input queue of the (down stream) task that receive the output data is full, - // so the TASK_INPUT_STATUS_BLOCKED is rsp - if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) { - pTask->inputInfo.status = TASK_INPUT_STATUS__BLOCKED; // block the input of current task, to push pressure to upstream - double el = 0; - if (pTask->msgInfo.blockingTs == 0) { - pTask->msgInfo.blockingTs = taosGetTimestampMs(); // record the blocking start time - } else { - el = (taosGetTimestampMs() - pTask->msgInfo.blockingTs) / 1000.0; - } - - int8_t ref = atomic_add_fetch_8(&pTask->status.timerActive, 1); - qError("s-task:%s inputQ of downstream task:0x%x is full, time:%" PRId64 - " wait for %dms and retry dispatch data, total wait:%.2fSec ref:%d", - id, pRsp->downstreamTaskId, pTask->msgInfo.blockingTs, DISPATCH_RETRY_INTERVAL_MS, el, ref); - streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); - } else { // pipeline send data in output queue - // this message has been sent successfully, let's try next one. - destroyStreamDataBlock(pTask->msgInfo.pData); - pTask->msgInfo.pData = NULL; - - if (pTask->msgInfo.blockingTs != 0) { - int64_t el = taosGetTimestampMs() - pTask->msgInfo.blockingTs; - qDebug("s-task:%s downstream task:0x%x resume to normal from inputQ blocking, blocking time:%" PRId64 "ms", id, - pRsp->downstreamTaskId, el); - pTask->msgInfo.blockingTs = 0; - - // put data into inputQ of current task is also allowed - pTask->inputInfo.status = TASK_INPUT_STATUS__NORMAL; - } - - // now ready for next data output - atomic_store_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL); - - // otherwise, continue dispatch the first block to down stream task in pipeline - streamDispatchStreamBlock(pTask); } return 0; diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index f03a6a32d4..12b51e6c93 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -16,9 +16,9 @@ #include "streamInt.h" // maximum allowed processed block batches. One block may include several submit blocks -#define MAX_STREAM_EXEC_BATCH_NUM 32 -#define MIN_STREAM_EXEC_BATCH_NUM 4 -#define STREAM_RESULT_DUMP_THRESHOLD 100 +#define MAX_STREAM_EXEC_BATCH_NUM 32 +#define STREAM_RESULT_DUMP_THRESHOLD 300 +#define STREAM_RESULT_DUMP_SIZE_THRESHOLD (1048576 * 1) static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask); @@ -36,10 +36,10 @@ static int32_t doOutputResultBlockImpl(SStreamTask* pTask, SStreamDataBlock* pBl int32_t code = 0; int32_t type = pTask->outputInfo.type; if (type == TASK_OUTPUT__TABLE) { - pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, pBlock->blocks); + pTask->outputInfo.tbSink.tbSinkFunc(pTask, pTask->outputInfo.tbSink.vnode, pBlock->blocks); destroyStreamDataBlock(pBlock); } else if (type == TASK_OUTPUT__SMA) { - pTask->smaSink.smaSink(pTask->smaSink.vnode, pTask->smaSink.smaId, pBlock->blocks); + pTask->outputInfo.smaSink.smaSink(pTask->outputInfo.smaSink.vnode, pTask->outputInfo.smaSink.smaId, pBlock->blocks); destroyStreamDataBlock(pBlock); } else { ASSERT(type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH); @@ -65,17 +65,16 @@ static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(pItem, pTask, size, pRes); if (pStreamBlocks == NULL) { - qError("s-task:%s failed to create result stream data block, code:%s", pTask->id.idStr, tstrerror(terrno)); + stError("s-task:%s failed to create result stream data block, code:%s", pTask->id.idStr, tstrerror(terrno)); taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); return TSDB_CODE_OUT_OF_MEMORY; } - qDebug("s-task:%s dump stream result data blocks, num:%d, size:%.2fMiB", pTask->id.idStr, numOfBlocks, - SIZE_IN_MB(size)); + stDebug("s-task:%s dump stream result data blocks, num:%d, size:%.2fMiB", pTask->id.idStr, numOfBlocks, + SIZE_IN_MiB(size)); int32_t code = doOutputResultBlockImpl(pTask, pStreamBlocks); if (code != TSDB_CODE_SUCCESS) { // back pressure and record position - //code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY destroyStreamDataBlock(pStreamBlocks); return code; } @@ -109,7 +108,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i } if (pTask->inputInfo.status == TASK_INPUT_STATUS__BLOCKED) { - qWarn("s-task:%s downstream task inputQ blocked, idle for 1sec and retry exec task", pTask->id.idStr); + stWarn("s-task:%s downstream task inputQ blocked, idle for 1sec and retry exec task", pTask->id.idStr); taosMsleep(1000); continue; } @@ -121,7 +120,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i resetTaskInfo(pExecutor); } - qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, tstrerror(code)); + stError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, tstrerror(code)); continue; } @@ -137,7 +136,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i taosArrayPush(pRes, &block); numOfBlocks += 1; - qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64 " dump results", pTask->id.idStr, + stDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64 " dump results", pTask->id.idStr, pTask->info.selfChildId, pRetrieveBlock->reqId); } @@ -162,11 +161,11 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i taosArrayPush(pRes, &block); - qDebug("s-task:%s (child %d) executed and get %d result blocks, size:%.2fMiB", pTask->id.idStr, - pTask->info.selfChildId, numOfBlocks, SIZE_IN_MB(size)); + stDebug("s-task:%s (child %d) executed and get %d result blocks, size:%.2fMiB", pTask->id.idStr, + pTask->info.selfChildId, numOfBlocks, SIZE_IN_MiB(size)); // current output should be dispatched to down stream nodes - if (numOfBlocks >= STREAM_RESULT_DUMP_THRESHOLD) { + if (numOfBlocks >= STREAM_RESULT_DUMP_THRESHOLD || size >= STREAM_RESULT_DUMP_SIZE_THRESHOLD) { ASSERT(numOfBlocks == taosArrayGetSize(pRes)); code = doDumpResult(pTask, pItem, pRes, size, totalSize, totalBlocks); if (code != TSDB_CODE_SUCCESS) { @@ -195,14 +194,13 @@ int32_t streamScanHistoryData(SStreamTask* pTask) { int32_t code = TSDB_CODE_SUCCESS; void* exec = pTask->exec.pExecutor; bool finished = false; - int32_t outputBatchSize = 100; qSetStreamOpOpen(exec); while (!finished) { if (streamTaskShouldPause(&pTask->status)) { - double el = (taosGetTimestampMs() - pTask->tsInfo.step1Start) / 1000.0; - qDebug("s-task:%s paused from the scan-history task, elapsed time:%.2fsec", pTask->id.idStr, el); + double el = (taosGetTimestampMs() - pTask->execInfo.step1Start) / 1000.0; + stDebug("s-task:%s paused from the scan-history task, elapsed time:%.2fsec", pTask->id.idStr, el); break; } @@ -212,6 +210,7 @@ int32_t streamScanHistoryData(SStreamTask* pTask) { return -1; } + int32_t size = 0; int32_t numOfBlocks = 0; while (1) { if (streamTaskShouldStop(&pTask->status)) { @@ -220,7 +219,7 @@ int32_t streamScanHistoryData(SStreamTask* pTask) { } if (pTask->inputInfo.status == TASK_INPUT_STATUS__BLOCKED) { - qDebug("s-task:%s inputQ is blocked, wait for 10sec and retry", pTask->id.idStr); + stDebug("s-task:%s inputQ is blocked, wait for 10sec and retry", pTask->id.idStr); taosMsleep(10000); continue; } @@ -229,7 +228,7 @@ int32_t streamScanHistoryData(SStreamTask* pTask) { uint64_t ts = 0; code = qExecTask(exec, &output, &ts); if (code != TSDB_CODE_TSC_QUERY_KILLED && code != TSDB_CODE_SUCCESS) { - qError("%s scan-history data error occurred code:%s, continue scan", pTask->id.idStr, tstrerror(code)); + stError("%s scan-history data error occurred code:%s, continue scan", pTask->id.idStr, tstrerror(code)); continue; } @@ -244,27 +243,20 @@ int32_t streamScanHistoryData(SStreamTask* pTask) { block.info.childId = pTask->info.selfChildId; taosArrayPush(pRes, &block); - if ((++numOfBlocks) >= outputBatchSize) { - qDebug("s-task:%s scan exec numOfBlocks:%d, output limit:%d reached", pTask->id.idStr, numOfBlocks, outputBatchSize); + size += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block); + + if ((++numOfBlocks) >= STREAM_RESULT_DUMP_THRESHOLD || size >= STREAM_RESULT_DUMP_SIZE_THRESHOLD) { + stDebug("s-task:%s scan exec numOfBlocks:%d, size:%.2fKiB output num-limit:%d, size-limit:%.2fKiB reached", + pTask->id.idStr, numOfBlocks, SIZE_IN_KiB(size), STREAM_RESULT_DUMP_THRESHOLD, + SIZE_IN_KiB(STREAM_RESULT_DUMP_SIZE_THRESHOLD)); break; } } if (taosArrayGetSize(pRes) > 0) { - SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); - if (qRes == NULL) { - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - qRes->type = STREAM_INPUT__DATA_BLOCK; - qRes->blocks = pRes; - - code = doOutputResultBlockImpl(pTask, qRes); - if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - taosFreeQitem(qRes); + SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(NULL, pTask, size, pRes); + code = doOutputResultBlockImpl(pTask, pStreamBlocks); + if (code != TSDB_CODE_SUCCESS) { return code; } } else { @@ -281,14 +273,14 @@ static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) { int64_t st = taosGetTimestampMs(); while (!streamTaskIsIdle(pStreamTask)) { - qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", id, pTask->info.taskLevel, + stDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", id, pTask->info.taskLevel, pStreamTask->id.idStr); taosMsleep(100); } double el = (taosGetTimestampMs() - st) / 1000.0; if (el > 0) { - qDebug("s-task:%s wait for stream task:%s for %.2fs to be idle", id, pStreamTask->id.idStr, el); + stDebug("s-task:%s wait for stream task:%s for %.2fs to be idle", id, pStreamTask->id.idStr, el); } } @@ -297,13 +289,13 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId); if (pStreamTask == NULL) { - qError( + stError( "s-task:%s failed to find related stream task:0x%x, it may have been destroyed or closed, destroy the related " "fill-history task", - pTask->id.idStr, pTask->streamTaskId.taskId); + pTask->id.idStr, (int32_t) pTask->streamTaskId.taskId); // 1. free it and remove fill-history task from disk meta-store - streamMetaUnregisterTask(pMeta, pTask->id.streamId, pTask->id.taskId); + streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id); // 2. save to disk taosWLockLatch(&pMeta->lock); @@ -313,11 +305,11 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { taosWUnLockLatch(&pMeta->lock); return TSDB_CODE_STREAM_TASK_NOT_EXIST; } else { - qDebug("s-task:%s fill-history task end, update related stream task:%s info, transfer exec state", pTask->id.idStr, + stDebug("s-task:%s fill-history task end, update related stream task:%s info, transfer exec state", pTask->id.idStr, pStreamTask->id.idStr); } - ASSERT(pStreamTask->historyTaskId.taskId == pTask->id.taskId && pTask->status.appendTranstateBlock == true); + ASSERT(pStreamTask->hTaskInfo.id.taskId == pTask->id.taskId && pTask->status.appendTranstateBlock == true); STimeWindow* pTimeWindow = &pStreamTask->dataRange.window; @@ -330,7 +322,7 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { } else { ASSERT(status == TASK_STATUS__NORMAL); pStreamTask->status.taskStatus = TASK_STATUS__HALT; - qDebug("s-task:%s halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); + stDebug("s-task:%s halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); } // wait for the stream task to handle all in the inputQ, and to be idle @@ -342,12 +334,12 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { // When a task is idle with halt status, all data in inputQ are consumed. if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) { // update the scan data range for source task. - qDebug("s-task:%s level:%d stream task window %" PRId64 " - %" PRId64 " update to %" PRId64 " - %" PRId64 + stDebug("s-task:%s level:%d stream task window %" PRId64 " - %" PRId64 " update to %" PRId64 " - %" PRId64 ", status:%s, sched-status:%d", pStreamTask->id.idStr, TASK_LEVEL__SOURCE, pTimeWindow->skey, pTimeWindow->ekey, INT64_MIN, pTimeWindow->ekey, streamGetTaskStatusStr(TASK_STATUS__NORMAL), pStreamTask->status.schedStatus); } else { - qDebug("s-task:%s no need to update time window for non-source task", pStreamTask->id.idStr); + stDebug("s-task:%s no need to update time window for non-source task", pStreamTask->id.idStr); } // 1. expand the query time window for stream task of WAL scanner @@ -358,17 +350,18 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { streamTaskReleaseState(pTask); streamTaskReloadState(pStreamTask); - // 3. clear the link between fill-history task and stream task info - pStreamTask->historyTaskId.taskId = 0; - - // 4. resume the state of stream task, after this function, the stream task will run immidately. But it can not be + // 3. resume the state of stream task, after this function, the stream task will run immidately. But it can not be // pause, since the pause allowed attribute is not set yet. streamTaskResumeFromHalt(pStreamTask); - qDebug("s-task:%s fill-history task set status to be dropping, save the state into disk", pTask->id.idStr); + stDebug("s-task:%s fill-history task set status to be dropping, save the state into disk", pTask->id.idStr); - // 5. free it and remove fill-history task from disk meta-store - streamMetaUnregisterTask(pMeta, pTask->id.streamId, pTask->id.taskId); + // 4. free it and remove fill-history task from disk meta-store + streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id); + + // 5. clear the link between fill-history task and stream task info + pStreamTask->hTaskInfo.id.taskId = 0; + pStreamTask->hTaskInfo.id.streamId = 0; // 6. save to disk taosWLockLatch(&pMeta->lock); @@ -389,7 +382,7 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { pItem->type = STREAM_INPUT__REF_DATA_BLOCK; pItem->pBlock = pDelBlock; int32_t code = streamTaskPutDataIntoInputQ(pStreamTask, (SStreamQueueItem*)pItem); - qDebug("s-task:%s append dummy delete block,res:%d", pStreamTask->id.idStr, code); + stDebug("s-task:%s append dummy delete block,res:%d", pStreamTask->id.idStr, code); } streamSchedExec(pStreamTask); @@ -408,6 +401,8 @@ int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SOURCE) { // do transfer task operator states. code = streamDoTransferStateToStreamTask(pTask); + } else { // drop fill-history task + streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pTask->pMeta->vgId, &pTask->id); } return code; @@ -426,7 +421,7 @@ static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_ ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput; qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); - qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit, + stDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit, pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver); ASSERT((*pVer) <= pSubmit->submit.ver); (*pVer) = pSubmit->submit.ver; @@ -436,7 +431,7 @@ static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_ SArray* pBlockList = pBlock->blocks; int32_t numOfBlocks = taosArrayGetSize(pBlockList); - qDebug("s-task:%s set sdata blocks as input num:%d, ver:%" PRId64, id, numOfBlocks, pBlock->sourceVer); + stDebug("s-task:%s set sdata blocks as input num:%d, ver:%" PRId64, id, numOfBlocks, pBlock->sourceVer); qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK); } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { @@ -444,7 +439,7 @@ static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_ SArray* pBlockList = pMerged->submits; int32_t numOfBlocks = taosArrayGetSize(pBlockList); - qDebug("s-task:%s %p set (merged) submit blocks as a batch, numOfBlocks:%d, ver:%" PRId64, id, pTask, numOfBlocks, + stDebug("s-task:%s %p set (merged) submit blocks as a batch, numOfBlocks:%d, ver:%" PRId64, id, pTask, numOfBlocks, pMerged->ver); qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT); ASSERT((*pVer) <= pMerged->ver); @@ -472,7 +467,7 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock int32_t remain = streamAlignTransferState(pTask); if (remain > 0) { streamFreeQitem((SStreamQueueItem*)pBlock); - qDebug("s-task:%s receive upstream transfer state msg, remain:%d", id, remain); + stDebug("s-task:%s receive upstream transfer state msg, remain:%d", id, remain); return 0; } } @@ -483,16 +478,16 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock // transfer the ownership of executor state if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) { if (level == TASK_LEVEL__SOURCE) { - qDebug("s-task:%s add transfer-state block into outputQ", id); + stDebug("s-task:%s add transfer-state block into outputQ", id); } else { - qDebug("s-task:%s all upstream tasks send transfer-state block, add transfer-state block into outputQ", id); + stDebug("s-task:%s all upstream tasks send transfer-state block, add transfer-state block into outputQ", id); ASSERT(pTask->streamTaskId.taskId != 0 && pTask->info.fillHistory == 1); } // agg task should dispatch trans-state msg to sink task, to flush all data to sink task. if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SOURCE) { pBlock->srcVgId = pTask->pMeta->vgId; - code = taosWriteQitem(pTask->outputInfo.queue->pQueue, pBlock); + code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock); if (code == 0) { streamDispatchStreamBlock(pTask); } else { @@ -503,16 +498,12 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock } } else { // non-dispatch task, do task state transfer directly streamFreeQitem((SStreamQueueItem*)pBlock); - if (level != TASK_LEVEL__SINK) { - qDebug("s-task:%s non-dispatch task, start to transfer state directly", id); - ASSERT(pTask->info.fillHistory == 1); - code = streamTransferStateToStreamTask(pTask); + stDebug("s-task:%s non-dispatch task, start to transfer state directly", id); + ASSERT(pTask->info.fillHistory == 1); + code = streamTransferStateToStreamTask(pTask); - if (code != TSDB_CODE_SUCCESS) { - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - } - } else { - qDebug("s-task:%s sink task does not transfer state", id); + if (code != TSDB_CODE_SUCCESS) { + /*int8_t status = */ streamTaskSetSchedStatusInActive(pTask); } } @@ -526,18 +517,19 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock int32_t streamExecForAll(SStreamTask* pTask) { const char* id = pTask->id.idStr; + // merge multiple input data if possible in the input queue. + stDebug("s-task:%s start to extract data block from inputQ", id); + while (1) { + int32_t blockSize = 0; int32_t numOfBlocks = 0; SStreamQueueItem* pInput = NULL; if (streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s stream task is stopped", id); + stDebug("s-task:%s stream task is stopped", id); break; } - // merge multiple input data if possible in the input queue. - qDebug("s-task:%s start to extract data block from inputQ", id); - - /*int32_t code = */ streamTaskGetDataFromInputQ(pTask, &pInput, &numOfBlocks); + /*int32_t code = */ streamTaskGetDataFromInputQ(pTask, &pInput, &numOfBlocks, &blockSize); if (pInput == NULL) { ASSERT(numOfBlocks == 0); return 0; @@ -559,8 +551,10 @@ int32_t streamExecForAll(SStreamTask* pTask) { if (pTask->info.taskLevel == TASK_LEVEL__SINK) { ASSERT(type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__CHECKPOINT); + // here only handle the data block sink operation if (type == STREAM_INPUT__DATA_BLOCK) { - qDebug("s-task:%s sink task start to sink %d blocks", id, numOfBlocks); + pTask->execInfo.sink.dataSize += blockSize; + stDebug("s-task:%s sink task start to sink %d blocks, size:%.2fKiB", id, numOfBlocks, SIZE_IN_KiB(blockSize)); doOutputResultBlockImpl(pTask, (SStreamDataBlock*)pInput); continue; } @@ -569,7 +563,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { int64_t st = taosGetTimestampMs(); const SStreamQueueItem* pItem = pInput; - qDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type); + stDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type); int64_t ver = pTask->chkInfo.checkpointVer; doSetStreamInputBlock(pTask, pInput, &ver, id); @@ -579,14 +573,14 @@ int32_t streamExecForAll(SStreamTask* pTask) { streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks); double el = (taosGetTimestampMs() - st) / 1000.0; - qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el, - SIZE_IN_MB(resSize), totalBlocks); + stDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el, + SIZE_IN_MiB(resSize), totalBlocks); // update the currentVer if processing the submit blocks. ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.nextProcessVer && ver >= pTask->chkInfo.checkpointVer); if (ver != pTask->chkInfo.checkpointVer) { - qDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64 " , currentVer:%" PRId64, + stDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64 ", nextProcessVer:%" PRId64, pTask->id.idStr, pTask->chkInfo.checkpointVer, ver, pTask->chkInfo.nextProcessVer); pTask->chkInfo.checkpointVer = ver; } @@ -596,8 +590,8 @@ int32_t streamExecForAll(SStreamTask* pTask) { // todo other thread may change the status // do nothing after sync executor state to storage backend, untill the vnode-level checkpoint is completed. if (type == STREAM_INPUT__CHECKPOINT) { - qDebug("s-task:%s checkpoint block received, set the status:%s", pTask->id.idStr, - streamGetTaskStatusStr(pTask->status.taskStatus)); + stDebug("s-task:%s checkpoint block received, set status:%s", pTask->id.idStr, + streamGetTaskStatusStr(pTask->status.taskStatus)); streamTaskBuildCheckpoint(pTask); return 0; } @@ -613,30 +607,33 @@ bool streamTaskIsIdle(const SStreamTask* pTask) { pTask->status.taskStatus == TASK_STATUS__DROPPING); } -int32_t streamTryExec(SStreamTask* pTask) { +int32_t streamExecTask(SStreamTask* pTask) { // this function may be executed by multi-threads, so status check is required. - int8_t schedStatus = - atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE); - const char* id = pTask->id.idStr; + int8_t schedStatus = streamTaskSetSchedStatusActive(pTask); if (schedStatus == TASK_SCHED_STATUS__WAITING) { - int32_t code = streamExecForAll(pTask); - if (code < 0) { // todo this status shoudl be removed - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED); - return -1; - } + while (1) { + int32_t code = streamExecForAll(pTask); + if (code < 0) { // todo this status should be removed + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED); + return -1; + } - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), - pTask->status.schedStatus); + taosThreadMutexLock(&pTask->lock); + if ((streamQueueGetNumOfItems(pTask->inputInfo.queue) == 0) || streamTaskShouldStop(&pTask->status) || + streamTaskShouldPause(&pTask->status)) { + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + taosThreadMutexUnlock(&pTask->lock); - if (!(taosQueueEmpty(pTask->inputInfo.queue->pQueue) || streamTaskShouldStop(&pTask->status) || - streamTaskShouldPause(&pTask->status))) { - streamSchedExec(pTask); + stDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, + streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus); + return 0; + } + taosThreadMutexUnlock(&pTask->lock); } } else { - qDebug("s-task:%s already started to exec by other thread, status:%s, sched-status:%d", id, + stDebug("s-task:%s already started to exec by other thread, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus); } @@ -644,7 +641,7 @@ int32_t streamTryExec(SStreamTask* pTask) { } int32_t streamTaskReleaseState(SStreamTask* pTask) { - qDebug("s-task:%s release exec state", pTask->id.idStr); + stDebug("s-task:%s release exec state", pTask->id.idStr); void* pExecutor = pTask->exec.pExecutor; if (pExecutor != NULL) { int32_t code = qStreamOperatorReleaseState(pExecutor); @@ -655,7 +652,7 @@ int32_t streamTaskReleaseState(SStreamTask* pTask) { } int32_t streamTaskReloadState(SStreamTask* pTask) { - qDebug("s-task:%s reload exec state", pTask->id.idStr); + stDebug("s-task:%s reload exec state", pTask->id.idStr); void* pExecutor = pTask->exec.pExecutor; if (pExecutor != NULL) { int32_t code = qStreamOperatorReloadState(pExecutor); @@ -666,10 +663,10 @@ int32_t streamTaskReloadState(SStreamTask* pTask) { } int32_t streamAlignTransferState(SStreamTask* pTask) { - int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t numOfUpstream = taosArrayGetSize(pTask->upstreamInfo.pList); int32_t old = atomic_val_compare_exchange_32(&pTask->transferStateAlignCnt, 0, numOfUpstream); if (old == 0) { - qDebug("s-task:%s set the transfer state aligncnt %d", pTask->id.idStr, numOfUpstream); + stDebug("s-task:%s set the transfer state aligncnt %d", pTask->id.idStr, numOfUpstream); } return atomic_sub_fetch_32(&pTask->transferStateAlignCnt, 1); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 85380151f3..f7b0cdb0f1 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -20,10 +20,7 @@ #include "tref.h" #include "tstream.h" #include "ttimer.h" - -#define META_HB_CHECK_INTERVAL 200 -#define META_HB_SEND_IDLE_COUNTER 25 // send hb every 5 sec -#define STREAM_TASK_KEY_LEN ((sizeof(int64_t)) << 1) +#include "wal.h" static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT; @@ -36,13 +33,20 @@ static void metaHbToMnode(void* param, void* tmrId); static void streamMetaClear(SStreamMeta* pMeta); static int32_t streamMetaBegin(SStreamMeta* pMeta); static void streamMetaCloseImpl(void* arg); -static void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask); typedef struct { TdThreadMutex mutex; SHashObj* pTable; } SMetaRefMgt; +struct SMetaHbInfo { + tmr_h hbTmr; + int32_t stopFlag; + int32_t tickCounter; + int32_t hbCount; + int64_t hbStart; +}; + SMetaRefMgt gMetaRefMgt; void metaRefMgtInit(); @@ -108,7 +112,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF SStreamMeta* pMeta = taosMemoryCalloc(1, sizeof(SStreamMeta)); if (pMeta == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - qError("vgId:%d failed to prepare stream meta, alloc size:%" PRIzu ", out of memory", vgId, sizeof(SStreamMeta)); + stError("vgId:%d failed to prepare stream meta, alloc size:%" PRIzu ", out of memory", vgId, sizeof(SStreamMeta)); return NULL; } @@ -129,14 +133,27 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF if (tdbTbOpen("checkpoint.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pCheckpointDb, 0) < 0) { goto _err; } - if (streamMetaBegin(pMeta) < 0) { goto _err; } _hash_fn_t fp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR); - pMeta->pTasks = taosHashInit(64, fp, true, HASH_NO_LOCK); - if (pMeta->pTasks == NULL) { + pMeta->pTasksMap = taosHashInit(64, fp, true, HASH_NO_LOCK); + if (pMeta->pTasksMap == NULL) { + goto _err; + } + + pMeta->pUpdateTaskSet = taosHashInit(64, fp, false, HASH_NO_LOCK); + if (pMeta->pUpdateTaskSet == NULL) { + goto _err; + } + + pMeta->startInfo.pReadyTaskSet = taosHashInit(64, fp, false, HASH_NO_LOCK); + if (pMeta->startInfo.pReadyTaskSet == NULL) { + } + + pMeta->pHbInfo = taosMemoryCalloc(1, sizeof(SMetaHbInfo)); + if (pMeta->pHbInfo == NULL) { goto _err; } @@ -160,9 +177,9 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF metaRefMgtAdd(pMeta->vgId, pRid); - pMeta->hbInfo.hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); - pMeta->hbInfo.tickCounter = 0; - pMeta->hbInfo.stopFlag = 0; + pMeta->pHbInfo->hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); + pMeta->pHbInfo->tickCounter = 0; + pMeta->pHbInfo->stopFlag = 0; pMeta->pTaskBackendUnique = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); @@ -174,48 +191,48 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF pMeta->chkpId = streamGetLatestCheckpointId(pMeta); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); while (pMeta->streamBackend == NULL) { - taosMsleep(2 * 1000); + taosMsleep(100); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); if (pMeta->streamBackend == NULL) { - qError("vgId:%d failed to init stream backend", pMeta->vgId); - qInfo("vgId:%d retry to init stream backend", pMeta->vgId); + stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); } } pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); + pMeta->role = NODE_ROLE_UNINIT; code = streamBackendLoadCheckpointInfo(pMeta); taosInitRWLatch(&pMeta->lock); taosThreadMutexInit(&pMeta->backendMutex, NULL); - pMeta->pauseTaskNum = 0; - - qInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId, - stage); + pMeta->numOfPausedTasks = 0; + pMeta->numOfStreamTasks = 0; + stInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId, + stage); return pMeta; _err: taosMemoryFree(pMeta->path); - if (pMeta->pTasks) taosHashCleanup(pMeta->pTasks); + if (pMeta->pTasksMap) taosHashCleanup(pMeta->pTasksMap); if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList); if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb); if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb); if (pMeta->db) tdbClose(pMeta->db); - - // taosThreadMutexDestroy(&pMeta->backendMutex); - // taosThreadRwlockDestroy(&pMeta->lock); - + if (pMeta->pHbInfo) taosMemoryFreeClear(pMeta->pHbInfo); + if (pMeta->pUpdateTaskSet) taosHashCleanup(pMeta->pUpdateTaskSet); + if (pMeta->startInfo.pReadyTaskSet) taosHashCleanup(pMeta->startInfo.pReadyTaskSet); taosMemoryFree(pMeta); - qError("failed to open stream meta"); + stError("failed to open stream meta"); return NULL; } -int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { +int32_t streamMetaReopen(SStreamMeta* pMeta) { streamMetaClear(pMeta); pMeta->streamBackendRid = -1; pMeta->streamBackend = NULL; + pMeta->role = NODE_ROLE_UNINIT; char* defaultPath = taosMemoryCalloc(1, strlen(pMeta->path) + 128); sprintf(defaultPath, "%s%s%s", pMeta->path, TD_DIRSEP, "state"); @@ -230,8 +247,8 @@ int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { code = taosRenameFile(newPath, defaultPath); if (code != 0) { terrno = TAOS_SYSTEM_ERROR(code); - qError("vgId:%d failed to rename file, from %s to %s, code:%s", pMeta->vgId, newPath, defaultPath, - tstrerror(terrno)); + stError("vgId:%d failed to rename file, from %s to %s, code:%s", pMeta->vgId, newPath, defaultPath, + tstrerror(terrno)); taosMemoryFree(defaultPath); taosMemoryFree(newPath); @@ -241,14 +258,13 @@ int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); while (pMeta->streamBackend == NULL) { - taosMsleep(2 * 1000); + taosMsleep(100); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); if (pMeta->streamBackend == NULL) { - qError("vgId:%d failed to init stream backend", pMeta->vgId); - qInfo("vgId:%d retry to init stream backend", pMeta->vgId); - // return -1; + stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); } } + pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); streamBackendLoadCheckpointInfo(pMeta); @@ -259,12 +275,12 @@ int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { void streamMetaClear(SStreamMeta* pMeta) { void* pIter = NULL; - while ((pIter = taosHashIterate(pMeta->pTasks, pIter)) != NULL) { + while ((pIter = taosHashIterate(pMeta->pTasksMap, pIter)) != NULL) { SStreamTask* p = *(SStreamTask**)pIter; // release the ref by timer if (p->info.triggerParam != 0 && p->info.fillHistory == 0) { // one more ref in timer - qDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", p->id.idStr, p->refCnt); + stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", p->id.idStr, p->refCnt); taosTmrStop(p->schedInfo.pTimer); p->info.triggerParam = 0; streamMetaReleaseTask(pMeta, p); @@ -275,16 +291,18 @@ void streamMetaClear(SStreamMeta* pMeta) { taosRemoveRef(streamBackendId, pMeta->streamBackendRid); - taosHashClear(pMeta->pTasks); + taosHashClear(pMeta->pTasksMap); taosHashClear(pMeta->pTaskBackendUnique); taosArrayClear(pMeta->pTaskList); taosArrayClear(pMeta->chkpSaved); taosArrayClear(pMeta->chkpInUse); + pMeta->numOfStreamTasks = 0; + pMeta->numOfPausedTasks = 0; } void streamMetaClose(SStreamMeta* pMeta) { - qDebug("start to close stream meta"); + stDebug("start to close stream meta"); if (pMeta == NULL) { return; } @@ -300,7 +318,7 @@ void streamMetaClose(SStreamMeta* pMeta) { void streamMetaCloseImpl(void* arg) { SStreamMeta* pMeta = arg; - qDebug("start to do-close stream meta"); + stDebug("start to do-close stream meta"); if (pMeta == NULL) { return; } @@ -316,14 +334,18 @@ void streamMetaCloseImpl(void* arg) { taosArrayDestroy(pMeta->chkpSaved); taosArrayDestroy(pMeta->chkpInUse); - taosHashCleanup(pMeta->pTasks); + taosHashCleanup(pMeta->pTasksMap); taosHashCleanup(pMeta->pTaskBackendUnique); + taosHashCleanup(pMeta->pUpdateTaskSet); + taosHashCleanup(pMeta->startInfo.pReadyTaskSet); + taosMemoryFree(pMeta->pHbInfo); taosMemoryFree(pMeta->path); taosThreadMutexDestroy(&pMeta->backendMutex); + pMeta->role = NODE_ROLE_UNINIT; taosMemoryFree(pMeta); - qDebug("end to close stream meta"); + stDebug("end to close stream meta"); } int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { @@ -345,11 +367,9 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); - int64_t key[2] = {0}; - extractStreamTaskKey(key, pTask); - - if (tdbTbUpsert(pMeta->pTaskDb, key, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn) < 0) { - qError("s-task:%s save to disk failed, code:%s", pTask->id.idStr, tstrerror(terrno)); + int64_t id[2] = {pTask->id.streamId, pTask->id.taskId}; + if (tdbTbUpsert(pMeta->pTaskDb, id, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn) < 0) { + stError("s-task:%s save to disk failed, code:%s", pTask->id.idStr, tstrerror(terrno)); return -1; } @@ -357,18 +377,14 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { return 0; } -void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask) { - pKey[0] = pTask->id.streamId; - pKey[1] = pTask->id.taskId; -} - -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey) { - int32_t code = tdbTbDelete(pMeta->pTaskDb, pKey, STREAM_TASK_KEY_LEN, pMeta->txn); +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pTaskId) { + int64_t key[2] = {pTaskId->streamId, pTaskId->taskId}; + int32_t code = tdbTbDelete(pMeta->pTaskDb, key, STREAM_TASK_KEY_LEN, pMeta->txn); if (code != 0) { - qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, (int32_t)pKey[1], - tstrerror(terrno)); + stError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, (int32_t)pTaskId->taskId, + tstrerror(terrno)); } else { - qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, (int32_t)pKey[1]); + stDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, (int32_t)pTaskId->taskId); } return code; @@ -378,37 +394,46 @@ int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey) { int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded) { *pAdded = false; - int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); - if (p == NULL) { - if (pMeta->expandFunc(pMeta->ahandle, pTask, ver) < 0) { - tFreeStreamTask(pTask); - return -1; - } - - taosArrayPush(pMeta->pTaskList, &pTask->id); - - if (streamMetaSaveTask(pMeta, pTask) < 0) { - tFreeStreamTask(pTask); - return -1; - } - - if (streamMetaCommit(pMeta) < 0) { - tFreeStreamTask(pTask); - return -1; - } - } else { + STaskId id = streamTaskExtractKey(pTask); + void* p = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + if (p != NULL) { return 0; } - taosHashPut(pMeta->pTasks, keys, sizeof(keys), &pTask, POINTER_BYTES); + if (pTask->info.fillHistory == 1) { + stDebug("s-task:0x%x initial nextProcessVer is set to 1 for fill-history task", pTask->id.taskId); + ver = 1; + } + + if (pMeta->expandFunc(pMeta->ahandle, pTask, ver) < 0) { + tFreeStreamTask(pTask); + return -1; + } + + taosArrayPush(pMeta->pTaskList, &pTask->id); + + if (streamMetaSaveTask(pMeta, pTask) < 0) { + tFreeStreamTask(pTask); + return -1; + } + + if (streamMetaCommit(pMeta) < 0) { + tFreeStreamTask(pTask); + return -1; + } + + taosHashPut(pMeta->pTasksMap, &id, sizeof(id), &pTask, POINTER_BYTES); + if (pTask->info.fillHistory == 0) { + atomic_add_fetch_32(&pMeta->numOfStreamTasks, 1); + } + *pAdded = true; return 0; } int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta) { - size_t size = taosHashGetSize(pMeta->pTasks); - ASSERT(taosArrayGetSize(pMeta->pTaskList) == taosHashGetSize(pMeta->pTasks)); + size_t size = taosHashGetSize(pMeta->pTasksMap); + ASSERT(taosArrayGetSize(pMeta->pTaskList) == taosHashGetSize(pMeta->pTasksMap)); return (int32_t)size; } @@ -416,10 +441,8 @@ int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta) { int32_t num = 0; size_t size = taosArrayGetSize(pMeta->pTaskList); for (int32_t i = 0; i < size; ++i) { - SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); - int64_t keys[2] = {pId->streamId, pId->taskId}; - - SStreamTask** p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId* pId = taosArrayGet(pMeta->pTaskList, i); + SStreamTask** p = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); if (p == NULL) { continue; } @@ -435,13 +458,13 @@ int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta) { SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { taosRLockLatch(&pMeta->lock); - int64_t keys[2] = {streamId, taskId}; - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId id = {.streamId = streamId, .taskId = taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask != NULL) { if (!streamTaskShouldStop(&(*ppTask)->status)) { int32_t ref = atomic_add_fetch_32(&(*ppTask)->refCnt, 1); taosRUnLockLatch(&pMeta->lock); - qTrace("s-task:%s acquire task, ref:%d", (*ppTask)->id.idStr, ref); + stTrace("s-task:%s acquire task, ref:%d", (*ppTask)->id.idStr, ref); return *ppTask; } } @@ -453,13 +476,13 @@ SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t void streamMetaReleaseTask(SStreamMeta* UNUSED_PARAM(pMeta), SStreamTask* pTask) { int32_t ref = atomic_sub_fetch_32(&pTask->refCnt, 1); if (ref > 0) { - qTrace("s-task:%s release task, ref:%d", pTask->id.idStr, ref); + stTrace("s-task:%s release task, ref:%d", pTask->id.idStr, ref); } else if (ref == 0) { ASSERT(streamTaskShouldStop(&pTask->status)); - qTrace("s-task:%s all refs are gone, free it", pTask->id.idStr); + stTrace("s-task:%s all refs are gone, free it", pTask->id.idStr); tFreeStreamTask(pTask); } else if (ref < 0) { - qError("task ref is invalid, ref:%d, %s", ref, pTask->id.idStr); + stError("task ref is invalid, ref:%d, %s", ref, pTask->id.idStr); } } @@ -479,28 +502,28 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t // pre-delete operation taosWLockLatch(&pMeta->lock); - int64_t keys[2] = {streamId, taskId}; - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId id = {.streamId = streamId, .taskId = taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask) { pTask = *ppTask; if (streamTaskShouldPause(&pTask->status)) { - int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); - qInfo("vgId:%d s-task:%s drop stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); + int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1); + stInfo("vgId:%d s-task:%s drop stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); } atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); } else { - qDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId); + stDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId); taosWUnLockLatch(&pMeta->lock); return 0; } taosWUnLockLatch(&pMeta->lock); - qDebug("s-task:0x%x set task status:%s and start to unregister it", taskId, - streamGetTaskStatusStr(TASK_STATUS__DROPPING)); + stDebug("s-task:0x%x set task status:%s and start to unregister it", taskId, + streamGetTaskStatusStr(TASK_STATUS__DROPPING)); while (1) { taosRLockLatch(&pMeta->lock); - ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask) { if ((*ppTask)->status.timerActive == 0) { @@ -509,7 +532,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t } taosMsleep(10); - qDebug("s-task:%s wait for quit from timer", (*ppTask)->id.idStr); + stDebug("s-task:%s wait for quit from timer", (*ppTask)->id.idStr); taosRUnLockLatch(&pMeta->lock); } else { taosRUnLockLatch(&pMeta->lock); @@ -519,25 +542,37 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t // let's do delete of stream task taosWLockLatch(&pMeta->lock); - ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask) { - taosHashRemove(pMeta->pTasks, keys, sizeof(keys)); + // it is an fill-history task, remove the related stream task's id that points to it + if ((*ppTask)->info.fillHistory == 1) { + STaskId streamTaskId = {.streamId = (*ppTask)->streamTaskId.streamId, .taskId = (*ppTask)->streamTaskId.taskId}; + SStreamTask** ppStreamTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &streamTaskId, sizeof(streamTaskId)); + if (ppStreamTask != NULL) { + (*ppStreamTask)->hTaskInfo.id.taskId = 0; + (*ppStreamTask)->hTaskInfo.id.streamId = 0; + } + } else { + atomic_sub_fetch_32(&pMeta->numOfStreamTasks, 1); + } + + taosHashRemove(pMeta->pTasksMap, &id, sizeof(id)); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); ASSERT(pTask->status.timerActive == 0); doRemoveIdFromList(pMeta, (int32_t)taosArrayGetSize(pMeta->pTaskList), &pTask->id); if (pTask->info.triggerParam != 0 && pTask->info.fillHistory == 0) { - qDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", pTask->id.idStr, pTask->refCnt); + stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", pTask->id.idStr, pTask->refCnt); taosTmrStop(pTask->schedInfo.pTimer); pTask->info.triggerParam = 0; streamMetaReleaseTask(pMeta, pTask); } - streamMetaRemoveTask(pMeta, keys); + streamMetaRemoveTask(pMeta, &id); streamMetaReleaseTask(pMeta, pTask); } else { - qDebug("vgId:%d failed to find the task:0x%x, it may have been dropped already", pMeta->vgId, taskId); + stDebug("vgId:%d failed to find the task:0x%x, it may have been dropped already", pMeta->vgId, taskId); } taosWUnLockLatch(&pMeta->lock); @@ -558,18 +593,18 @@ int32_t streamMetaBegin(SStreamMeta* pMeta) { // todo add error log int32_t streamMetaCommit(SStreamMeta* pMeta) { if (tdbCommit(pMeta->db, pMeta->txn) < 0) { - qError("vgId:%d failed to commit stream meta", pMeta->vgId); + stError("vgId:%d failed to commit stream meta", pMeta->vgId); return -1; } if (tdbPostCommit(pMeta->db, pMeta->txn) < 0) { - qError("vgId:%d failed to do post-commit stream meta", pMeta->vgId); + stError("vgId:%d failed to do post-commit stream meta", pMeta->vgId); return -1; } if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { - qError("vgId:%d failed to begin trans", pMeta->vgId); + stError("vgId:%d failed to begin trans", pMeta->vgId); return -1; } @@ -605,7 +640,7 @@ int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta) { chkpId = TMAX(chkpId, info.checkpointId); } - qDebug("get max chkp id: %" PRId64 "", chkpId); + stDebug("get max chkp id: %" PRId64 "", chkpId); tdbFree(pKey); tdbFree(pVal); @@ -622,11 +657,13 @@ static void doClear(void* pKey, void* pVal, TBC* pCur, SArray* pRecycleList) { } int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { - TBC* pCur = NULL; + TBC* pCur = NULL; + int32_t vgId = pMeta->vgId; + + stInfo("vgId:%d load stream tasks from meta files", vgId); - qInfo("vgId:%d load stream tasks from meta files", pMeta->vgId); if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { - qError("vgId:%d failed to open stream meta, code:%s", pMeta->vgId, tstrerror(terrno)); + stError("vgId:%d failed to open stream meta, code:%s", vgId, tstrerror(terrno)); return -1; } @@ -635,12 +672,14 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { void* pVal = NULL; int32_t vLen = 0; SDecoder decoder; - SArray* pRecycleList = taosArrayInit(4, STREAM_TASK_KEY_LEN); + SArray* pRecycleList = taosArrayInit(4, sizeof(STaskId)); tdbTbcMoveToFirst(pCur); while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + stError("vgId:%d failed to load stream task from meta-files, code:%s", vgId, tstrerror(terrno)); doClear(pKey, pVal, pCur, pRecycleList); return -1; } @@ -650,10 +689,11 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { tDecoderClear(&decoder); doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); - qError( - "stream read incompatible data, rm %s/vnode/vnode*/tq/stream if taosd cannot start, and rebuild stream " + stError( + "vgId:%d stream read incompatible data, rm %s/vnode/vnode*/tq/stream if taosd cannot start, and rebuild " + "stream " "manually", - tsDataDir); + vgId, tsDataDir); return -1; } tDecoderClear(&decoder); @@ -662,22 +702,19 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { int32_t taskId = pTask->id.taskId; tFreeStreamTask(pTask); - int64_t key[2] = {0}; - extractStreamTaskKey(key, pTask); + STaskId id = streamTaskExtractKey(pTask); - taosArrayPush(pRecycleList, key); + taosArrayPush(pRecycleList, &id); int32_t total = taosArrayGetSize(pRecycleList); - qDebug("s-task:0x%x is already dropped, add into recycle list, total:%d", taskId, total); + stDebug("s-task:0x%x is already dropped, add into recycle list, total:%d", taskId, total); continue; } // do duplicate task check. - int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + void* p = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (p == NULL) { - // pTask->chkInfo.checkpointVer may be 0, when a follower is become a leader - // In this case, we try not to start fill-history task anymore. - if (pMeta->expandFunc(pMeta->ahandle, pTask, pTask->chkInfo.checkpointVer) < 0) { + if (pMeta->expandFunc(pMeta->ahandle, pTask, pTask->chkInfo.checkpointVer + 1) < 0) { doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); return -1; @@ -685,43 +722,50 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { taosArrayPush(pMeta->pTaskList, &pTask->id); } else { + // todo this should replace the existed object put by replay creating stream task msg from mnode + stError("s-task:0x%x already added into table meta by replaying WAL, need check", pTask->id.taskId); tdbFree(pKey); tdbFree(pVal); taosMemoryFree(pTask); continue; } - streamTaskResetUpstreamStageInfo(pTask); - if (taosHashPut(pMeta->pTasks, keys, sizeof(keys), &pTask, sizeof(void*)) < 0) { + if (taosHashPut(pMeta->pTasksMap, &id, sizeof(id), &pTask, POINTER_BYTES) < 0) { doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); return -1; } + if (pTask->info.fillHistory == 0) { + atomic_add_fetch_32(&pMeta->numOfStreamTasks, 1); + } + if (streamTaskShouldPause(&pTask->status)) { - atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); } ASSERT(pTask->status.downstreamReady == 0); } - qInfo("vgId:%d pause task num:%d", pMeta->vgId, pMeta->pauseTaskNum); tdbFree(pKey); tdbFree(pVal); if (tdbTbcClose(pCur) < 0) { + stError("vgId:%d failed to close meta-file cursor", vgId); taosArrayDestroy(pRecycleList); return -1; } if (taosArrayGetSize(pRecycleList) > 0) { for (int32_t i = 0; i < taosArrayGetSize(pRecycleList); ++i) { - int64_t* pId = taosArrayGet(pRecycleList, i); + STaskId* pId = taosArrayGet(pRecycleList, i); streamMetaRemoveTask(pMeta, pId); } } int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - qDebug("vgId:%d load %d tasks into meta from disk completed", pMeta->vgId, numOfTasks); + ASSERT(pMeta->numOfStreamTasks <= numOfTasks && pMeta->numOfPausedTasks <= numOfTasks); + stDebug("vgId:%d load %d tasks into meta from disk completed, streamTask:%d, paused:%d", pMeta->vgId, numOfTasks, + pMeta->numOfStreamTasks, pMeta->numOfPausedTasks); taosArrayDestroy(pRecycleList); return 0; } @@ -733,9 +777,20 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { for (int32_t i = 0; i < pReq->numOfTasks; ++i) { STaskStatusEntry* ps = taosArrayGet(pReq->pTaskStatus, i); - if (tEncodeI64(pEncoder, ps->streamId) < 0) return -1; - if (tEncodeI32(pEncoder, ps->taskId) < 0) return -1; + if (tEncodeI64(pEncoder, ps->id.streamId) < 0) return -1; + if (tEncodeI32(pEncoder, ps->id.taskId) < 0) return -1; if (tEncodeI32(pEncoder, ps->status) < 0) return -1; + if (tEncodeI32(pEncoder, ps->stage) < 0) return -1; + if (tEncodeI32(pEncoder, ps->nodeId) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->inputQUsed) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->inputRate) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->sinkQuota) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->sinkDataSize) < 0) return -1; + if (tEncodeI64(pEncoder, ps->processedVer) < 0) return -1; + if (tEncodeI64(pEncoder, ps->verStart) < 0) return -1; + if (tEncodeI64(pEncoder, ps->verEnd) < 0) return -1; + if (tEncodeI64(pEncoder, ps->activeCheckpointId) < 0) return -1; + if (tEncodeI8(pEncoder, ps->checkpointFailed) < 0) return -1; } tEndEncode(pEncoder); return pEncoder->pos; @@ -748,21 +803,34 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { pReq->pTaskStatus = taosArrayInit(pReq->numOfTasks, sizeof(STaskStatusEntry)); for (int32_t i = 0; i < pReq->numOfTasks; ++i) { - STaskStatusEntry hb = {0}; - if (tDecodeI64(pDecoder, &hb.streamId) < 0) return -1; - if (tDecodeI32(pDecoder, &hb.taskId) < 0) return -1; - if (tDecodeI32(pDecoder, &hb.status) < 0) return -1; + int32_t taskId = 0; + STaskStatusEntry entry = {0}; - taosArrayPush(pReq->pTaskStatus, &hb); + if (tDecodeI64(pDecoder, &entry.id.streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &entry.status) < 0) return -1; + if (tDecodeI32(pDecoder, &entry.stage) < 0) return -1; + if (tDecodeI32(pDecoder, &entry.nodeId) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.inputQUsed) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.inputRate) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.sinkQuota) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.sinkDataSize) < 0) return -1; + if (tDecodeI64(pDecoder, &entry.processedVer) < 0) return -1; + if (tDecodeI64(pDecoder, &entry.verStart) < 0) return -1; + if (tDecodeI64(pDecoder, &entry.verEnd) < 0) return -1; + if (tDecodeI64(pDecoder, &entry.activeCheckpointId) < 0) return -1; + if (tDecodeI8(pDecoder, (int8_t*)&entry.checkpointFailed) < 0) return -1; + + entry.id.taskId = taskId; + taosArrayPush(pReq->pTaskStatus, &entry); } tEndDecode(pDecoder); return 0; } -static bool readyToSendHb(SMetaHbInfo* pInfo) { - if ((++pInfo->tickCounter) >= META_HB_SEND_IDLE_COUNTER) { - // reset the counter +static bool waitForEnoughDuration(SMetaHbInfo* pInfo) { + if ((++pInfo->tickCounter) >= META_HB_SEND_IDLE_COUNTER) { // reset the counter pInfo->tickCounter = 0; return true; } @@ -772,48 +840,86 @@ static bool readyToSendHb(SMetaHbInfo* pInfo) { void metaHbToMnode(void* param, void* tmrId) { int64_t rid = *(int64_t*)param; - SStreamHbMsg hbMsg = {0}; SStreamMeta* pMeta = taosAcquireRef(streamMetaId, rid); if (pMeta == NULL) { return; } // need to stop, stop now - if (pMeta->hbInfo.stopFlag == STREAM_META_WILL_STOP) { - pMeta->hbInfo.stopFlag = STREAM_META_OK_TO_STOP; - qDebug("vgId:%d jump out of meta timer", pMeta->vgId); + if (pMeta->pHbInfo->stopFlag == STREAM_META_WILL_STOP) { + pMeta->pHbInfo->stopFlag = STREAM_META_OK_TO_STOP; + stDebug("vgId:%d jump out of meta timer", pMeta->vgId); taosReleaseRef(streamMetaId, rid); return; } - if (!readyToSendHb(&pMeta->hbInfo)) { - taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr); + // not leader not send msg + if (pMeta->role == NODE_ROLE_FOLLOWER) { + stInfo("vgId:%d follower not send hb to mnode", pMeta->vgId); + taosReleaseRef(streamMetaId, rid); + pMeta->pHbInfo->hbStart = 0; + return; + } + + // set the hb start time + if (pMeta->pHbInfo->hbStart == 0) { + pMeta->pHbInfo->hbStart = taosGetTimestampMs(); + } + + if (!waitForEnoughDuration(pMeta->pHbInfo)) { + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->pHbInfo->hbTmr); taosReleaseRef(streamMetaId, rid); return; } + stDebug("vgId:%d build stream task hb, leader:%d", pMeta->vgId, (pMeta->role == NODE_ROLE_LEADER)); + + SStreamHbMsg hbMsg = {0}; taosRLockLatch(&pMeta->lock); int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); SEpSet epset = {0}; bool hasValEpset = false; - hbMsg.vgId = pMeta->vgId; hbMsg.pTaskStatus = taosArrayInit(numOfTasks, sizeof(STaskStatusEntry)); for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); - int64_t keys[2] = {pId->streamId, pId->taskId}; - SStreamTask** pTask = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId* pId = taosArrayGet(pMeta->pTaskList, i); + SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); + + // not report the status of fill-history task if ((*pTask)->info.fillHistory == 1) { continue; } - STaskStatusEntry entry = {.streamId = pId->streamId, .taskId = pId->taskId, .status = (*pTask)->status.taskStatus}; + STaskStatusEntry entry = { + .id = *pId, + .status = (*pTask)->status.taskStatus, + .nodeId = pMeta->vgId, + .stage = pMeta->stage, + .inputQUsed = SIZE_IN_MiB(streamQueueGetItemSize((*pTask)->inputInfo.queue)), + }; + + entry.inputRate = entry.inputQUsed*100.0/STREAM_TASK_QUEUE_CAPACITY_IN_SIZE; + if ((*pTask)->info.taskLevel == TASK_LEVEL__SINK) { + entry.sinkQuota = (*pTask)->outputInfo.pTokenBucket->quotaRate; + entry.sinkDataSize = SIZE_IN_MiB((*pTask)->execInfo.sink.dataSize); + } + + if ((*pTask)->checkpointingId != 0) { + entry.checkpointFailed = ((*pTask)->chkInfo.failedId >= (*pTask)->checkpointingId); + entry.activeCheckpointId = (*pTask)->checkpointingId; + } + + if ((*pTask)->exec.pWalReader != NULL) { + entry.processedVer = (*pTask)->chkInfo.nextProcessVer - 1; + walReaderValidVersionRange((*pTask)->exec.pWalReader, &entry.verStart, &entry.verEnd); + } + taosArrayPush(hbMsg.pTaskStatus, &entry); - if (i == 0) { + if (!hasValEpset) { epsetAssign(&epset, &(*pTask)->info.mnodeEpset); hasValEpset = true; } @@ -828,7 +934,7 @@ void metaHbToMnode(void* param, void* tmrId) { tEncodeSize(tEncodeStreamHbMsg, &hbMsg, tlen, code); if (code < 0) { - qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code)); + stError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code)); taosArrayDestroy(hbMsg.pTaskStatus); taosReleaseRef(streamMetaId, rid); return; @@ -836,7 +942,7 @@ void metaHbToMnode(void* param, void* tmrId) { void* buf = rpcMallocCont(tlen); if (buf == NULL) { - qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + stError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); taosArrayDestroy(hbMsg.pTaskStatus); taosReleaseRef(streamMetaId, rid); return; @@ -846,7 +952,7 @@ void metaHbToMnode(void* param, void* tmrId) { tEncoderInit(&encoder, buf, tlen); if ((code = tEncodeStreamHbMsg(&encoder, &hbMsg)) < 0) { rpcFreeCont(buf); - qError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code)); + stError("vgId:%d encode stream hb msg failed, code:%s", pMeta->vgId, tstrerror(code)); taosArrayDestroy(hbMsg.pTaskStatus); taosReleaseRef(streamMetaId, rid); return; @@ -857,12 +963,17 @@ void metaHbToMnode(void* param, void* tmrId) { initRpcMsg(&msg, TDMT_MND_STREAM_HEARTBEAT, buf, tlen); msg.info.noResp = 1; - qDebug("vgId:%d, build and send hb to mnode", pMeta->vgId); + pMeta->pHbInfo->hbCount += 1; + + stDebug("vgId:%d, build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks, + pMeta->pHbInfo->hbCount); tmsgSendReq(&epset, &msg); + } else { + stDebug("vgId:%d no tasks and no mnd epset, not send stream hb to mnode", pMeta->vgId); } taosArrayDestroy(hbMsg.pTaskStatus); - taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr); + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->pHbInfo->hbTmr); taosReleaseRef(streamMetaId, rid); } @@ -873,7 +984,7 @@ static bool hasStreamTaskInTimer(SStreamMeta* pMeta) { void* pIter = NULL; while (1) { - pIter = taosHashIterate(pMeta->pTasks, pIter); + pIter = taosHashIterate(pMeta->pTasksMap, pIter); if (pIter == NULL) { break; } @@ -891,38 +1002,59 @@ static bool hasStreamTaskInTimer(SStreamMeta* pMeta) { void streamMetaNotifyClose(SStreamMeta* pMeta) { int32_t vgId = pMeta->vgId; - qDebug("vgId:%d notify all stream tasks that the vnode is closing", vgId); + stDebug("vgId:%d notify all stream tasks that the vnode is closing. isLeader:%d startHb%" PRId64 ", totalHb:%d", vgId, + (pMeta->role == NODE_ROLE_LEADER), pMeta->pHbInfo->hbStart, pMeta->pHbInfo->hbCount); + taosWLockLatch(&pMeta->lock); void* pIter = NULL; while (1) { - pIter = taosHashIterate(pMeta->pTasks, pIter); + pIter = taosHashIterate(pMeta->pTasksMap, pIter); if (pIter == NULL) { break; } SStreamTask* pTask = *(SStreamTask**)pIter; - qDebug("vgId:%d s-task:%s set closing flag", vgId, pTask->id.idStr); + stDebug("vgId:%d s-task:%s set closing flag", vgId, pTask->id.idStr); streamTaskStop(pTask); } taosWUnLockLatch(&pMeta->lock); // wait for the stream meta hb function stopping - pMeta->hbInfo.stopFlag = STREAM_META_WILL_STOP; - while (pMeta->hbInfo.stopFlag != STREAM_META_OK_TO_STOP) { - taosMsleep(100); - qDebug("vgId:%d wait for meta to stop timer", pMeta->vgId); + if (pMeta->role == NODE_ROLE_LEADER) { + pMeta->pHbInfo->stopFlag = STREAM_META_WILL_STOP; + while (pMeta->pHbInfo->stopFlag != STREAM_META_OK_TO_STOP) { + taosMsleep(100); + stDebug("vgId:%d wait for meta to stop timer", pMeta->vgId); + } } - qDebug("vgId:%d start to check all tasks", vgId); + stDebug("vgId:%d start to check all tasks", vgId); int64_t st = taosGetTimestampMs(); while (hasStreamTaskInTimer(pMeta)) { - qDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + stDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); taosMsleep(100); } int64_t el = taosGetTimestampMs() - st; - qDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%" PRId64 " ms", pMeta->vgId, el); + stDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%" PRId64 " ms", pMeta->vgId, el); } + +void streamMetaStartHb(SStreamMeta* pMeta) { + int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); + metaRefMgtAdd(pMeta->vgId, pRid); + *pRid = pMeta->rid; + metaHbToMnode(pRid, NULL); +} + +void streamMetaInitForSnode(SStreamMeta* pMeta) { + pMeta->stage = 0; + pMeta->role = NODE_ROLE_LEADER; +} + +void streamMetaResetStartInfo(STaskStartInfo* pStartInfo) { + taosHashClear(pStartInfo->pReadyTaskSet); + pStartInfo->startedAfterNodeUpdate = 0; +} \ No newline at end of file diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 29ca351a6b..ae285046ef 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -16,10 +16,9 @@ #include "streamInt.h" #define MAX_STREAM_EXEC_BATCH_NUM 32 -#define MIN_STREAM_EXEC_BATCH_NUM 4 -#define STREAM_TASK_QUEUE_CAPACITY 20480 -#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30) -#define STREAM_TASK_OUTPUT_QUEUE_CAPACITY_IN_SIZE (50) +#define MAX_SMOOTH_BURST_RATIO 5 // 5 sec +#define WAIT_FOR_DURATION 40 +#define SINK_TASK_IDLE_DURATION 200 // 200 ms // todo refactor: // read data from input queue @@ -30,7 +29,9 @@ typedef struct SQueueReader { int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms } SQueueReader; -static bool streamTaskHasAvailableToken(STokenBucket* pBucket); +static bool streamTaskExtractAvailableToken(STokenBucket* pBucket, const char* id); +static void streamTaskPutbackToken(STokenBucket* pBucket); +static void streamTaskConsumeQuota(STokenBucket* pBucket, int32_t bytes); static void streamQueueCleanup(SStreamQueue* pQueue) { void* qItem = NULL; @@ -65,7 +66,7 @@ SStreamQueue* streamQueueOpen(int64_t cap) { } void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) { - qDebug("s-task:0x%x free the queue:%p, items in queue:%d", taskId, pQueue->pQueue, taosQueueItemSize(pQueue->pQueue)); + stDebug("s-task:0x%x free the queue:%p, items in queue:%d", taskId, pQueue->pQueue, taosQueueItemSize(pQueue->pQueue)); streamQueueCleanup(pQueue); taosFreeQall(pQueue->qall); @@ -102,73 +103,13 @@ void streamQueueProcessFail(SStreamQueue* queue) { atomic_store_8(&queue->status, STREAM_QUEUE__FAILED); } -#if 0 -bool streamQueueResEmpty(const SStreamQueueRes* pRes) { - // - return true; -} -int64_t streamQueueResSize(const SStreamQueueRes* pRes) { return pRes->size; } -SStreamQueueNode* streamQueueResFront(SStreamQueueRes* pRes) { return pRes->head; } -SStreamQueueNode* streamQueueResPop(SStreamQueueRes* pRes) { - SStreamQueueNode* pRet = pRes->head; - pRes->head = pRes->head->next; - return pRet; -} - -void streamQueueResClear(SStreamQueueRes* pRes) { - while (pRes->head) { - SStreamQueueNode* pNode = pRes->head; - streamFreeQitem(pRes->head->item); - pRes->head = pNode; - } -} - -SStreamQueueRes streamQueueBuildRes(SStreamQueueNode* pTail) { - int64_t size = 0; - SStreamQueueNode* head = NULL; - - while (pTail) { - SStreamQueueNode* pTmp = pTail->next; - pTail->next = head; - head = pTail; - pTail = pTmp; - size++; - } - - return (SStreamQueueRes){.head = head, .size = size}; -} - -bool streamQueueHasTask(const SStreamQueue1* pQueue) { return atomic_load_ptr(pQueue->pHead); } -int32_t streamQueuePush(SStreamQueue1* pQueue, SStreamQueueItem* pItem) { - SStreamQueueNode* pNode = taosMemoryMalloc(sizeof(SStreamQueueNode)); - pNode->item = pItem; - SStreamQueueNode* pHead = atomic_load_ptr(pQueue->pHead); - while (1) { - pNode->next = pHead; - SStreamQueueNode* pOld = atomic_val_compare_exchange_ptr(pQueue->pHead, pHead, pNode); - if (pOld == pHead) { - break; - } - } - return 0; -} - -SStreamQueueRes streamQueueGetRes(SStreamQueue1* pQueue) { - SStreamQueueNode* pNode = atomic_exchange_ptr(pQueue->pHead, NULL); - if (pNode) return streamQueueBuildRes(pNode); - return (SStreamQueueRes){0}; -} -#endif - -bool streamQueueIsFull(const STaosQueue* pQueue, bool inputQ) { - bool isFull = taosQueueItemSize((STaosQueue*)pQueue) >= STREAM_TASK_QUEUE_CAPACITY; - if (isFull) { +bool streamQueueIsFull(const SStreamQueue* pQueue) { + int32_t numOfItems = streamQueueGetNumOfItems(pQueue); + if (numOfItems >= STREAM_TASK_QUEUE_CAPACITY) { return true; } - int32_t threahold = (inputQ) ? STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE : STREAM_TASK_OUTPUT_QUEUE_CAPACITY_IN_SIZE; - double size = SIZE_IN_MB(taosQueueMemorySize((STaosQueue*)pQueue)); - return (size >= threahold); + return (SIZE_IN_MiB(taosQueueMemorySize(pQueue->pQueue)) >= STREAM_TASK_QUEUE_CAPACITY_IN_SIZE); } int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue) { @@ -178,72 +119,107 @@ int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue) { return numOfItems1 + numOfItems2; } -int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks) { +int32_t streamQueueGetAvailableSpace(const SStreamQueue* pQueue, int32_t* availNum, double* availSize) { + int32_t num = streamQueueGetNumOfItems(pQueue); + *availNum = STREAM_TASK_QUEUE_CAPACITY - num; + + *availSize = STREAM_TASK_QUEUE_CAPACITY_IN_SIZE - taosQueueMemorySize(pQueue->pQueue); + return 0; +} + +// todo: fix it: data in Qall is not included here +int32_t streamQueueGetItemSize(const SStreamQueue* pQueue) { + return taosQueueMemorySize(pQueue->pQueue); +} + +int32_t streamQueueItemGetSize(const SStreamQueueItem* pItem) { + STaosQnode* p = (STaosQnode*)((char*) pItem - sizeof(STaosQnode)); + return p->dataSize; +} + +void streamQueueItemIncSize(const SStreamQueueItem* pItem, int32_t size) { + STaosQnode* p = (STaosQnode*)((char*) pItem - sizeof(STaosQnode)); + p->dataSize += size; +} + +const char* streamQueueItemGetTypeStr(int32_t type) { + switch (type) { + case STREAM_INPUT__CHECKPOINT: + return "checkpoint"; + case STREAM_INPUT__CHECKPOINT_TRIGGER: + return "checkpoint-trigger"; + case STREAM_INPUT__TRANS_STATE: + return "trans-state"; + default: + return "datablock"; + } +} + +int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, + int32_t* blockSize) { int32_t retryTimes = 0; int32_t MAX_RETRY_TIMES = 5; const char* id = pTask->id.idStr; + int32_t taskLevel = pTask->info.taskLevel; - if (pTask->info.taskLevel == TASK_LEVEL__SINK) { // extract block from inputQ, one-by-one - while (1) { - if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); - return TSDB_CODE_SUCCESS; - } + *pInput = NULL; + *numOfBlocks = 0; + *blockSize = 0; - STokenBucket* pBucket = &pTask->tokenBucket; - bool has = streamTaskHasAvailableToken(pBucket); - if (!has) { // no available token in th bucket, ignore this execution -// qInfo("s-task:%s no available token for sink, capacity:%d, rate:%d token/sec, quit", pTask->id.idStr, -// pBucket->capacity, pBucket->rate); - return TSDB_CODE_SUCCESS; - } - - SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputInfo.queue); - if (qItem == NULL) { - qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); - return TSDB_CODE_SUCCESS; - } - - qDebug("s-task:%s sink task handle block one-by-one, type:%d", id, qItem->type); - - *numOfBlocks = 1; - *pInput = qItem; - return TSDB_CODE_SUCCESS; - } + // no available token in bucket for sink task, let's wait for a little bit + if (taskLevel == TASK_LEVEL__SINK && (!streamTaskExtractAvailableToken(pTask->outputInfo.pTokenBucket, pTask->id.idStr))) { + stDebug("s-task:%s no available token in bucket for sink data, wait for 50ms", id); + return TSDB_CODE_SUCCESS; } while (1) { if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { - qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); + stDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); return TSDB_CODE_SUCCESS; } SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputInfo.queue); if (qItem == NULL) { - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) { - taosMsleep(10); - qDebug("===stream===try again batchSize:%d, retry:%d, %s", *numOfBlocks, retryTimes, id); + if ((taskLevel == TASK_LEVEL__SOURCE || taskLevel == TASK_LEVEL__SINK) && (++retryTimes) < MAX_RETRY_TIMES) { + taosMsleep(WAIT_FOR_DURATION); continue; } - qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); + // restore the token to bucket + if (*numOfBlocks > 0) { + *blockSize = streamQueueItemGetSize(*pInput); + if (taskLevel == TASK_LEVEL__SINK) { + streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize); + } + } else { + streamTaskPutbackToken(pTask->outputInfo.pTokenBucket); + } + return TSDB_CODE_SUCCESS; } // do not merge blocks for sink node and check point data block - if (qItem->type == STREAM_INPUT__CHECKPOINT || qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER || - qItem->type == STREAM_INPUT__TRANS_STATE) { - const char* p = streamGetBlockTypeStr(qItem->type); + int8_t type = qItem->type; + if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || + type == STREAM_INPUT__TRANS_STATE) { + const char* p = streamQueueItemGetTypeStr(type); if (*pInput == NULL) { - qDebug("s-task:%s %s msg extracted, start to process immediately", id, p); + stDebug("s-task:%s %s msg extracted, start to process immediately", id, p); + // restore the token to bucket in case of checkpoint/trans-state msg + streamTaskPutbackToken(pTask->outputInfo.pTokenBucket); + *blockSize = 0; *numOfBlocks = 1; *pInput = qItem; return TSDB_CODE_SUCCESS; - } else { - // previous existed blocks needs to be handle, before handle the checkpoint msg block - qDebug("s-task:%s %s msg extracted, handle previous blocks, numOfBlocks:%d", id, p, *numOfBlocks); + } else { // previous existed blocks needs to be handle, before handle the checkpoint msg block + stDebug("s-task:%s %s msg extracted, handle previous blocks, numOfBlocks:%d", id, p, *numOfBlocks); + *blockSize = streamQueueItemGetSize(*pInput); + if (taskLevel == TASK_LEVEL__SINK) { + streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize); + } + streamQueueProcessFail(pTask->inputInfo.queue); return TSDB_CODE_SUCCESS; } @@ -252,14 +228,19 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu ASSERT((*numOfBlocks) == 0); *pInput = qItem; } else { - // todo we need to sort the data block, instead of just appending into the array list. + // merge current block failed, let's handle the already merged blocks. void* newRet = streamMergeQueueItem(*pInput, qItem); if (newRet == NULL) { if (terrno != 0) { - qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d, code:%s", id, *numOfBlocks, + stError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d, code:%s", id, *numOfBlocks, tstrerror(terrno)); } + *blockSize = streamQueueItemGetSize(*pInput); + if (taskLevel == TASK_LEVEL__SINK) { + streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize); + } + streamQueueProcessFail(pTask->inputInfo.queue); return TSDB_CODE_SUCCESS; } @@ -271,7 +252,13 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu streamQueueProcessSuccess(pTask->inputInfo.queue); if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) { - qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); + stDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); + + *blockSize = streamQueueItemGetSize(*pInput); + if (taskLevel == TASK_LEVEL__SINK) { + streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize); + } + return TSDB_CODE_SUCCESS; } } @@ -285,11 +272,11 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) if (type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* px = (SStreamDataSubmit*)pItem; - if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && streamQueueIsFull(pQueue, true)) { - double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); - qTrace( + if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && streamQueueIsFull(pTask->inputInfo.queue)) { + double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); + stTrace( "s-task:%s inputQ is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", - pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); + pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size); streamDataSubmitDestroy(px); taosFreeQitem(pItem); return -1; @@ -305,18 +292,18 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) return code; } - double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); // use the local variable to avoid the pItem be freed by other threads, since it has been put into queue already. - qDebug("s-task:%s submit enqueue msgLen:%d ver:%" PRId64 ", total in queue:%d, size:%.2fMiB", pTask->id.idStr, - msgLen, ver, total, size + SIZE_IN_MB(msgLen)); + stDebug("s-task:%s submit enqueue msgLen:%d ver:%" PRId64 ", total in queue:%d, size:%.2fMiB", pTask->id.idStr, + msgLen, ver, total, size + SIZE_IN_MiB(msgLen)); } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__REF_DATA_BLOCK) { - if (streamQueueIsFull(pQueue, true)) { - double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + if (streamQueueIsFull(pTask->inputInfo.queue)) { + double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); - qTrace("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", - pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); + stTrace("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", + pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size); destroyStreamDataBlock((SStreamDataBlock*)pItem); return -1; } @@ -327,8 +314,8 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) return code; } - double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); - qDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); + double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); + stDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__TRANS_STATE) { int32_t code = taosWriteQitem(pQueue, pItem); @@ -337,9 +324,9 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) return code; } - double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); - qDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, - pTask->info.taskLevel, streamGetBlockTypeStr(type), total, size); + double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); + stDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, + pTask->info.taskLevel, streamQueueItemGetTypeStr(type), total, size); } else if (type == STREAM_INPUT__GET_RES) { // use the default memory limit, refactor later. int32_t code = taosWriteQitem(pQueue, pItem); @@ -348,15 +335,15 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) return code; } - double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); - qDebug("s-task:%s data res enqueue, current(blocks:%d, size:%.2fMiB)", pTask->id.idStr, total, size); + double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); + stDebug("s-task:%s data res enqueue, current(blocks:%d, size:%.2fMiB)", pTask->id.idStr, total, size); } else { ASSERT(0); } if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->info.triggerParam != 0) { atomic_val_compare_exchange_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); - qDebug("s-task:%s new data arrived, active the trigger, triggerStatus:%d", pTask->id.idStr, pTask->schedInfo.status); + stDebug("s-task:%s new data arrived, active the trigger, triggerStatus:%d", pTask->id.idStr, pTask->schedInfo.status); } return 0; @@ -364,73 +351,98 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) // the result should be put into the outputQ in any cases, otherwise, the result may be lost int32_t streamTaskPutDataIntoOutputQ(SStreamTask* pTask, SStreamDataBlock* pBlock) { - STaosQueue* pQueue = pTask->outputInfo.queue->pQueue; + STaosQueue* pQueue = pTask->outputq.queue->pQueue; - while (streamQueueIsFull(pQueue, false)) { + while (streamQueueIsFull(pTask->outputq.queue)) { if (streamTaskShouldStop(&pTask->status)) { - qInfo("s-task:%s discard result block due to task stop", pTask->id.idStr); + stInfo("s-task:%s discard result block due to task stop", pTask->id.idStr); return TSDB_CODE_STREAM_EXEC_CANCELLED; } - int32_t total = streamQueueGetNumOfItems(pTask->outputInfo.queue); - double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + int32_t total = streamQueueGetNumOfItems(pTask->outputq.queue); + double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); // let's wait for there are enough space to hold this result pBlock - qDebug("s-task:%s outputQ is full, wait for 500ms and retry, outputQ items:%d, size:%.2fMiB", pTask->id.idStr, + stDebug("s-task:%s outputQ is full, wait for 500ms and retry, outputQ items:%d, size:%.2fMiB", pTask->id.idStr, total, size); taosMsleep(500); } int32_t code = taosWriteQitem(pQueue, pBlock); - int32_t total = streamQueueGetNumOfItems(pTask->outputInfo.queue); - double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + int32_t total = streamQueueGetNumOfItems(pTask->outputq.queue); + double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); if (code != 0) { - qError("s-task:%s failed to put res into outputQ, outputQ items:%d, size:%.2fMiB code:%s, result lost", + stError("s-task:%s failed to put res into outputQ, outputQ items:%d, size:%.2fMiB code:%s, result lost", pTask->id.idStr, total + 1, size, tstrerror(code)); } else { - qInfo("s-task:%s data put into outputQ, outputQ items:%d, size:%.2fMiB", pTask->id.idStr, total, size); + stDebug("s-task:%s data put into outputQ, outputQ items:%d, size:%.2fMiB", pTask->id.idStr, total, size); } return TSDB_CODE_SUCCESS; } -int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t cap, int32_t rate) { - if (cap < 100 || rate < 50 || pBucket == NULL) { - qError("failed to init sink task bucket, cap:%d, rate:%d", cap, rate); +int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, int32_t quotaRate) { + if (numCap < 10 || numRate < 10 || pBucket == NULL) { + stError("failed to init sink task bucket, cap:%d, rate:%d", numCap, numRate); return TSDB_CODE_INVALID_PARA; } - pBucket->capacity = cap; - pBucket->rate = rate; - pBucket->numOfToken = cap; + pBucket->numCapacity = numCap; + pBucket->numOfToken = numCap; + pBucket->numRate = numRate; + + pBucket->quotaRate = quotaRate; + pBucket->quotaCapacity = quotaRate * MAX_SMOOTH_BURST_RATIO; + pBucket->quotaRemain = pBucket->quotaCapacity; + pBucket->fillTimestamp = taosGetTimestampMs(); return TSDB_CODE_SUCCESS; } -static void fillBucket(STokenBucket* pBucket) { +static void fillTokenBucket(STokenBucket* pBucket, const char* id) { int64_t now = taosGetTimestampMs(); int64_t delta = now - pBucket->fillTimestamp; ASSERT(pBucket->numOfToken >= 0); - int32_t inc = (delta / 1000.0) * pBucket->rate; - if (inc > 0) { - if ((pBucket->numOfToken + inc) < pBucket->capacity) { - pBucket->numOfToken += inc; - } else { - pBucket->numOfToken = pBucket->capacity; - } - + int32_t incNum = (delta / 1000.0) * pBucket->numRate; + if (incNum > 0) { + pBucket->numOfToken = TMIN(pBucket->numOfToken + incNum, pBucket->numCapacity); pBucket->fillTimestamp = now; - qDebug("new token available, current:%d, inc:%d ts:%"PRId64, pBucket->numOfToken, inc, now); + } + + // increase the new available quota as time goes on + double incSize = (delta / 1000.0) * pBucket->quotaRate; + if (incSize > 0) { + pBucket->quotaRemain = TMIN(pBucket->quotaRemain + incSize, pBucket->quotaCapacity); + } + + if (incNum > 0 || incSize > 0) { + stDebug("new token and capacity available, current token:%d inc:%d, current quota:%.2fMiB inc:%.2fMiB, ts:%" PRId64 + " idle for %.2f Sec, %s", + pBucket->numOfToken, incNum, pBucket->quotaRemain, incSize, now, delta / 1000.0, id); } } -bool streamTaskHasAvailableToken(STokenBucket* pBucket) { - fillBucket(pBucket); +bool streamTaskExtractAvailableToken(STokenBucket* pBucket, const char* id) { + fillTokenBucket(pBucket, id); + if (pBucket->numOfToken > 0) { - --pBucket->numOfToken; - return true; + if (pBucket->quotaRemain > 0) { + pBucket->numOfToken -= 1; + return true; + } else { // no available size quota now + return false; + } } else { return false; } +} + +void streamTaskPutbackToken(STokenBucket* pBucket) { + pBucket->numOfToken = TMIN(pBucket->numOfToken + 1, pBucket->numCapacity); +} + +// size in KB +void streamTaskConsumeQuota(STokenBucket* pBucket, int32_t bytes) { + pBucket->quotaRemain -= SIZE_IN_MiB(bytes); } \ No newline at end of file diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 54d5957900..cd15595411 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -13,26 +13,35 @@ * along with this program. If not, see . */ -#include #include "streamInt.h" #include "trpc.h" #include "ttimer.h" #include "wal.h" -typedef struct SStreamTaskRetryInfo { +typedef struct SLaunchHTaskInfo { SStreamMeta* pMeta; - int32_t taskId; - int64_t streamId; -} SStreamTaskRetryInfo; + STaskId id; +} SLaunchHTaskInfo; -static int32_t streamSetParamForScanHistory(SStreamTask* pTask); -static void streamTaskSetRangeStreamCalc(SStreamTask* pTask); -static int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); +typedef struct STaskRecheckInfo { + SStreamTask* pTask; + SStreamTaskCheckReq req; + void* checkTimer; +} STaskRecheckInfo; + +static int32_t streamSetParamForScanHistory(SStreamTask* pTask); +static void streamTaskSetRangeStreamCalc(SStreamTask* pTask); +static int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); +static SLaunchHTaskInfo* createHTaskLaunchInfo(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); +static void tryLaunchHistoryTask(void* param, void* tmrId); static void streamTaskSetReady(SStreamTask* pTask, int32_t numOfReqs) { + SStreamMeta* pMeta = pTask->pMeta; + int32_t vgId = pMeta->vgId; + if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY && pTask->info.taskLevel != TASK_LEVEL__SOURCE) { - pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); - qDebug("s-task:%s level:%d task wait for %d upstream tasks complete scan-history procedure, status:%s", + pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->upstreamInfo.pList); + stDebug("s-task:%s level:%d task wait for %d upstream tasks complete scan-history procedure, status:%s", pTask->id.idStr, pTask->info.taskLevel, pTask->numOfWaitingUpstream, streamGetTaskStatusStr(pTask->status.taskStatus)); } @@ -40,9 +49,37 @@ static void streamTaskSetReady(SStreamTask* pTask, int32_t numOfReqs) { ASSERT(pTask->status.downstreamReady == 0); pTask->status.downstreamReady = 1; - int64_t el = (taosGetTimestampMs() - pTask->tsInfo.init); - qDebug("s-task:%s all %d downstream ready, init completed, elapsed time:%"PRId64"ms, task status:%s", - pTask->id.idStr, numOfReqs, el, streamGetTaskStatusStr(pTask->status.taskStatus)); + pTask->execInfo.start = taosGetTimestampMs(); + int64_t el = (pTask->execInfo.start - pTask->execInfo.init); + stDebug("s-task:%s all %d downstream ready, init completed, elapsed time:%" PRId64 "ms, task status:%s", + pTask->id.idStr, numOfReqs, el, streamGetTaskStatusStr(pTask->status.taskStatus)); + + taosWLockLatch(&pMeta->lock); + + STaskId id = streamTaskExtractKey(pTask); + taosHashPut(pMeta->startInfo.pReadyTaskSet, &id, sizeof(id), NULL, 0); + + int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta); + + if (taosHashGetSize(pMeta->startInfo.pReadyTaskSet) == numOfTotal) { + STaskStartInfo* pStartInfo = &pMeta->startInfo; + pStartInfo->readyTs = pTask->execInfo.start; + + if (pStartInfo->startTs != 0) { + pStartInfo->elapsedTime = pStartInfo->readyTs - pStartInfo->startTs; + } else { + pStartInfo->elapsedTime = 0; + } + + streamMetaResetStartInfo(pStartInfo); + + stDebug("vgId:%d all %d task(s) are started successfully, last ready task:%s level:%d, startTs:%" PRId64 + ", readyTs:%" PRId64 " total elapsed time:%.2fs", + vgId, numOfTotal, pTask->id.idStr, pTask->info.taskLevel, pStartInfo->startTs, pStartInfo->readyTs, + pStartInfo->elapsedTime / 1000.0); + } + + taosWUnLockLatch(&pMeta->lock); } int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated) { @@ -65,20 +102,7 @@ int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated) { return 0; } -const char* streamGetTaskStatusStr(int32_t status) { - switch(status) { - case TASK_STATUS__NORMAL: return "normal"; - case TASK_STATUS__SCAN_HISTORY: return "scan-history"; - case TASK_STATUS__HALT: return "halt"; - case TASK_STATUS__PAUSE: return "paused"; - case TASK_STATUS__CK: return "check-point"; - case TASK_STATUS__DROPPING: return "dropping"; - case TASK_STATUS__STOP: return "stop"; - default:return ""; - } -} - -static int32_t doLaunchScanHistoryTask(SStreamTask* pTask) { +static int32_t doStartScanHistoryTask(SStreamTask* pTask) { SVersionRange* pRange = &pTask->dataRange.range; if (pTask->info.fillHistory) { streamSetParamForScanHistory(pTask); @@ -89,15 +113,18 @@ static int32_t doLaunchScanHistoryTask(SStreamTask* pTask) { return code; } -int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) { +int32_t streamTaskStartScanHistory(SStreamTask* pTask) { + ASSERT(pTask->status.downstreamReady == 1); + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { - return doLaunchScanHistoryTask(pTask); + return doStartScanHistoryTask(pTask); } else { ASSERT(pTask->status.taskStatus == TASK_STATUS__NORMAL); - qDebug("s-task:%s no need to scan-history-data, status:%s, sched-status:%d, ver:%" PRId64, pTask->id.idStr, - streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus, - walReaderGetCurrentVer(pTask->exec.pWalReader)); + stDebug("s-task:%s no need to scan-history data, status:%s, sched-status:%d, ver:%" PRId64, pTask->id.idStr, + streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus, + walReaderGetCurrentVer(pTask->exec.pWalReader)); + streamTaskEnablePause(pTask); } } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { if (pTask->info.fillHistory) { @@ -105,15 +132,17 @@ int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) { streamTaskEnablePause(pTask); } } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - qDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr); + if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { + stDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr); + } } return 0; } // check status static int32_t doCheckDownstreamStatus(SStreamTask* pTask) { - SDataRange* pRange = &pTask->dataRange; - STimeWindow* pWindow = &pRange->window; + SDataRange* pRange = &pTask->dataRange; + STimeWindow* pWindow = &pRange->window; SStreamTaskCheckReq req = { .streamId = pTask->id.streamId, @@ -126,24 +155,24 @@ static int32_t doCheckDownstreamStatus(SStreamTask* pTask) { // serialize streamProcessScanHistoryFinishRsp if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { req.reqId = tGenIdPI64(); - req.downstreamNodeId = pTask->fixedEpDispatcher.nodeId; - req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; + req.downstreamNodeId = pTask->outputInfo.fixedDispatcher.nodeId; + req.downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; pTask->checkReqId = req.reqId; - qDebug("s-task:%s check single downstream task:0x%x(vgId:%d) ver:%" PRId64 "-%" PRId64 " window:%" PRId64 - "-%" PRId64 ", stage:%"PRId64" req:0x%" PRIx64, - pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId, pRange->range.minVer, pRange->range.maxVer, - pWindow->skey, pWindow->ekey, req.stage, req.reqId); + stDebug("s-task:%s (vgId:%d) stage:%" PRId64 " check single downstream task:0x%x(vgId:%d) ver:%" PRId64 "-%" PRId64 + " window:%" PRId64 "-%" PRId64 " req:0x%" PRIx64, + pTask->id.idStr, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, + pRange->range.minVer, pRange->range.maxVer, pWindow->skey, pWindow->ekey, req.reqId); - streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); + streamSendCheckMsg(pTask, &req, pTask->outputInfo.fixedDispatcher.nodeId, &pTask->outputInfo.fixedDispatcher.epSet); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); pTask->notReadyTasks = numOfVgs; pTask->checkReqIds = taosArrayInit(numOfVgs, sizeof(int64_t)); - qDebug("s-task:%s check %d downstream tasks, ver:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64, + stDebug("s-task:%s check %d downstream tasks, ver:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64, pTask->id.idStr, numOfVgs, pRange->range.minVer, pRange->range.maxVer, pWindow->skey, pWindow->ekey); for (int32_t i = 0; i < numOfVgs; i++) { @@ -152,24 +181,31 @@ static int32_t doCheckDownstreamStatus(SStreamTask* pTask) { taosArrayPush(pTask->checkReqIds, &req.reqId); req.downstreamNodeId = pVgInfo->vgId; req.downstreamTaskId = pVgInfo->taskId; - qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d, stage:%" PRId64, - pTask->id.idStr, pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, i, req.stage); - streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); + stDebug("s-task:%s (vgId:%d) stage:%" PRId64 " check downstream task:0x%x (vgId:%d) (shuffle), idx:%d", + pTask->id.idStr, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, i); + streamSendCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { - qDebug("s-task:%s (vgId:%d) set downstream ready, since no downstream", pTask->id.idStr, pTask->info.nodeId); + stDebug("s-task:%s (vgId:%d) set downstream ready, since no downstream", pTask->id.idStr, pTask->info.nodeId); streamTaskSetReady(pTask, 0); streamTaskSetRangeStreamCalc(pTask); - streamTaskLaunchScanHistory(pTask); + streamTaskStartScanHistory(pTask); streamLaunchFillHistoryTask(pTask); } return 0; } -int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { - SStreamTaskCheckReq req = { +static STaskRecheckInfo* createRecheckInfo(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { + STaskRecheckInfo* pInfo = taosMemoryCalloc(1, sizeof(STaskRecheckInfo)); + if (pInfo == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + pInfo->pTask = pTask; + pInfo->req = (SStreamTaskCheckReq){ .reqId = pRsp->reqId, .streamId = pRsp->streamId, .upstreamTaskId = pRsp->upstreamTaskId, @@ -180,49 +216,76 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p .stage = pTask->pMeta->stage, }; + return pInfo; +} + +static void destroyRecheckInfo(STaskRecheckInfo* pInfo) { + if (pInfo != NULL) { + taosTmrStop(pInfo->checkTimer); + pInfo->checkTimer = NULL; + taosMemoryFree(pInfo); + } +} + +static void recheckDownstreamTasks(void* param, void* tmrId) { + STaskRecheckInfo* pInfo = param; + SStreamTask* pTask = pInfo->pTask; + + SStreamTaskCheckReq* pReq = &pInfo->req; + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { - qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr, - pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, req.stage); - streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet); + stDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr, + pTask->info.nodeId, pReq->downstreamTaskId, pReq->downstreamNodeId, pReq->stage); + streamSendCheckMsg(pTask, pReq, pReq->downstreamNodeId, &pTask->outputInfo.fixedDispatcher.epSet); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - if (pVgInfo->taskId == req.downstreamTaskId) { - qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr, - pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId, req.stage); - streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pVgInfo->epSet); + if (pVgInfo->taskId == pReq->downstreamTaskId) { + stDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) stage:%" PRId64 " (recheck)", pTask->id.idStr, + pTask->info.nodeId, pReq->downstreamTaskId, pReq->downstreamNodeId, pReq->stage); + streamSendCheckMsg(pTask, pReq, pReq->downstreamNodeId, &pVgInfo->epSet); } } } - return 0; + destroyRecheckInfo(pInfo); + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s complete send check in timer, ref:%d", pTask->id.idStr, ref); } int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage) { SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); ASSERT(pInfo != NULL); + const char* id = pTask->id.idStr; if (stage == -1) { - qDebug("s-task:%s receive check msg from upstream task:0x%x, invalid stageId:%" PRId64 ", not ready", pTask->id.idStr, - upstreamTaskId, stage); + stDebug("s-task:%s receive check msg from upstream task:0x%x(vgId:%d), invalid stageId:%" PRId64 ", not ready", id, + upstreamTaskId, vgId, stage); return 0; } if (pInfo->stage == -1) { pInfo->stage = stage; - qDebug("s-task:%s receive check msg from upstream task:0x%x, init stage value:%" PRId64, pTask->id.idStr, - upstreamTaskId, stage); + stDebug("s-task:%s receive check msg from upstream task:0x%x(vgId:%d) first time, init stage value:%" PRId64, id, + upstreamTaskId, vgId, stage); } if (pInfo->stage < stage) { - qError("s-task:%s receive msg from upstream task:0x%x(vgId:%d), new stage received:%" PRId64 ", prev:%" PRId64, - pTask->id.idStr, upstreamTaskId, vgId, stage, pInfo->stage); + stError("s-task:%s receive check msg from upstream task:0x%x(vgId:%d), new stage received:%" PRId64 + ", prev:%" PRId64, + id, upstreamTaskId, vgId, stage, pInfo->stage); } - return ((pTask->status.downstreamReady == 1) && (pInfo->stage == stage))? 1:0; + if (pInfo->stage != stage) { + return TASK_SELF_NEW_STAGE; + } else if (pTask->status.downstreamReady != 1) { + return TASK_DOWNSTREAM_NOT_READY; + } else { + return TASK_DOWNSTREAM_READY; + } } static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) { @@ -236,15 +299,17 @@ static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) { streamTaskSetRangeStreamCalc(pTask); if (status == TASK_STATUS__SCAN_HISTORY) { - qDebug("s-task:%s enter into scan-history data stage, status:%s", id, str); - streamTaskLaunchScanHistory(pTask); + stDebug("s-task:%s enter into scan-history data stage, status:%s", id, str); + streamTaskStartScanHistory(pTask); } else { + // fill-history tasks are not allowed to reach here. if (pTask->info.fillHistory == 1) { - qDebug("s-task:%s fill-history is set normal when start it, try to remove it,set it task to be dropping", id); + stDebug("s-task:%s fill-history is set normal when start it, try to remove it,set it task to be dropping", id); pTask->status.taskStatus = TASK_STATUS__DROPPING; - ASSERT(pTask->historyTaskId.taskId == 0); + ASSERT(pTask->hTaskInfo.id.taskId == 0); } else { - qDebug("s-task:%s downstream tasks are ready, now ready for data from wal, status:%s", id, str); + stDebug("s-task:%s downstream tasks are ready, now ready for data from wal, status:%s", id, str); + streamTaskEnablePause(pTask); } } @@ -252,12 +317,16 @@ static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) { streamLaunchFillHistoryTask(pTask); } -// todo handle error int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { ASSERT(pTask->id.taskId == pRsp->upstreamTaskId); const char* id = pTask->id.idStr; - if (pRsp->status == 1) { + if (streamTaskShouldStop(&pTask->status)) { + stDebug("s-task:%s should stop, do not do check downstream again", id); + return TSDB_CODE_SUCCESS; + } + + if (pRsp->status == TASK_DOWNSTREAM_READY) { if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { bool found = false; @@ -283,9 +352,9 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs doProcessDownstreamReadyRsp(pTask, numOfReqs); } else { - int32_t total = taosArrayGetSize(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); - qDebug("s-task:%s (vgId:%d) recv check rsp from task:0x%x (vgId:%d) status:%d, total:%d not ready:%d", id, - pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status, total, left); + int32_t total = taosArrayGetSize(pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos); + stDebug("s-task:%s (vgId:%d) recv check rsp from task:0x%x (vgId:%d) status:%d, total:%d not ready:%d", id, + pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status, total, left); } } else { ASSERT(pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH); @@ -296,10 +365,25 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs doProcessDownstreamReadyRsp(pTask, 1); } } else { // not ready, wait for 100ms and retry - qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%d, wait for 100ms and retry", id, - pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage); - taosMsleep(100); - streamRecheckDownstream(pTask, pRsp); + if (pRsp->status == TASK_DOWNSTREAM_NOT_LEADER) { + stError( + "s-task:%s downstream taskId:0x%x (vgId:%d) vnode-transfer/leader-change detected, not send check again, " + "roll-back needed", + id, pRsp->downstreamTaskId, pRsp->downstreamNodeId); + } else if (pRsp->status == TASK_SELF_NEW_STAGE) { + stError( + "s-task:%s vnode-transfer/leader-change/restart detected, old stage:%d, current stage:%d, not send check " + "again, roll-back needed", + id, pRsp->oldStage, (int32_t)pTask->pMeta->stage); + } else { + STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp); + + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%d, retry in 100ms, ref:%d ", id, + pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage, ref); + + taosTmrReset(recheckDownstreamTasks, CHECK_DOWNSTREAM_INTERVAL, pInfo, streamEnv.timer, &pInfo->checkTimer); + } } return 0; @@ -313,7 +397,7 @@ int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* tEncodeSize(tEncodeStreamTaskCheckRsp, pRsp, len, code); if (code < 0) { - qError("vgId:%d failed to encode task check rsp, s-task:0x%x", pMeta->vgId, taskId); + stError("vgId:%d failed to encode task check rsp, s-task:0x%x", pMeta->vgId, taskId); return -1; } @@ -333,27 +417,39 @@ int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* // common int32_t streamSetParamForScanHistory(SStreamTask* pTask) { - qDebug("s-task:%s set operator option for scan-history data", pTask->id.idStr); + stDebug("s-task:%s set operator option for scan-history data", pTask->id.idStr); return qSetStreamOperatorOptionForScanHistory(pTask->exec.pExecutor); } int32_t streamRestoreParam(SStreamTask* pTask) { - qDebug("s-task:%s restore operator param after scan-history", pTask->id.idStr); + stDebug("s-task:%s restore operator param after scan-history", pTask->id.idStr); return qRestoreStreamOperatorOption(pTask->exec.pExecutor); } int32_t streamSetStatusNormal(SStreamTask* pTask) { int32_t status = atomic_load_8(&pTask->status.taskStatus); if (status == TASK_STATUS__DROPPING) { - qError("s-task:%s cannot be set normal, since in dropping state", pTask->id.idStr); + stError("s-task:%s cannot be set normal, since in dropping state", pTask->id.idStr); return -1; } else { - qDebug("s-task:%s set task status to be normal, prev:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); + stDebug("s-task:%s set task status to be normal, prev:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); return 0; } } +int32_t streamSetStatusUnint(SStreamTask* pTask) { + int32_t status = atomic_load_8(&pTask->status.taskStatus); + if (status == TASK_STATUS__DROPPING) { + stError("s-task:%s cannot be set uninit, since in dropping state", pTask->id.idStr); + return -1; + } else { + stDebug("s-task:%s set task status to be uninit, prev:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__UNINIT); + return 0; + } +} + // source int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange *pVerRange, STimeWindow* pWindow) { return qStreamSourceScanParamForHistoryScanStep1(pTask->exec.pExecutor, pVerRange, pWindow); @@ -420,7 +516,7 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory ASSERT(taskLevel == TASK_LEVEL__AGG || taskLevel == TASK_LEVEL__SINK); if (pTask->status.taskStatus != TASK_STATUS__SCAN_HISTORY) { - qError("s-task:%s not in scan-history status, status:%s return upstream:0x%x scan-history finish directly", + stError("s-task:%s not in scan-history status, status:%s return upstream:0x%x scan-history finish directly", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pReq->upstreamTaskId); void* pBuf = NULL; @@ -431,7 +527,7 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory initRpcMsg(&msg, 0, pBuf, sizeof(SMsgHead) + len); tmsgSendRsp(&msg); - qDebug("s-task:%s level:%d notify upstream:0x%x(vgId:%d) to continue process data in WAL", pTask->id.idStr, + stDebug("s-task:%s level:%d notify upstream:0x%x(vgId:%d) to continue process data in WAL", pTask->id.idStr, pTask->info.taskLevel, pReq->upstreamTaskId, pReq->upstreamNodeId); return 0; } @@ -443,8 +539,8 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory ASSERT(left >= 0); if (left == 0) { - int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamInfoList); - qDebug( + int32_t numOfTasks = taosArrayGetSize(pTask->upstreamInfo.pList); + stDebug( "s-task:%s all %d upstream tasks finish scan-history data, set param for agg task for stream data and send " "rsp to all upstream tasks", pTask->id.idStr, numOfTasks); @@ -467,7 +563,7 @@ int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistory } } } else { - qDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d", + stDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d", pTask->id.idStr, pReq->upstreamTaskId, pReq->childId, left); } @@ -480,7 +576,7 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) { // execute in the scan history complete call back msg, ready to process data from inputQ streamSetStatusNormal(pTask); - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + streamTaskSetSchedStatusInActive(pTask); taosWLockLatch(&pMeta->lock); streamMetaSaveTask(pMeta, pTask); @@ -499,17 +595,20 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) { } static void checkFillhistoryTaskStatus(SStreamTask* pTask, SStreamTask* pHTask) { - pHTask->dataRange.range.minVer = 0; + SDataRange* pRange = &pHTask->dataRange; + // the query version range should be limited to the already processed data - pHTask->dataRange.range.maxVer = pTask->chkInfo.nextProcessVer - 1; + pRange->range.minVer = 0; + pRange->range.maxVer = pTask->chkInfo.nextProcessVer - 1; + pHTask->execInfo.init = taosGetTimestampMs(); if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - qDebug("s-task:%s set the launch condition for fill-history s-task:%s, window:%" PRId64 " - %" PRId64 - " ver range:%" PRId64 " - %" PRId64, - pTask->id.idStr, pHTask->id.idStr, pHTask->dataRange.window.skey, pHTask->dataRange.window.ekey, - pHTask->dataRange.range.minVer, pHTask->dataRange.range.maxVer); + stDebug("s-task:%s set the launch condition for fill-history s-task:%s, window:%" PRId64 " - %" PRId64 + " ver range:%" PRId64 " - %" PRId64", init:%"PRId64, + pTask->id.idStr, pHTask->id.idStr, pRange->window.skey, pRange->window.ekey, + pRange->range.minVer, pRange->range.maxVer, pHTask->execInfo.init); } else { - qDebug("s-task:%s no fill history condition for non-source task:%s", pTask->id.idStr, pHTask->id.idStr); + stDebug("s-task:%s no fill history condition for non-source task:%s", pTask->id.idStr, pHTask->id.idStr); } // check if downstream tasks have been ready @@ -517,111 +616,147 @@ static void checkFillhistoryTaskStatus(SStreamTask* pTask, SStreamTask* pHTask) } static void tryLaunchHistoryTask(void* param, void* tmrId) { - SStreamTaskRetryInfo* pInfo = param; - SStreamMeta* pMeta = pInfo->pMeta; - - qDebug("s-task:0x%x in timer to launch related history task", pInfo->taskId); + SLaunchHTaskInfo* pInfo = param; + SStreamMeta* pMeta = pInfo->pMeta; taosWLockLatch(&pMeta->lock); - int64_t keys[2] = {pInfo->streamId, pInfo->taskId}; - - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &pInfo->id, sizeof(pInfo->id)); if (ppTask) { ASSERT((*ppTask)->status.timerActive >= 1); if (streamTaskShouldStop(&(*ppTask)->status)) { const char* pStatus = streamGetTaskStatusStr((*ppTask)->status.taskStatus); - qDebug("s-task:%s status:%s quit timer task", (*ppTask)->id.idStr, pStatus); + + int32_t ref = atomic_sub_fetch_32(&(*ppTask)->status.timerActive, 1); + stDebug("s-task:%s status:%s should stop, quit launch fill-history task timer, retry:%d, ref:%d", + (*ppTask)->id.idStr, pStatus, (*ppTask)->hTaskInfo.retryTimes, ref); taosMemoryFree(pInfo); - atomic_sub_fetch_8(&(*ppTask)->status.timerActive, 1); taosWUnLockLatch(&pMeta->lock); return; } } taosWUnLockLatch(&pMeta->lock); - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pInfo->streamId, pInfo->taskId); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pInfo->id.streamId, pInfo->id.taskId); if (pTask != NULL) { - ASSERT(pTask->status.timerActive >= 1); - // abort the timer if intend to stop task - SStreamTask* pHTask = streamMetaAcquireTask(pMeta, pTask->historyTaskId.streamId, pTask->historyTaskId.taskId); - if (pHTask == NULL && (!streamTaskShouldStop(&pTask->status))) { - const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - qWarn( - "s-task:%s vgId:%d status:%s failed to launch history task:0x%x, since it may not be built, or may have been " - "destroyed, or should stop", - pTask->id.idStr, pMeta->vgId, pStatus, pTask->historyTaskId.taskId); + SHistoryTaskInfo* pHTaskInfo = &pTask->hTaskInfo; - taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer); + pHTaskInfo->tickCount -= 1; + if (pHTaskInfo->tickCount > 0) { + taosTmrReset(tryLaunchHistoryTask, LAUNCH_HTASK_INTERVAL, pInfo, streamEnv.timer, &pHTaskInfo->pTimer); streamMetaReleaseTask(pMeta, pTask); return; } - if (pHTask != NULL) { - checkFillhistoryTaskStatus(pTask, pHTask); - streamMetaReleaseTask(pMeta, pHTask); - } + if (pHTaskInfo->retryTimes > MAX_RETRY_LAUNCH_HISTORY_TASK) { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + streamMetaReleaseTask(pMeta, pTask); - // not in timer anymore - atomic_sub_fetch_8(&pTask->status.timerActive, 1); - streamMetaReleaseTask(pMeta, pTask); + stError("s-task:%s max retry:%d reached, quit from retrying launch related fill-history task:0x%x, ref:%d", + pTask->id.idStr, MAX_RETRY_LAUNCH_HISTORY_TASK, (int32_t)pHTaskInfo->id.taskId, ref); + + pHTaskInfo->id.taskId = 0; + pHTaskInfo->id.streamId = 0; + } else { // not reach the limitation yet, let's continue retrying launch related fill-history task. + streamTaskSetRetryInfoForLaunch(pHTaskInfo); + ASSERT(pTask->status.timerActive >= 1); + + // abort the timer if intend to stop task + SStreamTask* pHTask = streamMetaAcquireTask(pMeta, pHTaskInfo->id.streamId, pHTaskInfo->id.taskId); + if (pHTask == NULL && (!streamTaskShouldStop(&pTask->status))) { + const char* p = streamGetTaskStatusStr(pTask->status.taskStatus); + int32_t hTaskId = pHTaskInfo->id.taskId; + stDebug( + "s-task:%s status:%s failed to launch fill-history task:0x%x, retry launch:%dms, retryCount:%d", + pTask->id.idStr, p, hTaskId, pHTaskInfo->waitInterval, pHTaskInfo->retryTimes); + + taosTmrReset(tryLaunchHistoryTask, LAUNCH_HTASK_INTERVAL, pInfo, streamEnv.timer, &pHTaskInfo->pTimer); + streamMetaReleaseTask(pMeta, pTask); + return; + } + + if (pHTask != NULL) { + checkFillhistoryTaskStatus(pTask, pHTask); + streamMetaReleaseTask(pMeta, pHTask); + } + + // not in timer anymore + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:0x%x fill-history task launch completed, retry times:%d, ref:%d", (int32_t)pInfo->id.taskId, + pHTaskInfo->retryTimes, ref); + streamMetaReleaseTask(pMeta, pTask); + } } else { - qError("s-task:0x%x failed to load task, it may have been destroyed", pInfo->taskId); + stError("s-task:0x%x failed to load task, it may have been destroyed, not launch related fill-history task", + (int32_t)pInfo->id.taskId); } taosMemoryFree(pInfo); } -// todo fix the bug: 2. race condition +SLaunchHTaskInfo* createHTaskLaunchInfo(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { + SLaunchHTaskInfo* pInfo = taosMemoryCalloc(1, sizeof(SLaunchHTaskInfo)); + if (pInfo == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + pInfo->id.taskId = taskId; + pInfo->id.streamId = streamId; + pInfo->pMeta = pMeta; + return pInfo; +} + // an fill history task needs to be started. int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { SStreamMeta* pMeta = pTask->pMeta; - int32_t hTaskId = pTask->historyTaskId.taskId; + int32_t hTaskId = pTask->hTaskInfo.id.taskId; if (hTaskId == 0) { return TSDB_CODE_SUCCESS; } ASSERT(pTask->status.downstreamReady == 1); - qDebug("s-task:%s start to launch related fill-history task:0x%" PRIx64 "-0x%x", pTask->id.idStr, - pTask->historyTaskId.streamId, hTaskId); - - int64_t keys[2] = {pTask->historyTaskId.streamId, hTaskId}; + stDebug("s-task:%s start to launch related fill-history task:0x%" PRIx64 "-0x%x", pTask->id.idStr, + pTask->hTaskInfo.id.streamId, hTaskId); // Set the execute conditions, including the query time window and the version range - SStreamTask** pHTask = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + SStreamTask** pHTask = taosHashGet(pMeta->pTasksMap, &pTask->hTaskInfo.id, sizeof(pTask->hTaskInfo.id)); if (pHTask == NULL) { - qWarn("s-task:%s vgId:%d failed to launch history task:0x%x, since it is not built yet", pTask->id.idStr, - pMeta->vgId, hTaskId); + stWarn("s-task:%s vgId:%d failed to launch history task:0x%x, since not built yet", pTask->id.idStr, pMeta->vgId, + hTaskId); - SStreamTaskRetryInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamTaskRetryInfo)); - pInfo->taskId = pTask->id.taskId; - pInfo->streamId = pTask->id.streamId; - pInfo->pMeta = pTask->pMeta; - - if (pTask->launchTaskTimer == NULL) { - pTask->launchTaskTimer = taosTmrStart(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer); - if (pTask->launchTaskTimer == NULL) { - // todo failed to create timer - taosMemoryFree(pInfo); - } else { - int32_t ref = atomic_add_fetch_8(&pTask->status.timerActive, 1);// timer is active - ASSERT(ref == 1); - qDebug("s-task:%s set timer active flag", pTask->id.idStr); - } - } else { // timer exists - ASSERT(pTask->status.timerActive == 1); - qDebug("s-task:%s set timer active flag, task timer not null", pTask->id.idStr); - taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer); + SLaunchHTaskInfo* pInfo = createHTaskLaunchInfo(pTask->pMeta, pTask->id.streamId, pTask->id.taskId); + if (pInfo == NULL) { + stError("s-task:%s failed to launch related fill-history task, since Out Of Memory", pTask->id.idStr); + return terrno; + } + + streamTaskInitForLaunchHTask(&pTask->hTaskInfo); + if (pTask->hTaskInfo.pTimer == NULL) { + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + pTask->hTaskInfo.pTimer = taosTmrStart(tryLaunchHistoryTask, WAIT_FOR_MINIMAL_INTERVAL, pInfo, streamEnv.timer); + if (pTask->hTaskInfo.pTimer == NULL) { + atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stError("s-task:%s failed to start timer, related fill-history task not launched, ref:%d", pTask->id.idStr, + pTask->status.timerActive); + taosMemoryFree(pInfo); + } else { + ASSERT(ref >= 1); + stDebug("s-task:%s set timer active flag, ref:%d", pTask->id.idStr, ref); + } + } else { // timer exists + ASSERT(pTask->status.timerActive >= 1); + stDebug("s-task:%s set timer active flag, task timer not null", pTask->id.idStr); + taosTmrReset(tryLaunchHistoryTask, WAIT_FOR_MINIMAL_INTERVAL, pInfo, streamEnv.timer, &pTask->hTaskInfo.pTimer); } - // try again in 100ms return TSDB_CODE_SUCCESS; } if ((*pHTask)->status.downstreamReady == 1) { - qDebug("s-task:%s fill-history task is ready, no need to check downstream", (*pHTask)->id.idStr); + stDebug("s-task:%s fill-history task is ready, no need to check downstream", (*pHTask)->id.idStr); } else { checkFillhistoryTaskStatus(pTask, *pHTask); } @@ -657,24 +792,24 @@ int32_t streamTaskFillHistoryFinished(SStreamTask* pTask) { return qStreamInfoResetTimewindowFilter(exec); } -bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer) { +bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t nextProcessVer) { SVersionRange* pRange = &pTask->dataRange.range; - ASSERT(latestVer >= pRange->maxVer); + ASSERT(nextProcessVer >= pRange->maxVer); - int64_t nextStartVer = pRange->maxVer + 1; - if (nextStartVer > latestVer - 1) { - // no input data yet. no need to execute the secondardy scan while stream task halt + int64_t walScanStartVer = pRange->maxVer + 1; + if (walScanStartVer > nextProcessVer - 1) { + // no input data yet. no need to execute the secondary scan while stream task halt streamTaskFillHistoryFinished(pTask); - qDebug( + stDebug( "s-task:%s no need to perform secondary scan-history data(step 2), since no data ingest during step1 scan, " "related stream task currentVer:%" PRId64, - pTask->id.idStr, latestVer); + pTask->id.idStr, nextProcessVer); return true; } else { // 2. do secondary scan of the history data, the time window remain, and the version range is updated to // [pTask->dataRange.range.maxVer, ver1] - pRange->minVer = nextStartVer; - pRange->maxVer = latestVer - 1; + pRange->minVer = walScanStartVer; + pRange->maxVer = nextProcessVer - 1; return false; } } @@ -760,20 +895,19 @@ int32_t tDecodeStreamScanHistoryFinishReq(SDecoder* pDecoder, SStreamScanHistory } void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { - if (pTask->historyTaskId.taskId == 0) { - SDataRange* pRange = &pTask->dataRange; + SDataRange* pRange = &pTask->dataRange; + + if (pTask->hTaskInfo.id.taskId == 0) { if (pTask->info.fillHistory == 1) { - qDebug("s-task:%s fill-history task, time window:%" PRId64 "-%" PRId64 ", verRange:%" PRId64 - "-%" PRId64, - pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); + stDebug("s-task:%s fill-history task, time window:%" PRId64 "-%" PRId64 ", verRange:%" PRId64 "-%" PRId64, + pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); } else { - qDebug("s-task:%s no related fill-history task, stream time window:%" PRId64 "-%" PRId64 ", verRange:%" PRId64 - "-%" PRId64, - pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); + stDebug( + "s-task:%s no related fill-history task, stream time window and verRange are not set. default stream time " + "window:%" PRId64 "-%" PRId64 ", verRange:%" PRId64 "-%" PRId64, + pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); } } else { - SDataRange* pRange = &pTask->dataRange; - int64_t ekey = 0; if (pRange->window.ekey < INT64_MAX) { ekey = pRange->window.ekey + 1; @@ -788,17 +922,17 @@ void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { pRange->range.minVer = 0; pRange->range.maxVer = ver; - qDebug("s-task:%s level:%d related fill-history task exists, update stream calc time window:%" PRId64 " - %" PRId64 - ", verRang:%" PRId64 " - %" PRId64, - pTask->id.idStr, pTask->info.taskLevel, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, - pRange->range.maxVer); + stDebug("s-task:%s level:%d related fill-history task exists, update stream calc time window:%" PRId64 " - %" PRId64 + ", verRang:%" PRId64 " - %" PRId64, + pTask->id.idStr, pTask->info.taskLevel, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, + pRange->range.maxVer); } } // only the downstream tasks are ready, set the task to be ready to work. void streamTaskCheckDownstream(SStreamTask* pTask) { if (pTask->info.fillHistory) { - qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr); + stDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr); return; } @@ -812,36 +946,42 @@ void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta) { int8_t status = pTask->status.taskStatus; if (status == TASK_STATUS__DROPPING) { - qDebug("vgId:%d s-task:%s task already dropped, do nothing", pMeta->vgId, pTask->id.idStr); + stDebug("vgId:%d s-task:%s task already dropped, do nothing", pMeta->vgId, pTask->id.idStr); return; } const char* str = streamGetTaskStatusStr(status); if (status == TASK_STATUS__STOP || status == TASK_STATUS__PAUSE) { - qDebug("vgId:%d s-task:%s task already stopped/paused, status:%s, do nothing", pMeta->vgId, pTask->id.idStr, str); + stDebug("vgId:%d s-task:%s task already stopped/paused, status:%s, do nothing", pMeta->vgId, pTask->id.idStr, str); return; } if(pTask->info.taskLevel == TASK_LEVEL__SINK) { - int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); - qInfo("vgId:%d s-task:%s pause stream sink task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); + int32_t num = atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); + stInfo("vgId:%d s-task:%s pause stream sink task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); return; } while (!pTask->status.pauseAllowed || (pTask->status.taskStatus == TASK_STATUS__HALT)) { status = pTask->status.taskStatus; if (status == TASK_STATUS__DROPPING) { - qDebug("vgId:%d s-task:%s task already dropped, do nothing", pMeta->vgId, pTask->id.idStr); + stDebug("vgId:%d s-task:%s task already dropped, do nothing", pMeta->vgId, pTask->id.idStr); return; } if (status == TASK_STATUS__STOP || status == TASK_STATUS__PAUSE) { - qDebug("vgId:%d s-task:%s task already stopped/paused, status:%s, do nothing", pMeta->vgId, pTask->id.idStr, str); + stDebug("vgId:%d s-task:%s task already stopped/paused, status:%s, do nothing", pMeta->vgId, pTask->id.idStr, str); return; } +// +// if (pTask->status.downstreamReady == 0) { +// ASSERT(pTask->execInfo.start == 0); +// stDebug("s-task:%s in check downstream procedure, abort and paused", pTask->id.idStr); +// break; +// } const char* pStatus = streamGetTaskStatusStr(status); - qDebug("s-task:%s wait for the task can be paused, status:%s, vgId:%d", pTask->id.idStr, pStatus, pMeta->vgId); + stDebug("s-task:%s wait for the task can be paused, status:%s, vgId:%d", pTask->id.idStr, pStatus, pMeta->vgId); taosMsleep(100); } @@ -851,14 +991,14 @@ void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta) { status = pTask->status.taskStatus; if (status == TASK_STATUS__DROPPING || status == TASK_STATUS__STOP) { taosWUnLockLatch(&pMeta->lock); - qDebug("vgId:%d s-task:%s task already dropped/stopped/paused, do nothing", pMeta->vgId, pTask->id.idStr); + stDebug("vgId:%d s-task:%s task already dropped/stopped/paused, do nothing", pMeta->vgId, pTask->id.idStr); return; } atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); - int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); - qInfo("vgId:%d s-task:%s pause stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); + int32_t num = atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); + stInfo("vgId:%d s-task:%s pause stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); taosWUnLockLatch(&pMeta->lock); // in case of fill-history task, stop the tsdb file scan operation. @@ -868,7 +1008,7 @@ void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta) { } int64_t el = taosGetTimestampMs() - st; - qDebug("vgId:%d s-task:%s set pause flag, prev:%s, pause elapsed time:%dms", pMeta->vgId, pTask->id.idStr, + stDebug("vgId:%d s-task:%s set pause flag, prev:%s, pause elapsed time:%dms", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(pTask->status.keepTaskStatus), (int32_t)el); } @@ -877,13 +1017,13 @@ void streamTaskResume(SStreamTask* pTask, SStreamMeta* pMeta) { if (status == TASK_STATUS__PAUSE) { pTask->status.taskStatus = pTask->status.keepTaskStatus; pTask->status.keepTaskStatus = TASK_STATUS__NORMAL; - int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); - qInfo("vgId:%d s-task:%s resume from pause, status:%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); + int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1); + stInfo("vgId:%d s-task:%s resume from pause, status:%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); - qInfo("vgId:%d s-task:%s sink task.resume from pause, status:%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); + int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1); + stInfo("vgId:%d s-task:%s sink task.resume from pause, status:%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); } else { - qError("s-task:%s not in pause, failed to resume, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); + stError("s-task:%s not in pause, failed to resume, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); } } @@ -892,57 +1032,28 @@ void streamTaskDisablePause(SStreamTask* pTask) { // pre-condition check const char* id = pTask->id.idStr; while (pTask->status.taskStatus == TASK_STATUS__PAUSE) { - qDebug("s-task:%s already in pause, wait for pause being cancelled, and set pause disabled, recheck in 100ms", id); + stDebug("s-task:%s already in pause, wait for pause being cancelled, and set pause disabled, recheck in 100ms", id); taosMsleep(100); } - qDebug("s-task:%s disable task pause", id); + stDebug("s-task:%s disable task pause", id); pTask->status.pauseAllowed = 0; } void streamTaskEnablePause(SStreamTask* pTask) { - qDebug("s-task:%s enable task pause", pTask->id.idStr); + stDebug("s-task:%s enable task pause", pTask->id.idStr); pTask->status.pauseAllowed = 1; } -void streamTaskHalt(SStreamTask* pTask) { - int8_t status = pTask->status.taskStatus; - if (status == TASK_STATUS__DROPPING || status == TASK_STATUS__STOP) { - return; - } - - if (status == TASK_STATUS__HALT) { - return; - } - - // wait for checkpoint completed - while(pTask->status.taskStatus == TASK_STATUS__CK) { - qDebug("s-task:%s status:%s during generating checkpoint, wait for 1sec and retry set status:halt", pTask->id.idStr, - streamGetTaskStatusStr(TASK_STATUS__CK)); - taosMsleep(1000); - } - - // upgrade to halt status - if (status == TASK_STATUS__PAUSE) { - qDebug("s-task:%s upgrade status to %s from %s", pTask->id.idStr, streamGetTaskStatusStr(TASK_STATUS__HALT), - streamGetTaskStatusStr(TASK_STATUS__PAUSE)); - } else { - qDebug("s-task:%s halt task", pTask->id.idStr); - } - - pTask->status.keepTaskStatus = status; - pTask->status.taskStatus = TASK_STATUS__HALT; -} - void streamTaskResumeFromHalt(SStreamTask* pTask) { const char* id = pTask->id.idStr; int8_t status = pTask->status.taskStatus; if (status != TASK_STATUS__HALT) { - qError("s-task:%s not in halt status, status:%s", id, streamGetTaskStatusStr(status)); + stError("s-task:%s not in halt status, status:%s", id, streamGetTaskStatusStr(status)); return; } pTask->status.taskStatus = pTask->status.keepTaskStatus; pTask->status.keepTaskStatus = TASK_STATUS__NORMAL; - qDebug("s-task:%s resume from halt, current status:%s", id, streamGetTaskStatusStr(pTask->status.taskStatus)); + stDebug("s-task:%s resume from halt, current status:%s", id, streamGetTaskStatusStr(pTask->status.taskStatus)); } diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 1e27aec03b..6a96278bc0 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -17,6 +17,7 @@ #include "query.h" #include "rocksdb/c.h" #include "streamBackendRocksdb.h" +#include "streamInt.h" #include "tcommon.h" enum SBackendFileType { @@ -122,10 +123,11 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk chkpId); if (taosIsDir(tdir)) { validChkp = 1; - qInfo("%s start to read snap %s", STREAM_STATE_TRANSFER, tdir); + stInfo("%s start to read snap %s", STREAM_STATE_TRANSFER, tdir); streamBackendAddInUseChkp(pMeta, chkpId); } else { - qWarn("%s failed to read from %s, reason: dir not exist,retry to default state dir", STREAM_STATE_TRANSFER, tdir); + stWarn("%s failed to read from %s, reason: dir not exist,retry to default state dir", STREAM_STATE_TRANSFER, + tdir); } } @@ -137,22 +139,22 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk taosMemoryFree(tdir); tdir = chkpdir; - qInfo("%s start to trigger checkpoint on %s", STREAM_STATE_TRANSFER, tdir); + stInfo("%s start to trigger checkpoint on %s", STREAM_STATE_TRANSFER, tdir); code = streamBackendTriggerChkp(pMeta, tdir); if (code != 0) { - qError("%s failed to trigger chekckpoint at %s", STREAM_STATE_TRANSFER, tdir); + stError("%s failed to trigger chekckpoint at %s", STREAM_STATE_TRANSFER, tdir); taosMemoryFree(tdir); return code; } chkpId = 0; } - qInfo("%s start to read dir: %s", STREAM_STATE_TRANSFER, tdir); + stInfo("%s start to read dir: %s", STREAM_STATE_TRANSFER, tdir); TdDirPtr pDir = taosOpenDir(tdir); if (NULL == pDir) { - qError("%s failed to open %s", STREAM_STATE_TRANSFER, tdir); + stError("%s failed to open %s", STREAM_STATE_TRANSFER, tdir); goto _err; } @@ -202,14 +204,14 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk } sprintf(buf + strlen(buf) - 1, "]"); - qInfo("%s get file list: %s", STREAM_STATE_TRANSFER, buf); + stInfo("%s get file list: %s", STREAM_STATE_TRANSFER, buf); taosMemoryFree(buf); } taosCloseDir(&pDir); if (pFile->pCurrent == NULL) { - qError("%s failed to open %s, reason: no valid file", STREAM_STATE_TRANSFER, tdir); + stError("%s failed to open %s, reason: no valid file", STREAM_STATE_TRANSFER, tdir); code = -1; tdir = NULL; goto _err; @@ -270,7 +272,7 @@ void streamSnapHandleDestroy(SStreamSnapHandle* handle) { if (handle->checkpointId == 0) { // del tmp dir - if (taosIsDir(pFile->path)) { + if (pFile && taosIsDir(pFile->path)) { taosRemoveDir(pFile->path); } } else { @@ -333,28 +335,28 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si return 0; } else { pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ); - qDebug("%s open file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, - item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + stDebug("%s open file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, + item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); } } - qDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER, - item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + stDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER, + item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize); if(buf == NULL){ return TSDB_CODE_OUT_OF_MEMORY; } int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); if (nread == -1) { - code = TAOS_SYSTEM_ERROR(terrno); - qError("%s snap failed to read snap, file name:%s, type:%d,reason:%s", STREAM_STATE_TRANSFER, item->name, - item->type, tstrerror(code)); taosMemoryFree(buf); + code = TAOS_SYSTEM_ERROR(terrno); + stError("%s snap failed to read snap, file name:%s, type:%d,reason:%s", STREAM_STATE_TRANSFER, item->name, + item->type, tstrerror(code)); return -1; } else if (nread > 0 && nread <= kBlockSize) { // left bytes less than kBlockSize - qDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, - item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + stDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, + item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); pHandle->offset += nread; if (pHandle->offset >= item->size || nread < kBlockSize) { taosCloseFile(&pHandle->fd); @@ -362,8 +364,8 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si pHandle->currFileIdx += 1; } } else { - qDebug("%s no data read, close file no.%d, move to next file, open and read", STREAM_STATE_TRANSFER, - pHandle->currFileIdx); + stDebug("%s no data read, close file no.%d, move to next file, open and read", STREAM_STATE_TRANSFER, + pHandle->currFileIdx); taosCloseFile(&pHandle->fd); pHandle->offset = 0; pHandle->currFileIdx += 1; @@ -381,8 +383,8 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); pHandle->offset += nread; - qDebug("%s open file and read file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", - STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + stDebug("%s open file and read file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", + STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); } SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)buf; @@ -437,8 +439,8 @@ int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nDa pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pHandle->fd == NULL) { code = TAOS_SYSTEM_ERROR(terrno); - qError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, pHdr->name, - tstrerror(code)); + stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, + pHdr->name, tstrerror(code)); } } @@ -446,7 +448,7 @@ int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nDa int64_t bytes = taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); if (bytes != pHdr->size) { code = TAOS_SYSTEM_ERROR(terrno); - qError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); + stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); return code; } pHandle->offset += bytes; @@ -464,8 +466,8 @@ int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nDa pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pHandle->fd == NULL) { code = TAOS_SYSTEM_ERROR(terrno); - qError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, pHdr->name, - tstrerror(code)); + stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, + pHdr->name, tstrerror(code)); } taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); @@ -488,7 +490,7 @@ int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) { n += sprintf(buf + n, "%s %" PRId64 "]", item->name, item->size); } } - qDebug("%s snap get file list, %s", STREAM_STATE_TRANSFER, buf); + stDebug("%s snap get file list, %s", STREAM_STATE_TRANSFER, buf); taosMemoryFree(buf); } diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 60b93a5590..4a056563ee 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -91,7 +91,7 @@ int stateKeyCmpr(const void* pKey1, int kLen1, const void* pKey2, int kLen2) { } SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages) { - qDebug("open stream state, %s", path); + stDebug("open stream state, %s", path); SStreamState* pState = taosMemoryCalloc(1, sizeof(SStreamState)); if (pState == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -139,7 +139,7 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz pState->pTdbState->backendCfWrapperId = id; pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id); // already exist stream task for - qInfo("already exist stream-state for %s", pState->pTdbState->idstr); + stInfo("already exist stream-state for %s", pState->pTdbState->idstr); // taosAcquireRef(streamBackendId, pState->streamBackendRid); } taosThreadMutexUnlock(&pMeta->backendMutex); @@ -149,7 +149,7 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT); pState->parNameMap = tSimpleHashInit(1024, hashFn); - qInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId, + stInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId, pState->taskId); return pState; @@ -462,7 +462,7 @@ int32_t streamStateAddIfNotExist(SStreamState* pState, const SWinKey* key, void* int32_t streamStateReleaseBuf(SStreamState* pState, void* pVal, bool used) { // todo refactor - qDebug("streamStateReleaseBuf"); + stDebug("streamStateReleaseBuf"); if (!pVal) { return 0; } @@ -724,7 +724,7 @@ int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, void code = streamStateSessionPut_rocksdb(pState, key, pos->pRowBuff, vLen); streamStateReleaseBuf(pState, pos, true); putFreeBuff(pState->pFileState, pos); - qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 ".code:%d", key->win.skey, + stDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 ".code:%d", key->win.skey, key->win.ekey, key->groupId, code); } else { code = putSessionWinResultBuff(pState->pFileState, value); @@ -763,7 +763,7 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB - qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, + stDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, key->groupId); return deleteRowBuff(pState->pFileState, key, sizeof(SSessionKey)); #else @@ -1081,7 +1081,7 @@ _end: } int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char tbname[TSDB_TABLE_NAME_LEN]) { - qDebug("try to write to cf parname"); + stDebug("try to write to cf parname"); #ifdef USE_ROCKSDB if (tSimpleHashGetSize(pState->parNameMap) > MAX_TABLE_NAME_NUM) { if (tSimpleHashGet(pState->parNameMap, &groupId, sizeof(int64_t)) == NULL) { diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 663deca171..37af1ce64f 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -20,6 +20,8 @@ #include "ttimer.h" #include "wal.h" +static void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo); + static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) { int32_t childId = taosArrayGetSize(pArray); pTask->info.selfChildId = childId; @@ -27,8 +29,8 @@ static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) { return 0; } -SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHistory, int64_t triggerParam, - SArray* pTaskList) { +SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, bool fillHistory, int64_t triggerParam, + SArray* pTaskList, bool hasFillhistory) { SStreamTask* pTask = (SStreamTask*)taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -46,9 +48,13 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto pTask->id.idStr = taosStrdup(buf); pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; - pTask->status.taskStatus = TASK_STATUS__SCAN_HISTORY; + pTask->status.taskStatus = (fillHistory || hasFillhistory)? TASK_STATUS__SCAN_HISTORY:TASK_STATUS__NORMAL; pTask->inputInfo.status = TASK_INPUT_STATUS__NORMAL; - pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; + pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL; + + if (fillHistory) { + ASSERT(hasFillhistory); + } addToTaskset(pTaskList, pTask); return pTask; @@ -96,20 +102,23 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI64(pEncoder, pTask->chkInfo.checkpointVer) < 0) return -1; if (tEncodeI8(pEncoder, pTask->info.fillHistory) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->historyTaskId.streamId)) return -1; - if (tEncodeI32(pEncoder, pTask->historyTaskId.taskId)) return -1; + if (tEncodeI64(pEncoder, pTask->hTaskInfo.id.streamId)) return -1; + int32_t taskId = pTask->hTaskInfo.id.taskId; + if (tEncodeI32(pEncoder, taskId)) return -1; + if (tEncodeI64(pEncoder, pTask->streamTaskId.streamId)) return -1; - if (tEncodeI32(pEncoder, pTask->streamTaskId.taskId)) return -1; + taskId = pTask->streamTaskId.taskId; + if (tEncodeI32(pEncoder, taskId)) return -1; if (tEncodeU64(pEncoder, pTask->dataRange.range.minVer)) return -1; if (tEncodeU64(pEncoder, pTask->dataRange.range.maxVer)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1; if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1; - int32_t epSz = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t epSz = taosArrayGetSize(pTask->upstreamInfo.pList); if (tEncodeI32(pEncoder, epSz) < 0) return -1; for (int32_t i = 0; i < epSz; i++) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1; } @@ -118,20 +127,20 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { } if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { - if (tEncodeI64(pEncoder, pTask->tbSink.stbUid) < 0) return -1; - if (tEncodeCStr(pEncoder, pTask->tbSink.stbFullName) < 0) return -1; - if (tEncodeSSchemaWrapper(pEncoder, pTask->tbSink.pSchemaWrapper) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->outputInfo.tbSink.stbUid) < 0) return -1; + if (tEncodeCStr(pEncoder, pTask->outputInfo.tbSink.stbFullName) < 0) return -1; + if (tEncodeSSchemaWrapper(pEncoder, pTask->outputInfo.tbSink.pSchemaWrapper) < 0) return -1; } else if (pTask->outputInfo.type == TASK_OUTPUT__SMA) { - if (tEncodeI64(pEncoder, pTask->smaSink.smaId) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->outputInfo.smaSink.smaId) < 0) return -1; } else if (pTask->outputInfo.type == TASK_OUTPUT__FETCH) { - if (tEncodeI8(pEncoder, pTask->fetchSink.reserved) < 0) return -1; + if (tEncodeI8(pEncoder, pTask->outputInfo.fetchSink.reserved) < 0) return -1; } else if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { - if (tEncodeI32(pEncoder, pTask->fixedEpDispatcher.taskId) < 0) return -1; - if (tEncodeI32(pEncoder, pTask->fixedEpDispatcher.nodeId) < 0) return -1; - if (tEncodeSEpSet(pEncoder, &pTask->fixedEpDispatcher.epSet) < 0) return -1; + if (tEncodeI32(pEncoder, pTask->outputInfo.fixedDispatcher.taskId) < 0) return -1; + if (tEncodeI32(pEncoder, pTask->outputInfo.fixedDispatcher.nodeId) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pTask->outputInfo.fixedDispatcher.epSet) < 0) return -1; } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - if (tSerializeSUseDbRspImp(pEncoder, &pTask->shuffleDispatcher.dbInfo) < 0) return -1; - if (tEncodeCStr(pEncoder, pTask->shuffleDispatcher.stbFullName) < 0) return -1; + if (tSerializeSUseDbRspImp(pEncoder, &pTask->outputInfo.shuffleDispatcher.dbInfo) < 0) return -1; + if (tEncodeCStr(pEncoder, pTask->outputInfo.shuffleDispatcher.stbFullName) < 0) return -1; } if (tEncodeI64(pEncoder, pTask->info.triggerParam) < 0) return -1; if (tEncodeCStrWithLen(pEncoder, pTask->reserve, sizeof(pTask->reserve) - 1) < 0) return -1; @@ -141,6 +150,8 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { } int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { + int32_t taskId = 0; + if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->ver) < 0) return -1; if (pTask->ver != SSTREAM_TASK_VER) return -1; @@ -164,10 +175,13 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI64(pDecoder, &pTask->chkInfo.checkpointVer) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->info.fillHistory) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->historyTaskId.streamId)) return -1; - if (tDecodeI32(pDecoder, &pTask->historyTaskId.taskId)) return -1; + if (tDecodeI64(pDecoder, &pTask->hTaskInfo.id.streamId)) return -1; + if (tDecodeI32(pDecoder, &taskId)) return -1; + pTask->hTaskInfo.id.taskId = taskId; + if (tDecodeI64(pDecoder, &pTask->streamTaskId.streamId)) return -1; - if (tDecodeI32(pDecoder, &pTask->streamTaskId.taskId)) return -1; + if (tDecodeI32(pDecoder, &taskId)) return -1; + pTask->streamTaskId.taskId = taskId; if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1; if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; @@ -177,7 +191,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { int32_t epSz = -1; if (tDecodeI32(pDecoder, &epSz) < 0) return -1; - pTask->pUpstreamInfoList = taosArrayInit(epSz, POINTER_BYTES); + pTask->upstreamInfo.pList = taosArrayInit(epSz, POINTER_BYTES); for (int32_t i = 0; i < epSz; i++) { SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo)); if (pInfo == NULL) return -1; @@ -185,7 +199,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { taosMemoryFreeClear(pInfo); return -1; } - taosArrayPush(pTask->pUpstreamInfoList, &pInfo); + taosArrayPush(pTask->upstreamInfo.pList, &pInfo); } if (pTask->info.taskLevel != TASK_LEVEL__SINK) { @@ -193,22 +207,22 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { } if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { - if (tDecodeI64(pDecoder, &pTask->tbSink.stbUid) < 0) return -1; - if (tDecodeCStrTo(pDecoder, pTask->tbSink.stbFullName) < 0) return -1; - pTask->tbSink.pSchemaWrapper = taosMemoryCalloc(1, sizeof(SSchemaWrapper)); - if (pTask->tbSink.pSchemaWrapper == NULL) return -1; - if (tDecodeSSchemaWrapper(pDecoder, pTask->tbSink.pSchemaWrapper) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->outputInfo.tbSink.stbUid) < 0) return -1; + if (tDecodeCStrTo(pDecoder, pTask->outputInfo.tbSink.stbFullName) < 0) return -1; + pTask->outputInfo.tbSink.pSchemaWrapper = taosMemoryCalloc(1, sizeof(SSchemaWrapper)); + if (pTask->outputInfo.tbSink.pSchemaWrapper == NULL) return -1; + if (tDecodeSSchemaWrapper(pDecoder, pTask->outputInfo.tbSink.pSchemaWrapper) < 0) return -1; } else if (pTask->outputInfo.type == TASK_OUTPUT__SMA) { - if (tDecodeI64(pDecoder, &pTask->smaSink.smaId) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->outputInfo.smaSink.smaId) < 0) return -1; } else if (pTask->outputInfo.type == TASK_OUTPUT__FETCH) { - if (tDecodeI8(pDecoder, &pTask->fetchSink.reserved) < 0) return -1; + if (tDecodeI8(pDecoder, &pTask->outputInfo.fetchSink.reserved) < 0) return -1; } else if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { - if (tDecodeI32(pDecoder, &pTask->fixedEpDispatcher.taskId) < 0) return -1; - if (tDecodeI32(pDecoder, &pTask->fixedEpDispatcher.nodeId) < 0) return -1; - if (tDecodeSEpSet(pDecoder, &pTask->fixedEpDispatcher.epSet) < 0) return -1; + if (tDecodeI32(pDecoder, &pTask->outputInfo.fixedDispatcher.taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pTask->outputInfo.fixedDispatcher.nodeId) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &pTask->outputInfo.fixedDispatcher.epSet) < 0) return -1; } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - if (tDeserializeSUseDbRspImp(pDecoder, &pTask->shuffleDispatcher.dbInfo) < 0) return -1; - if (tDecodeCStrTo(pDecoder, pTask->shuffleDispatcher.stbFullName) < 0) return -1; + if (tDeserializeSUseDbRspImp(pDecoder, &pTask->outputInfo.shuffleDispatcher.dbInfo) < 0) return -1; + if (tDecodeCStrTo(pDecoder, pTask->outputInfo.shuffleDispatcher.stbFullName) < 0) return -1; } if (tDecodeI64(pDecoder, &pTask->info.triggerParam) < 0) return -1; if (tDecodeCStrTo(pDecoder, pTask->reserve) < 0) return -1; @@ -251,15 +265,19 @@ int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo) tEndDecode(pDecoder); return 0; } -int32_t tDecodeStreamTaskId(SDecoder* pDecoder, SStreamTaskId* pTaskId) { + +int32_t tDecodeStreamTaskId(SDecoder* pDecoder, STaskId* pTaskId) { int64_t ver; if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &ver) < 0) return -1; if (ver != SSTREAM_TASK_VER) return -1; if (tDecodeI64(pDecoder, &pTaskId->streamId) < 0) return -1; - if (tDecodeI32(pDecoder, &pTaskId->taskId) < 0) return -1; + int32_t taskId = 0; + if (tDecodeI32(pDecoder, &taskId) < 0) return -1; + + pTaskId->taskId = taskId; tEndDecode(pDecoder); return 0; } @@ -277,11 +295,21 @@ static void freeUpstreamItem(void* p) { void tFreeStreamTask(SStreamTask* pTask) { int32_t taskId = pTask->id.taskId; - qDebug("free s-task:0x%x, %p, state:%p", taskId, pTask, pTask->pState); + STaskExecStatisInfo* pStatis = &pTask->execInfo; + + stDebug("start to free s-task:0x%x, %p, state:%p, status:%s", taskId, pTask, pTask->pState, + streamGetTaskStatusStr(pTask->status.taskStatus)); + + stDebug("s-task:0x%x task exec summary: create:%" PRId64 ", init:%" PRId64 ", start:%" PRId64 + ", updateCount:%d latestUpdate:%" PRId64 ", latestCheckPoint:%" PRId64 ", ver:%" PRId64 + " nextProcessVer:%" PRId64", checkpointCount:%d", + taskId, pStatis->created, pStatis->init, pStatis->start, pStatis->updateCount, pStatis->latestUpdateTs, + pTask->chkInfo.checkpointId, pTask->chkInfo.checkpointVer, pTask->chkInfo.nextProcessVer, + pStatis->checkpoint); // remove the ref by timer while (pTask->status.timerActive > 0) { - qDebug("s-task:%s wait for task stop timer activities", pTask->id.idStr); + stDebug("s-task:%s wait for task stop timer activities", pTask->id.idStr); taosMsleep(10); } @@ -290,9 +318,14 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->schedInfo.pTimer = NULL; } - if (pTask->launchTaskTimer != NULL) { - taosTmrStop(pTask->launchTaskTimer); - pTask->launchTaskTimer = NULL; + if (pTask->hTaskInfo.pTimer != NULL) { + taosTmrStop(pTask->hTaskInfo.pTimer); + pTask->hTaskInfo.pTimer = NULL; + } + + if (pTask->msgInfo.pTimer != NULL) { + taosTmrStop(pTask->msgInfo.pTimer); + pTask->msgInfo.pTimer = NULL; } int32_t status = atomic_load_8((int8_t*)&(pTask->status.taskStatus)); @@ -300,8 +333,8 @@ void tFreeStreamTask(SStreamTask* pTask) { streamQueueClose(pTask->inputInfo.queue, pTask->id.taskId); } - if (pTask->outputInfo.queue) { - streamQueueClose(pTask->outputInfo.queue, pTask->id.taskId); + if (pTask->outputq.queue) { + streamQueueClose(pTask->outputq.queue, pTask->id.taskId); } if (pTask->exec.qmsg) { @@ -317,27 +350,27 @@ void tFreeStreamTask(SStreamTask* pTask) { walCloseReader(pTask->exec.pWalReader); } + pTask->pReadyMsgList = taosArrayDestroy(pTask->pReadyMsgList); + if (pTask->msgInfo.pData != NULL) { + destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask)); + pTask->msgInfo.pData = NULL; + pTask->msgInfo.dispatchMsgType = 0; + } + if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { - tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper); - taosMemoryFree(pTask->tbSink.pTSchema); - tSimpleHashCleanup(pTask->tbSink.pTblInfo); + tDeleteSchemaWrapper(pTask->outputInfo.tbSink.pSchemaWrapper); + taosMemoryFree(pTask->outputInfo.tbSink.pTSchema); + tSimpleHashCleanup(pTask->outputInfo.tbSink.pTblInfo); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); + taosArrayDestroy(pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos); pTask->checkReqIds = taosArrayDestroy(pTask->checkReqIds); } if (pTask->pState) { - qDebug("s-task:0x%x start to free task state", taskId); + stDebug("s-task:0x%x start to free task state", taskId); streamStateClose(pTask->pState, status == TASK_STATUS__DROPPING); } - pTask->pReadyMsgList = taosArrayDestroy(pTask->pReadyMsgList); - taosThreadMutexDestroy(&pTask->lock); - if (pTask->msgInfo.pData != NULL) { - destroyStreamDataBlock(pTask->msgInfo.pData); - pTask->msgInfo.pData = NULL; - } - if (pTask->id.idStr != NULL) { taosMemoryFree((void*)pTask->id.idStr); } @@ -351,15 +384,14 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->pRspMsgList = NULL; } - if (pTask->pUpstreamInfoList != NULL) { - taosArrayDestroyEx(pTask->pUpstreamInfoList, freeUpstreamItem); - pTask->pUpstreamInfoList = NULL; - } + streamTaskDestroyUpstreamInfo(&pTask->upstreamInfo); + pTask->msgInfo.pRetryList = taosArrayDestroy(pTask->msgInfo.pRetryList); + taosMemoryFree(pTask->outputInfo.pTokenBucket); taosThreadMutexDestroy(&pTask->lock); taosMemoryFree(pTask); - qDebug("s-task:0x%x free task completed", taskId); + stDebug("s-task:0x%x free task completed", taskId); } int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver) { @@ -368,25 +400,49 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->status.timerActive = 0; pTask->inputInfo.queue = streamQueueOpen(512 << 10); - pTask->outputInfo.queue = streamQueueOpen(512 << 10); + pTask->outputq.queue = streamQueueOpen(512 << 10); - if (pTask->inputInfo.queue == NULL || pTask->outputInfo.queue == NULL) { - qError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr); - return -1; + if (pTask->inputInfo.queue == NULL || pTask->outputq.queue == NULL) { + stError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr); + return TSDB_CODE_OUT_OF_MEMORY; } - pTask->tsInfo.created = taosGetTimestampMs(); + pTask->execInfo.created = taosGetTimestampMs(); pTask->inputInfo.status = TASK_INPUT_STATUS__NORMAL; - pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; + pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL; pTask->pMeta = pMeta; + pTask->chkInfo.checkpointVer = ver - 1; pTask->chkInfo.nextProcessVer = ver; pTask->dataRange.range.maxVer = ver; pTask->dataRange.range.minVer = ver; pTask->pMsgCb = pMsgCb; + pTask->msgInfo.pRetryList = taosArrayInit(4, sizeof(int32_t)); - streamTaskInitTokenBucket(&pTask->tokenBucket, 150, 100); - taosThreadMutexInit(&pTask->lock, NULL); + pTask->outputInfo.pTokenBucket = taosMemoryCalloc(1, sizeof(STokenBucket)); + if (pTask->outputInfo.pTokenBucket == NULL) { + stError("s-task:%s failed to prepare the tokenBucket, code:%s", pTask->id.idStr, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return TSDB_CODE_OUT_OF_MEMORY; + } + + // 2MiB per second for sink task + // 50 times sink operator per second + streamTaskInitTokenBucket(pTask->outputInfo.pTokenBucket, 50, 50, 2); + + TdThreadMutexAttr attr = {0}; + int code = taosThreadMutexAttrInit(&attr); + if (code != 0) { + stError("s-task:%s initElapsed mutex attr failed, code:%s", pTask->id.idStr, tstrerror(code)); + return code; + } + + code = taosThreadMutexAttrSetType(&attr, PTHREAD_MUTEX_RECURSIVE); + if (code != 0) { + stError("s-task:%s set mutex attr recursive, code:%s", pTask->id.idStr, tstrerror(code)); + return code; + } + + taosThreadMutexInit(&pTask->lock, &attr); streamTaskOpenAllUpstreamInput(pTask); return TSDB_CODE_SUCCESS; @@ -400,7 +456,7 @@ int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask) { if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__TABLE) { return 1; } else { - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; return taosArrayGetSize(vgInfo); } } @@ -428,11 +484,11 @@ int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstre return TSDB_CODE_OUT_OF_MEMORY; } - if (pTask->pUpstreamInfoList == NULL) { - pTask->pUpstreamInfoList = taosArrayInit(4, POINTER_BYTES); + if (pTask->upstreamInfo.pList == NULL) { + pTask->upstreamInfo.pList = taosArrayInit(4, POINTER_BYTES); } - taosArrayPush(pTask->pUpstreamInfoList, &pEpInfo); + taosArrayPush(pTask->upstreamInfo.pList, &pEpInfo); return TSDB_CODE_SUCCESS; } @@ -440,19 +496,28 @@ void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpS char buf[512] = {0}; EPSET_TO_STR(pEpSet, buf); - int32_t numOfUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t numOfUpstream = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < numOfUpstream; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (pInfo->nodeId == nodeId) { epsetAssign(&pInfo->epSet, pEpSet); - qDebug("s-task:0x%x update the upstreamInfo, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf); + stDebug("s-task:0x%x update the upstreamInfo taskId:0x%x(nodeId:%d) newEpset:%s", pTask->id.taskId, + pInfo->taskId, nodeId, buf); break; } } } +void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo) { + if (pUpstreamInfo->pList != NULL) { + taosArrayDestroyEx(pUpstreamInfo->pList, freeUpstreamItem); + pUpstreamInfo->numOfClosed = 0; + pUpstreamInfo->pList = NULL; + } +} + void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask) { - STaskDispatcherFixedEp* pDispatcher = &pTask->fixedEpDispatcher; + STaskDispatcherFixed* pDispatcher = &pTask->outputInfo.fixedDispatcher; pDispatcher->taskId = pDownstreamTask->id.taskId; pDispatcher->nodeId = pDownstreamTask->info.nodeId; pDispatcher->epSet = pDownstreamTask->info.epSet; @@ -467,7 +532,7 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE int8_t type = pTask->outputInfo.type; if (type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - SArray* pVgs = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + SArray* pVgs = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgroups = taosArrayGetSize(pVgs); for (int32_t i = 0; i < numOfVgroups; i++) { @@ -475,15 +540,17 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE if (pVgInfo->vgId == nodeId) { epsetAssign(&pVgInfo->epSet, pEpSet); - qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf); + stDebug("s-task:0x%x update the dispatch info, task:0x%x(nodeId:%d) newEpset:%s", pTask->id.taskId, + pVgInfo->taskId, nodeId, buf); break; } } } else if (type == TASK_OUTPUT__FIXED_DISPATCH) { - STaskDispatcherFixedEp* pDispatcher = &pTask->fixedEpDispatcher; + STaskDispatcherFixed* pDispatcher = &pTask->outputInfo.fixedDispatcher; if (pDispatcher->nodeId == nodeId) { epsetAssign(&pDispatcher->epSet, pEpSet); - qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpSet:%s", pTask->id.taskId, nodeId, buf); + stDebug("s-task:0x%x update the dispatch info, task:0x%x(nodeId:%d) newEpSet:%s", pTask->id.taskId, + pDispatcher->taskId, nodeId, buf); } } else { // do nothing @@ -491,21 +558,26 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE } int32_t streamTaskStop(SStreamTask* pTask) { - SStreamMeta* pMeta = pTask->pMeta; + int32_t vgId = pTask->pMeta->vgId; int64_t st = taosGetTimestampMs(); const char* id = pTask->id.idStr; + taosThreadMutexLock(&pTask->lock); + if (pTask->status.taskStatus == TASK_STATUS__CK) { + stDebug("s-task:%s in checkpoint will be discarded since task is stopped", id); + } pTask->status.taskStatus = TASK_STATUS__STOP; - qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); + taosThreadMutexUnlock(&pTask->lock); + qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); while (/*pTask->status.schedStatus != TASK_SCHED_STATUS__INACTIVE */ !streamTaskIsIdle(pTask)) { - qDebug("s-task:%s level:%d wait for task to be idle, check again in 100ms", id, pTask->info.taskLevel); + stDebug("s-task:%s level:%d wait for task to be idle and then close, check again in 100ms", id, + pTask->info.taskLevel); taosMsleep(100); } - pTask->tsInfo.init = 0; int64_t el = taosGetTimestampMs() - st; - qDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms, and reset init ts", pMeta->vgId, pTask->id.idStr, el); + stDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", vgId, id, el); return 0; } @@ -515,7 +587,7 @@ int32_t doUpdateTaskEpset(SStreamTask* pTask, int32_t nodeId, SEpSet* pEpSet) { if (pTask->info.nodeId == nodeId) { // execution task should be moved away epsetAssign(&pTask->info.epSet, pEpSet); EPSET_TO_STR(pEpSet, buf) - qDebug("s-task:0x%x (vgId:%d) self node epset is updated %s", pTask->id.taskId, nodeId, buf); + stDebug("s-task:0x%x (vgId:%d) self node epset is updated %s", pTask->id.taskId, nodeId, buf); } // check for the dispath info and the upstream task info @@ -533,6 +605,16 @@ int32_t doUpdateTaskEpset(SStreamTask* pTask, int32_t nodeId, SEpSet* pEpSet) { } int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) { + STaskExecStatisInfo* p = &pTask->execInfo; + + int32_t numOfNodes = taosArrayGetSize(pNodeList); + int64_t prevTs = p->latestUpdateTs; + + p->latestUpdateTs = taosGetTimestampMs(); + p->updateCount += 1; + stDebug("s-task:%s update task nodeEp epset, updatedNodes:%d, updateCount:%d, prevTs:%" PRId64, pTask->id.idStr, + numOfNodes, p->updateCount, prevTs); + for (int32_t i = 0; i < taosArrayGetSize(pNodeList); ++i) { SNodeUpdateInfo* pInfo = taosArrayGet(pNodeList, i); doUpdateTaskEpset(pTask, pInfo->nodeId, &pInfo->newEp); @@ -545,11 +627,129 @@ void streamTaskResetUpstreamStageInfo(SStreamTask* pTask) { return; } - int32_t size = taosArrayGetSize(pTask->pUpstreamInfoList); + int32_t size = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < size; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); pInfo->stage = -1; } - qDebug("s-task:%s reset all upstream tasks stage info", pTask->id.idStr); + stDebug("s-task:%s reset all upstream tasks stage info", pTask->id.idStr); } + +bool streamTaskAllUpstreamClosed(SStreamTask* pTask) { + return pTask->upstreamInfo.numOfClosed == taosArrayGetSize(pTask->upstreamInfo.pList); +} + +bool streamTaskSetSchedStatusWait(SStreamTask* pTask) { + bool ret = false; + + // double check + if (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE) { + taosThreadMutexLock(&pTask->lock); + if (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE) { + pTask->status.schedStatus = TASK_SCHED_STATUS__WAITING; + ret = true; + } + taosThreadMutexUnlock(&pTask->lock); + } + + return ret; +} + +int8_t streamTaskSetSchedStatusActive(SStreamTask* pTask) { + taosThreadMutexLock(&pTask->lock); + int8_t status = pTask->status.schedStatus; + if (status == TASK_SCHED_STATUS__WAITING) { + pTask->status.schedStatus = TASK_SCHED_STATUS__ACTIVE; + } + taosThreadMutexUnlock(&pTask->lock); + + return status; +} + +int8_t streamTaskSetSchedStatusInActive(SStreamTask* pTask) { + taosThreadMutexLock(&pTask->lock); + int8_t status = pTask->status.schedStatus; + ASSERT(status == TASK_SCHED_STATUS__WAITING || status == TASK_SCHED_STATUS__ACTIVE || + status == TASK_SCHED_STATUS__INACTIVE); + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; + taosThreadMutexUnlock(&pTask->lock); + + return status; +} + +int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId) { + SVDropStreamTaskReq *pReq = rpcMallocCont(sizeof(SVDropStreamTaskReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pReq->head.vgId = vgId; + pReq->taskId = pTaskId->taskId; + pReq->streamId = pTaskId->streamId; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_DROP, .pCont = pReq, .contLen = sizeof(SVDropStreamTaskReq)}; + int32_t code = tmsgPutToQueue(pMsgCb, WRITE_QUEUE, &msg); + if (code != TSDB_CODE_SUCCESS) { + stError("vgId:%d failed to send drop task:0x%x msg, code:%s", vgId, pTaskId->taskId, tstrerror(code)); + return code; + } + + stDebug("vgId:%d build and send drop table:0x%x msg", vgId, pTaskId->taskId); + return code; +} + +STaskId streamTaskExtractKey(const SStreamTask* pTask) { + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + return id; +} + +void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo) { + pInfo->waitInterval = LAUNCH_HTASK_INTERVAL; + pInfo->tickCount = ceil(LAUNCH_HTASK_INTERVAL / WAIT_FOR_MINIMAL_INTERVAL); + pInfo->retryTimes = 0; +} + +void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo) { + ASSERT(pInfo->tickCount == 0); + + pInfo->waitInterval *= RETRY_LAUNCH_INTERVAL_INC_RATE; + pInfo->tickCount = ceil(pInfo->waitInterval / WAIT_FOR_MINIMAL_INTERVAL); + pInfo->retryTimes += 1; +} + +const char* streamGetTaskStatusStr(int32_t status) { + switch(status) { + case TASK_STATUS__NORMAL: return "normal"; + case TASK_STATUS__SCAN_HISTORY: return "scan-history"; + case TASK_STATUS__HALT: return "halt"; + case TASK_STATUS__PAUSE: return "paused"; + case TASK_STATUS__CK: return "check-point"; + case TASK_STATUS__DROPPING: return "dropping"; + case TASK_STATUS__STOP: return "stop"; + case TASK_STATUS__UNINIT: return "uninitialized"; + default:return ""; + } +} + +void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask) { + pEntry->id.streamId = pTask->id.streamId; + pEntry->id.taskId = pTask->id.taskId; + pEntry->stage = -1; + pEntry->nodeId = pTask->info.nodeId; + pEntry->status = TASK_STATUS__STOP; +} + +void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc) { + pDst->stage = pSrc->stage; + pDst->inputQUsed = pSrc->inputQUsed; + pDst->inputRate = pSrc->inputRate; + pDst->processedVer = pSrc->processedVer; + pDst->verStart = pSrc->verStart; + pDst->verEnd = pSrc->verEnd; + pDst->sinkQuota = pSrc->sinkQuota; + pDst->sinkDataSize = pSrc->sinkDataSize; + pDst->activeCheckpointId = pSrc->activeCheckpointId; + pDst->checkpointFailed = pSrc->checkpointFailed; +} \ No newline at end of file diff --git a/source/libs/stream/src/streamUpdate.c b/source/libs/stream/src/streamUpdate.c index f9ab672c4b..59471e8d8e 100644 --- a/source/libs/stream/src/streamUpdate.c +++ b/source/libs/stream/src/streamUpdate.c @@ -103,10 +103,12 @@ SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t waterma pInfo->minTS = -1; pInfo->interval = adjustInterval(interval, precision); pInfo->watermark = adjustWatermark(pInfo->interval, interval, watermark); + pInfo->numSBFs = 0; uint64_t bfSize = 0; if (!igUp) { bfSize = (uint64_t)(pInfo->watermark / pInfo->interval); + pInfo->numSBFs = bfSize; pInfo->pTsSBFs = taosArrayInit(bfSize, sizeof(void *)); if (pInfo->pTsSBFs == NULL) { @@ -130,7 +132,6 @@ SUpdateInfo *updateInfoInit(int64_t interval, int32_t precision, int64_t waterma _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT); pInfo->pMap = taosHashInit(DEFAULT_MAP_CAPACITY, hashFn, true, HASH_NO_LOCK); } - pInfo->numSBFs = bfSize; pInfo->maxDataVersion = 0; return pInfo; } diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 532a6955cf..019f8f7e62 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -197,6 +197,7 @@ int32_t syncLogBufferInitWithoutLock(SSyncLogBuffer* pBuf, SSyncNode* pNode) { SyncIndex index = toIndex; SSyncRaftEntry* pEntry = NULL; bool takeDummy = false; + int emptySize = (TSDB_SYNC_LOG_BUFFER_SIZE >> 1); while (true) { if (index <= pBuf->commitIndex) { @@ -210,7 +211,6 @@ int32_t syncLogBufferInitWithoutLock(SSyncLogBuffer* pBuf, SSyncNode* pNode) { } bool taken = false; - int emptySize = 5; if (toIndex - index + 1 <= pBuf->size - emptySize) { SSyncLogBufEntry tmp = {.pItem = pEntry, .prevLogIndex = -1, .prevLogTerm = -1}; pBuf->entries[index % pBuf->size] = tmp; diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index b66a08bd20..677e08ec56 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -576,6 +576,7 @@ void* destroyConnPool(SCliThrd* pThrd) { connList = taosHashIterate((SHashObj*)pool, connList); } taosHashCleanup(pool); + pThrd->pool = NULL; return NULL; } @@ -870,8 +871,10 @@ static void cliDestroyConn(SCliConn* conn, bool clear) { connList->list->numOfConn--; connList->size--; } else { - SConnList* connList = taosHashGet((SHashObj*)pThrd->pool, conn->dstAddr, strlen(conn->dstAddr) + 1); - if (connList != NULL) connList->list->numOfConn--; + if (pThrd->pool) { + SConnList* connList = taosHashGet((SHashObj*)pThrd->pool, conn->dstAddr, strlen(conn->dstAddr) + 1); + if (connList != NULL) connList->list->numOfConn--; + } } conn->list = NULL; pThrd->newConnCount--; diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index ca7e411874..bf73c253bc 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -1498,7 +1498,10 @@ int transSendResponse(const STransMsg* msg) { return 0; } SExHandle* exh = msg->info.handle; - int64_t refId = msg->info.refId; + if (exh == NULL) { + return 0; + } + int64_t refId = msg->info.refId; ASYNC_CHECK_HANDLE(exh, refId); STransMsg tmsg = *msg; diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 2eee04a27a..c0435ca774 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -56,6 +56,8 @@ SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond, int64_t id) { } void walCloseReader(SWalReader *pReader) { + if(pReader == NULL) return; + taosCloseFile(&pReader->pIdxFile); taosCloseFile(&pReader->pLogFile); taosMemoryFreeClear(pReader->pHead); diff --git a/source/os/src/osRand.c b/source/os/src/osRand.c index 43abc75d4f..b71be59f1d 100644 --- a/source/os/src/osRand.c +++ b/source/os/src/osRand.c @@ -86,9 +86,9 @@ void taosRandStr(char* str, int32_t size) { } void taosRandStr2(char* str, int32_t size) { - + const char* set = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ@"; - int32_t len = strlen(set); + int32_t len = strlen(set); for (int32_t i = 0; i < size; ++i) { str[i] = set[taosRand() % len]; diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 562328a198..4816ec8f8b 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -852,13 +852,12 @@ void taosGetProcIODelta(int64_t *rchars, int64_t *wchars, int64_t *read_bytes, i } int32_t taosGetCardInfo(int64_t *receive_bytes, int64_t *transmit_bytes) { -#ifdef WINDOWS *receive_bytes = 0; *transmit_bytes = 0; + +#ifdef WINDOWS return 0; #elif defined(_TD_DARWIN_64) - *receive_bytes = 0; - *transmit_bytes = 0; return 0; #else TdFilePtr pFile = taosOpenFile(tsSysNetFile, TD_FILE_READ | TD_FILE_STREAM); @@ -895,8 +894,8 @@ int32_t taosGetCardInfo(int64_t *receive_bytes, int64_t *transmit_bytes) { "%s %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64, nouse0, &o_rbytes, &rpackts, &nouse1, &nouse2, &nouse3, &nouse4, &nouse5, &nouse6, &o_tbytes, &tpackets); - *receive_bytes = o_rbytes; - *transmit_bytes = o_tbytes; + *receive_bytes += o_rbytes; + *transmit_bytes += o_tbytes; } taosCloseFile(&pFile); @@ -908,8 +907,8 @@ int32_t taosGetCardInfo(int64_t *receive_bytes, int64_t *transmit_bytes) { void taosGetCardInfoDelta(int64_t *receive_bytes, int64_t *transmit_bytes) { static int64_t last_receive_bytes = 0; static int64_t last_transmit_bytes = 0; - static int64_t cur_receive_bytes = 0; - static int64_t cur_transmit_bytes = 0; + int64_t cur_receive_bytes = 0; + int64_t cur_transmit_bytes = 0; if (taosGetCardInfo(&cur_receive_bytes, &cur_transmit_bytes) == 0) { *receive_bytes = cur_receive_bytes - last_receive_bytes; *transmit_bytes = cur_transmit_bytes - last_transmit_bytes; diff --git a/source/util/src/talgo.c b/source/util/src/talgo.c index e373850b3c..8d83a70c11 100644 --- a/source/util/src/talgo.c +++ b/source/util/src/talgo.c @@ -273,3 +273,86 @@ void taosheapsort(void *base, int32_t size, int32_t len, const void *parcompar, taosMemoryFree(buf); } + +static void taosMerge(void *src, int32_t start, int32_t leftend, int32_t end, int64_t size, const void *param, + __ext_compar_fn_t comparFn, void *tmp) { + int32_t leftSize = leftend - start + 1; + int32_t rightSize = end - leftend; + + void *leftBuf = tmp; + void *rightBuf = (char *)tmp + (leftSize * size); + + memcpy(leftBuf, elePtrAt(src, size, start), leftSize * size); + memcpy(rightBuf, elePtrAt(src, size, leftend + 1), rightSize * size); + + int32_t i = 0, j = 0, k = start; + + while (i < leftSize && j < rightSize) { + int32_t ret = comparFn(elePtrAt(leftBuf, size, i), elePtrAt(rightBuf, size, j), param); + if (ret <= 0) { + memcpy(elePtrAt(src, size, k), elePtrAt(leftBuf, size, i), size); + i++; + } else { + memcpy(elePtrAt(src, size, k), elePtrAt(rightBuf, size, j), size); + j++; + } + k++; + } + + while (i < leftSize) { + memcpy(elePtrAt(src, size, k), elePtrAt(leftBuf, size, i), size); + i++; + k++; + } + + while (j < rightSize) { + memcpy(elePtrAt(src, size, k), elePtrAt(rightBuf, size, j), size); + j++; + k++; + } +} + +static int32_t taosMergeSortHelper(void *src, int64_t numOfElem, int64_t size, const void *param, + __ext_compar_fn_t comparFn) { + // short array sort, instead of merge sort process + const int32_t THRESHOLD_SIZE = 6; + char *buf = taosMemoryCalloc(1, size); // prepare the swap buffer + if (buf == NULL) return TSDB_CODE_OUT_OF_MEMORY; + for (int32_t start = 0; start < numOfElem - 1; start += THRESHOLD_SIZE) { + int32_t end = (start + THRESHOLD_SIZE - 1) <= numOfElem - 1 ? (start + THRESHOLD_SIZE - 1) : numOfElem - 1; + tInsertSort(src, size, start, end, param, comparFn, buf); + } + taosMemoryFreeClear(buf); + + if (numOfElem > THRESHOLD_SIZE) { + int32_t currSize; + void *tmp = taosMemoryMalloc(numOfElem * size); + if (tmp == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + for (currSize = THRESHOLD_SIZE; currSize <= numOfElem - 1; currSize = 2 * currSize) { + int32_t leftStart; + for (leftStart = 0; leftStart < numOfElem - 1; leftStart += 2 * currSize) { + int32_t leftend = leftStart + currSize - 1; + int32_t rightEnd = + (leftStart + 2 * currSize - 1 < numOfElem - 1) ? (leftStart + 2 * currSize - 1) : (numOfElem - 1); + if (leftend >= rightEnd) break; + + taosMerge(src, leftStart, leftend, rightEnd, size, param, comparFn, tmp); + } + } + + taosMemoryFreeClear(tmp); + } + return 0; +} + +int32_t msortHelper(const void *p1, const void *p2, const void *param) { + __compar_fn_t comparFn = param; + return comparFn(p1, p2); +} + + +int32_t taosMergeSort(void *src, int64_t numOfElem, int64_t size, __compar_fn_t comparFn) { + void *param = comparFn; + return taosMergeSortHelper(src, numOfElem, size, param, msortHelper); +} diff --git a/source/util/src/tarray.c b/source/util/src/tarray.c index 8e7c0f9584..a7c28df22b 100644 --- a/source/util/src/tarray.c +++ b/source/util/src/tarray.c @@ -417,6 +417,10 @@ void taosArraySort(SArray* pArray, __compar_fn_t compar) { taosSort(pArray->pData, pArray->size, pArray->elemSize, compar); } +int32_t taosArrayMSort(SArray* pArray, __compar_fn_t compar) { + return taosMergeSort(pArray->pData, pArray->size, pArray->elemSize, compar); +} + void* taosArraySearch(const SArray* pArray, const void* key, __compar_fn_t comparFn, int32_t flags) { return taosbsearch(key, pArray->pData, pArray->size, pArray->elemSize, comparFn, flags); } diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index a431b091ec..b858421e25 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -100,6 +100,7 @@ int32_t tmrDebugFlag = 131; int32_t uDebugFlag = 131; int32_t rpcDebugFlag = 131; int32_t qDebugFlag = 131; +int32_t stDebugFlag = 131; int32_t wDebugFlag = 131; int32_t sDebugFlag = 131; int32_t tsdbDebugFlag = 131; diff --git a/source/util/test/CMakeLists.txt b/source/util/test/CMakeLists.txt index 0bf06e6f44..94f8deee44 100644 --- a/source/util/test/CMakeLists.txt +++ b/source/util/test/CMakeLists.txt @@ -84,3 +84,11 @@ add_test( NAME pageBufferTest COMMAND pageBufferTest ) + +# talgoTest +add_executable(talgoTest "talgoTest.cpp") +target_link_libraries(talgoTest os util gtest_main) +add_test( + NAME talgoTest + COMMAND talgoTest +) diff --git a/source/util/test/talgoTest.cpp b/source/util/test/talgoTest.cpp new file mode 100644 index 0000000000..b5a8db7378 --- /dev/null +++ b/source/util/test/talgoTest.cpp @@ -0,0 +1,104 @@ +#include +#include +#include "talgo.h" + +struct TestStruct { + int a; + float b; +}; + +// Define a custom comparison function for testing +int cmpFunc(const void* a, const void* b) { + const TestStruct* pa = reinterpret_cast(a); + const TestStruct* pb = reinterpret_cast(b); + if (pa->a < pb->a) { + return -1; + } else if (pa->a > pb->a) { + return 1; + } else { + return 0; + } +} + +TEST(utilTest, taosMSort) { + // Create an array of test data + TestStruct arr[] = {{4, 2.5}, {3, 6}, {2, 1.5}, {3, 2}, {1, 3.5}, {3, 5}}; + + // Sort the array using taosSort + taosMergeSort(arr, 6, sizeof(TestStruct), cmpFunc); + + for (int i = 0; i < sizeof(arr) / sizeof(TestStruct); i++) { + printf("%d: %d %f\n", i, arr[i].a, arr[i].b); + } + + // Check that the array is sorted correctly + EXPECT_EQ(arr[0].a, 1); + EXPECT_EQ(arr[1].a, 2); + EXPECT_EQ(arr[2].a, 3); + EXPECT_EQ(arr[2].b, 6); + EXPECT_EQ(arr[3].a, 3); + EXPECT_EQ(arr[3].b, 2); + EXPECT_EQ(arr[4].a, 3); + EXPECT_EQ(arr[4].b, 5); + EXPECT_EQ(arr[5].a, 4); +} + +int cmpInt(const void* a, const void* b) { + int int_a = *((int*)a); + int int_b = *((int*)b); + + if (int_a == int_b) + return 0; + else if (int_a < int_b) + return -1; + else + return 1; +} + +TEST(utilTest, taosMSort2) { + clock_t start_time, end_time; + double cpu_time_used; + + int times = 10000; + start_time = clock(); + for (int i = 0; i < 10000; i++) { + TestStruct arr[] = {{4, 2.5}, {3, 6}, {2, 1.5}, {3, 2}, {1, 3.5}, {3, 5}}; + taosMergeSort(arr, 6, sizeof(TestStruct), cmpFunc); + } + end_time = clock(); + cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + printf("taosMSort %d times: %f s\n", times, cpu_time_used); + + start_time = clock(); + for (int i = 0; i < 10000; i++) { + TestStruct arr[] = {{4, 2.5}, {3, 6}, {2, 1.5}, {3, 2}, {1, 3.5}, {3, 5}}; + taosSort(arr, 6, sizeof(TestStruct), cmpFunc); + } + end_time = clock(); + cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + printf("taosSort %d times: %f s\n", times, cpu_time_used); + + const int arraySize = 1000000; + int data1[arraySize]; + int data2[arraySize]; + for (int i = 0; i < arraySize; ++i) { + data1[i] = taosRand(); + data2[i] = data1[i]; + } + start_time = clock(); + taosMergeSort(data1, arraySize, sizeof(int), cmpInt); + end_time = clock(); + cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + printf("taosMSort length:%d cost: %f s\n", arraySize, cpu_time_used); + + start_time = clock(); + taosSort(data2, arraySize, sizeof(int), cmpInt); + end_time = clock(); + cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + printf("taosSort length:%d cost: %f s\n", arraySize, cpu_time_used); + + for (int i = 0; i < arraySize - 1; i++) { + EXPECT_EQ(data1[i], data2[i]); + ASSERT_LE(data1[i], data1[i+1]); + } +} diff --git a/tests/develop-test/2-query/ts-range.py b/tests/develop-test/2-query/ts-range.py new file mode 100644 index 0000000000..6ad88281ef --- /dev/null +++ b/tests/develop-test/2-query/ts-range.py @@ -0,0 +1,86 @@ +import sys +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import tdDnodes +from math import inf + +class TDTestCase: + def caseDescription(self): + ''' + case1: [TS-4088] timestamp range support operator + ''' + return + + def init(self, conn, logSql, replicaVer=1): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), True) + self._conn = conn + + def restartTaosd(self, index=1, dbname="db"): + tdDnodes.stop(index) + tdDnodes.startWithoutSleep(index) + tdSql.execute(f"use ts_range") + + def run(self): + print("running {}".format(__file__)) + tdSql.execute("drop database if exists ts_range") + tdSql.execute("create database if not exists ts_range") + tdSql.execute('use ts_range') + tdSql.execute('create table stb1 (ts timestamp, c1 bool, c2 tinyint, c3 smallint, c4 int, c5 bigint, c6 float, c7 double, c8 binary(10), c9 nchar(10), c10 tinyint unsigned, c11 smallint unsigned, c12 int unsigned, c13 bigint unsigned) TAGS(t1 int, t2 binary(10), t3 double);') + + tdSql.execute("create table tb1 using stb1 tags(1,'1',1.0);") + + tdSql.execute("create table tb2 using stb1 tags(2,'2',2.0);") + + tdSql.execute("create table tb3 using stb1 tags(3,'3',3.0);") + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:00\',true,1,1,1,1,1,1,"123","1234",1,1,1,1);') + + tdSql.execute("insert into tb1 values ('2021-11-11 09:00:01',true,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);") + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:02\',true,2,NULL,2,NULL,2,NULL,"234",NULL,2,NULL,2,NULL);') + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:03\',false,NULL,3,NULL,3,NULL,3,NULL,"3456",NULL,3,NULL,3);') + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:04\',true,4,4,4,4,4,4,"456","4567",4,4,4,4);') + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:05\',true,127,32767,2147483647,9223372036854775807,3.402823466e+38,1.79769e+308,"567","5678",254,65534,4294967294,9223372036854775807);') + + tdSql.execute('insert into tb1 values (\'2021-11-11 09:00:06\',true,-127,-32767,-2147483647,-9223372036854775807,-3.402823466e+38,-1.79769e+308,"678","6789",0,0,0,0);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:00\',true,1,1,1,1,1,1,"111","1111",1,1,1,1);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:01\',true,2,2,2,2,2,2,"222","2222",2,2,2,2);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:02\',true,3,3,2,3,3,3,"333","3333",3,3,3,3);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:03\',false,4,4,4,4,4,4,"444","4444",4,4,4,4);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:04\',true,5,5,5,5,5,5,"555","5555",5,5,5,5);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:05\',true,6,6,6,6,6,6,"666","6666",6,6,6,6);') + + tdSql.execute('insert into tb2 values (\'2021-11-11 09:00:06\',true,7,7,7,7,7,7,"777","7777",7,7,7,7);') + + + tdSql.query('select count(*) from stb1 where ts < 1000000000000 + 10s') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 0) + tdSql.query('select count(*) from stb1 where ts >= 1000000000000 + 10s') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 14) + + tdSql.query('select count(*) from stb1 where ts > 1000000000000 - 10s and ts <= 1000000000000 + 10s') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 0) + + tdSql.query('select count(*) from stb1 where ts > 1636592400000 + 3s'); + tdSql.checkData(0, 0, 6) + #tdSql.execute('drop database ts_range') + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/develop-test/5-taos-tools/taosbenchmark/json/taosc_sample_use_ts.json b/tests/develop-test/5-taos-tools/taosbenchmark/json/taosc_sample_use_ts.json index 38aa47740f..56c2a52b6a 100644 --- a/tests/develop-test/5-taos-tools/taosbenchmark/json/taosc_sample_use_ts.json +++ b/tests/develop-test/5-taos-tools/taosbenchmark/json/taosc_sample_use_ts.json @@ -43,7 +43,7 @@ "disorder_ratio": 0, "disorder_range": 1000, "timestamp_step": 1, - "start_timestamp": "now", + "start_timestamp": 1641976781440, "sample_file": "./5-taos-tools/taosbenchmark/csv/sample_use_ts.csv", "use_sample_ts": "yes", "tags_file": "./5-taos-tools/taosbenchmark/csv/sample_tags.csv", diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 8afb739cbc..e83586ca09 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -57,6 +57,10 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 2 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 4 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/interval_limit_opt_2.py -Q 4 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/interval_limit_opt_2.py -Q 3 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/interval_limit_opt_2.py -Q 2 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/interval_limit_opt_2.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqShow.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDropStb.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeStb0.py @@ -1012,6 +1016,7 @@ ,,y,script,./test.sh -f tsim/query/udf_with_const.sim ,,y,script,./test.sh -f tsim/query/join_interval.sim ,,y,script,./test.sh -f tsim/query/join_pk.sim +,,y,script,./test.sh -f tsim/query/count_spread.sim ,,y,script,./test.sh -f tsim/query/unionall_as_table.sim ,,y,script,./test.sh -f tsim/query/multi_order_by.sim ,,y,script,./test.sh -f tsim/query/sys_tbname.sim @@ -1023,12 +1028,13 @@ ,,y,script,./test.sh -f tsim/query/emptyTsRange_scl.sim ,,y,script,./test.sh -f tsim/query/partitionby.sim ,,y,script,./test.sh -f tsim/query/tableCount.sim -,,y,script,./test.sh -f tsim/query/show_db_table_kind.sim -,,y,script,./test.sh -f tsim/query/bi_star_table.sim -,,y,script,./test.sh -f tsim/query/tag_scan.sim +,,y,script,./test.sh -f tsim/query/show_db_table_kind.sim +,,y,script,./test.sh -f tsim/query/bi_star_table.sim +,,y,script,./test.sh -f tsim/query/tag_scan.sim ,,y,script,./test.sh -f tsim/query/nullColSma.sim ,,y,script,./test.sh -f tsim/query/bug3398.sim ,,y,script,./test.sh -f tsim/query/explain_tsorder.sim +,,y,script,./test.sh -f tsim/query/apercentile.sim ,,y,script,./test.sh -f tsim/qnode/basic1.sim ,,y,script,./test.sh -f tsim/snode/basic1.sim ,,y,script,./test.sh -f tsim/mnode/basic1.sim @@ -1264,6 +1270,7 @@ #develop test ,,n,develop-test,python3 ./test.py -f 2-query/table_count_scan.py +,,n,develop-test,python3 ./test.py -f 2-query/ts-range.py ,,n,develop-test,python3 ./test.py -f 2-query/show_create_db.py ,,n,develop-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/auto_create_table_json.py ,,n,develop-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/custom_col_tag.py diff --git a/tests/perf-test/build.sh b/tests/perf-test/build.sh new file mode 100755 index 0000000000..04717cbd13 --- /dev/null +++ b/tests/perf-test/build.sh @@ -0,0 +1,22 @@ +#! /bin/bash + +set -x + +cd $1 +git reset --hard HEAD +git checkout -- . +git checkout $2 +git pull + +sed -i ':a;N;$!ba;s/\(.*\)OFF/\1ON/' $1/cmake/cmake.options + +mkdir -p $1/debug +rm -rf $1/debug/* +cd $1/debug +cmake .. -DBUILD_TOOLS=true +cd $1/debug +make -j 4 +cd $1/debug +make install + +systemctl start taosd diff --git a/tests/perf-test/buildTD.py b/tests/perf-test/buildTD.py new file mode 100644 index 0000000000..9b47886089 --- /dev/null +++ b/tests/perf-test/buildTD.py @@ -0,0 +1,32 @@ +import os +import subprocess + +class BuildTDengine: + def __init__(self, host='vm96', path = '/root/pxiao/TDengine', branch = 'main') -> None: + self.host = host + self.path = path + self.branch = branch + + def build(self): + parameters=[self.path, self.branch] + build_fild = "./build.sh" + try: + # Run the Bash script using subprocess + subprocess.run(['bash', build_fild] + parameters, check=True) + print("TDengine build successfully.") + except subprocess.CalledProcessError as e: + print(f"Error running Bash script: {e}") + except FileNotFoundError as e: + print(f"File not found: {e}") + + def get_cmd_output(self, cmd): + try: + # Run the Bash command and capture the output + result = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True, text=True) + + # Access the output from the 'result' object + output = result.stdout + + return output.strip() + except subprocess.CalledProcessError as e: + print(f"Error running Bash command: {e}") \ No newline at end of file diff --git a/tests/perf-test/insert_json.py b/tests/perf-test/insert_json.py new file mode 100644 index 0000000000..7ce5fb86e2 --- /dev/null +++ b/tests/perf-test/insert_json.py @@ -0,0 +1,100 @@ +import datetime +import json + +class InsertJson: + def __init__(self, tables = 10000, records_per_table = 10000, interlace_rows = 0, stt_trigger = 1) -> None: + self.tables = tables + self.records_per_table = records_per_table + self.interlace_rows = interlace_rows + self.stt_trigger = stt_trigger + + def get_db_cfg(self) -> dict: + return { + "name": "test", + "drop": "true", + "replica": 1, + "precision": "ms", + "cachemodel": "'both'", + "keep": 3650, + "minRows": 100, + "maxRows": 4096, + "comp": 2, + "vgroups": 10, + "stt_trigger": self.stt_trigger + } + + def get_stb_cfg(self) -> list: + return [ + { + "name": "meters", + "child_table_exists": "no", + "childtable_count": self.tables, + "childtable_prefix": "d", + "escape_character": "yes", + "auto_create_table": "no", + "batch_create_tbl_num": 5, + "data_source": "rand", + "insert_mode": "taosc", + "non_stop_mode": "no", + "line_protocol": "line", + "insert_rows": self.records_per_table, + "childtable_limit": 10000, + "childtable_offset": 100, + "interlace_rows": self.interlace_rows, + "insert_interval": 0, + "partial_col_num": 0, + "disorder_ratio": 0, + "disorder_range": 1000, + "timestamp_step": 10, + "start_timestamp": "2022-10-01 00:00:00.000", + "sample_format": "csv", + "sample_file": "./sample.csv", + "use_sample_ts": "no", + "tags_file": "", + "columns": self.get_column_list(), + "tags": self.get_tag_list() + } + ] + + def get_column_list(self) -> list: + return [ + {"type": "FLOAT", "name": "current", "count": 1, "max": 12, "min": 8}, + {"type": "INT", "name": "voltage", "max": 225, "min": 215}, + {"type": "FLOAT", "name": "phase", "max": 1, "min": 0}, + ] + + def get_tag_list(self) -> list: + return [ + { "type": "TINYINT", "name": "groupid", "max": 10, "min": 1 }, + { "name": "location", "type": "BINARY", "len": 16, "values": ["San Francisco", "Los Angles", "San Diego", "San Jose", "Palo Alto", "Campbell", "Mountain View", "Sunnyvale", "Santa Clara", "Cupertino"]} + ] + + def get_insert_cfg(self) -> dict: + return { + "filetype": "insert", + "cfgdir": "/etc/taos", + "host": "127.0.0.1", + "port": 6030, + "user": "root", + "password": "taosdata", + "thread_count": 10, + "create_table_thread_count": 7, + "result_file": "/tmp/insert_res.txt", + "confirm_parameter_prompt": "no", + "insert_interval": 0, + "num_of_records_per_req": 1000, + "max_sql_len": 1024000, + "databases": [{ + "dbinfo": self.get_db_cfg(), + "super_tables": self.get_stb_cfg() + }] + } + + def create_insert_file(self) -> str: + date = datetime.datetime.now() + file_create_table = f"/tmp/insert_{date:%F-%H%M}.json" + + with open(file_create_table, 'w') as f: + json.dump(self.get_insert_cfg(), f) + + return file_create_table \ No newline at end of file diff --git a/tests/perf-test/mysqldb.py b/tests/perf-test/mysqldb.py new file mode 100644 index 0000000000..f25f4f35f7 --- /dev/null +++ b/tests/perf-test/mysqldb.py @@ -0,0 +1,60 @@ +import mysql.connector + +class MySQLDatabase: + def __init__(self, host = '192.168.1.116', port = 3306, user = 'root', password = 'taosdata', database = 'perf_data'): + self.host = host + self.port = port + self.user = user + self.password = password + self.database = database + self.connection = None + + def connect(self): + try: + self.connection = mysql.connector.connect( + host=self.host, + port=self.port, + user=self.user, + password=self.password, + database=self.database + ) + except mysql.connector.Error as error: + print("Failed to connect to database: {}".format(error)) + + def execute(self, query, params=None): + cursor = self.connection.cursor() + try: + cursor.execute(query, params) + self.connection.commit() + except mysql.connector.Error as error: + print("Failed to execute query: {}".format(error)) + finally: + cursor.close() + + def query(self, query, params=None): + cursor = self.connection.cursor() + try: + cursor.execute(query, params) + result = cursor.fetchall() + return result + except mysql.connector.Error as error: + print("Failed to execute query: {}".format(error)) + finally: + cursor.close() + + def get_id(self, query, params = None): + cursor = self.connection.cursor() + try: + cursor.execute(query, params) + cursor.execute("select last_insert_id()") + id = cursor.fetchone()[0] + self.connection.commit() + + return id + except mysql.connector.Error as error: + print("Failed to execute query: {}".format(error)) + finally: + cursor.close() + + def disconnect(self): + self.connection.close() \ No newline at end of file diff --git a/tests/perf-test/query_json.py b/tests/perf-test/query_json.py new file mode 100644 index 0000000000..0c2b2f38d1 --- /dev/null +++ b/tests/perf-test/query_json.py @@ -0,0 +1,41 @@ +import datetime +import json + +class QueryJson: + def __init__(self, sql, query_times = 1) -> None: + self.sql = sql + self.query_times = query_times + + def gen_query_json(self) -> dict: + return { + "filetype": "query", + "cfgdir": "/etc/taos", + "host": "127.0.0.1", + "port": 6030, + "user": "root", + "password": "taosdata", + "confirm_parameter_prompt": "no", + "databases": "test", + "query_times": self.query_times, + "query_mode": "taosc", + "specified_table_query": { + "query_interval": 1, + "concurrent": 1, + "sqls": [ + { + "sql": "%s" % self.sql, + "result": "./query_res.txt" + } + ] + } + + } + + def create_query_file(self) -> str: + date = datetime.datetime.now() + file_create_table = f"/tmp/query_{date:%F-%H%M}.json" + + with open(file_create_table, 'w') as f: + json.dump(self.gen_query_json(), f) + + return file_create_table \ No newline at end of file diff --git a/tests/perf-test/write_perf_data.py b/tests/perf-test/write_perf_data.py new file mode 100644 index 0000000000..4a2021c356 --- /dev/null +++ b/tests/perf-test/write_perf_data.py @@ -0,0 +1,75 @@ +import os +import socket +import mysqldb +import insert_json +import query_json +import buildTD + +if __name__ == "__main__": + # Build TDengine + hostname = socket.gethostname() + new_build = buildTD.BuildTDengine(host = hostname) + + new_build.build() + cmd = f"cd {new_build.path} && git rev-parse --short @ " + commit_id = new_build.get_cmd_output(cmd) + branch = new_build.branch + + num_of_tables = 10000 + records_per_table = 10000 + interlace_rows = 0 + stt_trigger = 1 + + # get scenario id + db = mysqldb.MySQLDatabase() + db.connect() + sql = f"select id from scenarios where num_of_tables = {num_of_tables} and records_per_table = {records_per_table} and interlace_rows = {interlace_rows} and stt_trigger = {stt_trigger}" + row = db.query(sql) + if row is None: + id = db.get_id(f"insert into scenarios(num_of_tables, records_per_table, interlace_rows, stt_trigger) values({num_of_tables},{records_per_table}, {interlace_rows}, {stt_trigger})") + else: + id = row[0][0] + + print(f"scenario id is {id}") + + # record insert performance data + insert = insert_json.InsertJson(num_of_tables, records_per_table, interlace_rows, stt_trigger) + os.system(f"taosBenchmark -f {insert.create_insert_file()}") + + cmd = "grep Spent /tmp/insert_res.txt | tail -1 | awk {'print $5'}" + time = new_build.get_cmd_output(cmd) + + cmd = "grep Spent /tmp/insert_res.txt | tail -1 | awk {'print $16'}" + speed = new_build.get_cmd_output(cmd) + + sql = f"insert into insert_perf(sid, time_cost, records_per_sec, branch, commit_id, date) values({id}, {time}, {speed}, '{branch}', '{commit_id}', now())" + print(sql) + db.execute(sql) + + # record query performance data + sql = "select * from queries" + res = db.query(sql) + for row in res: + json = query_json.QueryJson(row[1], query_times=1) + print(f"query: {row[1]}") + os.system(f"taosBenchmark -f {json.create_query_file()} > /tmp/{row[0]}.txt") + cmd = "grep delay /tmp/%d.txt | awk {'print $11'} | cut -d 's' -f 1" % row[0] + print(f"cmd is {cmd}") + avg = new_build.get_cmd_output(cmd) + print(f"avg is {avg}") + if (avg == ""): + break + + sql = f"insert into query_perf(sid, qid, time_cost, branch, commit_id, date) values({id}, {row[0]}, {avg}, '{branch}', '{commit_id}', now())" + print(sql) + db.execute(sql) + + # close connection + db.disconnect() + + + + + + + \ No newline at end of file diff --git a/tests/pytest/util/sql.py b/tests/pytest/util/sql.py index 91aac1929f..7dcf6bc3f2 100644 --- a/tests/pytest/util/sql.py +++ b/tests/pytest/util/sql.py @@ -78,7 +78,7 @@ class TDSql: self.cursor.execute(s) time.sleep(2) - def error(self, sql, expectedErrno = None): + def error(self, sql, expectedErrno = None, expectErrInfo = None): caller = inspect.getframeinfo(inspect.stack()[1][0]) expectErrNotOccured = True @@ -87,12 +87,9 @@ class TDSql: except BaseException as e: expectErrNotOccured = False self.errno = e.errno - self.error_info = repr(e) - # print(error_info) - # self.error_info = error_info[error_info.index('(')+1:-1].split(",")[0].replace("'","") + error_info = repr(e) + self.error_info = error_info[error_info.index('(')+1:-1].split(",")[0].replace("'","") # self.error_info = (','.join(error_info.split(",")[:-1]).split("(",1)[1:][0]).replace("'","") - # print("!!!!!!!!!!!!!!",self.error_info) - if expectErrNotOccured: tdLog.exit("%s(%d) failed: sql:%s, expect error not occured" % (caller.filename, caller.lineno, sql)) else: @@ -108,8 +105,15 @@ class TDSql: else: tdLog.info("sql:%s, expect error occured" % (sql)) - return self.error_info + if expectErrInfo != None: + if expectErrInfo == self.error_info: + tdLog.info("sql:%s, expected expectErrInfo %s occured" % (sql, expectErrInfo)) + else: + tdLog.exit("%s(%d) failed: sql:%s, expectErrInfo %s occured, but not expected errno %s" % (caller.filename, caller.lineno, sql, self.error_info, expectErrInfo)) + else: + tdLog.info("sql:%s, expect error occured" % (sql)) + return self.error_info def query(self, sql, row_tag=None, queryTimes=10, count_expected_res=None): self.sql = sql @@ -257,7 +261,7 @@ class TDSql: return self.cursor.istype(col, dataType) - def checkData(self, row, col, data): + def checkData(self, row, col, data, show = False): if row >= self.queryRows: caller = inspect.getframeinfo(inspect.stack()[1][0]) args = (caller.filename, caller.lineno, self.sql, row+1, self.queryRows) @@ -275,8 +279,8 @@ class TDSql: if isinstance(data,str) : if (len(data) >= 28): if self.queryResult[row][col] == _parse_ns_timestamp(data): - # tdLog.info(f"sql:{self.sql}, row:{row} col:{col} data:{pd.to_datetime(resultData)} == expect:{data}") - tdLog.info("check successfully") + if(show): + tdLog.info("check successfully") else: caller = inspect.getframeinfo(inspect.stack()[1][0]) args = (caller.filename, caller.lineno, self.sql, row, col, self.queryResult[row][col], data) @@ -284,7 +288,8 @@ class TDSql: else: if self.queryResult[row][col].astimezone(datetime.timezone.utc) == _parse_datetime(data).astimezone(datetime.timezone.utc): # tdLog.info(f"sql:{self.sql}, row:{row} col:{col} data:{self.queryResult[row][col]} == expect:{data}") - tdLog.info("check successfully") + if(show): + tdLog.info("check successfully") else: caller = inspect.getframeinfo(inspect.stack()[1][0]) args = (caller.filename, caller.lineno, self.sql, row, col, self.queryResult[row][col], data) @@ -317,7 +322,8 @@ class TDSql: if data == self.queryResult[row][col]: success = True if success: - tdLog.info("check successfully") + if(show): + tdLog.info("check successfully") else: caller = inspect.getframeinfo(inspect.stack()[1][0]) args = (caller.filename, caller.lineno, self.sql, row, col, self.queryResult[row][col], data) @@ -328,7 +334,8 @@ class TDSql: delt_data = data-datetime.datetime.fromtimestamp(0,data.tzinfo) delt_result = self.queryResult[row][col] - datetime.datetime.fromtimestamp(0,self.queryResult[row][col].tzinfo) if delt_data == delt_result: - tdLog.info("check successfully") + if(show): + tdLog.info("check successfully") else: caller = inspect.getframeinfo(inspect.stack()[1][0]) args = (caller.filename, caller.lineno, self.sql, row, col, self.queryResult[row][col], data) @@ -341,16 +348,19 @@ class TDSql: if str(self.queryResult[row][col]) == str(data): # tdLog.info(f"sql:{self.sql}, row:{row} col:{col} data:{self.queryResult[row][col]} == expect:{data}") - tdLog.info("check successfully") + if(show): + tdLog.info("check successfully") return elif isinstance(data, float): if abs(data) >= 1 and abs((self.queryResult[row][col] - data) / data) <= 0.000001: # tdLog.info(f"sql:{self.sql}, row:{row} col:{col} data:{self.queryResult[row][col]} == expect:{data}") - tdLog.info("check successfully") + if(show): + tdLog.info("check successfully") elif abs(data) < 1 and abs(self.queryResult[row][col] - data) <= 0.000001: # tdLog.info(f"sql:{self.sql}, row:{row} col:{col} data:{self.queryResult[row][col]} == expect:{data}") - tdLog.info("check successfully") + if(show): + tdLog.info("check successfully") else: caller = inspect.getframeinfo(inspect.stack()[1][0]) @@ -361,7 +371,8 @@ class TDSql: caller = inspect.getframeinfo(inspect.stack()[1][0]) args = (caller.filename, caller.lineno, self.sql, row, col, self.queryResult[row][col], data) tdLog.exit("%s(%d) failed: sql:%s row:%d col:%d data:%s != expect:%s" % args) - tdLog.info("check successfully") + if(show): + tdLog.info("check successfully") # return true or false replace exit, no print out def checkRowColNoExit(self, row, col): diff --git a/tests/script/sh/deploy.sh b/tests/script/sh/deploy.sh index 7da8da09bf..3b3d275a07 100755 --- a/tests/script/sh/deploy.sh +++ b/tests/script/sh/deploy.sh @@ -137,6 +137,7 @@ echo "idxDebugFlag 143" >> $TAOS_CFG echo "udfDebugFlag 143" >> $TAOS_CFG echo "smaDebugFlag 143" >> $TAOS_CFG echo "metaDebugFlag 143" >> $TAOS_CFG +echo "stDebugFlag 143" >> $TAOS_CFG echo "numOfLogLines 20000000" >> $TAOS_CFG echo "asyncLog 0" >> $TAOS_CFG echo "locale en_US.UTF-8" >> $TAOS_CFG diff --git a/tests/script/tsim/query/apercentile.sim b/tests/script/tsim/query/apercentile.sim new file mode 100644 index 0000000000..71d075b0ef --- /dev/null +++ b/tests/script/tsim/query/apercentile.sim @@ -0,0 +1,36 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql drop database if exists test2; +sql create database test2; +sql use test2; +sql create table s(ts timestamp,v double) tags(id nchar(16)); +sql create table t using s tags('11') ; +sql insert into t values(now,null); +sql select APERCENTILE(v,50,'t-digest') as k from s where ts > now-1d and ts < now interval(1h); +if $rows != 1 then + return -1 +endi +if $data00 != NULL then + return -1 +endi + +sql select APERCENTILE(v,50) as k from s where ts > now-1d and ts < now interval(1h); +if $rows != 1 then + return -1 +endi +if $data00 != NULL then + return -1 +endi + +sql select APERCENTILE(v,50) as k from s where ts > now-1d and ts < now interval(1h); +if $rows != 1 then + return -1 +endi +if $data00 != NULL then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/query/count_spread.sim b/tests/script/tsim/query/count_spread.sim new file mode 100644 index 0000000000..c03783b7fe --- /dev/null +++ b/tests/script/tsim/query/count_spread.sim @@ -0,0 +1,24 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql create database test; +sql use test; +sql create table st(ts timestamp, f int) tags(t int); +sql insert into ct1 using st tags(1) values(now, 0)(now+1s, 1)(now+2s, 10)(now+3s, 11) +sql insert into ct2 using st tags(2) values(now+2s, 2)(now+3s, 3) +sql insert into ct3 using st tags(3) values(now+4s, 4)(now+5s, 5) +sql insert into ct4 using st tags(4) values(now+6s, 6)(now+7s, 7) + +sql select count(*), spread(ts) from st where tbname='ct1' +print $data00, $data01 +if $data00 != @4@ then + return -1 +endi +if $data01 != @3000.000000000@ then + return -1 +endi + +sql drop database test; +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/checkStreamSTable.sim b/tests/script/tsim/stream/checkStreamSTable.sim index 3b31cbc383..873fb3f060 100644 --- a/tests/script/tsim/stream/checkStreamSTable.sim +++ b/tests/script/tsim/stream/checkStreamSTable.sim @@ -22,6 +22,8 @@ sql create table t2 using st tags(2,2,2); sql create stable result.streamt0(ts timestamp,a int,b int) tags(ta int,tb varchar(100),tc int); sql create stream streams0 trigger at_once into result.streamt0 tags(tb) as select _wstart, count(*) c1, max(a) c2 from st partition by tbname tb interval(10s); +sleep 500 + sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,2,2,3); @@ -106,6 +108,8 @@ sql create table t2 using st tags(2,2,2); sql create stable result1.streamt1(ts timestamp,a int,b int,c int) tags(ta varchar(100),tb int,tc int); sql create stream streams1 trigger at_once into result1.streamt1(ts,c,a,b) tags(ta) as select _wstart, count(*) c1, max(a),min(b) c2 from st partition by tbname as ta interval(10s); +sleep 500 + sql insert into t1 values(1648791213000,10,20,30); sql insert into t2 values(1648791213000,40,50,60); @@ -194,7 +198,7 @@ sql_error create stream streams2 trigger at_once into result2.streamt2 as selec # column dest 3, source 2 sql create stream streams2 trigger at_once into result2.streamt2(ts, a) tags(ta) as select _wstart, count(*) c1 from st partition by tbname as ta interval(10s); - +sleep 500 print ===== step5 @@ -211,6 +215,7 @@ sql create table t2 using st tags(4,5,6); sql create stable result3.streamt3(ts timestamp,a int,b int,c int, d int) tags(ta int,tb int,tc int); sql create stream streams3 trigger at_once into result3.streamt3(ts,c,a,b) as select _wstart, count(*) c1, max(a),min(b) c2 from st interval(10s); +sleep 500 sql insert into t1 values(1648791213000,10,20,30); sql insert into t2 values(1648791213000,40,50,60); @@ -290,6 +295,7 @@ sql create table t2 using st tags(4,5,6); sql create stable result4.streamt4(ts timestamp,a int,b int,c int, d int) tags(tg1 int,tg2 int,tg3 int); sql create stream streams4 trigger at_once into result4.streamt4(ts,c,a,b) tags(tg2, tg3, tg1) subtable( concat("tbl-", cast(tg1 as varchar(10)) ) ) as select _wstart, count(*) c1, max(a),min(b) c2 from st partition by ta+1 as tg1, cast(tb as bigint) as tg2, tc as tg3 interval(10s); +sleep 500 sql insert into t1 values(1648791213000,10,20,30); sql insert into t2 values(1648791213000,40,50,60); @@ -374,6 +380,7 @@ sql create table t2 using st tags(4,5,6); sql create stable result5.streamt5(ts timestamp,a int,b int,c int, d int) tags(tg1 int,tg2 int,tg3 int); sql create stream streams5 trigger at_once into result5.streamt5(ts,c,a,b) tags(tg2, tg3, tg1) subtable( concat("tbl-", cast(tg3 as varchar(10)) ) ) as select _wstart, count(*) c1, max(a),min(b) c2 from st partition by ta+1 as tg1, cast(tb as bigint) as tg2, a as tg3 session(ts, 10s); +sleep 500 sql insert into t1 values(1648791213000,NULL,NULL,NULL); @@ -458,6 +465,7 @@ sql create stream streams8 trigger at_once into streamt8 as select _wstart as sql drop stream streams8; sql create stream streams71 trigger at_once into streamt8(ts, c2) tags(group_id)as select _wstart, count(*) from t1 partition by tbname as group_id interval(10s); +sleep 500 sql insert into t1 values(1648791233000,1,2,3,1.0); diff --git a/tests/script/tsim/stream/checkStreamSTable1.sim b/tests/script/tsim/stream/checkStreamSTable1.sim index 57d0f0190d..dd44f5c102 100644 --- a/tests/script/tsim/stream/checkStreamSTable1.sim +++ b/tests/script/tsim/stream/checkStreamSTable1.sim @@ -15,6 +15,8 @@ sql create stable st(ts timestamp,a int,b int,c int) tags(ta int,tb int,tc int); sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams1 trigger at_once into streamt1 as select _wstart, count(*) c1, count(a) c2 from st interval(1s) ; +sleep 500 + sql insert into t1 values(1648791211000,1,2,3); sql insert into t1 values(1648791212000,2,2,3); @@ -44,6 +46,7 @@ sql alter table streamt1 add column c3 double; print create stream streams1 trigger at_once into streamt1 as select _wstart, count(*) c1, count(a) c2, avg(b) c3 from st interval(1s) ; sql create stream streams1 trigger at_once into streamt1 as select _wstart, count(*) c1, count(a) c2, avg(b) c3 from st interval(1s) ; +sleep 500 sql insert into t2 values(1648791213000,1,2,3); sql insert into t1 values(1648791214000,1,2,3); diff --git a/tests/script/tsim/stream/deleteInterval.sim b/tests/script/tsim/stream/deleteInterval.sim index b78de20a97..11e5ee39d2 100644 --- a/tests/script/tsim/stream/deleteInterval.sim +++ b/tests/script/tsim/stream/deleteInterval.sim @@ -17,6 +17,7 @@ sql create database test vgroups 1; sql use test; sql create table t1(ts timestamp, a int, b int , c int, d double); sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart c1, count(*) c2, max(a) c3 from t1 interval(10s); +sleep 500 sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sleep 1000 @@ -194,6 +195,7 @@ sql create stable st(ts timestamp, a int, b int, c int, d double) tags(ta int,tb sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test.streamt2 as select _wstart c1, count(*) c2, max(a) c3 from st interval(10s); +sleep 500 sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sql insert into t2 values(1648791213000,NULL,NULL,NULL,NULL); @@ -420,6 +422,7 @@ sql create stable st(ts timestamp, a int, b int, c int, d double) tags(ta int,tb sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams3 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test.streamt3 as select _wstart c1, count(*) c2, max(a) c3 from st interval(10s); +sleep 500 sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sql insert into t2 values(1648791213000,NULL,NULL,NULL,NULL); diff --git a/tests/script/tsim/stream/deleteSession.sim b/tests/script/tsim/stream/deleteSession.sim index f2694e79c7..18ff56ee3b 100644 --- a/tests/script/tsim/stream/deleteSession.sim +++ b/tests/script/tsim/stream/deleteSession.sim @@ -18,6 +18,8 @@ sql use test; sql create table t1(ts timestamp, a int, b int , c int, d double); sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart c1, count(*) c2, max(a) c3 from t1 session(ts, 5s); +sleep 2000 + sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sleep 1000 sql delete from t1 where ts = 1648791213000; @@ -193,6 +195,7 @@ sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test.streamt2 as select _wstart c1, count(*) c2, max(a) c3 from st session(ts,5s); +sleep 2000 sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sql insert into t2 values(1648791213000,NULL,NULL,NULL,NULL); @@ -423,6 +426,7 @@ sql create stable st(ts timestamp, a int, b int, c int, d double) tags(ta int,tb sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams3 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test.streamt3 as select _wstart c1, count(*) c2, max(a) c3 from st session(ts,5s); +sleep 2000 sql insert into t1 values(1648791210000,1,1,1,NULL); sql insert into t1 values(1648791210001,2,2,2,NULL); @@ -534,6 +538,7 @@ sql create table t2 using st tags(2,2,2); print create stream streams4 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt4 as select _wstart, count(*) c1 from st partition by tbname session(ts, 2s); sql create stream streams4 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt4 as select _wstart, count(*) c1 from st partition by tbname session(ts, 2s); +sleep 1000 sql insert into t1 values(1648791210000,1,2,3); sql insert into t1 values(1648791220000,2,2,3); diff --git a/tests/script/tsim/stream/distributeInterval0.sim b/tests/script/tsim/stream/distributeInterval0.sim index 5bb03c8cbf..a4e7941c28 100644 --- a/tests/script/tsim/stream/distributeInterval0.sim +++ b/tests/script/tsim/stream/distributeInterval0.sim @@ -439,6 +439,7 @@ sql create table ts1 using st tags(1,1,1); sql create table ts2 using st tags(2,2,2); sql create stream stream_t2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 watermark 20s into streamtST1 as select _wstart, count(*) c1, count(a) c2 , sum(a) c3 , max(b) c5, min(c) c6 from st interval(10s) ; +sleep 2000 sql insert into ts1 values(1648791211000,1,2,3); sql insert into ts1 values(1648791222001,2,2,3); sql insert into ts2 values(1648791211000,1,2,3); diff --git a/tests/script/tsim/stream/partitionbyColumnInterval.sim b/tests/script/tsim/stream/partitionbyColumnInterval.sim index d586522cc8..d5f815d533 100644 --- a/tests/script/tsim/stream/partitionbyColumnInterval.sim +++ b/tests/script/tsim/stream/partitionbyColumnInterval.sim @@ -17,6 +17,7 @@ sql create database test vgroups 1; sql use test; sql create table t1(ts timestamp, a int, b int , c int, d double); sql create stream streams0 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart c1, count(*) c2, max(a) c3, _group_key(a) c4 from t1 partition by a interval(10s); +sleep 1000 sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); @@ -198,6 +199,7 @@ sql create database test1 vgroups 1; sql use test1; sql create table t1(ts timestamp, a int, b int , c int, d double); sql create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart c1, count(*) c2, max(c) c3, _group_key(a+b) c4 from t1 partition by a+b interval(10s); +sleep 1000 sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); @@ -285,6 +287,7 @@ sql create stable st(ts timestamp, a int, b int, c int, d double) tags(ta int,tb sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test.streamt2 as select _wstart c1, count(*) c2, max(a) c3 from st partition by a interval(10s); +sleep 1000 sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); @@ -482,6 +485,7 @@ sql create table t2 using st tags(2,2,2); sql create table t3 using st tags(2,2,2); sql create table t4 using st tags(2,2,2); sql create stream streams4 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test.streamt4 as select _wstart c1, count(*) c2, max(a) c3 from st partition by a interval(10s); +sleep 1000 sql insert into t1 values(1648791213000,2,2,3,1.0); sql insert into t2 values(1648791213000,2,2,3,1.0); @@ -572,6 +576,7 @@ sql create table t2 using st tags(2,2,2); sql create table t3 using st tags(2,2,2); sql create table t4 using st tags(2,2,2); sql create stream streams5 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test.streamt5 as select _wstart c1, count(*) c2, max(a) c3 from st partition by a interval(10s); +sleep 1000 sql insert into t1 values(1648791213000,1,2,3,1.0); sql insert into t2 values(1648791213000,2,2,3,1.0); @@ -584,7 +589,6 @@ sql insert into t3 values(1648791223000,3,2,3,1.0); sql insert into t4 values(1648791223000,4,2,3,1.0); sleep 1000 - sql delete from st where ts = 1648791223000; $loop_count = 0 diff --git a/tests/script/tsim/stream/partitionbyColumnSession.sim b/tests/script/tsim/stream/partitionbyColumnSession.sim index 035fe1d232..a22e36e499 100644 --- a/tests/script/tsim/stream/partitionbyColumnSession.sim +++ b/tests/script/tsim/stream/partitionbyColumnSession.sim @@ -284,6 +284,7 @@ sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test.streamt2 as select _wstart c1, count(*) c2, max(a) c3 from st partition by a session(ts, 5s); +sleep 1000 sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sql insert into t1 values(1648791213000,NULL,NULL,NULL,NULL); sql insert into t2 values(1648791213000,NULL,NULL,NULL,NULL); @@ -480,6 +481,7 @@ sql create table t3 using st tags(2,2,2); sql create table t4 using st tags(2,2,2); sql create stream streams4 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test.streamt4 as select _wstart c1, count(*) c2, max(a) c3 from st partition by a session(ts, 5s); +sleep 2000 sql insert into t1 values(1648791213000,2,2,3,1.0); sql insert into t2 values(1648791213000,2,2,3,1.0); sql insert into t3 values(1648791213000,2,2,3,1.0); diff --git a/tests/script/tsim/stream/udTableAndTag0.sim b/tests/script/tsim/stream/udTableAndTag0.sim index 3fe17dbfe8..c81927abcb 100644 --- a/tests/script/tsim/stream/udTableAndTag0.sim +++ b/tests/script/tsim/stream/udTableAndTag0.sim @@ -22,6 +22,8 @@ sql create table t2 using st tags(2,2,2); #sql_error create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result.streamt SUBTABLE("aaa") as select _wstart, count(*) c1 from st interval(10s); sql create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result.streamt SUBTABLE(concat("aaa-", tbname)) as select _wstart, count(*) c1 from st partition by tbname interval(10s); +sleep 1000 + sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,1,2,3); @@ -89,10 +91,11 @@ sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result2.streamt2 TAGS(cc varchar(100)) as select _wstart, count(*) c1 from st partition by concat("tag-", tbname) as cc interval(10s); +sleep 1000 + sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,1,2,3); - $loop_count = 0 loop2: @@ -174,10 +177,11 @@ sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams3 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result3.streamt3 TAGS(dd varchar(100)) SUBTABLE(concat("tbn-", tbname)) as select _wstart, count(*) c1 from st partition by concat("tag-", tbname) as dd, tbname interval(10s); +sleep 1000 + sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,1,2,3); - $loop_count = 0 loop4: @@ -286,8 +290,9 @@ sql create table t2 using st tags(2,2,2); sql create table t3 using st tags(3,3,3); sql create stream streams4 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result4.streamt4 TAGS(dd varchar(100)) SUBTABLE(concat("tbn-", tbname)) as select _wstart, count(*) c1 from st partition by concat("tag-", tbname) as dd, tbname interval(10s); -sql insert into t1 values(1648791213000,1,1,1) t2 values(1648791213000,2,2,2) t3 values(1648791213000,3,3,3); +sleep 1000 +sql insert into t1 values(1648791213000,1,1,1) t2 values(1648791213000,2,2,2) t3 values(1648791213000,3,3,3); $loop_count = 0 loop7: @@ -405,8 +410,9 @@ sql create table t2 using st tags("2",2,2); sql create table t3 using st tags("3",3,3); sql create stream streams6 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result6.streamt6 TAGS(dd int) as select _wstart, count(*) c1 from st partition by concat(ta, "0") as dd, tbname interval(10s); -sql insert into t1 values(1648791213000,1,1,1) t2 values(1648791213000,2,2,2) t3 values(1648791213000,3,3,3); +sleep 1000 +sql insert into t1 values(1648791213000,1,1,1) t2 values(1648791213000,2,2,2) t3 values(1648791213000,3,3,3); $loop_count = 0 loop9: diff --git a/tests/script/tsim/stream/udTableAndTag1.sim b/tests/script/tsim/stream/udTableAndTag1.sim index 091615d0f3..e9dfbaabcf 100644 --- a/tests/script/tsim/stream/udTableAndTag1.sim +++ b/tests/script/tsim/stream/udTableAndTag1.sim @@ -22,6 +22,8 @@ sql create table t2 using st tags(2,2,2); #sql_error create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result.streamt SUBTABLE("aaa") as select _wstart, count(*) c1 from st interval(10s); sql create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result.streamt SUBTABLE( concat("aaa-", cast(a as varchar(10) ) ) ) as select _wstart, count(*) c1 from st partition by a interval(10s); +sleep 2000 + print ===== insert into 1 sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,2,2,3); @@ -88,11 +90,12 @@ sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result2.streamt2 TAGS(cc varchar(100)) as select _wstart, count(*) c1 from st partition by concat("col-", cast(a as varchar(10) ) ) as cc interval(10s); +sleep 2000 + print ===== insert into 2 sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,2,2,3); - $loop_count = 0 loop2: @@ -172,6 +175,8 @@ sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams3 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result3.streamt3 TAGS(dd varchar(100)) SUBTABLE(concat("tbn-", cast(a as varchar(10) ) ) ) as select _wstart, count(*) c1 from st partition by concat("col-", cast(a as varchar(10) ) ) as dd, a interval(10s); +sleep 2000 + print ===== insert into 3 sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,2,2,3); @@ -284,8 +289,9 @@ sql create table t2 using st tags(2,2,2); sql create table t3 using st tags(3,3,3); sql create stream streams4 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result4.streamt4 TAGS(dd varchar(100)) SUBTABLE(concat("tbn-", dd)) as select _wstart, count(*) c1 from st partition by concat("t", cast(a as varchar(10) ) ) as dd interval(10s); -sql insert into t1 values(1648791213000,1,1,1) t2 values(1648791213000,2,2,2) t3 values(1648791213000,3,3,3); +sleep 2000 +sql insert into t1 values(1648791213000,1,1,1) t2 values(1648791213000,2,2,2) t3 values(1648791213000,3,3,3); $loop_count = 0 loop7: diff --git a/tests/script/tsim/stream/udTableAndTag2.sim b/tests/script/tsim/stream/udTableAndTag2.sim index 9ad985c681..973c55b9ef 100644 --- a/tests/script/tsim/stream/udTableAndTag2.sim +++ b/tests/script/tsim/stream/udTableAndTag2.sim @@ -21,6 +21,8 @@ sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result.streamt SUBTABLE("aaa") as select _wstart, count(*) c1 from st interval(10s); +sleep 2000 + print ===== insert into 1 sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,2,2,3); @@ -94,11 +96,12 @@ sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams2 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result2.streamt2 TAGS(cc varchar(100)) as select _wstart, count(*) c1 from st interval(10s); +sleep 2000 + print ===== insert into 2 sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,2,2,3); - $loop_count = 0 loop2: @@ -186,21 +189,20 @@ print ===== step4 print ===== column name + table name sql create database result3 vgroups 1; - sql create database test3 vgroups 4; sql use test3; - sql create stable st(ts timestamp,a int,b int,c int) tags(ta int,tb int,tc int); sql create table t1 using st tags(1,1,1); sql create table t2 using st tags(2,2,2); sql create stream streams3 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result3.streamt3 TAGS(dd varchar(100)) SUBTABLE(concat("tbn-", "1") ) as select _wstart, count(*) c1 from st interval(10s); +sleep 2000 + print ===== insert into 3 sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,2,2,3); - $loop_count = 0 loop4: @@ -306,8 +308,9 @@ sql create table t2 using st tags(2,2,2); sql create table t3 using st tags(3,3,3); sql create stream streams4 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result4.streamt4 TAGS(dd varchar(100)) SUBTABLE(concat("tbn-", "1")) as select _wstart, count(*) c1 from st interval(10s); -sql insert into t1 values(1648791213000,1,1,1) t2 values(1648791213000,2,2,2) t3 values(1648791213000,3,3,3); +sleep 2000 +sql insert into t1 values(1648791213000,1,1,1) t2 values(1648791213000,2,2,2) t3 values(1648791213000,3,3,3); $loop_count = 0 loop7: @@ -379,6 +382,8 @@ sql create stream streams51 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 in sql create stream streams52 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result5.streamt52 TAGS(cc varchar(100)) as select _wstart, count(*) c1 from st interval(10s); sql create stream streams53 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into result5.streamt53 TAGS(dd varchar(100)) SUBTABLE(concat("aaa-", "1") ) as select _wstart, count(*) c1 from st interval(10s); +sleep 2000 + sql insert into t1 values(1648791213000,1,2,3); sql insert into t2 values(1648791213000,2,2,3); diff --git a/tests/script/tsim/tagindex/add_index.sim b/tests/script/tsim/tagindex/add_index.sim index e73c7480ac..cf1b5e05e9 100644 --- a/tests/script/tsim/tagindex/add_index.sim +++ b/tests/script/tsim/tagindex/add_index.sim @@ -293,9 +293,9 @@ if $rows != 1 then endi #$drop_name=`$data[0][0]` -#sql drop index `$data[0][0]\` +#sql drop index `$data[0][0]\` -#if $rows != 0 then +#if $rows != 0 then # return -1 #endi diff --git a/tests/script/tsim/tmq/basic1.sim b/tests/script/tsim/tmq/basic1.sim index fe6ec04a20..4ef0c121f6 100644 --- a/tests/script/tsim/tmq/basic1.sim +++ b/tests/script/tsim/tmq/basic1.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/basic1Of2Cons.sim b/tests/script/tsim/tmq/basic1Of2Cons.sim index c12351cbe8..d2906ec875 100644 --- a/tests/script/tsim/tmq/basic1Of2Cons.sim +++ b/tests/script/tsim/tmq/basic1Of2Cons.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/basic2.sim b/tests/script/tsim/tmq/basic2.sim index 5c7528ea5d..4477101d0f 100644 --- a/tests/script/tsim/tmq/basic2.sim +++ b/tests/script/tsim/tmq/basic2.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/basic2Of2Cons.sim b/tests/script/tsim/tmq/basic2Of2Cons.sim index 23598c17a4..951a1d52fd 100644 --- a/tests/script/tsim/tmq/basic2Of2Cons.sim +++ b/tests/script/tsim/tmq/basic2Of2Cons.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/basic2Of2ConsOverlap.sim b/tests/script/tsim/tmq/basic2Of2ConsOverlap.sim index 1223a94fa7..8cc447f0c7 100644 --- a/tests/script/tsim/tmq/basic2Of2ConsOverlap.sim +++ b/tests/script/tsim/tmq/basic2Of2ConsOverlap.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/basic3.sim b/tests/script/tsim/tmq/basic3.sim index 8bb34cefa2..da2bee4f6b 100644 --- a/tests/script/tsim/tmq/basic3.sim +++ b/tests/script/tsim/tmq/basic3.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/basic3Of2Cons.sim b/tests/script/tsim/tmq/basic3Of2Cons.sim index 75d762c44b..21d691bd9c 100644 --- a/tests/script/tsim/tmq/basic3Of2Cons.sim +++ b/tests/script/tsim/tmq/basic3Of2Cons.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/basic4.sim b/tests/script/tsim/tmq/basic4.sim index c72d8ff412..adeab58ff2 100644 --- a/tests/script/tsim/tmq/basic4.sim +++ b/tests/script/tsim/tmq/basic4.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/basic4Of2Cons.sim b/tests/script/tsim/tmq/basic4Of2Cons.sim index bb006a354c..186005b231 100644 --- a/tests/script/tsim/tmq/basic4Of2Cons.sim +++ b/tests/script/tsim/tmq/basic4Of2Cons.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/snapshot.sim b/tests/script/tsim/tmq/snapshot.sim index fbdaba7d28..c0194d98c8 100644 --- a/tests/script/tsim/tmq/snapshot.sim +++ b/tests/script/tsim/tmq/snapshot.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/tmq/snapshot1.sim b/tests/script/tsim/tmq/snapshot1.sim index 5349981cc7..6121692d6c 100644 --- a/tests/script/tsim/tmq/snapshot1.sim +++ b/tests/script/tsim/tmq/snapshot1.sim @@ -62,8 +62,8 @@ $keyList = $keyList . , $keyList = $keyList . enable.auto.commit:false #$keyList = $keyList . , #$keyList = $keyList . auto.commit.interval.ms:6000 -#$keyList = $keyList . , -#$keyList = $keyList . auto.offset.reset:earliest +$keyList = $keyList . , +$keyList = $keyList . auto.offset.reset:earliest $keyList = $keyList . ' print ========== key list: $keyList diff --git a/tests/script/tsim/user/privilege_create_db.sim b/tests/script/tsim/user/privilege_create_db.sim index c81bd1b258..f199e2ee9c 100644 --- a/tests/script/tsim/user/privilege_create_db.sim +++ b/tests/script/tsim/user/privilege_create_db.sim @@ -68,10 +68,10 @@ print =============connect with root, revoke read from u1, all from u2 sql connect sql revoke read on u1_d1.* from u1 sql revoke all on u2_d1.* from u2 -sleep 1000 print =============connect with u1 sql connect u1 +sql reset query cache sql insert into u1_d1.t1 values(now, 1) sql_error select * from u1_d1.t1; @@ -85,9 +85,9 @@ sql connect sql grant read on u1_d1.* to u1 sql grant all on u2_d1.* to u2 -sleep 1000 print =============connect with u1 sql connect u1 +sql reset query cache sql select * from u1_d1.t1; sql insert into u1_d1.t1 values(now, 2) diff --git a/tests/system-test/0-others/compatibility.py b/tests/system-test/0-others/compatibility.py index cb804aad0c..83bfb2bed7 100644 --- a/tests/system-test/0-others/compatibility.py +++ b/tests/system-test/0-others/compatibility.py @@ -30,7 +30,7 @@ class TDTestCase: self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") tdSql.init(conn.cursor()) - self.deletedDataSql= '''drop database if exists deldata;create database deldata duration 300;use deldata; + self.deletedDataSql= '''drop database if exists deldata;create database deldata duration 300 stt_trigger 4; ;use deldata; create table deldata.stb1 (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) tags (t1 int); create table deldata.ct1 using deldata.stb1 tags ( 1 ); insert into deldata.ct1 values ( now()-0s, 0, 0, 0, 0, 0.0, 0.0, 0, 'binary0', 'nchar0', now()+0a ) ( now()-10s, 1, 11111, 111, 11, 1.11, 11.11, 1, 'binary1', 'nchar1', now()+1a ) ( now()-20s, 2, 22222, 222, 22, 2.22, 22.22, 0, 'binary2', 'nchar2', now()+2a ) ( now()-30s, 3, 33333, 333, 33, 3.33, 33.33, 1, 'binary3', 'nchar3', now()+3a ); @@ -38,7 +38,9 @@ class TDTestCase: delete from deldata.stb1; flush database deldata; insert into deldata.ct1 values ( now()-0s, 0, 0, 0, 0, 0.0, 0.0, 0, 'binary0', 'nchar0', now()+0a ) ( now()-10s, 1, 11111, 111, 11, 1.11, 11.11, 1, 'binary1', 'nchar1', now()+1a ) ( now()-20s, 2, 22222, 222, 22, 2.22, 22.22, 0, 'binary2', 'nchar2', now()+2a ) ( now()-30s, 3, 33333, 333, 33, 3.33, 33.33, 1, 'binary3', 'nchar3', now()+3a ); - delete from deldata.ct1;''' + delete from deldata.ct1; + insert into deldata.ct1 values ( now()-0s, 0, 0, 0, 0, 0.0, 0.0, 0, 'binary0', 'nchar0', now()+0a ); + flush database deldata;''' def checkProcessPid(self,processName): i=0 while i<60: @@ -262,7 +264,7 @@ class TDTestCase: if self.is_list_same_as_ordered_list(resultList,expectList): print("The unordered list is the same as the ordered list.") else: - tdlog.error("The unordered list is not the same as the ordered list.") + tdLog.exit("The unordered list is not the same as the ordered list.") tdsql.execute("insert into test.d80 values (now+1s, 11, 103, 0.21);") tdsql.execute("insert into test.d9 values (now+5s, 4.3, 104, 0.4);") diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py index eaea4e18b2..51347f5f64 100644 --- a/tests/system-test/0-others/information_schema.py +++ b/tests/system-test/0-others/information_schema.py @@ -22,7 +22,7 @@ class TDTestCase: def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) - tdSql.init(conn.cursor()) + tdSql.init(conn.cursor(), True) self.setsql = TDSetSql() self.dbname = 'db' self.stbname = 'stb' @@ -217,7 +217,7 @@ class TDTestCase: tdSql.checkEqual(20470,len(tdSql.queryResult)) tdSql.query("select * from information_schema.ins_columns where db_name ='information_schema'") - tdSql.checkEqual(195, len(tdSql.queryResult)) + tdSql.checkEqual(198, len(tdSql.queryResult)) tdSql.query("select * from information_schema.ins_columns where db_name ='performance_schema'") tdSql.checkEqual(54, len(tdSql.queryResult)) diff --git a/tests/system-test/0-others/show.py b/tests/system-test/0-others/show.py index 4ef323db22..75d7116e03 100644 --- a/tests/system-test/0-others/show.py +++ b/tests/system-test/0-others/show.py @@ -210,6 +210,27 @@ class TDTestCase: licences_info = tdSql.queryResult tdSql.checkEqual(grants_info,licences_info) + def show_column_name(self): + tdSql.execute("create database db;") + tdSql.execute("use db;") + tdSql.execute("create table ta(ts timestamp, name nchar(16), age int , address int);") + tdSql.execute("insert into ta values(now, 'jack', 19, 23);") + + colName1 = ["ts","name","age","address"] + colName2 = tdSql.getColNameList("select last(*) from ta;") + for i in range(len(colName1)): + if colName2[i] != f"last({colName1[i]})": + tdLog.exit(f"column name is different. {colName2} != last({colName1[i]} ") + return + + # alter option + tdSql.execute("alter local 'keepColumnName' '1';") + colName3 = tdSql.getColNameList("select last(*) from ta;") + for col in colName3: + if colName1 != colName3: + tdLog.exit(f"column name is different. colName1= {colName1} colName2={colName3}") + return + def run(self): self.check_gitinfo() self.show_base() @@ -218,6 +239,7 @@ class TDTestCase: self.show_create_sql() self.show_create_sysdb_sql() self.show_create_systb_sql() + self.show_column_name() def stop(self): tdSql.close() diff --git a/tests/system-test/0-others/show_tag_index.py b/tests/system-test/0-others/show_tag_index.py index d39f9eaab9..c79880ba35 100644 --- a/tests/system-test/0-others/show_tag_index.py +++ b/tests/system-test/0-others/show_tag_index.py @@ -180,6 +180,13 @@ class TDTestCase: tdSql.error(f'show indexes from db.ctb1 from db') tdSql.error(f'show indexes from `db`.`ctb1` from db') + # check error information + tdSql.error(f'create index idx1 on db2.stb (t1);', expectErrInfo='Database not exist') + tdSql.error(f'use db2;', expectErrInfo='Database not exist') + tdSql.error(f' alter stable db2.stb add column c2 int;', expectErrInfo='Database not exist') + + + def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__) diff --git a/tests/system-test/0-others/splitVGroupRep1.py b/tests/system-test/0-others/splitVGroupRep1.py index b119ba0a32..0b75a3e6e1 100644 --- a/tests/system-test/0-others/splitVGroupRep1.py +++ b/tests/system-test/0-others/splitVGroupRep1.py @@ -223,7 +223,7 @@ class TDTestCase: start1 = time.time() rows1 = tdSql.query(sql1) spend1 = time.time() - start1 - res1 = copy.copy(tdSql.queryResult) + res1 = copy.deepcopy(tdSql.queryResult) sql2 = sql.replace('@db_name', self.db2) tdLog.info(sql2) @@ -234,6 +234,7 @@ class TDTestCase: rowlen1 = len(res1) rowlen2 = len(res2) + errCnt = 0 if rowlen1 != rowlen2: tdLog.exit(f"both row count not equal. rowlen1={rowlen1} rowlen2={rowlen2} ") @@ -249,8 +250,11 @@ class TDTestCase: return False for j in range(collen1): if row1[j] != row2[j]: - tdLog.exit(f"both col not equal. row={i} col={j} col1={row1[j]} col2={row2[j]} .") - return False + tdLog.info(f"error both column value not equal. row={i} col={j} col1={row1[j]} col2={row2[j]} .") + errCnt += 1 + + if errCnt > 0: + tdLog.exit(f" db2 column value different with db2. different count ={errCnt} ") # warning performance diff = (spend2 - spend1)*100/spend1 @@ -391,7 +395,7 @@ class TDTestCase: tdSql.execute("use topicdb;") tdSql.execute("create table ta(ts timestamp, age int);") tdSql.execute("create topic toa as select * from ta;") - + #self.expectSplitError("topicdb") tdSql.execute("drop topic toa;") self.expectSplitOk("topicdb") @@ -409,6 +413,9 @@ class TDTestCase: # prepare env self.prepareEnv() + tdLog.info("check db1 and db2 same after creating ...") + self.checkResult() + for i in range(3): # split vgroup on db2 start = time.time() diff --git a/tests/system-test/0-others/splitVGroupRep3.py b/tests/system-test/0-others/splitVGroupRep3.py index 68c915eeaf..d45b037b5a 100644 --- a/tests/system-test/0-others/splitVGroupRep3.py +++ b/tests/system-test/0-others/splitVGroupRep3.py @@ -233,6 +233,7 @@ class TDTestCase: rowlen1 = len(res1) rowlen2 = len(res2) + errCnt = 0 if rowlen1 != rowlen2: tdLog.exit(f"both row count not equal. rowlen1={rowlen1} rowlen2={rowlen2} ") @@ -248,8 +249,11 @@ class TDTestCase: return False for j in range(collen1): if row1[j] != row2[j]: - tdLog.exit(f"both col not equal. row={i} col={j} col1={row1[j]} col2={row2[j]} .") - return False + tdLog.info(f"error both column value not equal. row={i} col={j} col1={row1[j]} col2={row2[j]} .") + errCnt += 1 + + if errCnt > 0: + tdLog.exit(f" db2 column value different with db2. different count ={errCnt} ") # warning performance diff = (spend2 - spend1)*100/spend1 diff --git a/tests/system-test/0-others/ttl.py b/tests/system-test/0-others/ttl.py index 32b18c6bbb..6ae6edfe5d 100644 --- a/tests/system-test/0-others/ttl.py +++ b/tests/system-test/0-others/ttl.py @@ -7,7 +7,7 @@ from util.dnodes import * class TDTestCase: updatecfgDict = {'ttlUnit': 1, "ttlPushInterval": 1, "ttlChangeOnWrite": 0} - + def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") @@ -21,7 +21,8 @@ class TDTestCase: tdSql.execute(f'create table {self.dbname}.t2(ts timestamp, c1 int) ttl {self.ttl}') tdSql.query(f'show {self.dbname}.tables') tdSql.checkRows(2) - + tdSql.execute(f'flush database {self.dbname}') + time.sleep(self.ttl + 2) tdSql.query(f'show {self.dbname}.tables') tdSql.checkRows(1) diff --git a/tests/system-test/0-others/ttlChangeOnWrite.py b/tests/system-test/0-others/ttlChangeOnWrite.py index 7bb10e25e8..16c6585e07 100644 --- a/tests/system-test/0-others/ttlChangeOnWrite.py +++ b/tests/system-test/0-others/ttlChangeOnWrite.py @@ -6,9 +6,9 @@ from util.dnodes import * class TDTestCase: - updatecfgDict = {'ttlUnit': 1, "ttlPushInterval": 3, "ttlChangeOnWrite": 1, "trimVDbIntervalSec": 360, + updatecfgDict = {'ttlUnit': 1, "ttlPushInterval": 3, "ttlChangeOnWrite": 1, "trimVDbIntervalSec": 360, "ttlFlushThreshold": 100, "ttlBatchDropNum": 10} - + def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") @@ -16,15 +16,16 @@ class TDTestCase: self.ttl = 5 self.tables = 100 self.dbname = "test" - + def check_batch_drop_num(self): tdSql.execute(f'create database {self.dbname} vgroups 1') tdSql.execute(f'use {self.dbname}') tdSql.execute(f'create table stb(ts timestamp, c1 int) tags(t1 int)') for i in range(self.tables): tdSql.execute(f'create table t{i} using stb tags({i}) ttl {self.ttl}') - - time.sleep(self.ttl + 3) + + tdSql.execute(f'flush database {self.dbname}') + time.sleep(self.ttl + self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query('show tables') tdSql.checkRows(90) @@ -35,14 +36,17 @@ class TDTestCase: tdSql.execute(f'create table {self.dbname}.t2(ts timestamp, c1 int) ttl {self.ttl}') tdSql.query(f'show {self.dbname}.tables') tdSql.checkRows(2) - - time.sleep(self.ttl) + + tdSql.execute(f'flush database {self.dbname}') + time.sleep(self.ttl - 1) tdSql.execute(f'insert into {self.dbname}.t2 values(now, 1)'); - - time.sleep(self.ttl) + + tdSql.execute(f'flush database {self.dbname}') + time.sleep(self.ttl - 1) tdSql.query(f'show {self.dbname}.tables') tdSql.checkRows(2) - + + tdSql.execute(f'flush database {self.dbname}') time.sleep(self.ttl * 2) tdSql.query(f'show {self.dbname}.tables') tdSql.checkRows(1) diff --git a/tests/system-test/0-others/user_privilege_multi_users.py b/tests/system-test/0-others/user_privilege_multi_users.py index 8812f42e7b..53ff136e63 100644 --- a/tests/system-test/0-others/user_privilege_multi_users.py +++ b/tests/system-test/0-others/user_privilege_multi_users.py @@ -107,6 +107,7 @@ class TDTestCase: tdLog.debug("case passed") else: tdLog.exit("The privilege number in information_schema.ins_user_privileges is incorrect") + tdSql.query("select * from information_schema.ins_columns where db_name='{self.dbname}';") def stop(self): # remove the privilege diff --git a/tests/system-test/0-others/walRetention.py b/tests/system-test/0-others/walRetention.py index 0fdeb84a5b..53316fc88b 100644 --- a/tests/system-test/0-others/walRetention.py +++ b/tests/system-test/0-others/walRetention.py @@ -109,11 +109,14 @@ class VNode : # load config tdLog.info(f' meta-ver file={metaFile}') if metaFile != "": - jsonVer = jsonFromFile(metaFile) - metaNode = jsonVer["meta"] - self.snapVer = int(metaNode["snapshotVer"]) - self.firstVer = int(metaNode["firstVer"]) - self.lastVer = int(metaNode["lastVer"]) + try: + jsonVer = jsonFromFile(metaFile) + metaNode = jsonVer["meta"] + self.snapVer = int(metaNode["snapshotVer"]) + self.firstVer = int(metaNode["firstVer"]) + self.lastVer = int(metaNode["lastVer"]) + except Exception as e: + tdLog.info(f' read json file except.') # sort with startVer self.walFiles = sorted(self.walFiles, key=lambda x : x.startVer, reverse=True) diff --git a/tests/system-test/1-insert/delete_data.py b/tests/system-test/1-insert/delete_data.py index aaad723b89..ffeb9e23a9 100644 --- a/tests/system-test/1-insert/delete_data.py +++ b/tests/system-test/1-insert/delete_data.py @@ -14,6 +14,7 @@ import random import string +import time from numpy import logspace from util import constant @@ -298,13 +299,37 @@ class TDTestCase: tdSql.query(f'select {func}(*) from {self.stbname}') tdSql.execute(f'drop table {self.stbname}') tdSql.execute(f'drop database {self.dbname}') + + def FIX_TS_3987(self): + tdSql.execute("create database db duration 1d vgroups 1;") + tdSql.execute("use db;") + tdSql.execute("create table t (ts timestamp, a int);") + tdSql.execute("insert into t values (1694681045000, 1);") + tdSql.execute("select * from t;") + tdSql.execute("flush database db;") + tdSql.execute("select * from t;") + tdSql.execute("delete from t where ts = 1694681045000;") + tdSql.execute("select * from t;") + tdSql.execute("insert into t values (1694581045000, 2);") + tdSql.execute("select * from t;") + tdSql.execute("flush database db;") + tdSql.query("select * from t;") + time.sleep(5) + tdSql.query("select * from t;") + + tdSql.checkRows(1) + tdSql.checkData(0, 0, 1694581045000) + tdSql.checkData(0, 1, 2) + def run(self): + self.FIX_TS_3987() self.delete_data_ntb() self.delete_data_ctb() self.delete_data_stb() tdDnodes.stoptaosd(1) tdDnodes.starttaosd(1) self.delete_data_ntb() + def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__) diff --git a/tests/system-test/1-insert/table_param_ttl.py b/tests/system-test/1-insert/table_param_ttl.py index 6cc978a76c..f36a49a1d7 100644 --- a/tests/system-test/1-insert/table_param_ttl.py +++ b/tests/system-test/1-insert/table_param_ttl.py @@ -35,6 +35,7 @@ class TDTestCase: tdSql.execute(f'create table db.{self.ntbname}_{i} (ts timestamp,c0 int) ttl {self.ttl_param}') tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum) + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.ttl_param+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(0) @@ -42,6 +43,7 @@ class TDTestCase: tdSql.execute(f'create table db.{self.ntbname}_{i} (ts timestamp,c0 int) ttl {self.default_ttl}') for i in range(int(self.tbnum/2)): tdSql.execute(f'alter table db.{self.ntbname}_{i} ttl {self.modify_ttl}') + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.modify_ttl+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum - int(self.tbnum/2)) @@ -54,6 +56,7 @@ class TDTestCase: tdSql.execute(f'create table db.{self.stbname}_{i} using db.{self.stbname} tags({i}) ttl {self.ttl_param}') tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum) + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.ttl_param+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(0) @@ -63,6 +66,7 @@ class TDTestCase: tdSql.checkRows(self.tbnum) for i in range(int(self.tbnum/2)): tdSql.execute(f'alter table db.{self.stbname}_{i} ttl {self.modify_ttl}') + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.modify_ttl+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum - int(self.tbnum/2)) @@ -75,6 +79,7 @@ class TDTestCase: tdSql.execute(f'insert into db.{self.stbname}_{i} using db.{self.stbname} tags({i}) ttl {self.ttl_param} values(now,1)') tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum) + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.ttl_param+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(0) diff --git a/tests/system-test/2-query/db.py b/tests/system-test/2-query/db.py index 451fc0caf3..6870c59a0d 100644 --- a/tests/system-test/2-query/db.py +++ b/tests/system-test/2-query/db.py @@ -55,7 +55,7 @@ class TDTestCase: tdSql.checkData(0, 2, 0) tdSql.query("show dnode 1 variables like '%debugFlag'") - tdSql.checkRows(21) + tdSql.checkRows(22) tdSql.query("show dnode 1 variables like '____debugFlag'") tdSql.checkRows(2) diff --git a/tests/system-test/2-query/diff.py b/tests/system-test/2-query/diff.py index c6f233eefa..10e16a690f 100644 --- a/tests/system-test/2-query/diff.py +++ b/tests/system-test/2-query/diff.py @@ -16,10 +16,42 @@ class TDTestCase: self.perfix = 'dev' self.tables = 10 + def check_result(self): + for i in range(self.rowNum): + tdSql.checkData(i, 0, 1); + + def full_datatype_test(self): + tdSql.execute("use db;") + sql = "create table db.st(ts timestamp, c1 bool, c2 float, c3 double,c4 tinyint, c5 smallint, c6 int, c7 bigint, c8 tinyint unsigned, c9 smallint unsigned, c10 int unsigned, c11 bigint unsigned) tags( area int);" + tdSql.execute(sql) + + sql = "create table db.t1 using db.st tags(1);" + tdSql.execute(sql) + + ts = 1694000000000 + rows = 126 + for i in range(rows): + ts += 1 + sql = f"insert into db.t1 values({ts},true,{i},{i},{i%127},{i%32767},{i},{i},{i%127},{i%32767},{i},{i});" + tdSql.execute(sql) + + sql = "select diff(ts),diff(c1),diff(c3),diff(c4),diff(c5),diff(c6),diff(c7),diff(c8),diff(c9),diff(c10),diff(c11) from db.t1" + tdSql.query(sql) + tdSql.checkRows(rows - 1) + for i in range(rows - 1): + for j in range(10): + if j == 1: # bool + tdSql.checkData(i, j, 0) + else: + tdSql.checkData(i, j, 1) def run(self): tdSql.prepare() dbname = "db" + + # full type test + self.full_datatype_test() + tdSql.execute( f"create table {dbname}.ntb(ts timestamp,c1 int,c2 double,c3 float)") tdSql.execute( @@ -179,11 +211,6 @@ class TDTestCase: tdSql.error(f"select diff(col8) from {dbname}.stb_1") tdSql.error(f"select diff(col9) from {dbname}.stb") tdSql.error(f"select diff(col9) from {dbname}.stb_1") - tdSql.error(f"select diff(col11) from {dbname}.stb_1") - tdSql.error(f"select diff(col12) from {dbname}.stb_1") - tdSql.error(f"select diff(col13) from {dbname}.stb_1") - tdSql.error(f"select diff(col14) from {dbname}.stb_1") - tdSql.error(f"select diff(col14) from {dbname}.stb_1") tdSql.error(f"select diff(col1,col1,col1) from {dbname}.stb_1") tdSql.error(f"select diff(col1,1,col1) from {dbname}.stb_1") tdSql.error(f"select diff(col1,col1,col) from {dbname}.stb_1") @@ -217,6 +244,22 @@ class TDTestCase: tdSql.query(f"select diff(col6) from {dbname}.stb_1") tdSql.checkRows(10) + tdSql.query(f"select diff(col11) from {dbname}.stb_1") + tdSql.checkRows(10) + self.check_result() + + tdSql.query(f"select diff(col12) from {dbname}.stb_1") + tdSql.checkRows(10) + self.check_result() + + tdSql.query(f"select diff(col13) from {dbname}.stb_1") + tdSql.checkRows(10) + self.check_result() + + tdSql.query(f"select diff(col14) from {dbname}.stb_1") + tdSql.checkRows(10) + self.check_result() + tdSql.execute(f'''create table {dbname}.stb1(ts timestamp, col1 tinyint, col2 smallint, col3 int, col4 bigint, col5 float, col6 double, col7 bool, col8 binary(20), col9 nchar(20), col11 tinyint unsigned, col12 smallint unsigned, col13 int unsigned, col14 bigint unsigned) tags(loc nchar(20))''') tdSql.execute(f"create table {dbname}.stb1_1 using {dbname}.stb tags('shanghai')") diff --git a/tests/system-test/2-query/interval_limit_opt.py b/tests/system-test/2-query/interval_limit_opt.py index 851138fed3..492f453de5 100644 --- a/tests/system-test/2-query/interval_limit_opt.py +++ b/tests/system-test/2-query/interval_limit_opt.py @@ -174,61 +174,6 @@ class TDTestCase: for offset in range(0, 1000, 500): self.test_interval_limit_asc(offset) self.test_interval_limit_desc(offset) - self.test_interval_fill_limit(offset) - self.test_interval_order_by_limit(offset) - self.test_interval_partition_by_slimit(offset) - - def test_interval_fill_limit(self, offset: int = 0): - sqls = [ - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-09-17 09:30:00.000' interval(1s) fill(linear)", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-09-17 09:30:00.000' interval(1m) fill(linear)", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-09-17 09:30:00.000' interval(1h) fill(linear)", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-09-17 09:30:00.000' interval(1d) fill(linear)" - ] - for sql in sqls: - self.query_and_check_with_limit(sql, 5000, 1000, offset) - - def test_interval_order_by_limit(self, offset: int = 0): - sqls = [ - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by b", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by a desc", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), last(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by a desc", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by count(*), sum(c1), a", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by a, count(*), sum(c1)", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by b", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by a desc", - "select _wstart as a, _wend as b, count(*), sum(c1), last(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by a desc", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by count(*), sum(c1), a", - "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ - where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by a, count(*), sum(c1)", - ] - for sql in sqls: - self.query_and_check_with_limit(sql, 6000, 2000, offset) - - def test_interval_partition_by_slimit(self, offset: int = 0): - sqls = [ - "select _wstart as a, _wend as b, count(*), sum(c1), last(c2), first(ts) from meters " - "where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' partition by t1 interval(1m)", - "select _wstart as a, _wend as b, count(*), sum(c1), last(c2), first(ts) from meters " - "where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' partition by t1 interval(1h)", - "select _wstart as a, _wend as b, count(*), sum(c1), last(c2), first(ts) from meters " - "where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' partition by c3 interval(1m)", - ] - for sql in sqls: - self.query_and_check_with_slimit(sql, 10, 2, offset) def test_interval_partition_by_slimit_limit(self): sql = "select * from (select _wstart as a, _wend as b, count(*), sum(c1), last(c2), first(ts),c3 from meters " \ diff --git a/tests/system-test/2-query/interval_limit_opt_2.py b/tests/system-test/2-query/interval_limit_opt_2.py new file mode 100644 index 0000000000..cadb32b388 --- /dev/null +++ b/tests/system-test/2-query/interval_limit_opt_2.py @@ -0,0 +1,222 @@ +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +# from tmqCommon import * + +class TDTestCase: + def __init__(self): + self.vgroups = 4 + self.ctbNum = 10 + self.rowsPerTbl = 10000 + self.duraion = '1h' + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def create_database(self,tsql, dbName,dropFlag=1,vgroups=2,replica=1, duration:str='1d'): + if dropFlag == 1: + tsql.execute("drop database if exists %s"%(dbName)) + + tsql.execute("create database if not exists %s vgroups %d replica %d duration %s"%(dbName, vgroups, replica, duration)) + tdLog.debug("complete to create database %s"%(dbName)) + return + + def create_stable(self,tsql, paraDict): + colString = tdCom.gen_column_type_str(colname_prefix=paraDict["colPrefix"], column_elm_list=paraDict["colSchema"]) + tagString = tdCom.gen_tag_type_str(tagname_prefix=paraDict["tagPrefix"], tag_elm_list=paraDict["tagSchema"]) + sqlString = f"create table if not exists %s.%s (%s) tags (%s)"%(paraDict["dbName"], paraDict["stbName"], colString, tagString) + tdLog.debug("%s"%(sqlString)) + tsql.execute(sqlString) + return + + def create_ctable(self,tsql=None, dbName='dbx',stbName='stb',ctbPrefix='ctb',ctbNum=1,ctbStartIdx=0): + for i in range(ctbNum): + sqlString = "create table %s.%s%d using %s.%s tags(%d, 'tb%d', 'tb%d', %d, %d, %d)" % \ + (dbName,ctbPrefix,i+ctbStartIdx,dbName,stbName,(i+ctbStartIdx) % 5,i+ctbStartIdx,i+ctbStartIdx,i+ctbStartIdx,i+ctbStartIdx,i+ctbStartIdx) + tsql.execute(sqlString) + + tdLog.debug("complete to create %d child tables by %s.%s" %(ctbNum, dbName, stbName)) + return + + def insert_data(self,tsql,dbName,ctbPrefix,ctbNum,rowsPerTbl,batchNum,startTs,tsStep): + tdLog.debug("start to insert data ............") + tsql.execute("use %s" %dbName) + pre_insert = "insert into " + sql = pre_insert + + for i in range(ctbNum): + rowsBatched = 0 + sql += " %s%d values "%(ctbPrefix,i) + for j in range(rowsPerTbl): + if (i < ctbNum/2): + sql += "(%d, %d, %d, %d,%d,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%10, j%10, j%10, j%10, j%10, j%10, j%10, j%10) + else: + sql += "(%d, %d, NULL, %d,NULL,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%10, j%10, j%10, j%10, j%10, j%10) + rowsBatched += 1 + if ((rowsBatched == batchNum) or (j == rowsPerTbl - 1)): + tsql.execute(sql) + rowsBatched = 0 + if j < rowsPerTbl - 1: + sql = "insert into %s%d values " %(ctbPrefix,i) + else: + sql = "insert into " + if sql != pre_insert: + tsql.execute(sql) + tdLog.debug("insert data ............ [OK]") + return + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'test', + 'dropFlag': 1, + 'vgroups': 2, + 'stbName': 'meters', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'FLOAT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'smallint', 'count':1},{'type': 'tinyint', 'count':1},{'type': 'bool', 'count':1},{'type': 'binary', 'len':10, 'count':1},{'type': 'nchar', 'len':10, 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'nchar', 'len':20, 'count':1},{'type': 'binary', 'len':20, 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'smallint', 'count':1},{'type': 'DOUBLE', 'count':1}], + 'ctbPrefix': 't', + 'ctbStartIdx': 0, + 'ctbNum': 100, + 'rowsPerTbl': 10000, + 'batchNum': 3000, + 'startTs': 1537146000000, + 'tsStep': 600000} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tdLog.info("create database") + self.create_database(tsql=tdSql, dbName=paraDict["dbName"], dropFlag=paraDict["dropFlag"], vgroups=paraDict["vgroups"], replica=self.replicaVar, duration=self.duraion) + + tdLog.info("create stb") + self.create_stable(tsql=tdSql, paraDict=paraDict) + + tdLog.info("create child tables") + self.create_ctable(tsql=tdSql, dbName=paraDict["dbName"], \ + stbName=paraDict["stbName"],ctbPrefix=paraDict["ctbPrefix"],\ + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict["ctbStartIdx"]) + self.insert_data(tsql=tdSql, dbName=paraDict["dbName"],\ + ctbPrefix=paraDict["ctbPrefix"],ctbNum=paraDict["ctbNum"],\ + rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"],\ + startTs=paraDict["startTs"],tsStep=paraDict["tsStep"]) + return + + def check_first_rows(self, all_rows, limited_rows, offset: int = 0): + for i in range(0, len(limited_rows) - 1): + if limited_rows[i] != all_rows[i + offset]: + tdLog.info("row: %d, row in all: %s" % (i+offset+1, str(all_rows[i+offset]))) + tdLog.info("row: %d, row in limted: %s" % (i+1, str(limited_rows[i]))) + tdLog.exit("row data check failed") + tdLog.info("all rows are the same as query without limit..") + + def query_and_check_with_slimit(self, sql: str, max_limit: int, step: int, offset: int = 0): + self.query_and_check_with_limit(sql, max_limit, step, offset, ' slimit ') + + def query_and_check_with_limit(self, sql: str, max_limit: int, step: int, offset: int = 0, limit_str: str = ' limit '): + for limit in range(0, max_limit, step): + limited_sql = sql + limit_str + str(offset) + "," + str(limit) + tdLog.info("query with sql: %s " % (sql) + limit_str + " %d,%d" % (offset, limit)) + all_rows = tdSql.getResult(sql) + limited_rows = tdSql.getResult(limited_sql) + tdLog.info("all rows: %d, limited rows: %d" % (len(all_rows), len(limited_rows))) + if limit_str == ' limit ': + if limit + offset <= len(all_rows) and len(limited_rows) != limit: + tdLog.exit("limited sql has less rows than limit value which is not right, \ + limit: %d, limited_rows: %d, all_rows: %d, offset: %d" % (limit, len(limited_rows), len(all_rows), offset)) + elif limit + offset > len(all_rows) and offset < len(all_rows) and offset + len(limited_rows) != len(all_rows): + tdLog.exit("limited sql has less rows than all_rows which is not right, \ + limit: %d, limited_rows: %d, all_rows: %d, offset: %d" % (limit, len(limited_rows), len(all_rows), offset)) + elif offset >= len(all_rows) and len(limited_rows) != 0: + tdLog.exit("limited rows should be zero, \ + limit: %d, limited_rows: %d, all_rows: %d, offset: %d" % (limit, len(limited_rows), len(all_rows), offset)) + + self.check_first_rows(all_rows, limited_rows, offset) + + def test_interval_limit_offset(self): + for offset in range(0, 1000, 500): + self.test_interval_fill_limit(offset) + self.test_interval_order_by_limit(offset) + self.test_interval_partition_by_slimit(offset) + + def test_interval_fill_limit(self, offset: int = 0): + sqls = [ + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-09-17 09:30:00.000' interval(1s) fill(linear)", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-09-17 09:30:00.000' interval(1m) fill(linear)", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-09-17 09:30:00.000' interval(1h) fill(linear)", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-09-17 09:30:00.000' interval(1d) fill(linear)" + ] + for sql in sqls: + self.query_and_check_with_limit(sql, 5000, 1000, offset) + + def test_interval_order_by_limit(self, offset: int = 0): + sqls = [ + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by b", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by a desc", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), last(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by a desc", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by count(*), sum(c1), a", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) order by a, count(*), sum(c1)", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by b", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by a desc", + "select _wstart as a, _wend as b, count(*), sum(c1), last(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by a desc", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by count(*), sum(c1), a", + "select _wstart as a, _wend as b, count(*), sum(c1), avg(c2), first(ts) from meters \ + where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' interval(1m) fill(linear) order by a, count(*), sum(c1)", + ] + for sql in sqls: + self.query_and_check_with_limit(sql, 6000, 2000, offset) + + def test_interval_partition_by_slimit(self, offset: int = 0): + sqls = [ + "select _wstart as a, _wend as b, count(*), sum(c1), last(c2), first(ts) from meters " + "where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' partition by t1 interval(1m)", + "select _wstart as a, _wend as b, count(*), sum(c1), last(c2), first(ts) from meters " + "where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' partition by t1 interval(1h)", + "select _wstart as a, _wend as b, count(*), sum(c1), last(c2), first(ts) from meters " + "where ts >= '2018-09-17 09:00:00.000' and ts <= '2018-10-17 09:30:00.000' partition by c3 interval(1m)", + ] + for sql in sqls: + self.query_and_check_with_slimit(sql, 10, 2, offset) + + def test_group_by_operator(self): + tdSql.query('select count(*), c1+1 from meters group by tbname, c1+1', 1) + + def run(self): + self.prepareTestEnv() + self.test_group_by_operator() + self.test_interval_limit_offset() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/2-query/stbJoin.py b/tests/system-test/2-query/stbJoin.py index 677704648c..6eb95349fe 100644 --- a/tests/system-test/2-query/stbJoin.py +++ b/tests/system-test/2-query/stbJoin.py @@ -112,6 +112,18 @@ class TDTestCase: tdSql.query(f"select a.* from sta a join stb b on a.tg1 != b.tg1 and a.ts=b.ts;") tdSql.checkRows(36) + tdSql.query(f"select a.* from sta a join stb b on a.ts=b.ts and a.ts is null;") + tdSql.checkRows(0) + + tdSql.query(f"select a.* from sta a join stb b on a.ts=b.ts and a.ts is not null;") + tdSql.checkRows(48) + + tdSql.query(f"select a.* from sta a ,stb b where a.ts=b.ts and a.ts is null;") + tdSql.checkRows(0) + + tdSql.query(f"select a.* from sta a ,stb b where a.ts=b.ts and a.ts is not null;") + tdSql.checkRows(48) + # tdSql.checkData(0,1,10) tdSql.error(f"select a.* from sta a join stb b on a.tg1=b.tg1 where a.ts=b.ts or a.tg2=b.tg2;") diff --git a/tests/system-test/2-query/union.py b/tests/system-test/2-query/union.py index 9086d7754d..547ab07eb0 100644 --- a/tests/system-test/2-query/union.py +++ b/tests/system-test/2-query/union.py @@ -249,6 +249,9 @@ class TDTestCase: tdSql.checkRows(14) tdSql.query(f"select derivative(c1, 1s, 0) from (select * from {dbname}.t1 union select * from {dbname}.t1 order by ts)") tdSql.checkRows(11) + tdSql.query(f"select count(*) from {dbname}.t1 as a join {dbname}.t1 as b on a.ts = b.ts and a.ts is null") + tdSql.checkRows(1) + tdSql.checkData(0, 0, 0) tdSql.error(f"select first(c1) from (select * from {dbname}.t1 union select * from {dbname}.t1)") tdSql.error(f"select last(c1) from (select * from {dbname}.t1 union select * from {dbname}.t1)") diff --git a/tests/system-test/7-tmq/tmqParamsTest.py b/tests/system-test/7-tmq/tmqParamsTest.py index ff7c70bcd2..0e9e8f989f 100644 --- a/tests/system-test/7-tmq/tmqParamsTest.py +++ b/tests/system-test/7-tmq/tmqParamsTest.py @@ -19,7 +19,7 @@ class TDTestCase: self.wal_retention_period1 = 3600 self.wal_retention_period2 = 1 self.commit_value_list = ["true", "false"] - self.offset_value_list = ["", "earliest", "latest", "none"] + self.offset_value_list = ["earliest", "latest", "none"] self.tbname_value_list = ["true", "false"] self.snapshot_value_list = ["false"] @@ -92,7 +92,7 @@ class TDTestCase: } consumer_commit = 1 if consumer_dict["enable.auto.commit"] == "true" else 0 consumer_tbname = 1 if consumer_dict["msg.with.table.name"] == "true" else 0 - consumer_ret = "earliest" if offset_value == "" else offset_value + consumer_ret = "latest" if offset_value == "" else offset_value expected_parameters=f'tbname:{consumer_tbname},commit:{consumer_commit},interval:{paraDict["auto_commit_interval"]}ms,reset:{consumer_ret}' if len(offset_value) == 0: del consumer_dict["auto.offset.reset"] diff --git a/tests/system-test/8-stream/at_once_interval.py b/tests/system-test/8-stream/at_once_interval.py index 020b5f2a17..8f5438be37 100644 --- a/tests/system-test/8-stream/at_once_interval.py +++ b/tests/system-test/8-stream/at_once_interval.py @@ -70,6 +70,9 @@ class TDTestCase: fill_value='VALUE,1,2,3,4,5,6,7,8,9,10,11' self.tdCom.create_stream(stream_name=f'{self.tb_name}{self.tdCom.stream_suffix}', des_table=self.tb_stream_des_table, source_sql=f'select _wstart AS wstart, {self.tdCom.tb_source_select_str} from {self.tb_name} {partition_elm} interval({self.tdCom.dataDict["interval"]}s)', trigger_mode="at_once", subtable_value=tb_subtable_value, fill_value=fill_value, fill_history_value=fill_history_value) start_time = self.tdCom.date_time + + time.sleep(1) + for i in range(self.tdCom.range_count): ts_value = str(self.tdCom.date_time+self.tdCom.dataDict["interval"])+f'+{i*10}s' ts_cast_delete_value = self.tdCom.time_cast(ts_value) diff --git a/tests/system-test/8-stream/at_once_interval_ext.py b/tests/system-test/8-stream/at_once_interval_ext.py index 8674e7f853..e1dc057448 100644 --- a/tests/system-test/8-stream/at_once_interval_ext.py +++ b/tests/system-test/8-stream/at_once_interval_ext.py @@ -92,6 +92,8 @@ class TDTestCase: else: range_count = self.tdCom.range_count + time.sleep(1) + for i in range(range_count): latency = 0 tag_value_list = list() diff --git a/tests/system-test/8-stream/partition_interval.py b/tests/system-test/8-stream/partition_interval.py index 0424932bf8..3692408de0 100644 --- a/tests/system-test/8-stream/partition_interval.py +++ b/tests/system-test/8-stream/partition_interval.py @@ -35,6 +35,9 @@ class TDTestCase: # create stb/ctb/tb stream self.tdCom.create_stream(stream_name=f'{self.stb_name}{self.tdCom.stream_suffix}', des_table=self.stb_stream_des_table, source_sql=source_sql, ignore_expired=ignore_expired) + + time.sleep(1) + # insert data count = 1 step_count = 1 diff --git a/tests/system-test/8-stream/pause_resume_test.py b/tests/system-test/8-stream/pause_resume_test.py index 421f499a3d..484383f1ce 100644 --- a/tests/system-test/8-stream/pause_resume_test.py +++ b/tests/system-test/8-stream/pause_resume_test.py @@ -56,6 +56,9 @@ class TDTestCase: self.tdCom.create_stream(stream_name=f'{self.stb_name}{self.tdCom.stream_suffix}', des_table=self.stb_stream_des_table, source_sql=f'select _wstart AS wstart, {self.tdCom.stb_source_select_str} from {self.stb_name} {partition_elm} interval({self.tdCom.dataDict["interval"]}s)', trigger_mode="at_once", subtable_value=stb_subtable_value, fill_history_value=fill_history_value) self.tdCom.create_stream(stream_name=f'{self.ctb_name}{self.tdCom.stream_suffix}', des_table=self.tdCom.ctb_stream_des_table, source_sql=f'select _wstart AS wstart, {self.tdCom.stb_source_select_str} from {self.ctb_name} {partition_elm} interval({self.tdCom.dataDict["interval"]}s)', trigger_mode="at_once", subtable_value=ctb_subtable_value, fill_history_value=fill_history_value) self.tdCom.create_stream(stream_name=f'{self.tb_name}{self.tdCom.stream_suffix}', des_table=self.tdCom.tb_stream_des_table, source_sql=f'select _wstart AS wstart, {self.tdCom.tb_source_select_str} from {self.tb_name} {partition_elm} interval({self.tdCom.dataDict["interval"]}s)', trigger_mode="at_once", subtable_value=tb_subtable_value, fill_history_value=fill_history_value) + + time.sleep(1) + for i in range(range_count): ts_value = str(date_time+self.tdCom.dataDict["interval"])+f'+{i*10}s' ts_cast_delete_value = self.tdCom.time_cast(ts_value) @@ -75,6 +78,9 @@ class TDTestCase: partition_elm = f'partition by {partition}' else: partition_elm = "" + + time.sleep(1) + # if i == int(range_count/2): if i > 2 and i % 3 == 0: for stream_name in [f'{self.stb_name}{self.tdCom.stream_suffix}', f'{self.ctb_name}{self.tdCom.stream_suffix}', f'{self.tb_name}{self.tdCom.stream_suffix}']: diff --git a/tests/system-test/8-stream/window_close_session_ext.py b/tests/system-test/8-stream/window_close_session_ext.py index 0fc041e965..62e00b064a 100644 --- a/tests/system-test/8-stream/window_close_session_ext.py +++ b/tests/system-test/8-stream/window_close_session_ext.py @@ -43,6 +43,9 @@ class TDTestCase: watermark_value = None # create stb/ctb/tb stream self.tdCom.create_stream(stream_name=f'{self.stb_name}{self.tdCom.stream_suffix}', des_table=self.tdCom.ext_stb_stream_des_table, source_sql=f'select _wstart AS wstart, {partitial_tb_source_str} from {self.stb_name} session(ts, {self.tdCom.dataDict["session"]}s)', trigger_mode="window_close", watermark=watermark_value, subtable_value=stb_subtable_value, fill_history_value=fill_history_value, stb_field_name_value=stb_field_name_value, tag_value=tag_value, use_exist_stb=use_exist_stb) + + time.sleep(1) + for i in range(self.tdCom.range_count): if i == 0: window_close_ts = self.tdCom.cal_watermark_window_close_session_endts(self.tdCom.date_time, self.tdCom.dataDict['watermark'], self.tdCom.dataDict['session']) diff --git a/tests/system-test/simpletest.bat b/tests/system-test/simpletest.bat index 5ae2d3feb3..31b76cad4a 100644 --- a/tests/system-test/simpletest.bat +++ b/tests/system-test/simpletest.bat @@ -18,7 +18,7 @@ python3 .\test.py -f 1-insert\influxdb_line_taosc_insert.py @REM #python3 .\test.py -f 1-insert\test_stmt_muti_insert_query.py @REM python3 .\test.py -f 1-insert\alter_stable.py @REM python3 .\test.py -f 1-insert\alter_table.py -@REM python3 .\test.py -f 2-query\between.py +python3 .\test.py -f 2-query\between.py @REM python3 .\test.py -f 2-query\distinct.py @REM python3 .\test.py -f 2-query\varchar.py @REM python3 .\test.py -f 2-query\ltrim.py @@ -101,3 +101,4 @@ python3 .\test.py -f 7-tmq\subscribeStb.py @REM python3 .\test.py -f 7-tmq\subscribeStb3.py @REM python3 .\test.py -f 7-tmq\subscribeStb4.py @REM python3 .\test.py -f 7-tmq\db.py +python3 .\test.py -f 6-cluster\5dnode3mnodeSep1VnodeStopDnodeModifyMeta.py -N 6 -M 3 \ No newline at end of file diff --git a/tools/shell/src/shellAuto.c b/tools/shell/src/shellAuto.c index 41cdb0f928..60d6388faa 100644 --- a/tools/shell/src/shellAuto.c +++ b/tools/shell/src/shellAuto.c @@ -66,6 +66,8 @@ SWords shellCommands[] = { {"alter dnode \"debugFlag\" \"141\";", 0, 0, NULL}, {"alter dnode \"monitor\" \"0\";", 0, 0, NULL}, {"alter dnode \"monitor\" \"1\";", 0, 0, NULL}, + {"alter dnode \"asynclog\" \"0\";", 0, 0, NULL}, + {"alter dnode \"asynclog\" \"1\";", 0, 0, NULL}, {"alter all dnodes \"resetlog\";", 0, 0, NULL}, {"alter all dnodes \"debugFlag\" \"141\";", 0, 0, NULL}, {"alter all dnodes \"monitor\" \"0\";", 0, 0, NULL}, @@ -77,6 +79,8 @@ SWords shellCommands[] = { {"alter local \"uDebugFlag\" \"143\";", 0, 0, NULL}, {"alter local \"rpcDebugFlag\" \"143\";", 0, 0, NULL}, {"alter local \"tmrDebugFlag\" \"143\";", 0, 0, NULL}, + {"alter local \"asynclog\" \"0\";", 0, 0, NULL}, + {"alter local \"asynclog\" \"1\";", 0, 0, NULL}, {"alter topic", 0, 0, NULL}, {"alter user ;", 0, 0, NULL}, // 20 @@ -184,7 +188,7 @@ SWords shellCommands[] = { {"show grants;", 0, 0, NULL}, #ifdef TD_ENTERPRISE {"split vgroup ", 0, 0, NULL}, -#endif +#endif {"insert into values(", 0, 0, NULL}, {"insert into using tags(", 0, 0, NULL}, {"insert into using values(", 0, 0, NULL}, @@ -391,13 +395,19 @@ void showHelp() { alter dnode 'monitor' '0';\n\ alter dnode 'monitor' \"1\";\n\ alter dnode \"debugflag\" \"143\";\n\ + alter dnode 'asynclog' '0';\n\ + alter dnode 'asynclog' \"1\";\n\ alter all dnodes \"monitor\" \"0\";\n\ alter all dnodes \"monitor\" \"1\";\n\ alter all dnodes \"resetlog\";\n\ alter all dnodes \"debugFlag\" \n\ + alter all dnodes \"asynclog\" \"0\";\n\ + alter all dnodes \"asynclog\" \"1\";\n\ alter table ;\n\ alter local \"resetlog\";\n\ alter local \"DebugFlag\" \"143\";\n\ + alter local \"asynclog\" \"0\";\n\ + alter local \"asynclog\" \"1\";\n\ alter topic\n\ alter user ...\n\ ----- C ----- \n\ diff --git a/tools/shell/src/shellWebsocket.c b/tools/shell/src/shellWebsocket.c index ff2e5efdd4..e83ceff099 100644 --- a/tools/shell/src/shellWebsocket.c +++ b/tools/shell/src/shellWebsocket.c @@ -260,7 +260,7 @@ void shellRunSingleCommandWebsocketImp(char *command) { WS_RES* res; for (int reconnectNum = 0; reconnectNum < 2; reconnectNum++) { - if (!shell.ws_conn && shell_conn_ws_server(0)) { + if (!shell.ws_conn && shell_conn_ws_server(0) || shell.stop_query) { return; } st = taosGetTimestampUs(); diff --git a/utils/test/c/tmq_taosx_ci.c b/utils/test/c/tmq_taosx_ci.c index 5d4d73c448..ff89bb1f75 100644 --- a/utils/test/c/tmq_taosx_ci.c +++ b/utils/test/c/tmq_taosx_ci.c @@ -547,6 +547,7 @@ tmq_t* build_consumer() { tmq_conf_set(conf, "td.connect.pass", "taosdata"); tmq_conf_set(conf, "msg.with.table.name", "true"); tmq_conf_set(conf, "enable.auto.commit", "true"); + tmq_conf_set(conf, "auto.offset.reset", "earliest"); if (g_conf.snapShot) { tmq_conf_set(conf, "experimental.snapshot.enable", "true");