diff --git a/Jenkinsfile2 b/Jenkinsfile2 index f4dcdb242e..c41e739bd3 100644 --- a/Jenkinsfile2 +++ b/Jenkinsfile2 @@ -315,7 +315,9 @@ def pre_test_build_win() { python.exe -m pip install --upgrade pip python -m pip uninstall taospy -y python -m pip install taospy==2.7.10 - xcopy /e/y/i/f %WIN_INTERNAL_ROOT%\\debug\\build\\lib\\taos.dll C:\\Windows\\System32 + python -m pip uninstall taos-ws-py -y + python -m pip install taos-ws-py==0.2.8 + xcopy /e/y/i/f %WIN_INTERNAL_ROOT%\\debug\\build\\lib\\taos.dll C:\\Windows\\System32 ''' return 1 } diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..be2be525ba --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +Please submit CVE to https://github.com/taosdata/TDengine/security/advisories. diff --git a/cmake/cmake.define b/cmake/cmake.define index edc5dd601a..53026df4ef 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.0) -set(CMAKE_VERBOSE_MAKEFILE ON) +set(CMAKE_VERBOSE_MAKEFILE FALSE) set(TD_BUILD_TAOSA_INTERNAL FALSE) #set output directory diff --git a/cmake/cmake.platform b/cmake/cmake.platform index 76ac6ba004..30a33fcdb4 100644 --- a/cmake/cmake.platform +++ b/cmake/cmake.platform @@ -176,6 +176,14 @@ IF(APPLE) set(THREADS_PREFER_PTHREAD_FLAG ON) ENDIF() +IF(TD_WINDOWS) + IF(MSVC) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /STACK:8388608") + ELSEIF(MINGW) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--stack,8388608") + ENDIF() +ENDIF() + MESSAGE(STATUS "Platform arch:" ${PLATFORM_ARCH_STR}) set(TD_DEPS_DIR "x86") diff --git a/cmake/cmake.version b/cmake/cmake.version index 27e0f1d68a..0e4785f643 100644 --- a/cmake/cmake.version +++ b/cmake/cmake.version @@ -2,7 +2,7 @@ IF (DEFINED VERNUMBER) SET(TD_VER_NUMBER ${VERNUMBER}) ELSE () - SET(TD_VER_NUMBER "3.1.2.0.alpha") + SET(TD_VER_NUMBER "3.2.0.0.alpha") ENDIF () IF (DEFINED VERCOMPATIBLE) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index e3e48ac3a1..a963e4497f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -399,7 +399,7 @@ if(${BUILD_WITH_COS}) INCLUDE_DIRECTORIES($ENV{HOME}/.cos-local.1/include) MESSAGE("$ENV{HOME}/.cos-local.1/include") - set(CMAKE_BUILD_TYPE debug) + set(CMAKE_BUILD_TYPE Release) set(ORIG_CMAKE_PROJECT_NAME ${CMAKE_PROJECT_NAME}) set(CMAKE_PROJECT_NAME cos_c_sdk) diff --git a/docs/en/02-intro/index.md b/docs/en/02-intro/index.md index 4d65c86371..f9fe68b47a 100644 --- a/docs/en/02-intro/index.md +++ b/docs/en/02-intro/index.md @@ -4,11 +4,11 @@ description: This document introduces the major features, competitive advantages toc_max_heading_level: 2 --- -TDengine is an [open source](https://tdengine.com/tdengine/open-source-time-series-database/), [high-performance](https://tdengine.com/tdengine/high-performance-time-series-database/), [cloud native](https://tdengine.com/tdengine/cloud-native-time-series-database/) [time-series database](https://tdengine.com/tsdb/) optimized for Internet of Things (IoT), Connected Cars, and Industrial IoT. Its code, including its cluster feature is open source under GNU AGPL v3.0. Besides the database engine, it provides [caching](../develop/cache), [stream processing](../develop/stream), [data subscription](../develop/tmq) and other functionalities to reduce the system complexity and cost of development and operation. +TDengine is a big data platform designed and optimized for IoT (Internet of Things) and Industrial Internet. It can safely and effetively converge, store, process and distribute high volume data (TB or even PB) generated everyday by a lot of devices and data acquisition units, monitor and alert business operation status in real time and provide real time business insight. The core component of TDengine is TDengine OSS, which is a high performance, open source, cloud native and simplified time series database. This section introduces the major features, competitive advantages, typical use-cases and benchmarks to help you get a high level overview of TDengine. -## Major Features +## Major Features of TDengine OSS The major features are listed below: @@ -132,3 +132,9 @@ As a high-performance, scalable and SQL supported time-series database, TDengine - [Introduction to Time-Series Database](https://tdengine.com/tsdb/) - [Introduction to TDengine competitive advantages](https://tdengine.com/tdengine/) + +## Products + +There are two products offered by TDengine: TDengine Enterprise and TDengine Cloud, for details please refer to +- [TDengine Enterprise](https://www.taosdata.com/tdengine-pro) +- [TDengine Cloud](https://cloud.taosdata.com/?utm_source=menu&utm_medium=webcn) diff --git a/docs/en/07-develop/01-connect/index.md b/docs/en/07-develop/01-connect/index.md index 49c0b11a36..3ca44783c4 100644 --- a/docs/en/07-develop/01-connect/index.md +++ b/docs/en/07-develop/01-connect/index.md @@ -221,7 +221,7 @@ curl -L -o php-tdengine.tar.gz https://github.com/Yurunsoft/php-tdengine/archive && tar -xzf php-tdengine.tar.gz -C php-tdengine --strip-components=1 ``` -> Version number `v1.0.2` is only for example, it can be replaced to any newer version, please check available version from [TDengine PHP Connector Releases](https://github.com/Yurunsoft/php-tdengine/releases). +> Version number `v1.0.2` is only for example, it can be replaced to any newer version. **Non-Swoole Environment: ** diff --git a/docs/en/07-develop/02-model/index.mdx b/docs/en/07-develop/02-model/index.mdx index db5a259cfe..4524a66a41 100644 --- a/docs/en/07-develop/02-model/index.mdx +++ b/docs/en/07-develop/02-model/index.mdx @@ -55,7 +55,7 @@ At most 4096 columns are allowed in a STable. If there are more than 4096 of met ## Create Table -A specific table needs to be created for each data collection point. Similar to RDBMS, table name and schema are required to create a table. Additionally, one or more tags can be created for each table. To create a table, a STable needs to be used as template and the values need to be specified for the tags. For example, for the meters in [Table 1](/tdinternal/arch#model_table1), the table can be created using below SQL statement. +A specific table needs to be created for each data collection point. Similar to RDBMS, table name and schema are required to create a table. Additionally, one or more tags can be created for each table. To create a table, a STable needs to be used as template and the values need to be specified for the tags. For example, for the smart meters table, the table can be created using below SQL statement. ```sql CREATE TABLE d1001 USING meters TAGS ("California.SanFrancisco", 2); diff --git a/docs/en/07-develop/07-tmq.mdx b/docs/en/07-develop/07-tmq.mdx index 42a9f3af5a..d3a4b79d0b 100644 --- a/docs/en/07-develop/07-tmq.mdx +++ b/docs/en/07-develop/07-tmq.mdx @@ -23,20 +23,27 @@ By subscribing to a topic, a consumer can obtain the latest data in that topic i To implement these features, TDengine indexes its write-ahead log (WAL) file for fast random access and provides configurable methods for replacing and retaining this file. You can define a retention period and size for this file. For information, see the CREATE DATABASE statement. In this way, the WAL file is transformed into a persistent storage engine that remembers the order in which events occur. However, note that configuring an overly long retention period for your WAL files makes database compression inefficient. TDengine then uses the WAL file instead of the time-series database as its storage engine for queries in the form of topics. TDengine reads the data from the WAL file; uses a unified query engine instance to perform filtering, transformations, and other operations; and finally pushes the data to consumers. -Tips:(c interface for example) -1. A consumption group consumes all data under the same topic, and different consumption groups are independent of each other; -2. A consumption group consumes all vgroups of the same topic, which can be composed of multiple consumers, but a vgroup is only consumed by one consumer. If the number of consumers exceeds the number of vgroups, the excess consumers do not consume data; -3. On the server side, only one offset is saved for each vgroup, and the offsets for each vgroup are monotonically increasing, but not necessarily continuous. There is no correlation between the offsets of various vgroups; -4. Each poll server will return a result block, which belongs to a vgroup and may contain data from multiple versions of wal. This block can be accessed through tmq_get_vgroup_offset. The offset interface obtains the offset of the first record in the block; -5. If a consumer group has never committed an offset, when its member consumers restart and pull data again, they start consuming from the set value of the parameter auto.offset.reset; In a consumer lifecycle, the client locally records the offset of the most recent pull data and will not pull duplicate data; -6. If a consumer terminates abnormally (without calling tmq_close), they need to wait for about 12 seconds to trigger their consumer group rebalance. The consumer's status on the server will change to LOST, and after about 1 day, the consumer will be automatically deleted; Exit normally, and after exiting, the consumer will be deleted; Add a new consumer, wait for about 2 seconds to trigger Rebalance, and the consumer's status on the server will change to ready; -7. The consumer group Rebalance will reassign Vgroups to all consumer members in the ready state of the group, and consumers can only assign/see/commit/poll operations to the Vgroups they are responsible for; -8. Consumers can tmq_position to obtain the offset of the current consumption, seek to the specified offset, and consume again; -9. Seek points the position to the specified offset without executing the commit operation. Once the seek is successful, it can poll the specified offset and subsequent data; -10. Before the seek operation, tmq must be call tmq_get_topic_assignment, The assignment interface obtains the vgroup ID and offset range of the consumer. The seek operation will detect whether the vgroup ID and offset are legal, and if they are illegal, an error will be reported; -11. Due to the existence of a WAL expiration deletion mechanism, even if the seek operation is successful, it is possible that the offset has expired when polling data. If the offset of poll is less than the WAL minimum version number, it will be consumed from the WAL minimum version number; -12. The tmq_get_vgroup_offset interface obtains the offset of the first data in the result block where the record is located. When seeking to this offset, it will consume all the data in this block. Refer to point four; -13. Data subscription is to consume data from the wal. If some wal files are deleted according to WAL retention policy, the deleted data can't be consumed any more. So you need to set a reasonable value for parameter `WAL_RETENTION_PERIOD` or `WAL_RETENTION_SIZE` when creating the database and make sure your application consume the data in a timely way to make sure there is no data loss. This behavior is similar to Kafka and other widely used message queue products. +The following are some explanations about data subscription, which require some understanding of the architecture of TDengine and the use of various language linker interfaces. +- A consumption group consumes all data under the same topic, and different consumption groups are independent of each other; +- A consumption group consumes all vgroups of the same topic, which can be composed of multiple consumers, but a vgroup is only consumed by one consumer. If the number of consumers exceeds the number of vgroups, the excess consumers do not consume data; +- On the server side, only one offset is saved for each vgroup, and the offsets for each vgroup are monotonically increasing, but not necessarily continuous. There is no correlation between the offsets of various vgroups; +- Each poll server will return a result block, which belongs to a vgroup and may contain data from multiple versions of wal. This block can be accessed through offset interface. The offset interface obtains the offset of the first record in the block; +- If a consumer group has never committed an offset, when its member consumers restart and pull data again, they start consuming from the set value of the parameter auto.offset.reset; In a consumer lifecycle, the client locally records the offset of the most recent pull data and will not pull duplicate data; +- If a consumer terminates abnormally (without calling tmq_close), they need to wait for about 12 seconds to trigger their consumer group rebalance. The consumer's status on the server will change to LOST, and after about 1 day, the consumer will be automatically deleted; Exit normally, and after exiting, the consumer will be deleted; Add a new consumer, wait for about 2 seconds to trigger Rebalance, and the consumer's status on the server will change to ready; +- The consumer group Rebalance will reassign Vgroups to all consumer members in the ready state of the group, and consumers can only assign/see/commit/poll operations to the Vgroups they are responsible for; +- Consumers can call position interface to obtain the offset of the current consumption, seek to the specified offset, and consume again; +- Seek points the position to the specified offset without executing the commit operation. Once the seek is successful, it can poll the specified offset and subsequent data; +- Position is to obtain the current consumption position, which is the position to be taken next time, not the current consumption position +- Commit is the submission of the consumption location. Without parameters, it is the submission of the current consumption location (the location to be taken next time, not the current consumption location). With parameters, it is the location in the submission parameters (i.e. the location to be taken after the next exit and restart) +- Seek is to set the consumer's consumption position. Wherever the seek goes, the position will be returned, all of which are the positions to be taken next time +- Seek does not affect commit, commit does not affect seek, independent of each other, the two are different concepts +- The begin interface is the offset of the first data in wal, and the end interface is the offset+1 of the last data in wal10. +- Before the seek operation, tmq must be call assignment interface, The assignment interface obtains the vgroup ID and offset range of the consumer. The seek operation will detect whether the vgroup ID and offset are legal, and if they are illegal, an error will be reported; +- Due to the existence of a WAL expiration deletion mechanism, even if the seek operation is successful, it is possible that the offset has expired when polling data. If the offset of poll is less than the WAL minimum version number, it will be consumed from the WAL minimum version number; +- The offset interface obtains the offset of the first data in the result block where the record is located. When seeking to this offset, it will consume all the data in this block. Refer to point four; +- Data subscription is to consume data from the wal. If some wal files are deleted according to WAL retention policy, the deleted data can't be consumed any more. So you need to set a reasonable value for parameter `WAL_RETENTION_PERIOD` or `WAL_RETENTION_SIZE` when creating the database and make sure your application consume the data in a timely way to make sure there is no data loss. This behavior is similar to Kafka and other widely used message queue products. + +This document does not provide any further introduction to the knowledge of message queues themselves. If you need to know more, please search for it yourself. ## Data Schema and API diff --git a/docs/en/10-deployment/01-deploy.md b/docs/en/10-deployment/01-deploy.md index ec9162c859..4a6a3aae41 100644 --- a/docs/en/10-deployment/01-deploy.md +++ b/docs/en/10-deployment/01-deploy.md @@ -12,7 +12,7 @@ The FQDN of all hosts must be setup properly. For e.g. FQDNs may have to be conf ### Step 1 -If any previous version of TDengine has been installed and configured on any host, the installation needs to be removed and the data needs to be cleaned up. For details about uninstalling please refer to [Install and Uninstall](/operation/pkg-install). To clean up the data, please use `rm -rf /var/lib/taos/\*` assuming the `dataDir` is configured as `/var/lib/taos`. +If any previous version of TDengine has been installed and configured on any host, the installation needs to be removed and the data needs to be cleaned up. To clean up the data, please use `rm -rf /var/lib/taos/\*` assuming the `dataDir` is configured as `/var/lib/taos`. :::note FQDN information is written to file. If you have started TDengine without configuring or changing the FQDN, ensure that data is backed up or no longer needed before running the `rm -rf /var/lib\taos/\*` command. diff --git a/docs/en/12-taos-sql/22-meta.md b/docs/en/12-taos-sql/22-meta.md index 37304633e7..a9eec511c5 100644 --- a/docs/en/12-taos-sql/22-meta.md +++ b/docs/en/12-taos-sql/22-meta.md @@ -26,7 +26,7 @@ This document introduces the tables of INFORMATION_SCHEMA and their structure. ## INS_DNODES -Provides information about dnodes. Similar to SHOW DNODES. +Provides information about dnodes. Similar to SHOW DNODES. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :------------: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | @@ -40,7 +40,7 @@ Provides information about dnodes. Similar to SHOW DNODES. ## INS_MNODES -Provides information about mnodes. Similar to SHOW MNODES. +Provides information about mnodes. Similar to SHOW MNODES. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | ------------------------------------------ | @@ -52,7 +52,7 @@ Provides information about mnodes. Similar to SHOW MNODES. ## INS_QNODES -Provides information about qnodes. Similar to SHOW QNODES. +Provides information about qnodes. Similar to SHOW QNODES. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | --------------- | @@ -62,7 +62,7 @@ Provides information about qnodes. Similar to SHOW QNODES. ## INS_CLUSTER -Provides information about the cluster. +Provides information about the cluster. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | --------------- | @@ -74,27 +74,27 @@ Provides information about the cluster. Provides information about user-created databases. Similar to SHOW DATABASES. -| # | **Column** | **Data Type** | **Description** | +| # | **Column** | **Data Type** | **Description** | | --- | :------------------: | ---------------- | ------------------------------------------------ | -| 1| name| BINARY(32)| Database name | +| 1 | name | VARCHAR(64) | Database name | | 2 | create_time | TIMESTAMP | Creation time | | 3 | ntables | INT | Number of standard tables and subtables (not including supertables) | | 4 | vgroups | INT | Number of vgroups. It should be noted that `vnodes` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 6 | replica | INT | Number of replicas. It should be noted that `replica` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 7 | strict | BINARY(4) | Obsoleted | -| 8 | duration | INT | Duration for storage of single files. It should be noted that `duration` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 9 | keep | INT | Data retention period. It should be noted that `keep` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 7 | strict | VARCHAR(4) | Obsoleted | +| 8 | duration | VARCHAR(10) | Duration for storage of single files. It should be noted that `duration` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 9 | keep | VARCHAR(32) | Data retention period. It should be noted that `keep` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 10 | buffer | INT | Write cache size per vnode, in MB. It should be noted that `buffer` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 11 | pagesize | INT | Page size for vnode metadata storage engine, in KB. It should be noted that `pagesize` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 12 | pages | INT | Number of pages per vnode metadata storage engine. It should be noted that `pages` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 13 | minrows | INT | Maximum number of records per file block. It should be noted that `minrows` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 14 | maxrows | INT | Minimum number of records per file block. It should be noted that `maxrows` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 15 | comp | INT | Compression method. It should be noted that `comp` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 16 | precision | BINARY(2) | Time precision. It should be noted that `precision` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 17 | status | BINARY(10) | Current database status | -| 18 | retentions | BINARY (60) | Aggregation interval and retention period. It should be noted that `retentions` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 16 | precision | VARCHAR(2) | Time precision. It should be noted that `precision` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 17 | status | VARCHAR(10) | Current database status | +| 18 | retentions | VARCHAR(60) | Aggregation interval and retention period. It should be noted that `retentions` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 19 | single_stable | BOOL | Whether the database can contain multiple supertables. It should be noted that `single_stable` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 20 | cachemodel | BINARY(60) | Caching method for the newest data. It should be noted that `cachemodel` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 20 | cachemodel | VARCHAR(60) | Caching method for the newest data. It should be noted that `cachemodel` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 21 | cachesize | INT | Memory per vnode used for caching the newest data. It should be noted that `cachesize` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 22 | wal_level | INT | WAL level. It should be noted that `wal_level` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 23 | wal_fsync_period | INT | Interval at which WAL is written to disk. It should be noted that `wal_fsync_period` is a TDengine keyword and needs to be escaped with ` when used as a column name. | @@ -196,7 +196,7 @@ Provides information about standard tables and subtables. ## INS_USERS -Provides information about TDengine users. +Provides information about TDengine users. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | ---------------- | @@ -206,7 +206,7 @@ Provides information about TDengine users. ## INS_GRANTS -Provides information about TDengine Enterprise Edition permissions. +Provides information about TDengine Enterprise Edition permissions. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :---------: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | @@ -227,7 +227,7 @@ Provides information about TDengine Enterprise Edition permissions. ## INS_VGROUPS -Provides information about vgroups. +Provides information about vgroups. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :--------: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | @@ -256,7 +256,7 @@ Provides system configuration information. ## INS_DNODE_VARIABLES -Provides dnode configuration information. +Provides dnode configuration information. Users whose SYSINFO attribute is 0 can't view this table. | # | **Column** | **Data Type** | **Description** | | --- | :--------: | ------------- | ----------------------------------------------------------------------------------------------------------------------- | @@ -300,6 +300,8 @@ Provides dnode configuration information. ## INS_USER_PRIVILEGES +Users whose SYSINFO attribute is 0 can't view this table. + | # | **Column** | **Data Type** | **Description** |** | | --- | :----------: | ------------ | -------------------------------------------| | 1 | user_name | VARCHAR(24) | Username | diff --git a/docs/en/12-taos-sql/24-show.md b/docs/en/12-taos-sql/24-show.md index 7a58343f24..9e2897160c 100644 --- a/docs/en/12-taos-sql/24-show.md +++ b/docs/en/12-taos-sql/24-show.md @@ -384,7 +384,7 @@ Shows information about all vgroups in the current database. ## SHOW VNODES ```sql -SHOW VNODES {dnode_id | dnode_endpoint}; +SHOW VNODES [ON DNODE dnode_id]; ``` Shows information about all vnodes in the system or about the vnodes for a specified dnode. diff --git a/docs/en/13-operation/01-pkg-install.md b/docs/en/13-operation/01-pkg-install.md deleted file mode 100644 index 5610139471..0000000000 --- a/docs/en/13-operation/01-pkg-install.md +++ /dev/null @@ -1,178 +0,0 @@ ---- -title: Install and Uninstall -description: This document describes how to install, upgrade, and uninstall TDengine. ---- - -import Tabs from "@theme/Tabs"; -import TabItem from "@theme/TabItem"; - -This document gives more information about installing, uninstalling, and upgrading TDengine. - -## Install - -About details of installing TDenine, please refer to [Installation Guide](../../get-started/package/). - -## Uninstall - - - - -Uninstall package of TDengine by apt-get can be uninstalled as below: - -```bash -$ sudo apt-get remove tdengine -Reading package lists... Done -Building dependency tree -Reading state information... Done -The following packages will be REMOVED: - tdengine -0 upgraded, 0 newly installed, 1 to remove and 18 not upgraded. -After this operation, 68.3 MB disk space will be freed. -Do you want to continue? [Y/n] y -(Reading database ... 135625 files and directories currently installed.) -Removing tdengine (3.0.0.0) ... -TDengine is removed successfully! - -``` - -If you have installed taos-tools, please uninstall it first before uninstall TDengine. The command of uninstall is following: - -``` -$ sudo apt remove taostools -Reading package lists... Done -Building dependency tree -Reading state information... Done -The following packages will be REMOVED: - taostools -0 upgraded, 0 newly installed, 1 to remove and 0 not upgraded. -After this operation, 68.3 MB disk space will be freed. -Do you want to continue? [Y/n] -(Reading database ... 147973 files and directories currently installed.) -Removing taostools (2.1.2) ... -``` - - - - -Deb package of TDengine can be uninstalled as below: - -``` -$ sudo dpkg -r tdengine -(Reading database ... 137504 files and directories currently installed.) -Removing tdengine (3.0.0.0) ... -TDengine is removed successfully! - -``` - -Deb package of taosTools can be uninstalled as below: - -``` -$ sudo dpkg -r taostools -(Reading database ... 147973 files and directories currently installed.) -Removing taostools (2.1.2) ... -``` - - - - - -RPM package of TDengine can be uninstalled as below: - -``` -$ sudo rpm -e tdengine -TDengine is removed successfully! -``` - -RPM package of taosTools can be uninstalled as below: - -``` -sudo rpm -e taostools -taosToole is removed successfully! -``` - - - - - -tar.gz package of TDengine can be uninstalled as below: - -``` -$ rmtaos -TDengine is removed successfully! -``` - -tar.gz package of taosTools can be uninstalled as below: - -``` -$ rmtaostools -Start to uninstall taos tools ... - -taos tools is uninstalled successfully! -``` - - - - -Run C:\TDengine\unins000.exe to uninstall TDengine on a Windows system. - - - - -TDengine can be uninstalled as below: - -``` -$ rmtaos -TDengine is removed successfully! -``` - - - - -:::info - -- We strongly recommend not to use multiple kinds of installation packages on a single host TDengine. The packages may affect each other and cause errors. - -- After deb package is installed, if the installation directory is removed manually, uninstall or reinstall will not work. This issue can be resolved by using the command below which cleans up TDengine package information. - - ``` - $ sudo rm -f /var/lib/dpkg/info/tdengine* - ``` - -You can then reinstall if needed. - -- After rpm package is installed, if the installation directory is removed manually, uninstall or reinstall will not work. This issue can be resolved by using the command below which cleans up TDengine package information. - - ``` - $ sudo rpm -e --noscripts tdengine - ``` - -You can then reinstall if needed. - -::: - -Uninstalling and Modifying Files - -- When TDengine is uninstalled, the configuration /etc/taos/taos.cfg, data directory /var/lib/taos, log directory /var/log/taos are kept. They can be deleted manually with caution, because data can't be recovered. Please follow data integrity, security, backup or relevant SOPs before deleting any data. - -- When reinstalling TDengine, if the default configuration file /etc/taos/taos.cfg exists, it will be kept and the configuration file in the installation package will be renamed to taos.cfg.orig and stored at /usr/local/taos/cfg to be used as configuration sample. Otherwise the configuration file in the installation package will be installed to /etc/taos/taos.cfg and used. - - -## Upgrade -There are two aspects in upgrade operation: upgrade installation package and upgrade a running server. - -To upgrade a package, follow the steps mentioned previously to first uninstall the old version then install the new version. - -Upgrading a running server is much more complex. First please check the version number of the old version and the new version. The version number of TDengine consists of 4 sections, only if the first 2 sections match can the old version be upgraded to the new version. The steps of upgrading a running server are as below: -- Stop inserting data -- Make sure all data is persisted to disk, please use command `flush database` -- Stop the cluster of TDengine -- Uninstall old version and install new version -- Start the cluster of TDengine -- Execute simple queries, such as the ones executed prior to installing the new package, to make sure there is no data loss -- Run some simple data insertion statements to make sure the cluster works well -- Restore business services - -:::warning -TDengine doesn't guarantee any lower version is compatible with the data generated by a higher version, so it's never recommended to downgrade the version. - -::: diff --git a/docs/en/13-operation/10-monitor.md b/docs/en/13-operation/10-monitor.md index c1c6ac3c4c..2a667c132f 100644 --- a/docs/en/13-operation/10-monitor.md +++ b/docs/en/13-operation/10-monitor.md @@ -106,22 +106,22 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |field|type|is\_tag|comment| |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| -|uptime|FLOAT||dnode uptime| +|uptime|FLOAT||dnode uptime in `days`| |cpu\_engine|FLOAT||cpu usage of tdengine. read from `/proc//stat`| |cpu\_system|FLOAT||cpu usage of server. read from `/proc/stat`| |cpu\_cores|FLOAT||cpu cores of server| |mem\_engine|INT||memory usage of tdengine. read from `/proc//status`| -|mem\_system|INT||available memory on the server| +|mem\_system|INT||available memory on the server in `KB`| |mem\_total|INT||total memory of server in `KB`| |disk\_engine|INT||| |disk\_used|BIGINT||usage of data dir in `bytes`| |disk\_total|BIGINT||the capacity of data dir in `bytes`| -|net\_in|FLOAT||network throughput rate in kb/s. read from `/proc/net/dev`| -|net\_out|FLOAT||network throughput rate in kb/s. read from `/proc/net/dev`| -|io\_read|FLOAT||io throughput rate in kb/s. read from `/proc//io`| -|io\_write|FLOAT||io throughput rate in kb/s. read from `/proc//io`| -|io\_read\_disk|FLOAT||io throughput rate of disk in kb/s. read from `/proc//io`| -|io\_write\_disk|FLOAT||io throughput rate of disk in kb/s. read from `/proc//io`| +|net\_in|FLOAT||network throughput rate in byte/s. read from `/proc/net/dev`| +|net\_out|FLOAT||network throughput rate in byte/s. read from `/proc/net/dev`| +|io\_read|FLOAT||io throughput rate in byte/s. read from `/proc//io`| +|io\_write|FLOAT||io throughput rate in byte/s. read from `/proc//io`| +|io\_read\_disk|FLOAT||io throughput rate of disk in byte/s. read from `/proc//io`| +|io\_write\_disk|FLOAT||io throughput rate of disk in byte/s. read from `/proc//io`| |req\_select|INT||number of select queries received per dnode| |req\_select\_rate|FLOAT||number of select queries received per dnode divided by monitor interval.| |req\_insert|INT||number of insert queries received per dnode| @@ -150,9 +150,9 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |ts|TIMESTAMP||timestamp| |name|NCHAR||data directory. default is `/var/lib/taos`| |level|INT||level for multi-level storage| -|avail|BIGINT||available space for data directory| -|used|BIGINT||used space for data directory| -|total|BIGINT||total space for data directory| +|avail|BIGINT||available space for data directory in `bytes`| +|used|BIGINT||used space for data directory in `bytes`| +|total|BIGINT||total space for data directory in `bytes`| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| @@ -165,9 +165,9 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| |name|NCHAR||log directory. default is `/var/log/taos/`| -|avail|BIGINT||available space for log directory| -|used|BIGINT||used space for data directory| -|total|BIGINT||total space for data directory| +|avail|BIGINT||available space for log directory in `bytes`| +|used|BIGINT||used space for data directory in `bytes`| +|total|BIGINT||total space for data directory in `bytes`| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| @@ -180,9 +180,9 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| |name|NCHAR||temp directory. default is `/tmp/`| -|avail|BIGINT||available space for temp directory| -|used|BIGINT||used space for temp directory| -|total|BIGINT||total space for temp directory| +|avail|BIGINT||available space for temp directory in `bytes`| +|used|BIGINT||used space for temp directory in `bytes`| +|total|BIGINT||total space for temp directory in `bytes`| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| diff --git a/docs/en/14-reference/03-connector/07-python.mdx b/docs/en/14-reference/03-connector/07-python.mdx index 5067c33e2d..02c176ee3d 100644 --- a/docs/en/14-reference/03-connector/07-python.mdx +++ b/docs/en/14-reference/03-connector/07-python.mdx @@ -31,11 +31,13 @@ We recommend using the latest version of `taospy`, regardless of the version of |Python Connector Version|major changes| |:-------------------:|:----:| +|2.7.12|1. added support for `varbinary` type (STMT does not yet support)
2. improved query performance (thanks to contributor [hadrianl](https://github.com/taosdata/taos-connector-python/pull/209))| |2.7.9|support for getting assignment and seek function on subscription| |2.7.8|add `execute_many` method| |Python Websocket Connector Version|major changes| |:----------------------------:|:-----:| +|0.2.9|bugs fixes| |0.2.5|1. support for getting assignment and seek function on subscription
2. support schemaless
3. support STMT| |0.2.4|support `unsubscribe` on subscription| @@ -1023,10 +1025,6 @@ Due to the current imperfection of Python's nanosecond support (see link below), 1. https://stackoverflow.com/questions/10611328/parsing-datetime-strings-containing-nanoseconds 2. https://www.python.org/dev/peps/pep-0564/ -## Important Update - -[**Release Notes**] (https://github.com/taosdata/taos-connector-python/releases) - ## API Reference - [taos](https://docs.taosdata.com/api/taospy/taos/) diff --git a/docs/en/14-reference/03-connector/80-php.mdx b/docs/en/14-reference/03-connector/80-php.mdx index b6a31b6de3..b3c2065b6e 100644 --- a/docs/en/14-reference/03-connector/80-php.mdx +++ b/docs/en/14-reference/03-connector/80-php.mdx @@ -52,8 +52,6 @@ curl -L -o php-tdengine.tar.gz https://github.com/Yurunsoft/php-tdengine/archive && tar -xzf php-tdengine.tar.gz -C php-tdengine --strip-components=1 ``` -> Version number `v1.0.2` is only for example, it can be replaced to any newer version, please find available versions in [TDengine PHP Connector Releases](https://github.com/Yurunsoft/php-tdengine/releases). - **Non-Swoole Environment: ** ```shell diff --git a/docs/en/14-reference/03-connector/_linux_install.mdx b/docs/en/14-reference/03-connector/_linux_install.mdx index 398593cfe6..d637c2cb69 100644 --- a/docs/en/14-reference/03-connector/_linux_install.mdx +++ b/docs/en/14-reference/03-connector/_linux_install.mdx @@ -4,7 +4,6 @@ import PkgListV3 from "/components/PkgListV3"; - [All Downloads](../../releases/tdengine) 2. Unzip diff --git a/docs/en/14-reference/03-connector/_macos_install.mdx b/docs/en/14-reference/03-connector/_macos_install.mdx index effabbbebe..31ceae68b6 100644 --- a/docs/en/14-reference/03-connector/_macos_install.mdx +++ b/docs/en/14-reference/03-connector/_macos_install.mdx @@ -4,8 +4,6 @@ import PkgListV3 from "/components/PkgListV3"; - [All Downloads](../../releases/tdengine) - 2. Execute the installer, select the default value as prompted, and complete the installation. If the installation is blocked, you can right-click or ctrl-click on the installation package and select `Open`. 3. configure taos.cfg diff --git a/docs/en/14-reference/03-connector/_windows_install.mdx b/docs/en/14-reference/03-connector/_windows_install.mdx index 723f685b5d..a6e03f30fb 100644 --- a/docs/en/14-reference/03-connector/_windows_install.mdx +++ b/docs/en/14-reference/03-connector/_windows_install.mdx @@ -3,8 +3,6 @@ import PkgListV3 from "/components/PkgListV3"; 1. Download the client installation package - - [All Downloads](../../releases/tdengine) 2. Execute the installer, select the default value as prompted, and complete the installation 3. Installation path diff --git a/docs/en/14-reference/04-taosadapter.md b/docs/en/14-reference/04-taosadapter.md index 6bc49768c6..c75598b0df 100644 --- a/docs/en/14-reference/04-taosadapter.md +++ b/docs/en/14-reference/04-taosadapter.md @@ -31,7 +31,7 @@ taosAdapter provides the following features. ### Install taosAdapter -If you use the TDengine server, you don't need additional steps to install taosAdapter. You can download taosAdapter from [TDengine 3.0 released versions](../../releases/tdengine) to download the TDengine server installation package. If you need to deploy taosAdapter separately on another server other than the TDengine server, you should install the full TDengine server package on that server to install taosAdapter. If you need to build taosAdapter from source code, you can refer to the [Building taosAdapter]( https://github.com/taosdata/taosadapter/blob/3.0/BUILD.md) documentation. +If you use the TDengine server, you don't need additional steps to install taosAdapter. If you need to deploy taosAdapter separately on another server other than the TDengine server, you should install the full TDengine server package on that server to install taosAdapter. If you need to build taosAdapter from source code, you can refer to the [Building taosAdapter]( https://github.com/taosdata/taosadapter/blob/3.0/BUILD.md) documentation. ### Start/Stop taosAdapter @@ -180,7 +180,7 @@ See [example/config/taosadapter.toml](https://github.com/taosdata/taosadapter/bl node_export is an exporter for machine metrics. Please visit [https://github.com/prometheus/node_exporter](https://github.com/prometheus/node_exporter) for more information. - Support for Prometheus remote_read and remote_write remote_read and remote_write are interfaces for Prometheus data read and write from/to other data storage solution. Please visit [https://prometheus.io/blog/2019/10/10/remote-read-meets-streaming/#remote-apis](https://prometheus.io/blog/2019/10/10/remote-read-meets-streaming/#remote-apis) for more information. -- Get table's VGroup ID. For more information about VGroup, please refer to [primary-logic-unit](/tdinternal/arch/#primary-logic-unit). +- Get table's VGroup ID. ## Interfaces @@ -246,7 +246,7 @@ node_export is an exporter of hardware and OS metrics exposed by the \*NIX kerne ### Get table's VGroup ID -You can call `http://:6041/rest/vgid?db=&table=` to get table's VGroup ID. For more information about VGroup, please refer to [primary-logic-unit](/tdinternal/arch/#primary-logic-unit). +You can call `http://:6041/rest/vgid?db=&table=
` to get table's VGroup ID. ## Memory usage optimization methods diff --git a/docs/en/14-reference/05-taosbenchmark.md b/docs/en/14-reference/05-taosbenchmark.md index 38a8048a21..8e5ee178a4 100644 --- a/docs/en/14-reference/05-taosbenchmark.md +++ b/docs/en/14-reference/05-taosbenchmark.md @@ -13,7 +13,7 @@ taosBenchmark (formerly taosdemo ) is a tool for testing the performance of TDen There are two ways to install taosBenchmark: -- Installing the official TDengine installer will automatically install taosBenchmark. Please refer to [TDengine installation](/operation/pkg-install) for details. +- Installing the official TDengine installer will automatically install taosBenchmark. - Compile taos-tools separately and install them. Please refer to the [taos-tools](https://github.com/taosdata/taos-tools) repository for details. @@ -397,6 +397,7 @@ The configuration parameters for specifying super table tag columns and data col ### Query scenario configuration parameters `filetype` must be set to `query` in the query scenario. +`query_times` is number of times queries were run. To control the query scenario by setting `kill_slow_query_threshold` and `kill_slow_query_interval` parameters to kill the execution of slow query statements. Threshold controls exec_usec of query command will be killed by taosBenchmark after the specified time, in seconds; interval controls sleep time to avoid continuous querying of slow queries consuming CPU in seconds. diff --git a/docs/en/14-reference/06-taosdump.md b/docs/en/14-reference/06-taosdump.md index baf07d6b9e..c07465a97c 100644 --- a/docs/en/14-reference/06-taosdump.md +++ b/docs/en/14-reference/06-taosdump.md @@ -103,7 +103,7 @@ Usage: taosdump [OPTION...] dbname [tbname ...] use letter and number only. Default is NOT. -n, --no-escape No escape char '`'. Default is using it. -Q, --dot-replace Repalce dot character with underline character in - the table name. + the table name.(Version 2.5.3) -T, --thread-num=THREAD_NUM Number of thread for dump in file. Default is 8. -C, --cloud=CLOUD_DSN specify a DSN to access TDengine cloud service @@ -113,6 +113,10 @@ Usage: taosdump [OPTION...] dbname [tbname ...] -?, --help Give this help list --usage Give a short usage message -V, --version Print program version + -W, --rename=RENAME-LIST Rename database name with new name during + importing data. RENAME-LIST: + "db1=newDB1|db2=newDB2" means rename db1 to newDB1 + and rename db2 to newDB2 (Version 2.5.4) Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. diff --git a/docs/en/14-reference/14-taosKeeper.md b/docs/en/14-reference/14-taosKeeper.md index cfc2554387..a289ecf35f 100644 --- a/docs/en/14-reference/14-taosKeeper.md +++ b/docs/en/14-reference/14-taosKeeper.md @@ -16,7 +16,7 @@ taosKeeper is a tool for TDengine that exports monitoring metrics. With taosKeep There are two ways to install taosKeeper: Methods of installing taosKeeper: -- Installing the official TDengine installer will automatically install taosKeeper. Please refer to [TDengine installation](/operation/pkg-install) for details. +- Installing the official TDengine installer will automatically install taosKeeper. - You can compile taosKeeper separately and install it. Please refer to the [taosKeeper](https://github.com/taosdata/taoskeeper) repository for details. ## Configuration and Launch diff --git a/docs/en/20-third-party/11-kafka.md b/docs/en/20-third-party/11-kafka.md index a98c3e3a6b..64c0f0bd48 100644 --- a/docs/en/20-third-party/11-kafka.md +++ b/docs/en/20-third-party/11-kafka.md @@ -21,7 +21,7 @@ TDengine Source Connector is used to read data from TDengine in real-time and se 1. Linux operating system 2. Java 8 and Maven installed 3. Git/curl/vi is installed -4. TDengine is installed and started. If not, please refer to [Installation and Uninstallation](/operation/pkg-install) +4. TDengine is installed and started. ## Install Kafka diff --git a/docs/zh/02-intro.md b/docs/zh/02-intro.md index bb989f27da..93d650ddd6 100644 --- a/docs/zh/02-intro.md +++ b/docs/zh/02-intro.md @@ -4,20 +4,14 @@ description: 简要介绍 TDengine 的主要功能 toc_max_heading_level: 2 --- -TDengine 是一款开源、高性能、云原生的[时序数据库](https://tdengine.com/tsdb/),且针对物联网、车联网、工业互联网、金融、IT 运维等场景进行了优化。TDengine 的代码,包括集群功能,都在 GNU AGPL v3.0 下开源。除核心的时序数据库功能外,TDengine 还提供[缓存](../develop/cache/)、[数据订阅](../develop/tmq)、[流式计算](../develop/stream)等其它功能以降低系统复杂度及研发和运维成本。 +TDengine 是一款专为物联网、工业互联网等场景设计并优化的大数据平台,它能安全高效地将大量设备、数据采集器每天产生的高达 TB 甚至 PB 级的数据进行汇聚、存储、分析和分发,对业务运行状态进行实时监测、预警,提供实时的商业洞察。其核心模块是高性能、集群开源、云原生、极简的时序数据库 TDengine OSS。 -本章节介绍 TDengine 的主要产品和功能、竞争优势、适用场景、与其他数据库的对比测试等等,让大家对 TDengine 有个整体的了解。 -## 主要产品 - -TDengine 有三个主要产品:TDengine Enterprise (即 TDengine 企业版),TDengine Cloud,和 TDengine OSS,关于它们的具体定义请参考 -- [TDengine 企业版](https://www.taosdata.com/tdengine-pro) -- [TDengine 云服务](https://cloud.taosdata.com/?utm_source=menu&utm_medium=webcn) -- [TDengine 开源版](https://www.taosdata.com/tdengine-oss) +本节介绍 TDengine OSS 的主要产品和功能、竞争优势、适用场景、与其他数据库的对比测试等等,让大家对 TDengine OSS 有个整体了解 ## 主要功能 -TDengine 的主要功能如下: +TDengine OSS 的主要功能如下: 1. 写入数据,支持 - [SQL 写入](../develop/insert-data/sql-writing) @@ -150,3 +144,10 @@ TDengine 的主要功能如下: - [TDengine VS InfluxDB ,写入性能大 PK !](https://www.taosdata.com/2021/11/05/3248.html) - [TDengine 和 InfluxDB 查询性能对比测试报告](https://www.taosdata.com/2022/02/22/5969.html) - [TDengine 与 InfluxDB、OpenTSDB、Cassandra、MySQL、ClickHouse 等数据库的对比测试报告](https://www.taosdata.com/downloads/TDengine_Testing_Report_cn.pdf) + + +## 主要产品 + +TDengine 有两个主要产品:TDengine Enterprise (即 TDengine 企业版)和 TDengine Cloud,关于它们的具体定义请参考 +- [TDengine 企业版](https://www.taosdata.com/tdengine-pro) +- [TDengine 云服务](https://cloud.taosdata.com/?utm_source=menu&utm_medium=webcn) diff --git a/docs/zh/07-develop/07-tmq.mdx b/docs/zh/07-develop/07-tmq.md similarity index 88% rename from docs/zh/07-develop/07-tmq.mdx rename to docs/zh/07-develop/07-tmq.md index a73d43cd04..f0921af8d8 100644 --- a/docs/zh/07-develop/07-tmq.mdx +++ b/docs/zh/07-develop/07-tmq.md @@ -23,22 +23,27 @@ import CDemo from "./_sub_c.mdx"; 为了实现上述功能,TDengine 会为 WAL (Write-Ahead-Log) 文件自动创建索引以支持快速随机访问,并提供了灵活可配置的文件切换与保留机制:用户可以按需指定 WAL 文件保留的时间以及大小(详见 create database 语句)。通过以上方式将 WAL 改造成了一个保留事件到达顺序的、可持久化的存储引擎(但由于 TSDB 具有远比 WAL 更高的压缩率,我们不推荐保留太长时间,一般来说,不超过几天)。 对于以 topic 形式创建的查询,TDengine 将对接 WAL 而不是 TSDB 作为其存储引擎。在消费时,TDengine 根据当前消费进度从 WAL 直接读取数据,并使用统一的查询引擎实现过滤、变换等操作,将数据推送给消费者。 -本文档不对消息队列本身的基础知识做介绍,如果需要了解,请自行搜索。 +下面为关于数据订阅的一些说明,需要对TDengine的架构有一些了解,结合各个语言链接器的接口使用。 +- 一个消费组消费同一个topic下的所有数据,不同消费组之间相互独立; +- 一个消费组消费同一个topic所有的vgroup,消费组可由多个消费者组成,但一个vgroup仅被一个消费者消费,如果消费者数量超过了vgroup数量,多余的消费者不消费数据; +- 在服务端每个vgroup仅保存一个offset,每个vgroup的offset是单调递增的,但不一定连续。各个vgroup的offset之间没有关联; +- 每次poll服务端会返回一个结果block,该block属于一个vgroup,可能包含多个wal版本的数据,可以通过 offset 接口获得是该block第一条记录的offset; +- 一个消费组如果从未commit过offset,当其成员消费者重启重新拉取数据时,均从参数auto.offset.reset设定值开始消费;在一个消费者生命周期中,客户端本地记录了最近一次拉取数据的offset,不会拉取重复数据; +- 消费者如果异常终止(没有调用tmq_close),需等约12秒后触发其所属消费组rebalance,该消费者在服务端状态变为LOST,约1天后该消费者自动被删除;正常退出,退出后就会删除消费者;新增消费者,需等约2秒触发rebalance,该消费者在服务端状态变为ready; +- 消费组rebalance会对该组所有ready状态的消费者成员重新进行vgroup分配,消费者仅能对自己负责的vgroup进行assignment/seek/commit/poll操作; +- 消费者可利用 position 获得当前消费的offset,并seek到指定offset,重新消费; +- seek将position指向指定offset,不执行commit操作,一旦seek成功,可poll拉取指定offset及以后的数据; +- seek 操作之前须调用 assignment 接口获取该consumer的vgroup ID和offset范围。seek 操作会检测vgroup ID 和 offset是否合法,如非法将报错; +- position是获取当前的消费位置,是下次要取的位置,不是当前消费到的位置 +- commit是提交消费位置,不带参数的话,是提交当前消费位置(下次要取的位置,不是当前消费到的位置),带参数的话,是提交参数里的位置(也即下次退出重启后要取的位置) +- seek是设置consumer消费位置,seek到哪,position就返回哪,都是下次要取的位置 +- seek不会影响commit,commit不影响seek,相互独立,两个是不同的概念 +- begin接口为wal 第一条数据的offset,end 接口为wal 最后一条数据的offset + 1 +- offset接口获取的是记录所在结果block块里的第一条数据的offset,当seek至该offset时,将消费到这个block里的全部数据。参见第四点; +- 由于存在 WAL 过期删除机制,即使seek 操作成功,poll数据时有可能offset已失效。如果poll 的offset 小于 WAL 最小版本号,将会从WAL最小版本号消费; +- 数据订阅是从 WAL 消费数据,如果一些 WAL 文件被基于 WAL 保留策略删除,则已经删除的 WAL 文件中的数据就无法再消费到。需要根据业务需要在创建数据库时合理设置 `WAL_RETENTION_PERIOD` 或 `WAL_RETENTION_SIZE` ,并确保应用及时消费数据,这样才不会产生数据丢失的现象。数据订阅的行为与 Kafka 等广泛使用的消息队列类产品的行为相似; -说明(以c接口为例): -1. 一个消费组消费同一个topic下的所有数据,不同消费组之间相互独立; -2. 一个消费组消费同一个topic所有的vgroup,消费组可由多个消费者组成,但一个vgroup仅被一个消费者消费,如果消费者数量超过了vgroup数量,多余的消费者不消费数据; -3. 在服务端每个vgroup仅保存一个offset,每个vgroup的offset是单调递增的,但不一定连续。各个vgroup的offset之间没有关联; -4. 每次poll服务端会返回一个结果block,该block属于一个vgroup,可能包含多个wal版本的数据,可以通过 tmq_get_vgroup_offset 接口获得是该block第一条记录的offset; -5. 一个消费组如果从未commit过offset,当其成员消费者重启重新拉取数据时,均从参数auto.offset.reset设定值开始消费;在一个消费者生命周期中,客户端本地记录了最近一次拉取数据的offset,不会拉取重复数据; -6. 消费者如果异常终止(没有调用tmq_close),需等约12秒后触发其所属消费组rebalance,该消费者在服务端状态变为LOST,约1天后该消费者自动被删除;正常退出,退出后就会删除消费者;新增消费者,需等约2秒触发rebalance,该消费者在服务端状态变为ready; -7. 消费组rebalance会对该组所有ready状态的消费者成员重新进行vgroup分配,消费者仅能对自己负责的vgroup进行assignment/seek/commit/poll操作; -8. 消费者可利用 tmq_position 获得当前消费的offset,并seek到指定offset,重新消费; -9. seek将position指向指定offset,不执行commit操作,一旦seek成功,可poll拉取指定offset及以后的数据; -10. seek 操作之前须调用 tmq_get_topic_assignment 接口获取该consumer的vgroup ID和offset范围。seek 操作会检测vgroup ID 和 offset是否合法,如非法将报错; -11. tmq_get_vgroup_offset接口获取的是记录所在结果block块里的第一条数据的offset,当seek至该offset时,将消费到这个block里的全部数据。参见第四点; -12. 由于存在 WAL 过期删除机制,即使seek 操作成功,poll数据时有可能offset已失效。如果poll 的offset 小于 WAL 最小版本号,将会从WAL最小版本号消费; -13. 数据订阅是从 WAL 消费数据,如果一些 WAL 文件被基于 WAL 保留策略删除,则已经删除的 WAL 文件中的数据就无法再消费到。需要根据业务需要在创建数据库时合理设置 `WAL_RETENTION_PERIOD` 或 `WAL_RETENTION_SIZE` ,并确保应用及时消费数据,这样才不会产生数据丢失的现象。数据订阅的行为与 Kafka 等广泛使用的消息队列类产品的行为相似; +本文档不对消息队列本身的知识做更多的介绍,如果需要了解,请自行搜索。 ## 主要数据结构和 API diff --git a/docs/zh/08-connector/14-java.mdx b/docs/zh/08-connector/14-java.mdx index 0ff00d1710..64e9a3daed 100644 --- a/docs/zh/08-connector/14-java.mdx +++ b/docs/zh/08-connector/14-java.mdx @@ -1004,7 +1004,7 @@ TaosConsumer consumer = new TaosConsumer<>(config); - httpConnectTimeout: 创建连接超时参数,单位 ms,默认为 5000 ms。仅在 WebSocket 连接下有效。 - messageWaitTimeout: 数据传输超时参数,单位 ms,默认为 10000 ms。仅在 WebSocket 连接下有效。 - httpPoolSize: 同一个连接下最大并行请求数。仅在 WebSocket 连接下有效。 - 其他参数请参考:[Consumer 参数列表](../../../develop/tmq#创建-consumer-以及consumer-group) + 其他参数请参考:[Consumer 参数列表](../../develop/tmq#创建-consumer-以及consumer-group) #### 订阅消费数据 @@ -1082,7 +1082,7 @@ consumer.unsubscribe(); consumer.close() ``` -详情请参考:[数据订阅](../../../develop/tmq) +详情请参考:[数据订阅](../../develop/tmq) #### 完整示例 @@ -1373,7 +1373,7 @@ public static void main(String[] args) throws Exception { **解决方法**: 更换 taos-jdbcdriver 3.0.2+ 版本。 -其它问题请参考 [FAQ](../../../train-faq/faq) +其它问题请参考 [FAQ](../../train-faq/faq) ## API 参考 diff --git a/docs/zh/08-connector/26-rust.mdx b/docs/zh/08-connector/26-rust.mdx index 3e51aa72bb..018552117e 100644 --- a/docs/zh/08-connector/26-rust.mdx +++ b/docs/zh/08-connector/26-rust.mdx @@ -352,7 +352,7 @@ client.put(&sml_data)? ### 数据订阅 -TDengine 通过消息队列 [TMQ](../../../taos-sql/tmq/) 启动一个订阅。 +TDengine 通过消息队列 [TMQ](../../taos-sql/tmq/) 启动一个订阅。 #### 创建 Topic @@ -491,7 +491,7 @@ let taos = pool.get()?; ## 常见问题 -请参考 [FAQ](../../../train-faq/faq) +请参考 [FAQ](../../train-faq/faq) ## API 参考 diff --git a/docs/zh/08-connector/30-python.mdx b/docs/zh/08-connector/30-python.mdx index ab98b5b8de..1526c0da6e 100644 --- a/docs/zh/08-connector/30-python.mdx +++ b/docs/zh/08-connector/30-python.mdx @@ -33,11 +33,13 @@ Python 连接器的源码托管在 [GitHub](https://github.com/taosdata/taos-con |Python Connector 版本|主要变化| |:-------------------:|:----:| +|2.7.12|1. 新增 varbinary 类型支持(STMT暂不支持 varbinary )
2. query 性能提升(感谢贡献者[hadrianl](https://github.com/taosdata/taos-connector-python/pull/209))| |2.7.9|数据订阅支持获取消费进度和重置消费进度| |2.7.8|新增 `execute_many`| |Python Websocket Connector 版本|主要变化| |:----------------------------:|:-----:| +|0.2.9|已知问题修复| |0.2.5|1. 数据订阅支持获取消费进度和重置消费进度
2. 支持 schemaless
3. 支持 STMT| |0.2.4|数据订阅新增取消订阅方法| diff --git a/docs/zh/12-taos-sql/22-meta.md b/docs/zh/12-taos-sql/22-meta.md index 35794ec269..3d124db51f 100644 --- a/docs/zh/12-taos-sql/22-meta.md +++ b/docs/zh/12-taos-sql/22-meta.md @@ -26,7 +26,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_DNODES -提供 dnode 的相关信息。也可以使用 SHOW DNODES 来查询这些信息。 +提供 dnode 的相关信息。也可以使用 SHOW DNODES 来查询这些信息。 SYSINFO 为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :------------: | ------------ | ----------------------------------------------------------------------------------------------------- | @@ -40,7 +40,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_MNODES -提供 mnode 的相关信息。也可以使用 SHOW MNODES 来查询这些信息。 +提供 mnode 的相关信息。也可以使用 SHOW MNODES 来查询这些信息。 SYSINFO 为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | ------------------ | @@ -52,7 +52,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_QNODES -当前系统中 QNODE 的信息。也可以使用 SHOW QNODES 来查询这些信息。 +当前系统中 QNODE 的信息。也可以使用 SHOW QNODES 来查询这些信息。SYSINFO 属性为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | ------------ | @@ -62,7 +62,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_CLUSTER -存储集群相关信息。 +存储集群相关信息。 SYSINFO 属性为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | ---------- | @@ -76,25 +76,25 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | # | **列名** | **数据类型** | **说明** | | --- | :------------------: | ---------------- | ------------------------------------------------ | -| 1 | name | BINARY(32) | 数据库名 | +| 1 | name | VARCHAR(64) | 数据库名 | | 2 | create_time | TIMESTAMP | 创建时间 | | 3 | ntables | INT | 数据库中表的数量,包含子表和普通表但不包含超级表 | | 4 | vgroups | INT | 数据库中有多少个 vgroup。需要注意,`vgroups` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 6 | replica | INT | 副本数。需要注意,`replica` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 7 | strict | BINARY(4) | 废弃参数 | -| 8 | duration | INT | 单文件存储数据的时间跨度。需要注意,`duration` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 9 | keep | INT | 数据保留时长。需要注意,`keep` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 7 | strict | VARCHAR(4) | 废弃参数 | +| 8 | duration | VARCHAR(10) | 单文件存储数据的时间跨度。需要注意,`duration` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 9 | keep | VARCHAR(32) | 数据保留时长。需要注意,`keep` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 10 | buffer | INT | 每个 vnode 写缓存的内存块大小,单位 MB。需要注意,`buffer` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 11 | pagesize | INT | 每个 VNODE 中元数据存储引擎的页大小,单位为 KB。需要注意,`pagesize` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 12 | pages | INT | 每个 vnode 元数据存储引擎的缓存页个数。需要注意,`pages` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 13 | minrows | INT | 文件块中记录的最大条数。需要注意,`minrows` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 14 | maxrows | INT | 文件块中记录的最小条数。需要注意,`maxrows` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 15 | comp | INT | 数据压缩方式。需要注意,`comp` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 16 | precision | BINARY(2) | 时间分辨率。需要注意,`precision` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 17 | status | BINARY(10) | 数据库状态 | -| 18 | retentions | BINARY (60) | 数据的聚合周期和保存时长。需要注意,`retentions` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 16 | precision | VARCHAR(2) | 时间分辨率。需要注意,`precision` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 17 | status | VARCHAR(10) | 数据库状态 | +| 18 | retentions | VARCHAR(60) | 数据的聚合周期和保存时长。需要注意,`retentions` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 19 | single_stable | BOOL | 表示此数据库中是否只可以创建一个超级表。需要注意,`single_stable` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 20 | cachemodel | BINARY(60) | 表示是否在内存中缓存子表的最近数据。需要注意,`cachemodel` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 20 | cachemodel | VARCHAR(60) | 表示是否在内存中缓存子表的最近数据。需要注意,`cachemodel` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 21 | cachesize | INT | 表示每个 vnode 中用于缓存子表最近数据的内存大小。需要注意,`cachesize` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 22 | wal_level | INT | WAL 级别。需要注意,`wal_level` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 23 | wal_fsync_period | INT | 数据落盘周期。需要注意,`wal_fsync_period` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | @@ -197,7 +197,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_USERS -提供系统中创建的用户的相关信息。 +提供系统中创建的用户的相关信息. SYSINFO 属性为0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | -------- | @@ -207,7 +207,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_GRANTS -提供企业版授权的相关信息。 +提供企业版授权的相关信息。SYSINFO 属性为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :---------: | ------------ | --------------------------------------------------------------------------------------------------------- | @@ -228,7 +228,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_VGROUPS -系统中所有 vgroups 的信息。 +系统中所有 vgroups 的信息。SYSINFO 属性为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :-------: | ------------ | ------------------------------------------------------------------------------------------------ | @@ -257,7 +257,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_DNODE_VARIABLES -系统中每个 dnode 的配置参数。 +系统中每个 dnode 的配置参数。SYSINFO 属性 为 0 的用户不能查看此表。 | # | **列名** | **数据类型** | **说明** | | --- | :------: | ------------ | --------------------------------------------------------------------------------------- | @@ -301,6 +301,8 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_USER_PRIVILEGES +注:SYSINFO 属性为 0 的用户不能查看此表。 + | # | **列名** | **数据类型** | **说明** | | --- | :----------: | ------------ | -------------------------------------------------------------------------------------------------------------------- | | 1 | user_name | VARCHAR(24) | 用户名 diff --git a/docs/zh/12-taos-sql/24-show.md b/docs/zh/12-taos-sql/24-show.md index b7ca9493d4..197a7c78d6 100644 --- a/docs/zh/12-taos-sql/24-show.md +++ b/docs/zh/12-taos-sql/24-show.md @@ -327,7 +327,7 @@ SHOW [db_name.]VGROUPS; ## SHOW VNODES ```sql -SHOW VNODES {dnode_id | dnode_endpoint}; +SHOW VNODES [ON DNODE dnode_id]; ``` 显示当前系统中所有 VNODE 或某个 DNODE 的 VNODE 的信息。 diff --git a/docs/zh/14-reference/05-taosbenchmark.md b/docs/zh/14-reference/05-taosbenchmark.md index e42d921dc7..f0d46d117d 100644 --- a/docs/zh/14-reference/05-taosbenchmark.md +++ b/docs/zh/14-reference/05-taosbenchmark.md @@ -395,6 +395,7 @@ taosBenchmark -A INT,DOUBLE,NCHAR,BINARY\(16\) ### 查询场景配置参数 查询场景下 `filetype` 必须设置为 `query`。 +`query_times` 指定运行查询的次数,数值类型 查询场景可以通过设置 `kill_slow_query_threshold` 和 `kill_slow_query_interval` 参数来控制杀掉慢查询语句的执行,threshold 控制如果 exec_usec 超过指定时间的查询将被 taosBenchmark 杀掉,单位为秒;interval 控制休眠时间,避免持续查询慢查询消耗 CPU ,单位为秒。 diff --git a/docs/zh/14-reference/06-taosdump.md b/docs/zh/14-reference/06-taosdump.md index 9fe3c5af7a..8972e587b0 100644 --- a/docs/zh/14-reference/06-taosdump.md +++ b/docs/zh/14-reference/06-taosdump.md @@ -106,7 +106,7 @@ Usage: taosdump [OPTION...] dbname [tbname ...] use letter and number only. Default is NOT. -n, --no-escape No escape char '`'. Default is using it. -Q, --dot-replace Repalce dot character with underline character in - the table name. + the table name.(Version 2.5.3) -T, --thread-num=THREAD_NUM Number of thread for dump in file. Default is 8. -C, --cloud=CLOUD_DSN specify a DSN to access TDengine cloud service @@ -116,6 +116,10 @@ Usage: taosdump [OPTION...] dbname [tbname ...] -?, --help Give this help list --usage Give a short usage message -V, --version Print program version + -W, --rename=RENAME-LIST Rename database name with new name during + importing data. RENAME-LIST: + "db1=newDB1|db2=newDB2" means rename db1 to newDB1 + and rename db2 to newDB2 (Version 2.5.4) Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. diff --git a/docs/zh/17-operation/06-monitor.md b/docs/zh/17-operation/06-monitor.md index 4f8dccc78d..563a7fc6f7 100644 --- a/docs/zh/17-operation/06-monitor.md +++ b/docs/zh/17-operation/06-monitor.md @@ -102,22 +102,22 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |field|type|is\_tag|comment| |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| -|uptime|FLOAT||dnode uptime| +|uptime|FLOAT||dnode uptime,单位:天| |cpu\_engine|FLOAT||taosd cpu 使用率,从 `/proc//stat` 读取| |cpu\_system|FLOAT||服务器 cpu 使用率,从 `/proc/stat` 读取| |cpu\_cores|FLOAT||服务器 cpu 核数| |mem\_engine|INT||taosd 内存使用率,从 `/proc//status` 读取| -|mem\_system|INT||服务器可用内存| +|mem\_system|INT||服务器可用内存,单位 KB| |mem\_total|INT||服务器内存总量,单位 KB| -|disk\_engine|INT||| +|disk\_engine|INT||单位 bytes| |disk\_used|BIGINT||data dir 挂载的磁盘使用量,单位 bytes| |disk\_total|BIGINT||data dir 挂载的磁盘总容量,单位 bytes| -|net\_in|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 received bytes。单位 kb/s| -|net\_out|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 transmit bytes。单位 kb/s| -|io\_read|FLOAT||io 吞吐率,从 `/proc//io` 中读取的 rchar 与上次数值计算之后,计算得到速度。单位 kb/s| -|io\_write|FLOAT||io 吞吐率,从 `/proc//io` 中读取的 wchar 与上次数值计算之后,计算得到速度。单位 kb/s| -|io\_read\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc//io` 中读取的 read_bytes。单位 kb/s| -|io\_write\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc//io` 中读取的 write_bytes。单位 kb/s| +|net\_in|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 received bytes。单位 byte/s| +|net\_out|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 transmit bytes。单位 byte/s| +|io\_read|FLOAT||io 吞吐率,从 `/proc//io` 中读取的 rchar 与上次数值计算之后,计算得到速度。单位 byte/s| +|io\_write|FLOAT||io 吞吐率,从 `/proc//io` 中读取的 wchar 与上次数值计算之后,计算得到速度。单位 byte/s| +|io\_read\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc//io` 中读取的 read_bytes。单位 byte/s| +|io\_write\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc//io` 中读取的 write_bytes。单位 byte/s| |req\_select|INT||两个间隔内发生的查询请求数目| |req\_select\_rate|FLOAT||两个间隔内的查询请求速度 = `req_select / monitorInterval`| |req\_insert|INT||两个间隔内发生的写入请求,包含的单条数据数目| @@ -146,9 +146,9 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |ts|TIMESTAMP||timestamp| |name|NCHAR||data 目录,一般为 `/var/lib/taos`| |level|INT||0、1、2 多级存储级别| -|avail|BIGINT||data 目录可用空间| -|used|BIGINT||data 目录已使用空间| -|total|BIGINT||data 目录空间| +|avail|BIGINT||data 目录可用空间。单位 byte| +|used|BIGINT||data 目录已使用空间。单位 byte| +|total|BIGINT||data 目录空间。单位 byte| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| @@ -161,9 +161,9 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| |name|NCHAR||log 目录名,一般为 `/var/log/taos/`| -|avail|BIGINT||log 目录可用空间| -|used|BIGINT||log 目录已使用空间| -|total|BIGINT||log 目录空间| +|avail|BIGINT||log 目录可用空间。单位 byte| +|used|BIGINT||log 目录已使用空间。单位 byte| +|total|BIGINT||log 目录空间。单位 byte| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| @@ -176,9 +176,9 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |:----|:---|:-----|:------| |ts|TIMESTAMP||timestamp| |name|NCHAR||temp 目录名,一般为 `/tmp/`| -|avail|BIGINT||temp 目录可用空间| -|used|BIGINT||temp 目录已使用空间| -|total|BIGINT||temp 目录空间| +|avail|BIGINT||temp 目录可用空间。单位 byte| +|used|BIGINT||temp 目录已使用空间。单位 byte| +|total|BIGINT||temp 目录空间。单位 byte| |dnode\_id|INT|TAG|dnode id| |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| diff --git a/examples/JDBC/JDBCDemo/README-jdbc-windows.md b/examples/JDBC/JDBCDemo/README-jdbc-windows.md index 5a781f40f7..e91a953cd1 100644 --- a/examples/JDBC/JDBCDemo/README-jdbc-windows.md +++ b/examples/JDBC/JDBCDemo/README-jdbc-windows.md @@ -44,17 +44,17 @@ OS name: "windows 10", version: "10.0", arch: "amd64", family: "windows" - + D:\apache-maven-localRepository - - alimaven - aliyun maven - http://maven.aliyun.com/nexus/content/groups/public/ - central - + + alimaven + aliyun maven + http://maven.aliyun.com/nexus/content/groups/public/ + central + @@ -126,7 +126,7 @@ https://www.taosdata.com/cn/all-downloads/ 修改client的hosts文件(C:\Windows\System32\drivers\etc\hosts),将server的hostname和ip配置到client的hosts文件中 ``` -192.168.236.136 td01 +192.168.236.136 td01 ``` 配置完成后,在命令行内使用TDengine CLI连接server端 diff --git a/examples/go/BUILD.md b/examples/go/BUILD.md new file mode 100644 index 0000000000..dd607001cc --- /dev/null +++ b/examples/go/BUILD.md @@ -0,0 +1,3 @@ +go mod init demo +go mod tidy +go build diff --git a/include/common/ttime.h b/include/common/ttime.h index de74e48100..37e3045817 100644 --- a/include/common/ttime.h +++ b/include/common/ttime.h @@ -75,7 +75,7 @@ static FORCE_INLINE int64_t taosGetTimestampToday(int32_t precision) { int64_t taosTimeAdd(int64_t t, int64_t duration, char unit, int32_t precision); int64_t taosTimeTruncate(int64_t ts, const SInterval* pInterval); -int32_t taosTimeCountInterval(int64_t skey, int64_t ekey, int64_t interval, char unit, int32_t precision); +int32_t taosTimeCountIntervalForFill(int64_t skey, int64_t ekey, int64_t interval, char unit, int32_t precision, int32_t order); int32_t parseAbsoluteDuration(const char* token, int32_t tokenlen, int64_t* ts, char* unit, int32_t timePrecision); int32_t parseNatualDuration(const char* token, int32_t tokenLen, int64_t* duration, char* unit, int32_t timePrecision); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index ea3524d12d..2d70bb1e1c 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -106,19 +106,18 @@ typedef struct { } SStreamQueueItem; typedef void FTbSink(SStreamTask* pTask, void* vnode, void* data); +typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data); typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); typedef struct { int8_t type; int64_t ver; - int32_t* dataRef; SPackedData submit; } SStreamDataSubmit; typedef struct { int8_t type; int64_t ver; - SArray* dataRefs; // SArray SArray* submits; // SArray } SStreamMergedSubmit; @@ -156,8 +155,6 @@ typedef struct { int64_t size; } SStreamQueueRes; -void streamFreeQitem(SStreamQueueItem* data); - #if 0 bool streamQueueResEmpty(const SStreamQueueRes* pRes); int64_t streamQueueResSize(const SStreamQueueRes* pRes); @@ -187,12 +184,6 @@ typedef struct { int32_t streamInit(); void streamCleanUp(); -SStreamQueue* streamQueueOpen(int64_t cap); -void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); -void streamQueueProcessSuccess(SStreamQueue* queue); -void streamQueueProcessFail(SStreamQueue* queue); -void* streamQueueNextItem(SStreamQueue* pQueue); - SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type); void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit); @@ -224,8 +215,6 @@ typedef struct { SSHashObj* pTblInfo; } STaskSinkTb; -typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data); - typedef struct { int64_t smaId; // following are not applicable to encoder and decoder @@ -246,10 +235,10 @@ typedef struct SStreamChildEpInfo { int64_t stage; // upstream task stage value, to denote if the upstream node has restart/replica changed/transfer } SStreamChildEpInfo; -typedef struct SStreamTaskKey { +typedef struct STaskId { int64_t streamId; - int32_t taskId; -} SStreamTaskKey; + int64_t taskId; +} STaskId; typedef struct SStreamTaskId { int64_t streamId; @@ -260,7 +249,7 @@ typedef struct SStreamTaskId { typedef struct SCheckpointInfo { int64_t checkpointId; int64_t checkpointVer; // latest checkpointId version - int64_t currentVer; // current offset in WAL, not serialize it + int64_t nextProcessVer; // current offset in WAL, not serialize it } SCheckpointInfo; typedef struct SStreamStatus { @@ -312,11 +301,24 @@ typedef struct STaskSchedInfo { void* pTimer; } STaskSchedInfo; +typedef struct SSinkTaskRecorder { + int64_t numOfSubmit; + int64_t numOfBlocks; + int64_t numOfRows; +} SSinkTaskRecorder; + typedef struct { + int64_t created; int64_t init; int64_t step1Start; int64_t step2Start; -} STaskTimestamp; + int64_t start; + int32_t updateCount; + int64_t latestUpdateTs; +} STaskExecStatisInfo; + +typedef struct STokenBucket STokenBucket; +typedef struct SMetaHbInfo SMetaHbInfo; struct SStreamTask { int64_t ver; @@ -330,9 +332,9 @@ struct SStreamTask { SCheckpointInfo chkInfo; STaskExec exec; SDataRange dataRange; - SStreamTaskId historyTaskId; - SStreamTaskId streamTaskId; - STaskTimestamp tsInfo; + STaskId historyTaskId; + STaskId streamTaskId; + STaskExecStatisInfo taskExecInfo; SArray* pReadyMsgList; // SArray TdThreadMutex lock; // secure the operation of set task status and puting data into inputQ SArray* pUpstreamInfoList; @@ -345,6 +347,8 @@ struct SStreamTask { STaskSinkSma smaSink; STaskSinkFetch fetchSink; }; + SSinkTaskRecorder sinkRecorder; + STokenBucket* pTokenBucket; void* launchTaskTimer; SMsgCb* pMsgCb; // msg handle @@ -366,34 +370,31 @@ struct SStreamTask { char reserve[256]; }; -typedef struct SMetaHbInfo { - tmr_h hbTmr; - int32_t stopFlag; - int32_t tickCounter; -} SMetaHbInfo; - // meta typedef struct SStreamMeta { char* path; TDB* db; TTB* pTaskDb; TTB* pCheckpointDb; - SHashObj* pTasks; - SArray* pTaskList; // SArray + SHashObj* pTasksMap; + SArray* pTaskList; // SArray void* ahandle; TXN* txn; FTaskExpand* expandFunc; int32_t vgId; int64_t stage; + bool leader; + int8_t taskWillbeLaunched; SRWLatch lock; int32_t walScanCounter; void* streamBackend; int64_t streamBackendRid; SHashObj* pTaskBackendUnique; TdThreadMutex backendMutex; - SMetaHbInfo hbInfo; - int32_t closedTask; - int32_t totalTasks; // this value should be increased when a new task is added into the meta + SMetaHbInfo* pHbInfo; + SHashObj* pUpdateTaskSet; + int32_t numOfStreamTasks; // this value should be increased when a new task is added into the meta + int32_t numOfPausedTasks; int32_t chkptNotReadyTasks; int64_t rid; @@ -402,7 +403,6 @@ typedef struct SStreamMeta { SArray* chkpInUse; int32_t chkpCap; SRWLatch chkpDirLock; - int32_t pauseTaskNum; } SStreamMeta; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -416,11 +416,12 @@ void tFreeStreamTask(SStreamTask* pTask); int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, int64_t ver); int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo); -int32_t tDecodeStreamTaskId(SDecoder* pDecoder, SStreamTaskId* pTaskId); +int32_t tDecodeStreamTaskId(SDecoder* pDecoder, STaskId* pTaskId); int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem); +int32_t streamTaskPutDataIntoOutputQ(SStreamTask* pTask, SStreamDataBlock* pBlock); int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask); -bool streamQueueIsFull(const STaosQueue* pQueue); +bool streamQueueIsFull(const STaosQueue* pQueue, bool inputQ); typedef struct { SMsgHead head; @@ -506,7 +507,7 @@ typedef struct { int32_t downstreamTaskId; int32_t upstreamNodeId; int32_t childId; -} SStreamScanHistoryFinishReq, SStreamTransferReq; +} SStreamScanHistoryFinishReq; int32_t tEncodeStreamScanHistoryFinishReq(SEncoder* pEncoder, const SStreamScanHistoryFinishReq* pReq); int32_t tDecodeStreamScanHistoryFinishReq(SDecoder* pDecoder, SStreamScanHistoryFinishReq* pReq); @@ -552,8 +553,7 @@ int32_t tEncodeStreamCheckpointReadyMsg(SEncoder* pEncoder, const SStreamCheckpo int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointReadyMsg* pRsp); typedef struct STaskStatusEntry { - int64_t streamId; - int32_t taskId; + STaskId id; int32_t status; } STaskStatusEntry; @@ -630,12 +630,10 @@ SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t void streamTaskInputFail(SStreamTask* pTask); int32_t streamTryExec(SStreamTask* pTask); int32_t streamSchedExec(SStreamTask* pTask); -int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); bool streamTaskShouldStop(const SStreamStatus* pStatus); bool streamTaskShouldPause(const SStreamStatus* pStatus); bool streamTaskIsIdle(const SStreamTask* pTask); -int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize); void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen); char* createStreamTaskIdStr(int64_t streamId, int32_t taskId); @@ -646,6 +644,9 @@ int32_t streamTaskLaunchScanHistory(SStreamTask* pTask); int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage); int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList); void streamTaskResetUpstreamStageInfo(SStreamTask* pTask); +int8_t streamTaskSetSchedStatusWait(SStreamTask* pTask); +int8_t streamTaskSetSchedStatusActive(SStreamTask* pTask); +int8_t streamTaskSetSchedStatusInActive(SStreamTask* pTask); int32_t streamTaskStop(SStreamTask* pTask); int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* pReq, SStreamTaskCheckRsp* pRsp, @@ -655,7 +656,7 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask); int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask); int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated); bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer); -int32_t streamTaskGetInputQItems(const SStreamTask* pTask); +int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); // common int32_t streamRestoreParam(SStreamTask* pTask); @@ -679,11 +680,10 @@ void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); // source level int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); -int32_t streamSourceScanHistoryData(SStreamTask* pTask); +int32_t streamScanHistoryData(SStreamTask* pTask); int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); // agg level -int32_t streamTaskScanHistoryPrepare(SStreamTask* pTask); int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, SRpcHandleInfo* pRpcInfo); int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask); @@ -694,24 +694,25 @@ void streamMetaCleanup(); SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage); void streamMetaClose(SStreamMeta* streamMeta); int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); // save to stream meta store -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey); +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pKey); int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded); int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); -int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId); +int32_t streamMetaReopen(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta); +void streamMetaStartHb(SStreamMeta* pMeta); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); int32_t streamAlignTransferState(SStreamTask* pTask); - +int32_t streamBuildAndSendDropTaskMsg(SStreamTask* pTask, int32_t vgId, SStreamTaskId* pTaskId); int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask, int8_t isSucceed); int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index cfe70a186c..a56a5567eb 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -192,7 +192,7 @@ int32_t walApplyVer(SWal *, int64_t ver); // int32_t walDataCorrupted(SWal*); // wal reader -SWalReader *walOpenReader(SWal *, SWalFilterCond *pCond); +SWalReader *walOpenReader(SWal *, SWalFilterCond *pCond, int64_t id); void walCloseReader(SWalReader *pRead); void walReadReset(SWalReader *pReader); int32_t walReadVer(SWalReader *pRead, int64_t ver); diff --git a/include/os/osFile.h b/include/os/osFile.h index da1f8f8b57..63483dc906 100644 --- a/include/os/osFile.h +++ b/include/os/osFile.h @@ -112,6 +112,8 @@ int32_t taosGetErrorFile(TdFilePtr pFile); int32_t taosCompressFile(char *srcFileName, char *destFileName); +int32_t taosSetFileHandlesLimit(); + #ifdef __cplusplus } #endif diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 4a3f0d3a70..613e33fec7 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -788,7 +788,7 @@ int32_t* taosGetErrno(); // stream #define TSDB_CODE_STREAM_TASK_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x4100) -#define TSDB_CODE_STREAM_BACKPRESSURE_OUT_OF_QUEUE TAOS_DEF_ERROR_CODE(0, 0x4101) +#define TSDB_CODE_STREAM_EXEC_CANCELLED TAOS_DEF_ERROR_CODE(0, 0x4102) // TDLite #define TSDB_CODE_TDLITE_IVLD_OPEN_FLAGS TAOS_DEF_ERROR_CODE(0, 0x5100) diff --git a/packaging/tools/install.sh b/packaging/tools/install.sh index 408a5664a8..19efa7b169 100755 --- a/packaging/tools/install.sh +++ b/packaging/tools/install.sh @@ -33,17 +33,14 @@ adapterName="taosadapter" benchmarkName="taosBenchmark" dumpName="taosdump" demoName="taosdemo" -xname="taosx" clientName2="taos" serverName2="${clientName2}d" configFile2="${clientName2}.cfg" productName2="TDengine" emailName2="taosdata.com" -xname2="${clientName2}x" adapterName2="${clientName2}adapter" -explorerName="${clientName2}-explorer" benchmarkName2="${clientName2}Benchmark" demoName2="${clientName2}demo" dumpName2="${clientName2}dump" @@ -218,8 +215,6 @@ function install_bin() { ${csudo}rm -f ${bin_link_dir}/${demoName2} || : ${csudo}rm -f ${bin_link_dir}/${benchmarkName2} || : ${csudo}rm -f ${bin_link_dir}/${dumpName2} || : - ${csudo}rm -f ${bin_link_dir}/${xname2} || : - ${csudo}rm -f ${bin_link_dir}/${explorerName} || : ${csudo}rm -f ${bin_link_dir}/set_core || : ${csudo}rm -f ${bin_link_dir}/TDinsight.sh || : @@ -233,8 +228,6 @@ function install_bin() { [ -x ${install_main_dir}/bin/${benchmarkName2} ] && ${csudo}ln -sf ${install_main_dir}/bin/${benchmarkName2} ${bin_link_dir}/${demoName2} || : [ -x ${install_main_dir}/bin/${benchmarkName2} ] && ${csudo}ln -sf ${install_main_dir}/bin/${benchmarkName2} ${bin_link_dir}/${benchmarkName2} || : [ -x ${install_main_dir}/bin/${dumpName2} ] && ${csudo}ln -sf ${install_main_dir}/bin/${dumpName2} ${bin_link_dir}/${dumpName2} || : - [ -x ${install_main_dir}/bin/${xname2} ] && ${csudo}ln -sf ${install_main_dir}/bin/${xname2} ${bin_link_dir}/${xname2} || : - [ -x ${install_main_dir}/bin/${explorerName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${explorerName} ${bin_link_dir}/${explorerName} || : [ -x ${install_main_dir}/bin/TDinsight.sh ] && ${csudo}ln -sf ${install_main_dir}/bin/TDinsight.sh ${bin_link_dir}/TDinsight.sh || : if [ "$clientName2" == "${clientName}" ]; then [ -x ${install_main_dir}/bin/remove.sh ] && ${csudo}ln -s ${install_main_dir}/bin/remove.sh ${bin_link_dir}/${uninstallScript} || : @@ -703,26 +696,6 @@ function clean_service_on_systemd() { # if [ "$verMode" == "cluster" ] && [ "$clientName" != "$clientName2" ]; then # ${csudo}rm -f ${service_config_dir}/${serverName2}.service # fi - x_service_config="${service_config_dir}/${xName2}.service" - if [ -e "$x_service_config" ]; then - if systemctl is-active --quiet ${xName2}; then - echo "${productName2} ${xName2} is running, stopping it..." - ${csudo}systemctl stop ${xName2} &>/dev/null || echo &>/dev/null - fi - ${csudo}systemctl disable ${xName2} &>/dev/null || echo &>/dev/null - ${csudo}rm -f ${x_service_config} - fi - - explorer_service_config="${service_config_dir}/${explorerName2}.service" - if [ -e "$explorer_service_config" ]; then - if systemctl is-active --quiet ${explorerName2}; then - echo "${productName2} ${explorerName2} is running, stopping it..." - ${csudo}systemctl stop ${explorerName2} &>/dev/null || echo &>/dev/null - fi - ${csudo}systemctl disable ${explorerName2} &>/dev/null || echo &>/dev/null - ${csudo}rm -f ${explorer_service_config} - ${csudo}rm -f /etc/${clientName2}/explorer.toml - fi } function install_service_on_systemd() { diff --git a/packaging/tools/makepkg.sh b/packaging/tools/makepkg.sh index 9e70a6bbf1..42465b8783 100755 --- a/packaging/tools/makepkg.sh +++ b/packaging/tools/makepkg.sh @@ -89,7 +89,7 @@ else ${build_dir}/bin/taosBenchmark \ ${build_dir}/bin/TDinsight.sh \ ${build_dir}/bin/tdengine-datasource.zip \ - ${build_dir}/bin/tdengine-datasource.zip.md5sum" + ${build_dir}/bin/tdengine-datasource.zip.md5" fi [ -f ${build_dir}/bin/taosx ] && taosx_bin="${build_dir}/bin/taosx" diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index b555f4e683..dbddf9cac6 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -380,8 +380,7 @@ void destroySubRequests(SRequestObj *pRequest) { pReqList[++reqIdx] = pTmp; releaseRequest(tmpRefId); } else { - tscError("0x%" PRIx64 ", prev req ref 0x%" PRIx64 " is not there, reqId:0x%" PRIx64, pTmp->self, tmpRefId, - pTmp->requestId); + tscError("prev req ref 0x%" PRIx64 " is not there", tmpRefId); break; } } @@ -398,7 +397,7 @@ void destroySubRequests(SRequestObj *pRequest) { removeRequest(pTmp->self); releaseRequest(pTmp->self); } else { - tscError("0x%" PRIx64 " is not there", tmpRefId); + tscError("next req ref 0x%" PRIx64 " is not there", tmpRefId); break; } } @@ -492,8 +491,7 @@ void stopAllQueries(SRequestObj *pRequest) { pReqList[++reqIdx] = pTmp; releaseRequest(tmpRefId); } else { - tscError("0x%" PRIx64 ", prev req ref 0x%" PRIx64 " is not there, reqId:0x%" PRIx64, pTmp->self, tmpRefId, - pTmp->requestId); + tscError("prev req ref 0x%" PRIx64 " is not there", tmpRefId); break; } } @@ -512,7 +510,7 @@ void stopAllQueries(SRequestObj *pRequest) { taosStopQueryImpl(pTmp); releaseRequest(pTmp->self); } else { - tscError("0x%" PRIx64 " is not there", tmpRefId); + tscError("next req ref 0x%" PRIx64 " is not there", tmpRefId); break; } } diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 73b4ec2a74..ac7a6e6646 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -874,8 +874,13 @@ void handleSubQueryFromAnalyse(SSqlCallbackWrapper *pWrapper, SMetaData *pResult if (TSDB_CODE_SUCCESS == code) { code = cloneCatalogReq(&pNewWrapper->pCatalogReq, pWrapper->pCatalogReq); } - doAsyncQueryFromAnalyse(pResultMeta, pNewWrapper, code); - nodesDestroyNode(pRoot); + if (TSDB_CODE_SUCCESS == code) { + doAsyncQueryFromAnalyse(pResultMeta, pNewWrapper, code); + nodesDestroyNode(pRoot); + } else { + handleQueryAnslyseRes(pWrapper, pResultMeta, code); + return; + } } void handleQueryAnslyseRes(SSqlCallbackWrapper *pWrapper, SMetaData *pResultMeta, int32_t code) { @@ -1148,8 +1153,7 @@ void restartAsyncQuery(SRequestObj *pRequest, int32_t code) { pReqList[++reqIdx] = pTmp; releaseRequest(tmpRefId); } else { - tscError("0x%" PRIx64 ", prev req ref 0x%" PRIx64 " is not there, reqId:0x%" PRIx64, pTmp->self, tmpRefId, - pTmp->requestId); + tscError("prev req ref 0x%" PRIx64 " is not there", tmpRefId); break; } } @@ -1162,7 +1166,7 @@ void restartAsyncQuery(SRequestObj *pRequest, int32_t code) { removeRequest(pTmp->self); releaseRequest(pTmp->self); } else { - tscError("0x%" PRIx64 " is not there", tmpRefId); + tscError("next req ref 0x%" PRIx64 " is not there", tmpRefId); break; } } diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index f7bb6f85e2..53646b84b3 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -2139,22 +2139,16 @@ int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, cha return TSDB_CODE_FAILED; } - SSmlKv pTag = {.key = "group_id", - .keyLen = sizeof("group_id") - 1, - .type = TSDB_DATA_TYPE_UBIGINT, - .u = groupId, - .length = sizeof(uint64_t)}; + int8_t type = TSDB_DATA_TYPE_UBIGINT; + const char* name = "group_id"; + int32_t len = strlen(name); + SSmlKv pTag = { .key = name, .keyLen = len, .type = type, .u = groupId, .length = sizeof(uint64_t)}; taosArrayPush(tags, &pTag); RandTableName rname = { - .tags = tags, - .stbFullName = stbFullName, - .stbFullNameLen = strlen(stbFullName), - .ctbShortName = cname, - }; + .tags = tags, .stbFullName = stbFullName, .stbFullNameLen = strlen(stbFullName), .ctbShortName = cname}; buildChildTableName(&rname); - taosArrayDestroy(tags); if ((rname.ctbShortName && rname.ctbShortName[0]) == 0) { diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index 7b8f0e67fb..62504139f0 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -2460,10 +2460,10 @@ int32_t tColDataAddValueByDataBlock(SColData *pColData, int8_t type, int32_t byt code = tColDataAppendValueImpl[pColData->flag][CV_FLAG_NULL](pColData, NULL, 0); if (code) goto _exit; } else { - if (ASSERT(varDataTLen(data + offset) <= bytes)) { + if (varDataTLen(data + offset) > bytes) { uError("var data length invalid, varDataTLen(data + offset):%d <= bytes:%d", (int)varDataTLen(data + offset), bytes); - code = TSDB_CODE_INVALID_PARA; + code = TSDB_CODE_PAR_VALUE_TOO_LONG; goto _exit; } code = tColDataAppendValueImpl[pColData->flag][CV_FLAG_VALUE](pColData, (uint8_t *)varDataVal(data + offset), @@ -3588,5 +3588,5 @@ void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max, int64_ NULL, // TSDB_DATA_TYPE_DECIMAL NULL, // TSDB_DATA_TYPE_BLOB NULL, // TSDB_DATA_TYPE_MEDIUMBLOB - NULL // TSDB_DATA_TYPE_GEOMETRY + tColDataCalcSMAVarType // TSDB_DATA_TYPE_GEOMETRY }; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index ff9e922ee1..1578a9ea4c 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -240,7 +240,7 @@ int32_t tsTtlBatchDropNum = 10000; // number of tables dropped per batch // internal int32_t tsTransPullupInterval = 2; int32_t tsMqRebalanceInterval = 2; -int32_t tsStreamCheckpointTickInterval = 600; +int32_t tsStreamCheckpointTickInterval = 300; int32_t tsStreamNodeCheckInterval = 10; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; @@ -477,7 +477,7 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) { if (cfgAddTimezone(pCfg, "timezone", tsTimezoneStr, CFG_SCOPE_BOTH) != 0) return -1; if (cfgAddLocale(pCfg, "locale", tsLocale, CFG_SCOPE_BOTH) != 0) return -1; if (cfgAddCharset(pCfg, "charset", tsCharset, CFG_SCOPE_BOTH) != 0) return -1; - if (cfgAddBool(pCfg, "assert", 1, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "assert", tsAssert, CFG_SCOPE_BOTH) != 0) return -1; if (cfgAddBool(pCfg, "enableCoreFile", 1, CFG_SCOPE_BOTH) != 0) return -1; if (cfgAddFloat(pCfg, "numOfCores", tsNumOfCores, 1, 100000, CFG_SCOPE_BOTH) != 0) return -1; @@ -1598,6 +1598,7 @@ int32_t taosInitCfg(const char *cfgDir, const char **envCmd, const char *envFile if (taosSetS3Cfg(tsCfg) != 0) return -1; } taosSetSystemCfg(tsCfg); + if (taosSetFileHandlesLimit() != 0) return -1; cfgDumpCfg(tsCfg, tsc, false); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 04c86938de..0a13147247 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -4847,7 +4847,7 @@ int32_t tDeserializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnode if (tDecodeSReplica(&decoder, pReplica) < 0) return -1; } } - if (!tDecodeIsEnd(&decoder)){ + if (!tDecodeIsEnd(&decoder)) { if (tDecodeI32(&decoder, &pReq->changeVersion) < 0) return -1; } @@ -5736,17 +5736,17 @@ void tFreeSSubQueryMsg(SSubQueryMsg *pReq) { taosMemoryFreeClear(pReq->msg); } -int32_t tSerializeSOperatorParam(SEncoder* pEncoder, SOperatorParam* pOpParam) { +int32_t tSerializeSOperatorParam(SEncoder *pEncoder, SOperatorParam *pOpParam) { if (tEncodeI32(pEncoder, pOpParam->opType) < 0) return -1; if (tEncodeI32(pEncoder, pOpParam->downstreamIdx) < 0) return -1; switch (pOpParam->opType) { case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN: { - STableScanOperatorParam* pScan = (STableScanOperatorParam*)pOpParam->value; + STableScanOperatorParam *pScan = (STableScanOperatorParam *)pOpParam->value; if (tEncodeI8(pEncoder, pScan->tableSeq) < 0) return -1; int32_t uidNum = taosArrayGetSize(pScan->pUidList); if (tEncodeI32(pEncoder, uidNum) < 0) return -1; for (int32_t m = 0; m < uidNum; ++m) { - int64_t* pUid = taosArrayGet(pScan->pUidList, m); + int64_t *pUid = taosArrayGet(pScan->pUidList, m); if (tEncodeI64(pEncoder, *pUid) < 0) return -1; } break; @@ -5754,31 +5754,32 @@ int32_t tSerializeSOperatorParam(SEncoder* pEncoder, SOperatorParam* pOpParam) { default: return TSDB_CODE_INVALID_PARA; } - + int32_t n = taosArrayGetSize(pOpParam->pChildren); if (tEncodeI32(pEncoder, n) < 0) return -1; for (int32_t i = 0; i < n; ++i) { - SOperatorParam* pChild = *(SOperatorParam**)taosArrayGet(pOpParam->pChildren, i); + SOperatorParam *pChild = *(SOperatorParam **)taosArrayGet(pOpParam->pChildren, i); if (tSerializeSOperatorParam(pEncoder, pChild) < 0) return -1; } return 0; } -int32_t tDeserializeSOperatorParam(SDecoder *pDecoder, SOperatorParam* pOpParam) { +int32_t tDeserializeSOperatorParam(SDecoder *pDecoder, SOperatorParam *pOpParam) { if (tDecodeI32(pDecoder, &pOpParam->opType) < 0) return -1; if (tDecodeI32(pDecoder, &pOpParam->downstreamIdx) < 0) return -1; switch (pOpParam->opType) { case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN: { - STableScanOperatorParam* pScan = taosMemoryMalloc(sizeof(STableScanOperatorParam)); + STableScanOperatorParam *pScan = taosMemoryMalloc(sizeof(STableScanOperatorParam)); if (NULL == pScan) return -1; - if (tDecodeI8(pDecoder, (int8_t*)&pScan->tableSeq) < 0) return -1; + if (tDecodeI8(pDecoder, (int8_t *)&pScan->tableSeq) < 0) return -1; int32_t uidNum = 0; int64_t uid = 0; if (tDecodeI32(pDecoder, &uidNum) < 0) return -1; if (uidNum > 0) { pScan->pUidList = taosArrayInit(uidNum, sizeof(int64_t)); if (NULL == pScan->pUidList) return -1; + for (int32_t m = 0; m < uidNum; ++m) { if (tDecodeI64(pDecoder, &uid) < 0) return -1; taosArrayPush(pScan->pUidList, &uid); @@ -5795,11 +5796,12 @@ int32_t tDeserializeSOperatorParam(SDecoder *pDecoder, SOperatorParam* pOpParam) int32_t childrenNum = 0; if (tDecodeI32(pDecoder, &childrenNum) < 0) return -1; + if (childrenNum > 0) { pOpParam->pChildren = taosArrayInit(childrenNum, POINTER_BYTES); if (NULL == pOpParam->pChildren) return -1; for (int32_t i = 0; i < childrenNum; ++i) { - SOperatorParam* pChild = taosMemoryCalloc(1, sizeof(SOperatorParam)); + SOperatorParam *pChild = taosMemoryCalloc(1, sizeof(SOperatorParam)); if (NULL == pChild) return -1; if (tDeserializeSOperatorParam(pDecoder, pChild) < 0) return -1; taosArrayPush(pOpParam->pChildren, &pChild); @@ -5811,7 +5813,6 @@ int32_t tDeserializeSOperatorParam(SDecoder *pDecoder, SOperatorParam* pOpParam) return 0; } - int32_t tSerializeSResFetchReq(void *buf, int32_t bufLen, SResFetchReq *pReq) { int32_t headLen = sizeof(SMsgHead); if (buf != NULL) { @@ -5872,7 +5873,7 @@ int32_t tDeserializeSResFetchReq(void *buf, int32_t bufLen, SResFetchReq *pReq) if (NULL == pReq->pOpParam) return -1; if (tDeserializeSOperatorParam(&decoder, pReq->pOpParam) < 0) return -1; } - + tEndDecode(&decoder); tDecoderClear(&decoder); @@ -6060,7 +6061,7 @@ int32_t tDeserializeSTaskNotifyReq(void *buf, int32_t bufLen, STaskNotifyReq *pR if (tDecodeU64(&decoder, &pReq->taskId) < 0) return -1; if (tDecodeI64(&decoder, &pReq->refId) < 0) return -1; if (tDecodeI32(&decoder, &pReq->execId) < 0) return -1; - if (tDecodeI32(&decoder, (int32_t*)&pReq->type) < 0) return -1; + if (tDecodeI32(&decoder, (int32_t *)&pReq->type) < 0) return -1; tEndDecode(&decoder); @@ -6068,7 +6069,6 @@ int32_t tDeserializeSTaskNotifyReq(void *buf, int32_t bufLen, STaskNotifyReq *pR return 0; } - int32_t tSerializeSQueryTableRsp(void *buf, int32_t bufLen, SQueryTableRsp *pRsp) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -6086,7 +6086,7 @@ int32_t tSerializeSQueryTableRsp(void *buf, int32_t bufLen, SQueryTableRsp *pRsp if (tEncodeI32(&encoder, pVer->tversion) < 0) return -1; } } - + tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -6114,7 +6114,7 @@ int32_t tDeserializeSQueryTableRsp(void *buf, int32_t bufLen, SQueryTableRsp *pR if (tDecodeI32(&decoder, &tbVer.tversion) < 0) return -1; if (NULL == taosArrayPush(pRsp->tbVerInfo, &tbVer)) return -1; } - + tEndDecode(&decoder); tDecoderClear(&decoder); diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index e9313e0591..425218f0e1 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -692,38 +692,72 @@ int64_t taosTimeAdd(int64_t t, int64_t duration, char unit, int32_t precision) { return (int64_t)(taosMktime(&tm) * TSDB_TICK_PER_SECOND(precision) + fraction); } -int32_t taosTimeCountInterval(int64_t skey, int64_t ekey, int64_t interval, char unit, int32_t precision) { +/** + * @brief calc how many windows after filling between skey and ekey + * @notes for asc order + * skey ---> ekey + * ^ ^ + * _____!_____.........._____|_____.. + * |__1__) + * |__2__)...-->|_ret+1_) + * skey + ret * interval <= ekey + * skey + ret * interval + interval > ekey + * ======> (ekey - skey - interval) / interval < ret <= (ekey - skey) / interval + * For keys from blocks which do not need filling, skey + ret * interval == ekey. + * For keys need filling, skey + ret * interval <= ekey. + * Total num of windows is ret + 1(the last window) + * + * for desc order + * skey <--- ekey + * ^ ^ + * _____|____..........______!____... + * |_first_) + * |__1__) + * |_ret_)<--...|__2__) + * skey >= ekey - ret * interval + * skey < ekey - ret * interval + interval + *=======> (ekey - skey) / interval <= ret < (ekey - skey + interval) / interval + * For keys from blocks which do not need filling, skey == ekey - ret * interval. + * For keys need filling, skey >= ekey - ret * interval. + * Total num of windows is ret + 1(the first window) + */ +int32_t taosTimeCountIntervalForFill(int64_t skey, int64_t ekey, int64_t interval, char unit, int32_t precision, + int32_t order) { if (ekey < skey) { int64_t tmp = ekey; ekey = skey; skey = tmp; } + int32_t ret; if (unit != 'n' && unit != 'y') { - return (int32_t)((ekey - skey) / interval); + ret = (int32_t)((ekey - skey) / interval); + if (order == TSDB_ORDER_DESC && ret * interval < (ekey - skey)) ret += 1; + } else { + skey /= (int64_t)(TSDB_TICK_PER_SECOND(precision)); + ekey /= (int64_t)(TSDB_TICK_PER_SECOND(precision)); + + struct tm tm; + time_t t = (time_t)skey; + taosLocalTime(&t, &tm, NULL); + int32_t smon = tm.tm_year * 12 + tm.tm_mon; + + t = (time_t)ekey; + taosLocalTime(&t, &tm, NULL); + int32_t emon = tm.tm_year * 12 + tm.tm_mon; + + if (unit == 'y') { + interval *= 12; + } + ret = (emon - smon) / (int32_t)interval; + if (order == TSDB_ORDER_DESC && ret * interval < (smon - emon)) ret += 1; } - - skey /= (int64_t)(TSDB_TICK_PER_SECOND(precision)); - ekey /= (int64_t)(TSDB_TICK_PER_SECOND(precision)); - - struct tm tm; - time_t t = (time_t)skey; - taosLocalTime(&t, &tm, NULL); - int32_t smon = tm.tm_year * 12 + tm.tm_mon; - - t = (time_t)ekey; - taosLocalTime(&t, &tm, NULL); - int32_t emon = tm.tm_year * 12 + tm.tm_mon; - - if (unit == 'y') { - interval *= 12; - } - - return (emon - smon) / (int32_t)interval; + return ret + 1; } int64_t taosTimeTruncate(int64_t ts, const SInterval* pInterval) { - if (pInterval->sliding == 0 && pInterval->interval == 0) { + if (pInterval->sliding == 0) { + ASSERT(pInterval->interval == 0); return ts; } diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 1bf13c8fb5..0aff654f36 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -667,6 +667,7 @@ typedef struct { char name[TSDB_STREAM_FNAME_LEN]; // ctl SRWLatch lock; + // create info int64_t createTime; int64_t updateTime; diff --git a/source/dnode/mnode/impl/inc/mndUser.h b/source/dnode/mnode/impl/inc/mndUser.h index 8b930e7f18..5a99c7842c 100644 --- a/source/dnode/mnode/impl/inc/mndUser.h +++ b/source/dnode/mnode/impl/inc/mndUser.h @@ -36,6 +36,7 @@ SHashObj *mndDupTopicHash(SHashObj *pOld); int32_t mndValidateUserAuthInfo(SMnode *pMnode, SUserAuthVersion *pUsers, int32_t numOfUses, void **ppRsp, int32_t *pRspLen); int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db); +int32_t mndUserRemoveStb(SMnode *pMnode, STrans *pTrans, char *stb); int32_t mndUserRemoveTopic(SMnode *pMnode, STrans *pTrans, char *topic); int32_t mndUserDupObj(SUserObj *pUser, SUserObj *pNew); diff --git a/source/dnode/mnode/impl/src/mndIndex.c b/source/dnode/mnode/impl/src/mndIndex.c index 2e78116a86..041cc664e5 100644 --- a/source/dnode/mnode/impl/src/mndIndex.c +++ b/source/dnode/mnode/impl/src/mndIndex.c @@ -439,7 +439,7 @@ static int32_t mndProcessCreateIdxReq(SRpcMsg *pReq) { pDb = mndAcquireDbByStb(pMnode, createReq.stbName); if (pDb == NULL) { - terrno = TSDB_CODE_MND_INVALID_DB; + terrno = TSDB_CODE_MND_DB_NOT_EXIST; goto _OVER; } diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c index 75c024278c..1489d46fa2 100644 --- a/source/dnode/mnode/impl/src/mndProfile.c +++ b/source/dnode/mnode/impl/src/mndProfile.c @@ -259,7 +259,7 @@ static int32_t mndProcessConnectReq(SRpcMsg *pReq) { if (pDb == NULL) { if (0 != strcmp(connReq.db, TSDB_INFORMATION_SCHEMA_DB) && (0 != strcmp(connReq.db, TSDB_PERFORMANCE_SCHEMA_DB))) { - terrno = TSDB_CODE_MND_INVALID_DB; + terrno = TSDB_CODE_MND_DB_NOT_EXIST; mGError("user:%s, failed to login from %s while use db:%s since %s", pReq->info.conn.user, ip, connReq.db, terrstr()); goto _OVER; @@ -313,7 +313,7 @@ _CONNECT: sprintf(obj, "%s:%d", ip, pConn->port); char detail[1000] = {0}; - sprintf(detail, "connType:%d, db:%s, pid:%d, startTime:%" PRId64 ", sVer:%s, app:%s", + sprintf(detail, "connType:%d, db:%s, pid:%d, startTime:%" PRId64 ", sVer:%s, app:%s", connReq.connType, connReq.db, connReq.pid, connReq.startTime, connReq.sVer, connReq.app); auditRecord(pReq, pMnode->clusterId, "login", connReq.user, obj, detail, strlen(detail)); diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index b51a0fc39e..471a5d994f 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -2230,7 +2230,7 @@ static int32_t mndProcessAlterStbReq(SRpcMsg *pReq) { pDb = mndAcquireDbByStb(pMnode, alterReq.name); if (pDb == NULL) { - terrno = TSDB_CODE_MND_INVALID_DB; + terrno = TSDB_CODE_MND_DB_NOT_EXIST; goto _OVER; } @@ -2342,6 +2342,7 @@ static int32_t mndDropStb(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SStbObj *p if (mndSetDropStbRedoActions(pMnode, pTrans, pDb, pStb) != 0) goto _OVER; if (mndDropIdxsByStb(pMnode, pTrans, pDb, pStb) != 0) goto _OVER; if (mndDropSmasByStb(pMnode, pTrans, pDb, pStb) != 0) goto _OVER; + if (mndUserRemoveStb(pMnode, pTrans, pStb->name) != 0) goto _OVER; if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; code = 0; @@ -3519,7 +3520,7 @@ static int32_t mndProcessCreateIndexReq(SRpcMsg *pReq) { pDb = mndAcquireDbByStb(pMnode, tagIdxReq.dbFName); if (pDb == NULL) { - terrno = TSDB_CODE_MND_INVALID_DB; + terrno = TSDB_CODE_MND_DB_NOT_EXIST; goto _OVER; } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 7d1d2cf936..0ece330519 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -65,9 +65,6 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); static int32_t mndProcessStreamHb(SRpcMsg *pReq); -static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq); -static int32_t mndProcessStreamMetaReq(SRpcMsg *pReq); -static int32_t mndGetStreamMeta(SRpcMsg *pReq, SShowObj *pShow, STableMetaRsp *pMeta); static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); static void mndCancelGetNextStream(SMnode *pMnode, void *pIter); static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); @@ -802,17 +799,6 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { } } - // pDb = mndAcquireDb(pMnode, streamObj.sourceDb); - // if (pDb->cfg.replications != 1) { - // mError("stream source db must have only 1 replica, but %s has %d", pDb->name, pDb->cfg.replications); - // terrno = TSDB_CODE_MND_MULTI_REPLICA_SOURCE_DB; - // mndReleaseDb(pMnode, pDb); - // pDb = NULL; - // goto _OVER; - // } - - // mndReleaseDb(pMnode, pDb); - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq, "create-stream"); if (pTrans == NULL) { mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); @@ -1054,8 +1040,7 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in // return -1; // } -static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream, SMnode *pMnode, - int64_t checkpointId) { +static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream, SMnode *pMnode, int64_t chkptId) { taosWLockLatch(&pStream->lock); int32_t totLevel = taosArrayGetSize(pStream->tasks); @@ -1079,7 +1064,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream void *buf; int32_t tlen; - if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, + if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, chkptId, pTask->id.streamId, pTask->id.taskId) < 0) { mndReleaseVgroup(pMnode, pVgObj); taosWUnLockLatch(&pStream->lock); @@ -1100,9 +1085,9 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream } } - pStream->checkpointId = checkpointId; + pStream->checkpointId = chkptId; pStream->checkpointFreq = taosGetTimestampMs(); - atomic_store_64(&pStream->currentTick, 0); + pStream->currentTick = 0; // 3. commit log: stream checkpoint info pStream->version = pStream->version + 1; @@ -1189,7 +1174,7 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { STaskStatusEntry *p = taosArrayGet(execNodeList.pTaskList, i); if (p->status != TASK_STATUS__NORMAL) { mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, create checkpoint msg not issued", - p->streamId, p->taskId, 0, streamGetTaskStatusStr(p->status)); + p->id.streamId, (int32_t)p->id.taskId, 0, streamGetTaskStatusStr(p->status)); ready = false; break; } @@ -1565,29 +1550,17 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock // status char status[20 + VARSTR_HEADER_SIZE] = {0}; - int8_t taskStatus = atomic_load_8(&pTask->status.taskStatus); - if (taskStatus == TASK_STATUS__NORMAL) { - memcpy(varDataVal(status), "normal", 6); - varDataSetLen(status, 6); - } else if (taskStatus == TASK_STATUS__DROPPING) { - memcpy(varDataVal(status), "dropping", 8); - varDataSetLen(status, 8); - } else if (taskStatus == TASK_STATUS__UNINIT) { - memcpy(varDataVal(status), "uninit", 6); - varDataSetLen(status, 4); - } else if (taskStatus == TASK_STATUS__STOP) { - memcpy(varDataVal(status), "stop", 4); - varDataSetLen(status, 4); - } else if (taskStatus == TASK_STATUS__SCAN_HISTORY) { - memcpy(varDataVal(status), "history", 7); - varDataSetLen(status, 7); - } else if (taskStatus == TASK_STATUS__HALT) { - memcpy(varDataVal(status), "halt", 4); - varDataSetLen(status, 4); - } else if (taskStatus == TASK_STATUS__PAUSE) { - memcpy(varDataVal(status), "pause", 5); - varDataSetLen(status, 5); + + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + int32_t *index = taosHashGet(execNodeList.pTaskMap, &id, sizeof(id)); + if (index == NULL) { + continue; } + + STaskStatusEntry *pStatusEntry = taosArrayGet(execNodeList.pTaskList, *index); + const char* pStatus = streamGetTaskStatusStr(pStatusEntry->status); + STR_TO_VARSTR(status, pStatus); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)&status, false); @@ -1887,6 +1860,7 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha tEncodeSize(tEncodeStreamTaskUpdateMsg, &req, blen, code); if (code < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; + taosArrayDestroy(req.pNodeList); return -1; } @@ -1895,6 +1869,7 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha void *buf = taosMemoryMalloc(tlen); if (buf == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; + taosArrayDestroy(req.pNodeList); return -1; } @@ -1912,6 +1887,7 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha *pBuf = buf; *pLen = tlen; + taosArrayDestroy(req.pNodeList); return TSDB_CODE_SUCCESS; } @@ -1919,6 +1895,7 @@ int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans) { SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); if (pCommitRaw == NULL) { mError("failed to encode stream since %s", terrstr()); + mndTransDrop(pTrans); return -1; } @@ -1985,6 +1962,7 @@ static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgr if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(pBuf); taosWUnLockLatch(&pStream->lock); + mndTransDrop(pTrans); return -1; } } @@ -1995,7 +1973,6 @@ static int32_t createStreamUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SVgr int32_t code = mndPersistTransLog(pStream, pTrans); if (code != TSDB_CODE_SUCCESS) { sdbRelease(pMnode->pSdb, pStream); - mndTransDrop(pTrans); return -1; } @@ -2266,16 +2243,16 @@ static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamVnodeRevertIndex *p int32_t numOfTasks = taosArrayGetSize(pLevel); for (int32_t j = 0; j < numOfTasks; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); - int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void *p = taosHashGet(pExecNode->pTaskMap, keys, sizeof(keys)); + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + void *p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); if (p == NULL) { STaskStatusEntry entry = { - .streamId = pTask->id.streamId, .taskId = pTask->id.taskId, .status = TASK_STATUS__STOP}; + .id.streamId = pTask->id.streamId, .id.taskId = pTask->id.taskId, .status = TASK_STATUS__STOP}; taosArrayPush(pExecNode->pTaskList, &entry); int32_t ordinal = taosArrayGetSize(pExecNode->pTaskList) - 1; - taosHashPut(pExecNode->pTaskMap, keys, sizeof(keys), &ordinal, sizeof(ordinal)); + taosHashPut(pExecNode->pTaskMap, &id, sizeof(id), &ordinal, sizeof(ordinal)); } } } @@ -2308,8 +2285,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { for (int32_t i = 0; i < req.numOfTasks; ++i) { STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); - int64_t k[2] = {p->streamId, p->taskId}; - int32_t *index = taosHashGet(execNodeList.pTaskMap, &k, sizeof(k)); + int32_t *index = taosHashGet(execNodeList.pTaskMap, &p->id, sizeof(p->id)); if (index == NULL) { continue; } @@ -2317,71 +2293,11 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { STaskStatusEntry *pStatusEntry = taosArrayGet(execNodeList.pTaskList, *index); pStatusEntry->status = p->status; if (p->status != TASK_STATUS__NORMAL) { - mDebug("received s-task:0x%x not in ready status:%s", p->taskId, streamGetTaskStatusStr(p->status)); + mDebug("received s-task:0x%"PRIx64" not in ready status:%s", p->id.taskId, streamGetTaskStatusStr(p->status)); } } taosThreadMutexUnlock(&execNodeList.lock); taosArrayDestroy(req.pTaskStatus); - - // bool nodeChanged = false; - // SArray* pList = taosArrayInit(4, sizeof(int32_t)); - /* - // record the timeout node - for(int32_t i = 0; i < taosArrayGetSize(execNodeList.pNodeEntryList); ++i) { - SNodeEntry* pEntry = taosArrayGet(execNodeList.pNodeEntryList, i); - int64_t duration = now - pEntry->hbTimestamp; - if (duration > MND_STREAM_HB_INTERVAL) { // execNode timeout, try next - taosArrayPush(pList, &pEntry); - mWarn("nodeId:%d stream node timeout, since last hb:%"PRId64"s", pEntry->nodeId, duration); - continue; - } - - if (pEntry->nodeId != req.vgId) { - continue; - } - - pEntry->hbTimestamp = now; - - // check epset to identify whether the node has been transferred to other dnodes. - // node the epset is changed, which means the node transfer has occurred for this node. - // if (!isEpsetEqual(&pEntry->epset, &req.epset)) { - // nodeChanged = true; - // break; - // } - } - - // todo handle the node timeout case. Once the vnode is off-line, we should check the dnode status from mnode, - // to identify whether the dnode is truely offline or not. - - // handle the node changed case - if (!nodeChanged) { - return TSDB_CODE_SUCCESS; - } - - int32_t nodeId = req.vgId; - - {// check all streams that involved this vnode should update the epset info - SStreamObj *pStream = NULL; - void *pIter = NULL; - while (1) { - pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) { - break; - } - - // update the related upstream and downstream tasks, todo remove this, no need this function - taosWLockLatch(&pStream->lock); - // streamTaskUpdateEpInfo(pStream->tasks, req.vgId, &req.epset); - // streamTaskUpdateEpInfo(pStream->pHTasksList, req.vgId, &req.epset); - taosWUnLockLatch(&pStream->lock); - - // code = createStreamUpdateTrans(pMnode, pStream, nodeId, ); - // if (code != TSDB_CODE_SUCCESS) { - // todo - //// } - // } - } - */ return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index c756341164..3131112c9a 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -771,6 +771,29 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { return 0; } +static int32_t sendDeleteSubToVnode(SMqSubscribeObj *pSub, STrans *pTrans){ + // iter all vnode to delete handle + int32_t sz = taosArrayGetSize(pSub->unassignedVgs); + for (int32_t i = 0; i < sz; i++) { + SMqVgEp *pVgEp = taosArrayGetP(pSub->unassignedVgs, i); + SMqVDeleteReq *pReq = taosMemoryCalloc(1, sizeof(SMqVDeleteReq)); + pReq->head.vgId = htonl(pVgEp->vgId); + pReq->vgId = pVgEp->vgId; + pReq->consumerId = -1; + memcpy(pReq->subKey, pSub->key, TSDB_SUBSCRIBE_KEY_LEN); + STransAction action = {0}; + action.epSet = pVgEp->epSet; + action.pCont = pReq; + action.contLen = sizeof(SMqVDeleteReq); + action.msgType = TDMT_VND_TMQ_DELETE_SUB; + if (mndTransAppendRedoAction(pTrans, &action) != 0) { + taosMemoryFree(pReq); + return -1; + } + } + return 0; +} + static int32_t mndProcessDropCgroupReq(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; SMDropCgroupReq dropReq = {0}; @@ -831,6 +854,11 @@ static int32_t mndProcessDropCgroupReq(SRpcMsg *pMsg) { mInfo("trans:%d, used to drop cgroup:%s on topic %s", pTrans->id, dropReq.cgroup, dropReq.topic); + code = sendDeleteSubToVnode(pSub, pTrans); + if (code != 0) { + goto end; + } + if (mndSetDropSubCommitLogs(pMnode, pTrans, pSub) < 0) { mError("cgroup %s on topic:%s, failed to drop since %s", dropReq.cgroup, dropReq.topic, terrstr()); code = -1; @@ -1113,25 +1141,11 @@ int32_t mndDropSubByTopic(SMnode *pMnode, STrans *pTrans, const char *topicName) sdbCancelFetch(pSdb, pIter); return -1; } - int32_t sz = taosArrayGetSize(pSub->unassignedVgs); - for (int32_t i = 0; i < sz; i++) { - SMqVgEp *pVgEp = taosArrayGetP(pSub->unassignedVgs, i); - SMqVDeleteReq *pReq = taosMemoryCalloc(1, sizeof(SMqVDeleteReq)); - pReq->head.vgId = htonl(pVgEp->vgId); - pReq->vgId = pVgEp->vgId; - pReq->consumerId = -1; - memcpy(pReq->subKey, pSub->key, TSDB_SUBSCRIBE_KEY_LEN); - STransAction action = {0}; - action.epSet = pVgEp->epSet; - action.pCont = pReq; - action.contLen = sizeof(SMqVDeleteReq); - action.msgType = TDMT_VND_TMQ_DELETE_SUB; - if (mndTransAppendRedoAction(pTrans, &action) != 0) { - taosMemoryFree(pReq); - sdbRelease(pSdb, pSub); - sdbCancelFetch(pSdb, pIter); - return -1; - } + + if (sendDeleteSubToVnode(pSub, pTrans) != 0) { + sdbRelease(pSdb, pSub); + sdbCancelFetch(pSdb, pIter); + return -1; } if (mndSetDropSubRedoLogs(pMnode, pTrans, pSub) < 0) { diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index bbf69938b2..1a3489715f 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -1623,6 +1623,47 @@ int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db) { return code; } +int32_t mndUserRemoveStb(SMnode *pMnode, STrans *pTrans, char *stb) { + int32_t code = 0; + SSdb *pSdb = pMnode->pSdb; + int32_t len = strlen(stb) + 1; + void *pIter = NULL; + SUserObj *pUser = NULL; + SUserObj newUser = {0}; + + while (1) { + pIter = sdbFetch(pSdb, SDB_USER, pIter, (void **)&pUser); + if (pIter == NULL) break; + + code = -1; + if (mndUserDupObj(pUser, &newUser) != 0) { + break; + } + + bool inRead = (taosHashGet(newUser.readTbs, stb, len) != NULL); + bool inWrite = (taosHashGet(newUser.writeTbs, stb, len) != NULL); + if (inRead || inWrite) { + (void)taosHashRemove(newUser.readTbs, stb, len); + (void)taosHashRemove(newUser.writeTbs, stb, len); + + SSdbRaw *pCommitRaw = mndUserActionEncode(&newUser); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + break; + } + (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); + } + + mndUserFreeObj(&newUser); + sdbRelease(pSdb, pUser); + code = 0; + } + + if (pUser != NULL) sdbRelease(pSdb, pUser); + if (pIter != NULL) sdbCancelFetch(pSdb, pIter); + mndUserFreeObj(&newUser); + return code; +} + int32_t mndUserRemoveTopic(SMnode *pMnode, STrans *pTrans, char *topic) { int32_t code = 0; SSdb *pSdb = pMnode->pSdb; diff --git a/source/dnode/mnode/impl/test/profile/profile.cpp b/source/dnode/mnode/impl/test/profile/profile.cpp index 6ab6d364cb..b1b94c65fb 100644 --- a/source/dnode/mnode/impl/test/profile/profile.cpp +++ b/source/dnode/mnode/impl/test/profile/profile.cpp @@ -65,7 +65,7 @@ TEST_F(MndTestProfile, 01_ConnectMsg) { connId = connectRsp.connId; } -TEST_F(MndTestProfile, 02_ConnectMsg_InvalidDB) { +TEST_F(MndTestProfile, 02_ConnectMsg_NotExistDB) { char passwd[] = "taosdata"; char secretEncrypt[TSDB_PASSWORD_LEN + 1] = {0}; taosEncryptPass_c((uint8_t*)passwd, strlen(passwd), secretEncrypt); @@ -73,7 +73,7 @@ TEST_F(MndTestProfile, 02_ConnectMsg_InvalidDB) { SConnectReq connectReq = {0}; connectReq.pid = 1234; strcpy(connectReq.app, "mnode_test_profile"); - strcpy(connectReq.db, "invalid_db"); + strcpy(connectReq.db, "not_exist_db"); strcpy(connectReq.user, "root"); strcpy(connectReq.passwd, secretEncrypt); strcpy(connectReq.sVer, version); @@ -84,7 +84,7 @@ TEST_F(MndTestProfile, 02_ConnectMsg_InvalidDB) { SRpcMsg* pRsp = test.SendReq(TDMT_MND_CONNECT, pReq, contLen); ASSERT_NE(pRsp, nullptr); - ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DB); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DB_NOT_EXIST); ASSERT_EQ(pRsp->contLen, 0); } diff --git a/source/dnode/mnode/impl/test/stb/stb.cpp b/source/dnode/mnode/impl/test/stb/stb.cpp index dd03917fc2..1adbb87e19 100644 --- a/source/dnode/mnode/impl/test/stb/stb.cpp +++ b/source/dnode/mnode/impl/test/stb/stb.cpp @@ -448,7 +448,7 @@ TEST_F(MndTestStb, 02_Alter_Stb_AddTag) { { void* pReq = BuildAlterStbAddTagReq("1.d3.stb", "tag4", &contLen); SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); - ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DB); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DB_NOT_EXIST); } { @@ -665,7 +665,7 @@ TEST_F(MndTestStb, 06_Alter_Stb_AddColumn) { { void* pReq = BuildAlterStbAddColumnReq("1.d7.stb", "tag4", &contLen); SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); - ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DB); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_DB_NOT_EXIST); } { diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index c1a59416f6..ef9c1ebe2e 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -86,18 +86,18 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { SCheckpointInfo* pChkInfo = &pTask->chkInfo; // checkpoint ver is the kept version, handled data should be the next version. if (pTask->chkInfo.checkpointId != 0) { - pTask->chkInfo.currentVer = pTask->chkInfo.checkpointVer + 1; - qInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " currentVer:%" PRId64, pTask->id.idStr, - pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer); + pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1; + qInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64, pTask->id.idStr, + pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); } else { - if (pTask->chkInfo.currentVer == -1) { - pTask->chkInfo.currentVer = 0; + if (pTask->chkInfo.nextProcessVer == -1) { + pTask->chkInfo.nextProcessVer = 0; } } - qInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " currentVer:%" PRId64 + qInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", - SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer, + SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->info.fillHistory, pTask->info.triggerParam); @@ -189,15 +189,17 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; qDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId); - - SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, pReq->streamId, pReq->taskId); - if (pTask == NULL) { - qError("vgId:%d failed to acquire s-task:0x%x when dropping it", pSnode->pMeta->vgId, pReq->taskId); - return 0; - } - streamMetaUnregisterTask(pSnode->pMeta, pReq->streamId, pReq->taskId); - streamMetaReleaseTask(pSnode->pMeta, pTask); + + // commit the update + taosWLockLatch(&pSnode->pMeta->lock); + int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta); + qDebug("vgId:%d task:0x%x dropped, remain tasks:%d", pSnode->pMeta->vgId, pReq->taskId, numOfTasks); + + if (streamMetaCommit(pSnode->pMeta) < 0) { + // persist to disk + } + taosWUnLockLatch(&pSnode->pMeta->lock); return 0; } diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 5ae257aef8..58f8e00370 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -186,7 +186,7 @@ int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader *pHandle); void *tsdbGetIdx2(SMeta *pMeta); void *tsdbGetIvtIdx2(SMeta *pMeta); uint64_t tsdbGetReaderMaxVersion2(STsdbReader *pReader); -void tsdbReaderSetCloseFlag2(STsdbReader *pReader); +void tsdbReaderSetCloseFlag(STsdbReader *pReader); int64_t tsdbGetLastTimestamp2(SVnode *pVnode, void *pTableList, int32_t numOfTables, const char *pIdStr); //====================================================================================================================== diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 14ca7f3b02..4e73a481c8 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -155,7 +155,7 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore); // tqSink int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr); -void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, void* data); +void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data); // tqOffset char* tqOffsetBuildFName(const char* path, int32_t fVer); @@ -174,7 +174,7 @@ int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type, int64_t sver, int64_t ever); int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset); -void tqUpdateNodeStage(STQ* pTq); +void tqUpdateNodeStage(STQ* pTq, bool isLeader); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index ab6a7fb88b..8337b11ce5 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -302,12 +302,11 @@ int32_t tsdbDelFReaderClose(SDelFReader **ppReader); int32_t tsdbReadDelDatav1(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, int64_t maxVer); int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData); int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx); -// tsdbRead.c ============================================================================================== -int32_t tsdbTakeReadSnap(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); -void tsdbUntakeReadSnap(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); +// tsdbRead.c ============================================================================================== int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); + // tsdbMerge.c ============================================================================================== int32_t tsdbMerge(void *arg); @@ -830,7 +829,6 @@ bool tMergeTreeNext(SMergeTree *pMTree); bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree); void tMergeTreeClose(SMergeTree *pMTree); -SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols, int32_t numOfStt); SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols); void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo); void getSttBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, SSttBlockLoadCostInfo *pLoadCost); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 3355e771e2..39f3d465f2 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -250,7 +250,7 @@ int32_t tqProcessTaskDropReq(STQ* pTq, int64_t version, char* msg, int32_t msgLe int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t version, SRpcMsg* pMsg); +int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec); int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg); @@ -259,7 +259,6 @@ int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg); -int32_t tqCheckLogInWal(STQ* pTq, int64_t version); // sma int32_t smaInit(); diff --git a/source/dnode/vnode/src/meta/metaCommit.c b/source/dnode/vnode/src/meta/metaCommit.c index d262567953..59416fd284 100644 --- a/source/dnode/vnode/src/meta/metaCommit.c +++ b/source/dnode/vnode/src/meta/metaCommit.c @@ -52,7 +52,9 @@ int metaFinishCommit(SMeta *pMeta, TXN *txn) { return tdbPostCommit(pMeta->pEnv int metaPrepareAsyncCommit(SMeta *pMeta) { // return tdbPrepareAsyncCommit(pMeta->pEnv, pMeta->txn); int code = 0; + metaWLock(pMeta); code = ttlMgrFlush(pMeta->pTtlMgr, pMeta->txn); + metaULock(pMeta); code = tdbCommit(pMeta->pEnv, pMeta->txn); return code; diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index 9a298a4bb7..18e1aa303b 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -931,21 +931,16 @@ end: } int metaTtlFindExpired(SMeta *pMeta, int64_t timePointMs, SArray *tbUids, int32_t ttlDropMaxCount) { - metaWLock(pMeta); - int ret = ttlMgrFlush(pMeta->pTtlMgr, pMeta->txn); - if (ret != 0) { - metaError("ttl failed to flush, ret:%d", ret); - goto _err; - } + metaRLock(pMeta); + + int ret = ttlMgrFindExpired(pMeta->pTtlMgr, timePointMs, tbUids, ttlDropMaxCount); + + metaULock(pMeta); - ret = ttlMgrFindExpired(pMeta->pTtlMgr, timePointMs, tbUids, ttlDropMaxCount); if (ret != 0) { metaError("ttl failed to find expired table, ret:%d", ret); - goto _err; } -_err: - metaULock(pMeta); return ret; } diff --git a/source/dnode/vnode/src/meta/metaTtl.c b/source/dnode/vnode/src/meta/metaTtl.c index f920296b4a..58ecf54512 100644 --- a/source/dnode/vnode/src/meta/metaTtl.c +++ b/source/dnode/vnode/src/meta/metaTtl.c @@ -299,7 +299,7 @@ int ttlMgrInsertTtl(STtlManger *pTtlMgr, const STtlUpdTtlCtx *updCtx) { ret = 0; _out: - metaDebug("%s, ttl mgr insert ttl, uid: %" PRId64 ", ctime: %" PRId64 ", ttlDays: %" PRId64, pTtlMgr->logPrefix, + metaTrace("%s, ttl mgr insert ttl, uid: %" PRId64 ", ctime: %" PRId64 ", ttlDays: %" PRId64, pTtlMgr->logPrefix, updCtx->uid, updCtx->changeTimeMs, updCtx->ttlDays); return ret; @@ -323,7 +323,7 @@ int ttlMgrDeleteTtl(STtlManger *pTtlMgr, const STtlDelTtlCtx *delCtx) { ret = 0; _out: - metaDebug("%s, ttl mgr delete ttl, uid: %" PRId64, pTtlMgr->logPrefix, delCtx->uid); + metaTrace("%s, ttl mgr delete ttl, uid: %" PRId64, pTtlMgr->logPrefix, delCtx->uid); return ret; } @@ -363,17 +363,37 @@ int ttlMgrUpdateChangeTime(STtlManger *pTtlMgr, const STtlUpdCtimeCtx *pUpdCtime ret = 0; _out: - metaDebug("%s, ttl mgr update ctime, uid: %" PRId64 ", ctime: %" PRId64, pTtlMgr->logPrefix, pUpdCtimeCtx->uid, + metaTrace("%s, ttl mgr update ctime, uid: %" PRId64 ", ctime: %" PRId64, pTtlMgr->logPrefix, pUpdCtimeCtx->uid, pUpdCtimeCtx->changeTimeMs); return ret; } int ttlMgrFindExpired(STtlManger *pTtlMgr, int64_t timePointMs, SArray *pTbUids, int32_t ttlDropMaxCount) { + int ret = -1; + STtlIdxKeyV1 ttlKey = {.deleteTimeMs = timePointMs, .uid = INT64_MAX}; STtlExpiredCtx expiredCtx = { .ttlDropMaxCount = ttlDropMaxCount, .count = 0, .expiredKey = ttlKey, .pTbUids = pTbUids}; - return tdbTbTraversal(pTtlMgr->pTtlIdx, &expiredCtx, ttlMgrFindExpiredOneEntry); + ret = tdbTbTraversal(pTtlMgr->pTtlIdx, &expiredCtx, ttlMgrFindExpiredOneEntry); + if (ret) { + goto _out; + } + + size_t vIdx = 0; + for (size_t i = 0; i < pTbUids->size; i++) { + tb_uid_t *pUid = taosArrayGet(pTbUids, i); + if (taosHashGet(pTtlMgr->pDirtyUids, pUid, sizeof(tb_uid_t)) == NULL) { + // not in dirty && expired in tdb => must be expired + taosArraySet(pTbUids, vIdx, pUid); + vIdx++; + } + } + + taosArrayPopTailBatch(pTbUids, pTbUids->size - vIdx); + +_out: + return ret; } static bool ttlMgrNeedFlush(STtlManger *pTtlMgr) { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 4aee98148b..e639e272fa 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -726,7 +726,8 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { SStreamTask* pStateTask = pTask; SStreamTask task = {0}; if (pTask->info.fillHistory) { - task.id = pTask->streamTaskId; + task.id.streamId = pTask->streamTaskId.streamId; + task.id.taskId = pTask->streamTaskId.taskId; task.pMeta = pTask->pMeta; pStateTask = &task; } @@ -760,7 +761,8 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { SStreamTask* pSateTask = pTask; SStreamTask task = {0}; if (pTask->info.fillHistory) { - task.id = pTask->streamTaskId; + task.id.streamId = pTask->streamTaskId.streamId; + task.id.taskId = pTask->streamTaskId.taskId; task.pMeta = pTask->pMeta; pSateTask = &task; } @@ -798,7 +800,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->smaSink.smaSink = smaHandleRes; } else if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { pTask->tbSink.vnode = pTq->pVnode; - pTask->tbSink.tbSinkFunc = tqSinkToTablePipeline; + pTask->tbSink.tbSinkFunc = tqSinkDataIntoDstTable; int32_t ver1 = 1; SMetaInfo info = {0}; @@ -819,7 +821,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { SWalFilterCond cond = {.deleteMsg = 1}; // delete msg also extract from wal files - pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond); + pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond, pTask->id.taskId); } // reset the task status from unfinished transaction @@ -834,16 +836,26 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { // checkpoint ver is the kept version, handled data should be the next version. if (pTask->chkInfo.checkpointId != 0) { - pTask->chkInfo.currentVer = pTask->chkInfo.checkpointVer + 1; + pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1; tqInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " currentVer:%" PRId64, pTask->id.idStr, - pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer); + pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); } - tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " currentVer:%" PRId64 - " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", - vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->currentVer, - pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), - pTask->info.fillHistory, pTask->info.triggerParam); + if (pTask->info.fillHistory) { + tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + " nextProcessVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, related stream task:0x%x trigger:%" PRId64 " ms", + vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, + pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->info.fillHistory, (int32_t)pTask->streamTaskId.taskId, pTask->info.triggerParam); + } else { + tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + " nextProcessVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, related fill-task:0x%x trigger:%" PRId64 " ms", + vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, + pTask->info.selfChildId, pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->info.fillHistory, (int32_t)pTask->historyTaskId.taskId, pTask->info.triggerParam); + } return 0; } @@ -890,9 +902,10 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { return streamSendCheckRsp(pTq->pStreamMeta, &req, &rsp, &pMsg->info, taskId); } -int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { +int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) { char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); + int32_t vgId = pTq->pStreamMeta->vgId; int32_t code; SStreamTaskCheckRsp rsp; @@ -901,7 +914,9 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { tDecoderInit(&decoder, (uint8_t*)pReq, len); code = tDecodeStreamTaskCheckRsp(&decoder, &rsp); if (code < 0) { + terrno = TSDB_CODE_INVALID_MSG; tDecoderClear(&decoder); + tqError("vgId:%d failed to parse check rsp msg, code:%s", vgId, tstrerror(terrno)); return -1; } @@ -911,7 +926,7 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.streamId, rsp.upstreamTaskId); if (pTask == NULL) { - tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed", + tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed or stopped", rsp.streamId, rsp.upstreamTaskId, pTq->pStreamMeta->vgId); terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; return -1; @@ -980,6 +995,9 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms bool restored = pTq->pVnode->restored; if (p != NULL && restored) { + p->taskExecInfo.init = taosGetTimestampMs(); + tqDebug("s-task:%s set the init ts:%"PRId64, p->id.idStr, p->taskExecInfo.init); + streamTaskCheckDownstream(p); } else if (!restored) { tqWarn("s-task:%s not launched since vnode(vgId:%d) not ready", p->id.idStr, vgId); @@ -1016,19 +1034,18 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); tqDebug("s-task:%s start scan-history stage(step 1), status:%s", id, pStatus); - if (pTask->tsInfo.step1Start == 0) { + if (pTask->taskExecInfo.step1Start == 0) { ASSERT(pTask->status.pauseAllowed == false); - pTask->tsInfo.step1Start = taosGetTimestampMs(); + pTask->taskExecInfo.step1Start = taosGetTimestampMs(); if (pTask->info.fillHistory == 1) { streamTaskEnablePause(pTask); } } else { - tqDebug("s-task:%s resume from paused, start ts:%" PRId64, pTask->id.idStr, pTask->tsInfo.step1Start); + tqDebug("s-task:%s resume from paused, start ts:%" PRId64, pTask->id.idStr, pTask->taskExecInfo.step1Start); } // we have to continue retrying to successfully execute the scan history task. - int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, - TASK_SCHED_STATUS__WAITING); + int8_t schedStatus = streamTaskSetSchedStatusWait(pTask); if (schedStatus != TASK_SCHED_STATUS__INACTIVE) { tqError( "s-task:%s failed to start scan-history in first stream time window since already started, unexpected " @@ -1042,18 +1059,17 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { ASSERT(pTask->status.pauseAllowed == true); } - streamSourceScanHistoryData(pTask); + streamScanHistoryData(pTask); if (pTask->status.taskStatus == TASK_STATUS__PAUSE) { - double el = (taosGetTimestampMs() - pTask->tsInfo.step1Start) / 1000.0; - tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs, sched-status:%d", pTask->id.idStr, el, - TASK_SCHED_STATUS__INACTIVE); - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + double el = (taosGetTimestampMs() - pTask->taskExecInfo.step1Start) / 1000.0; + int8_t status = streamTaskSetSchedStatusInActive(pTask); + tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs, sched-status:%d", pTask->id.idStr, el, status); streamMetaReleaseTask(pMeta, pTask); return 0; } // the following procedure should be executed, no matter status is stop/pause or not - double el = (taosGetTimestampMs() - pTask->tsInfo.step1Start) / 1000.0; + double el = (taosGetTimestampMs() - pTask->taskExecInfo.step1Start) / 1000.0; tqDebug("s-task:%s scan-history stage(step 1) ended, elapsed time:%.2fs", id, el); if (pTask->info.fillHistory) { @@ -1065,12 +1081,13 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId); if (pStreamTask == NULL) { // todo delete this task, if the related stream task is dropped - qError("failed to find s-task:0x%x, it may have been destroyed, drop fill-history task:%s", + qError("failed to find s-task:0x%"PRIx64", it may have been destroyed, drop fill-history task:%s", pTask->streamTaskId.taskId, pTask->id.idStr); tqDebug("s-task:%s fill-history task set status to be dropping", id); - streamMetaUnregisterTask(pMeta, pTask->id.streamId, pTask->id.taskId); +// streamMetaUnregisterTask(pMeta, pTask->id.streamId, pTask->id.taskId); + streamBuildAndSendDropTaskMsg(pTask, pMeta->vgId, &pTask->id); streamMetaReleaseTask(pMeta, pTask); return -1; } @@ -1087,18 +1104,21 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { } // now we can stop the stream task execution - streamTaskHalt(pStreamTask); + int64_t latestVer = 0; + taosThreadMutexLock(&pStreamTask->lock); + streamTaskHalt(pStreamTask); tqDebug("s-task:%s level:%d sched-status:%d is halt by fill-history task:%s", pStreamTask->id.idStr, pStreamTask->info.taskLevel, pStreamTask->status.schedStatus, id); + latestVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader); + taosThreadMutexUnlock(&pStreamTask->lock); // if it's an source task, extract the last version in wal. pRange = &pTask->dataRange.range; - int64_t latestVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader); done = streamHistoryTaskSetVerRangeStep2(pTask, latestVer); if (done) { - pTask->tsInfo.step2Start = taosGetTimestampMs(); + pTask->taskExecInfo.step2Start = taosGetTimestampMs(); qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, 0.0); streamTaskPutTranstateIntoInputQ(pTask); streamTryExec(pTask); // exec directly @@ -1110,20 +1130,19 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { pStreamTask->id.idStr); ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__WAITING); - pTask->tsInfo.step2Start = taosGetTimestampMs(); + pTask->taskExecInfo.step2Start = taosGetTimestampMs(); streamSetParamForStreamScannerStep2(pTask, pRange, pWindow); - int64_t dstVer = pTask->dataRange.range.minVer - 1; - - pTask->chkInfo.currentVer = dstVer; + int64_t dstVer = pTask->dataRange.range.minVer; + pTask->chkInfo.nextProcessVer = dstVer; walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer); tqDebug("s-task:%s wal reader start scan WAL verRange:%" PRId64 "-%" PRId64 ", set sched-status:%d", id, dstVer, pTask->dataRange.range.maxVer, TASK_SCHED_STATUS__INACTIVE); - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + /*int8_t status = */streamTaskSetSchedStatusInActive(pTask); // set the fill-history task to be normal - if (pTask->info.fillHistory == 1) { + if (pTask->info.fillHistory == 1 && !streamTaskShouldStop(&pTask->status)) { streamSetStatusNormal(pTask); } @@ -1148,7 +1167,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { tqDebug( "s-task:%s scan-history in stream time window completed, now start to handle data from WAL, start " "ver:%" PRId64 ", window:%" PRId64 " - %" PRId64, - id, pTask->chkInfo.currentVer, pWindow->skey, pWindow->ekey); + id, pTask->chkInfo.nextProcessVer, pWindow->skey, pWindow->ekey); } code = streamTaskScanHistoryDataComplete(pTask); @@ -1162,44 +1181,6 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { return 0; } -// notify the downstream tasks to transfer executor state after handle all history blocks. -int32_t tqProcessTaskTransferStateReq(STQ* pTq, SRpcMsg* pMsg) { - char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t len = pMsg->contLen - sizeof(SMsgHead); - - SStreamTransferReq req = {0}; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)pReq, len); - int32_t code = tDecodeStreamScanHistoryFinishReq(&decoder, &req); - tDecoderClear(&decoder); - - tqDebug("vgId:%d start to process transfer state msg, from s-task:0x%x", pTq->pStreamMeta->vgId, - req.downstreamTaskId); - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.downstreamTaskId); - if (pTask == NULL) { - tqError("failed to find task:0x%x, it may have been dropped already. process transfer state failed", - req.downstreamTaskId); - return -1; - } - - int32_t remain = streamAlignTransferState(pTask); - if (remain > 0) { - tqDebug("s-task:%s receive upstream transfer state msg, remain:%d", pTask->id.idStr, remain); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; - } - - // transfer the ownership of executor state - tqDebug("s-task:%s all upstream tasks send transfer msg, open transfer state flag", pTask->id.idStr); - ASSERT(pTask->streamTaskId.taskId != 0 && pTask->info.fillHistory == 1); - - streamSchedExec(pTask); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; -} - // only the agg tasks and the sink tasks will receive this message from upstream tasks int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg) { char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); @@ -1283,13 +1264,13 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { // even in halt status, the data in inputQ must be processed int8_t st = pTask->status.taskStatus; if (st == TASK_STATUS__NORMAL || st == TASK_STATUS__SCAN_HISTORY || st == TASK_STATUS__CK) { - tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr, - pTask->chkInfo.currentVer); + tqDebug("vgId:%d s-task:%s start to process block from inputQ, next checked ver:%" PRId64, vgId, pTask->id.idStr, + pTask->chkInfo.nextProcessVer); streamProcessRunReq(pTask); } else { - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + int8_t status = streamTaskSetSchedStatusInActive(pTask); tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId, - pTask->id.idStr, streamGetTaskStatusStr(st), pTask->status.schedStatus); + pTask->id.idStr, streamGetTaskStatusStr(st), status); } streamMetaReleaseTask(pTq->pStreamMeta, pTask); @@ -1352,14 +1333,18 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; tqDebug("vgId:%d receive msg to drop stream task:0x%x", TD_VID(pTq->pVnode), pReq->taskId); - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); - if (pTask == NULL) { - tqError("vgId:%d failed to acquire s-task:0x%x when dropping it", pTq->pStreamMeta->vgId, pReq->taskId); - return 0; - } - streamMetaUnregisterTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); + + // commit the update + taosWLockLatch(&pTq->pStreamMeta->lock); + int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta); + tqDebug("vgId:%d task:0x%x dropped, remain tasks:%d", TD_VID(pTq->pVnode), pReq->taskId, numOfTasks); + + if (streamMetaCommit(pTq->pStreamMeta) < 0) { + // persist to disk + } + taosWUnLockLatch(&pTq->pStreamMeta->lock); + return 0; } @@ -1382,7 +1367,8 @@ int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg if (pTask->historyTaskId.taskId != 0) { pHistoryTask = streamMetaAcquireTask(pMeta, pTask->historyTaskId.streamId, pTask->historyTaskId.taskId); if (pHistoryTask == NULL) { - tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%x, it may have been dropped already", + tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%" PRIx64 + ", it may have been dropped already", pMeta->vgId, pTask->historyTaskId.taskId); streamMetaReleaseTask(pMeta, pTask); @@ -1423,10 +1409,10 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion); tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64 ", schedStatus:%d", - vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus); + vgId, pTask->id.idStr, pTask->chkInfo.nextProcessVer, sversion, pTask->status.schedStatus); } else { // from the previous paused version and go on tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d", - vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus); + vgId, pTask->id.idStr, pTask->chkInfo.nextProcessVer, sversion, pTask->status.schedStatus); } if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && @@ -1562,8 +1548,6 @@ FAIL: return -1; } -int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } - // todo error code cannot be return, since this is invoked by an mnode-launched transaction. int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg) { int32_t vgId = TD_VID(pTq->pVnode); @@ -1573,6 +1557,10 @@ int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg) { int32_t code = 0; SStreamCheckpointSourceReq req = {0}; + if (!vnodeIsRoleLeader(pTq->pVnode)) { + tqDebug("vgId:%d not leader node, ignore checkpoint-source msg", vgId); + return TSDB_CODE_SUCCESS; + } SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, len); @@ -1609,11 +1597,10 @@ int32_t tqProcessStreamCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg) { // set the initial value for generating check point // set the mgmt epset info according to the checkout source msg from mnode, todo update mgmt epset if needed if (pMeta->chkptNotReadyTasks == 0) { - pMeta->chkptNotReadyTasks = streamMetaGetNumOfStreamTasks(pMeta); - pMeta->totalTasks = pMeta->chkptNotReadyTasks; + pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks; } - total = taosArrayGetSize(pMeta->pTaskList); + total = pMeta->numOfStreamTasks; taosWUnLockLatch(&pMeta->lock); qDebug("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg, chkpt:%" PRId64 ", total checkpoint req:%d", @@ -1678,36 +1665,36 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { if (tDecodeStreamTaskUpdateMsg(&decoder, &req) < 0) { rsp.code = TSDB_CODE_MSG_DECODE_ERROR; tqError("vgId:%d failed to decode task update msg, code:%s", vgId, tstrerror(rsp.code)); - goto _end; + tDecoderClear(&decoder); + return rsp.code; } + tDecoderClear(&decoder); + // update the nodeEpset when it exists taosWLockLatch(&pMeta->lock); - // when replay the WAL, we should update the task epset one again and again, the task may be in stop status. - int64_t keys[2] = {req.streamId, req.taskId}; - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); - + // the task epset may be updated again and again, when replaying the WAL, the task may be in stop status. + STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask == NULL || *ppTask == NULL) { tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", pMeta->vgId, req.taskId); rsp.code = TSDB_CODE_SUCCESS; taosWUnLockLatch(&pMeta->lock); - goto _end; + taosArrayDestroy(req.pNodeList); + return rsp.code; } SStreamTask* pTask = *ppTask; + tqDebug("s-task:%s receive nodeEp update msg from mnode", pTask->id.idStr); - tqDebug("s-task:%s receive task nodeEp update msg from mnode", pTask->id.idStr); streamTaskUpdateEpsetInfo(pTask, req.pNodeList); streamSetStatusNormal(pTask); SStreamTask** ppHTask = NULL; if (pTask->historyTaskId.taskId != 0) { - keys[0] = pTask->historyTaskId.streamId; - keys[1] = pTask->historyTaskId.taskId; - - ppHTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + ppHTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &pTask->historyTaskId, sizeof(pTask->historyTaskId)); if (ppHTask == NULL || *ppHTask == NULL) { tqError("vgId:%d failed to acquire fill-history task:0x%x when handling update, it may have been dropped already", pMeta->vgId, req.taskId); @@ -1729,62 +1716,64 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { } streamTaskStop(pTask); + taosHashPut(pMeta->pUpdateTaskSet, &pTask->id, sizeof(pTask->id), NULL, 0); + if (ppHTask != NULL) { streamTaskStop(*ppHTask); + tqDebug("s-task:%s task nodeEp update completed, streamTask and related fill-history task closed", pTask->id.idStr); + taosHashPut(pMeta->pUpdateTaskSet, &(*ppHTask)->id, sizeof(pTask->id), NULL, 0); + } else { + tqDebug("s-task:%s task nodeEp update completed, streamTask closed", pTask->id.idStr); } - tqDebug("s-task:%s task nodeEp update completed", pTask->id.idStr); - - pMeta->closedTask += 1; - if (ppHTask != NULL) { - pMeta->closedTask += 1; - } + rsp.code = 0; // possibly only handle the stream task. int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); - bool allStopped = (pMeta->closedTask == numOfTasks); - if (allStopped) { - pMeta->closedTask = 0; + int32_t updateTasks = taosHashGetSize(pMeta->pUpdateTaskSet); + if (updateTasks < numOfTasks) { + pMeta->taskWillbeLaunched = 1; + + tqDebug("vgId:%d closed tasks:%d, unclosed:%d", vgId, updateTasks, (numOfTasks - updateTasks)); + taosWUnLockLatch(&pMeta->lock); } else { - tqDebug("vgId:%d closed tasks:%d, not closed:%d", vgId, pMeta->closedTask, (numOfTasks - pMeta->closedTask)); - } - - taosWUnLockLatch(&pMeta->lock); - -_end: - tDecoderClear(&decoder); - - if (allStopped) { + taosHashClear(pMeta->pUpdateTaskSet); if (!pTq->pVnode->restored) { tqDebug("vgId:%d vnode restore not completed, not restart the tasks", vgId); + taosWUnLockLatch(&pMeta->lock); } else { - tqDebug("vgId:%d all tasks are stopped, restart them", vgId); - taosWLockLatch(&pMeta->lock); + tqDebug("vgId:%d tasks are all updated and stopped, restart them", vgId); terrno = 0; - int32_t code = streamMetaReopen(pMeta, 0); + int32_t code = streamMetaReopen(pMeta); if (code != 0) { tqError("vgId:%d failed to reopen stream meta", vgId); taosWUnLockLatch(&pMeta->lock); + taosArrayDestroy(req.pNodeList); return -1; } if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { tqError("vgId:%d failed to load stream tasks", vgId); taosWUnLockLatch(&pMeta->lock); + taosArrayDestroy(req.pNodeList); return -1; } - taosWUnLockLatch(&pMeta->lock); if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { vInfo("vgId:%d, restart all stream tasks", vgId); tqStartStreamTasks(pTq); tqCheckAndRunStreamTaskAsync(pTq); + } else { + vInfo("vgId:%d, follower node not start stream tasks", vgId); } + + pMeta->taskWillbeLaunched = 0; + taosWUnLockLatch(&pMeta->lock); } } + taosArrayDestroy(req.pNodeList); return rsp.code; } - diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index 85151c6e19..154ac1e8c1 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -312,14 +312,14 @@ static int buildHandle(STQ* pTq, STqHandle* handle){ return -1; } } else if (handle->execHandle.subType == TOPIC_SUB_TYPE__DB) { - handle->pWalReader = walOpenReader(pVnode->pWal, NULL); + handle->pWalReader = walOpenReader(pVnode->pWal, NULL, 0); handle->execHandle.pTqReader = tqReaderOpen(pVnode); buildSnapContext(reader.vnode, reader.version, 0, handle->execHandle.subType, handle->fetchMeta, (SSnapContext**)(&reader.sContext)); handle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &reader, vgId, NULL, handle->consumerId); } else if (handle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { - handle->pWalReader = walOpenReader(pVnode->pWal, NULL); + handle->pWalReader = walOpenReader(pVnode->pWal, NULL, 0); if(handle->execHandle.execTb.qmsg != NULL && strcmp(handle->execHandle.execTb.qmsg, "") != 0) { if (nodesStringToNode(handle->execHandle.execTb.qmsg, &handle->execHandle.execTb.node) != 0) { diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 0839a2bfa3..25a07a55d3 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -187,25 +187,26 @@ end: int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, uint64_t reqId) { int32_t code = -1; int32_t vgId = TD_VID(pTq->pVnode); + int64_t id = pHandle->pWalReader->readerId; int64_t offset = *fetchOffset; int64_t lastVer = walGetLastVer(pHandle->pWalReader->pWal); int64_t committedVer = walGetCommittedVer(pHandle->pWalReader->pWal); int64_t appliedVer = walGetAppliedVer(pHandle->pWalReader->pWal); - wDebug("vgId:%d, wal start to fetch, index:%" PRId64 ", last index:%" PRId64 " commit index:%" PRId64 ", applied index:%" PRId64, - vgId, offset, lastVer, committedVer, appliedVer); + wDebug("vgId:%d, wal start to fetch, index:%" PRId64 ", last index:%" PRId64 " commit index:%" PRId64 ", applied index:%" PRId64", 0x%"PRIx64, + vgId, offset, lastVer, committedVer, appliedVer, id); while (offset <= appliedVer) { if (walFetchHead(pHandle->pWalReader, offset) < 0) { tqDebug("tmq poll: consumer:0x%" PRIx64 ", (epoch %d) vgId:%d offset %" PRId64 - ", no more log to return, reqId:0x%" PRIx64, - pHandle->consumerId, pHandle->epoch, vgId, offset, reqId); + ", no more log to return, reqId:0x%" PRIx64 " 0x%" PRIx64, + pHandle->consumerId, pHandle->epoch, vgId, offset, reqId, id); goto END; } - tqDebug("vgId:%d, consumer:0x%" PRIx64 " taosx get msg ver %" PRId64 ", type: %s, reqId:0x%" PRIx64, vgId, - pHandle->consumerId, offset, TMSG_INFO(pHandle->pWalReader->pHead->head.msgType), reqId); + tqDebug("vgId:%d, consumer:0x%" PRIx64 " taosx get msg ver %" PRId64 ", type: %s, reqId:0x%" PRIx64" 0x%"PRIx64, vgId, + pHandle->consumerId, offset, TMSG_INFO(pHandle->pWalReader->pHead->head.msgType), reqId, id); if (pHandle->pWalReader->pHead->head.msgType == TDMT_VND_SUBMIT) { code = walFetchBody(pHandle->pWalReader); @@ -250,7 +251,7 @@ STqReader* tqReaderOpen(SVnode* pVnode) { return NULL; } - pReader->pWalReader = walOpenReader(pVnode->pWal, NULL); + pReader->pWalReader = walOpenReader(pVnode->pWal, NULL, 0); if (pReader->pWalReader == NULL) { taosMemoryFree(pReader); return NULL; @@ -491,11 +492,11 @@ bool tqNextBlockImpl(STqReader* pReader, const char* idstr) { void* ret = taosHashGet(pReader->tbIdHash, &pSubmitTbData->uid, sizeof(int64_t)); if (ret != NULL) { - tqDebug("block found, ver:%" PRId64 ", uid:%" PRId64", %s", pReader->msg.ver, pSubmitTbData->uid, idstr); + tqDebug("block found, ver:%" PRId64 ", uid:%" PRId64 ", %s", pReader->msg.ver, pSubmitTbData->uid, idstr); return true; } else { - tqInfo("discard submit block, uid:%" PRId64 ", total queried tables:%d continue %s", pSubmitTbData->uid, - taosHashGetSize(pReader->tbIdHash), idstr); + tqDebug("discard submit block, uid:%" PRId64 ", total queried tables:%d continue %s", pSubmitTbData->uid, + taosHashGetSize(pReader->tbIdHash), idstr); } pReader->nextBlk++; @@ -1128,7 +1129,7 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { // update the table list handle for each stream scanner/wal reader taosWLockLatch(&pTq->pStreamMeta->lock); while (1) { - pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); + pIter = taosHashIterate(pTq->pStreamMeta->pTasksMap, pIter); if (pIter == NULL) { break; } diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index f7132ff6c4..e0bae18545 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -24,10 +24,23 @@ typedef struct STableSinkInfo { tstr name; } STableSinkInfo; -static int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, int64_t suid, - SSDataBlock* pDataBlock, SStreamTask* pTask); -static int32_t doSinkDeleteBlock(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, - int64_t suid); +static int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDataBlock, char* stbFullName, + SSubmitTbData* pTableData); +static int32_t setDstTableDataPayload(SStreamTask* pTask, int32_t blockIndex, SSDataBlock* pDataBlock, + SSubmitTbData* pTableData); +static int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, + int64_t suid); +static int32_t tqBuildSubmitReq(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen); +static int32_t tsAscendingSortFn(const void* p1, const void* p2); +static int32_t doConvertRows(SSubmitTbData* pTableData, STSchema* pTSchema, SSDataBlock* pDataBlock, const char* id); +static int32_t doWaitForDstTableCreated(SVnode* pVnode, SStreamTask* pTask, STableSinkInfo* pTableSinkInfo, + const char* dstTableName, int64_t* uid); +static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, const char* id); +static SVCreateTbReq* buildAutoCreateTableReq(char* stbFullName, int64_t suid, int32_t numOfCols, + SSDataBlock* pDataBlock); +static bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbName, int64_t suid); +static int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, const char* id); +static int32_t doBuildAndSendSubmitMsg(SVnode* pVnode, SStreamTask* pTask, SSubmitReq2* pReq, int32_t numOfBlocks); int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr) { @@ -112,14 +125,6 @@ static bool tqGetTableInfo(SSHashObj* pTableInfoMap,uint64_t groupId, STableSink return false; } -static int32_t tqPutTableInfo(SSHashObj* tblInfo ,uint64_t groupId, STableSinkInfo* pTbl) { - if (tSimpleHashGetSize(tblInfo) > MAX_CACHE_TABLE_INFO_NUM) { - return TSDB_CODE_FAILED; - } - - return tSimpleHashPut(tblInfo, &groupId, sizeof(uint64_t), &pTbl, POINTER_BYTES); -} - static int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) { void* buf = NULL; int32_t tlen = 0; @@ -133,147 +138,202 @@ static int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) { return TSDB_CODE_SUCCESS; } +static int32_t doBuildAndSendCreateTableMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, + int64_t suid) { + tqDebug("s-task:%s build create table msg", pTask->id.idStr); -void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, void* data) { - const SArray* pBlocks = (const SArray*)data; - SVnode* pVnode = (SVnode*)vnode; - int64_t suid = pTask->tbSink.stbUid; - char* stbFullName = pTask->tbSink.stbFullName; - STSchema* pTSchema = pTask->tbSink.pTSchema; - int32_t vgId = TD_VID(pVnode); - int32_t numOfBlocks = taosArrayGetSize(pBlocks); - int32_t code = TSDB_CODE_SUCCESS; + STSchema* pTSchema = pTask->tbSink.pTSchema; + int32_t rows = pDataBlock->info.rows; + SArray* tagArray = NULL; + int32_t code = 0; - tqDebug("vgId:%d, s-task:%s write %d stream resBlock(s) into table", vgId, pTask->id.idStr, numOfBlocks); + SVCreateTbBatchReq reqs = {0}; - SArray* tagArray = NULL; - SArray* pVals = NULL; - SArray* crTblArray = NULL; + SArray* crTblArray = reqs.pArray = taosArrayInit(1, sizeof(SVCreateTbReq)); + if (NULL == reqs.pArray) { + goto _end; + } - for (int32_t i = 0; i < numOfBlocks; i++) { - SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); - int32_t rows = pDataBlock->info.rows; + for (int32_t rowId = 0; rowId < rows; rowId++) { + SVCreateTbReq* pCreateTbReq = &((SVCreateTbReq){0}); - if (pDataBlock->info.type == STREAM_DELETE_RESULT) { - code = doSinkDeleteBlock(pVnode, stbFullName, pDataBlock, pTask, suid); - } else if (pDataBlock->info.type == STREAM_CREATE_CHILD_TABLE) { - tqDebug("s-task:%s build create table msg", pTask->id.idStr); + // set const + pCreateTbReq->flags = 0; + pCreateTbReq->type = TSDB_CHILD_TABLE; + pCreateTbReq->ctb.suid = suid; - SVCreateTbBatchReq reqs = {0}; - crTblArray = reqs.pArray = taosArrayInit(1, sizeof(SVCreateTbReq)); - if (NULL == reqs.pArray) { + // set super table name + SName name = {0}; + tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); + pCreateTbReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name)); // taosStrdup(stbFullName); + + // set tag content + int32_t size = taosArrayGetSize(pDataBlock->pDataBlock); + if (size == 2) { + tagArray = taosArrayInit(1, sizeof(STagVal)); + if (!tagArray) { + tdDestroySVCreateTbReq(pCreateTbReq); goto _end; } - for (int32_t rowId = 0; rowId < rows; rowId++) { - SVCreateTbReq* pCreateTbReq = &((SVCreateTbReq){0}); + STagVal tagVal = { + .cid = pTSchema->numOfCols + 1, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId}; - // set const - pCreateTbReq->flags = 0; - pCreateTbReq->type = TSDB_CHILD_TABLE; - pCreateTbReq->ctb.suid = suid; + taosArrayPush(tagArray, &tagVal); - // set super table name - SName name = {0}; - tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - pCreateTbReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name)); // taosStrdup(stbFullName); - - // set tag content - int32_t size = taosArrayGetSize(pDataBlock->pDataBlock); - if (size == 2) { - tagArray = taosArrayInit(1, sizeof(STagVal)); - - if (!tagArray) { - tdDestroySVCreateTbReq(pCreateTbReq); - goto _end; - } - - STagVal tagVal = { - .cid = pTSchema->numOfCols + 1, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId}; - - taosArrayPush(tagArray, &tagVal); - - // set tag name - SArray* tagName = taosArrayInit(1, TSDB_COL_NAME_LEN); - char tagNameStr[TSDB_COL_NAME_LEN] = "group_id"; - taosArrayPush(tagName, tagNameStr); - pCreateTbReq->ctb.tagName = tagName; - } else { - tagArray = taosArrayInit(size - 1, sizeof(STagVal)); - if (!tagArray) { - tdDestroySVCreateTbReq(pCreateTbReq); - goto _end; - } - - for (int32_t tagId = UD_TAG_COLUMN_INDEX, step = 1; tagId < size; tagId++, step++) { - SColumnInfoData* pTagData = taosArrayGet(pDataBlock->pDataBlock, tagId); - - STagVal tagVal = {.cid = pTSchema->numOfCols + step, .type = pTagData->info.type}; - void* pData = colDataGetData(pTagData, rowId); - if (colDataIsNull_s(pTagData, rowId)) { - continue; - } else if (IS_VAR_DATA_TYPE(pTagData->info.type)) { - tagVal.nData = varDataLen(pData); - tagVal.pData = (uint8_t*) varDataVal(pData); - } else { - memcpy(&tagVal.i64, pData, pTagData->info.bytes); - } - taosArrayPush(tagArray, &tagVal); - } - - } - pCreateTbReq->ctb.tagNum = TMAX(size - UD_TAG_COLUMN_INDEX, 1); - - STag* pTag = NULL; - tTagNew(tagArray, 1, false, &pTag); - tagArray = taosArrayDestroy(tagArray); - if (pTag == NULL) { - tdDestroySVCreateTbReq(pCreateTbReq); - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _end; - } - - pCreateTbReq->ctb.pTag = (uint8_t*)pTag; - - // set table name - if (!pDataBlock->info.parTbName[0]) { - SColumnInfoData* pGpIdColInfo = taosArrayGet(pDataBlock->pDataBlock, UD_GROUPID_COLUMN_INDEX); - void* pGpIdData = colDataGetData(pGpIdColInfo, rowId); - pCreateTbReq->name = buildCtbNameByGroupId(stbFullName, *(uint64_t*)pGpIdData); - } else { - pCreateTbReq->name = taosStrdup(pDataBlock->info.parTbName); - } - - taosArrayPush(reqs.pArray, pCreateTbReq); - tqDebug("s-task:%s build create table:%s msg complete", pTask->id.idStr, pCreateTbReq->name); - } - - reqs.nReqs = taosArrayGetSize(reqs.pArray); - if (tqPutReqToQueue(pVnode, &reqs) != TSDB_CODE_SUCCESS) { - goto _end; - } - - tagArray = taosArrayDestroy(tagArray); - taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq); - crTblArray = NULL; - } else if (pDataBlock->info.type == STREAM_CHECKPOINT) { - continue; + // set tag name + SArray* tagName = taosArrayInit(1, TSDB_COL_NAME_LEN); + char tagNameStr[TSDB_COL_NAME_LEN] = "group_id"; + taosArrayPush(tagName, tagNameStr); + pCreateTbReq->ctb.tagName = tagName; } else { - code = doSinkResultBlock(pVnode, i, stbFullName, suid, pDataBlock, pTask); + tagArray = taosArrayInit(size - 1, sizeof(STagVal)); + if (!tagArray) { + tdDestroySVCreateTbReq(pCreateTbReq); + goto _end; + } + + for (int32_t tagId = UD_TAG_COLUMN_INDEX, step = 1; tagId < size; tagId++, step++) { + SColumnInfoData* pTagData = taosArrayGet(pDataBlock->pDataBlock, tagId); + + STagVal tagVal = {.cid = pTSchema->numOfCols + step, .type = pTagData->info.type}; + void* pData = colDataGetData(pTagData, rowId); + if (colDataIsNull_s(pTagData, rowId)) { + continue; + } else if (IS_VAR_DATA_TYPE(pTagData->info.type)) { + tagVal.nData = varDataLen(pData); + tagVal.pData = (uint8_t*)varDataVal(pData); + } else { + memcpy(&tagVal.i64, pData, pTagData->info.bytes); + } + taosArrayPush(tagArray, &tagVal); + } + } + pCreateTbReq->ctb.tagNum = TMAX(size - UD_TAG_COLUMN_INDEX, 1); + + STag* pTag = NULL; + tTagNew(tagArray, 1, false, &pTag); + tagArray = taosArrayDestroy(tagArray); + if (pTag == NULL) { + tdDestroySVCreateTbReq(pCreateTbReq); + code = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } + + pCreateTbReq->ctb.pTag = (uint8_t*)pTag; + + // set table name + if (!pDataBlock->info.parTbName[0]) { + SColumnInfoData* pGpIdColInfo = taosArrayGet(pDataBlock->pDataBlock, UD_GROUPID_COLUMN_INDEX); + + void* pGpIdData = colDataGetData(pGpIdColInfo, rowId); + pCreateTbReq->name = buildCtbNameByGroupId(stbFullName, *(uint64_t*)pGpIdData); + } else { + pCreateTbReq->name = taosStrdup(pDataBlock->info.parTbName); + } + + taosArrayPush(reqs.pArray, pCreateTbReq); + tqDebug("s-task:%s build create table:%s msg complete", pTask->id.idStr, pCreateTbReq->name); + } + + reqs.nReqs = taosArrayGetSize(reqs.pArray); + code = tqPutReqToQueue(pVnode, &reqs); + if (code != TSDB_CODE_SUCCESS) { + tqError("s-task:%s failed to send create table msg", pTask->id.idStr); + } + + _end: + taosArrayDestroy(tagArray); + taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq); + return code; +} + +int32_t doBuildAndSendSubmitMsg(SVnode* pVnode, SStreamTask* pTask, SSubmitReq2* pReq, int32_t numOfBlocks) { + const char* id = pTask->id.idStr; + int32_t vgId = TD_VID(pVnode); + int32_t len = 0; + void* pBuf = NULL; + int32_t numOfFinalBlocks = taosArrayGetSize(pReq->aSubmitTbData); + + int32_t code = tqBuildSubmitReq(pReq, vgId, &pBuf, &len); + if (code != TSDB_CODE_SUCCESS) { + tqError("s-task:%s build submit msg failed, vgId:%d, code:%s", id, vgId, tstrerror(code)); + return code; + } + + SRpcMsg msg = {.msgType = TDMT_VND_SUBMIT, .pCont = pBuf, .contLen = len}; + code = tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg); + if (code == TSDB_CODE_SUCCESS) { + tqDebug("s-task:%s vgId:%d comp %d blocks into %d and send to dstTable(s) completed", id, vgId, numOfBlocks, + numOfFinalBlocks); + } else { + tqError("s-task:%s failed to put into write-queue since %s", id, terrstr()); + } + + pTask->sinkRecorder.numOfSubmit += 1; + + if ((pTask->sinkRecorder.numOfSubmit % 5000) == 0) { + SSinkTaskRecorder* pRec = &pTask->sinkRecorder; + double el = (taosGetTimestampMs() - pTask->taskExecInfo.start) / 1000.0; + tqInfo("s-task:%s vgId:%d write %" PRId64 " blocks (%" PRId64 " rows) in %" PRId64 + " submit into dst table, duration:%.2f Sec.", + pTask->id.idStr, vgId, pRec->numOfBlocks, pRec->numOfRows, pRec->numOfSubmit, el); + } + + return TSDB_CODE_SUCCESS; +} + +// merge the new submit table block with the existed blocks +// if ts in the new data block overlap with existed one, replace it +int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, const char* id) { + int32_t oldLen = taosArrayGetSize(pExisted->aRowP); + int32_t newLen = taosArrayGetSize(pNew->aRowP); + + int32_t j = 0, k = 0; + SArray* pFinal = taosArrayInit(oldLen + newLen, POINTER_BYTES); + if (pFinal == NULL) { + tqError("s-task:%s failed to prepare merge result datablock, code:%s", id, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return TSDB_CODE_OUT_OF_MEMORY; + } + + while (j < newLen && k < oldLen) { + SRow* pNewRow = taosArrayGetP(pNew->aRowP, j); + SRow* pOldRow = taosArrayGetP(pExisted->aRowP, k); + if (pNewRow->ts <= pOldRow->ts) { + taosArrayPush(pFinal, &pNewRow); + j += 1; + + if (pNewRow->ts == pOldRow->ts) { + k += 1; + tRowDestroy(pOldRow); + } + } else { + taosArrayPush(pFinal, &pOldRow); + k += 1; } } - tqDebug("vgId:%d, s-task:%s write results completed", vgId, pTask->id.idStr); + while (j < newLen) { + SRow* pRow = taosArrayGetP(pNew->aRowP, j++); + taosArrayPush(pFinal, &pRow); + } -_end: - taosArrayDestroy(tagArray); - taosArrayDestroy(pVals); - taosArrayDestroyEx(crTblArray, (FDelete)tdDestroySVCreateTbReq); - // TODO: change + while (k < oldLen) { + SRow* pRow = taosArrayGetP(pExisted->aRowP, k++); + taosArrayPush(pFinal, &pRow); + } + + taosArrayDestroy(pNew->aRowP); + taosArrayDestroy(pExisted->aRowP); + pExisted->aRowP = pFinal; + + tqDebug("s-task:%s rows merged, final rows:%d, uid:%" PRId64 ", existed auto-create table:%d, new-block:%d", id, + (int32_t)taosArrayGetSize(pFinal), pExisted->uid, (pExisted->pCreateTbReq != NULL), (pNew->pCreateTbReq != NULL)); + return TSDB_CODE_SUCCESS; } -int32_t doSinkDeleteBlock(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, - int64_t suid) { +int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, + int64_t suid) { SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; int32_t code = tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); @@ -311,22 +371,25 @@ int32_t doSinkDeleteBlock(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataB return TSDB_CODE_SUCCESS; } -static bool isValidDestChildTable(SMetaReader* pReader, int32_t vgId, char* ctbName, int64_t suid) { +bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbName, int64_t suid) { if (pReader->me.type != TSDB_CHILD_TABLE) { tqError("vgId:%d, failed to write into %s, since table type:%d incorrect", vgId, ctbName, pReader->me.type); + terrno = TSDB_CODE_TDB_INVALID_TABLE_TYPE; return false; } if (pReader->me.ctbEntry.suid != suid) { tqError("vgId:%d, failed to write into %s, since suid mismatch, expect suid:%" PRId64 ", actual:%" PRId64, vgId, ctbName, suid, pReader->me.ctbEntry.suid); + terrno = TSDB_CODE_TDB_TABLE_IN_OTHER_STABLE; return false; } + terrno = 0; return true; } -static SVCreateTbReq* buildAutoCreateTableReq(char* stbFullName, int64_t suid, int32_t numOfCols, SSDataBlock* pDataBlock) { +SVCreateTbReq* buildAutoCreateTableReq(char* stbFullName, int64_t suid, int32_t numOfCols, SSDataBlock* pDataBlock) { char* ctbName = pDataBlock->info.parTbName; SVCreateTbReq* pCreateTbReq = taosMemoryCalloc(1, sizeof(SVCreateStbReq)); @@ -383,14 +446,14 @@ static SVCreateTbReq* buildAutoCreateTableReq(char* stbFullName, int64_t suid, i return pCreateTbReq; } -static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, uint64_t uid, - const char* id) { - pTableSinkInfo->uid = uid; +int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, const char* id) { + if (tSimpleHashGetSize(pSinkTableMap) > MAX_CACHE_TABLE_INFO_NUM) { + return TSDB_CODE_FAILED; + } - int32_t code = tqPutTableInfo(pSinkTableMap, groupId, pTableSinkInfo); + int32_t code = tSimpleHashPut(pSinkTableMap, &groupId, sizeof(uint64_t), &pTableSinkInfo, POINTER_BYTES); if (code != TSDB_CODE_SUCCESS) { taosMemoryFreeClear(pTableSinkInfo); - tqError("s-task:%s failed to put tableSinkInfo in to cache, code:%s", id, tstrerror(code)); } else { tqDebug("s-task:%s new dst table:%s(uid:%" PRIu64 ") added into cache, total:%d", id, pTableSinkInfo->name.data, pTableSinkInfo->uid, tSimpleHashGetSize(pSinkTableMap)); @@ -399,147 +462,72 @@ static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSi return code; } -int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, int64_t suid, SSDataBlock* pDataBlock, - SStreamTask* pTask) { - int32_t numOfRows = pDataBlock->info.rows; - int32_t vgId = TD_VID(pVnode); - uint64_t groupId = pDataBlock->info.id.groupId; - STSchema* pTSchema = pTask->tbSink.pTSchema; - int32_t code = TSDB_CODE_SUCCESS; - void* pBuf = NULL; - SArray* pVals = NULL; - const char* id = pTask->id.idStr; +int32_t tqBuildSubmitReq(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen) { + int32_t code = 0; + void* pBuf = NULL; + *msgLen = 0; - SSubmitTbData tbData = {.suid = suid, .uid = 0, .sver = pTSchema->version}; - tqDebug("s-task:%s sink data pipeline, build submit msg from %d-th resBlock, including %d rows, dst suid:%" PRId64, - id, blockIndex + 1, numOfRows, suid); + // encode + int32_t len = 0; + tEncodeSize(tEncodeSubmitReq, pSubmitReq, len, code); - tbData.aRowP = taosArrayInit(numOfRows, sizeof(SRow*)); - pVals = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal)); + SEncoder encoder; + len += sizeof(SSubmitReq2Msg); - if (tbData.aRowP == NULL || pVals == NULL) { - taosArrayDestroy(tbData.aRowP); - taosArrayDestroy(pVals); + pBuf = rpcMallocCont(len); + if (NULL == pBuf) { + tDestroySubmitReq(pSubmitReq, TSDB_MSG_FLG_ENCODE); + return TSDB_CODE_OUT_OF_MEMORY; + } - code = TSDB_CODE_OUT_OF_MEMORY; - tqError("s-task:%s vgId:%d failed to prepare write stream res blocks, code:%s", id, vgId, tstrerror(code)); + ((SSubmitReq2Msg*)pBuf)->header.vgId = vgId; + ((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len); + ((SSubmitReq2Msg*)pBuf)->version = htobe64(1); + + tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); + if (tEncodeSubmitReq(&encoder, pSubmitReq) < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("failed to encode submit req, code:%s, ignore and continue", terrstr()); + tEncoderClear(&encoder); + rpcFreeCont(pBuf); + tDestroySubmitReq(pSubmitReq, TSDB_MSG_FLG_ENCODE); return code; } - STableSinkInfo* pTableSinkInfo = NULL; - bool exist = tqGetTableInfo(pTask->tbSink.pTblInfo, groupId, &pTableSinkInfo); + tEncoderClear(&encoder); + tDestroySubmitReq(pSubmitReq, TSDB_MSG_FLG_ENCODE); - char* dstTableName = pDataBlock->info.parTbName; - if (exist) { - if (dstTableName[0] == 0) { - tstrncpy(dstTableName, pTableSinkInfo->name.data, pTableSinkInfo->name.len + 1); - tqDebug("s-task:%s vgId:%d, gropuId:%" PRIu64 " datablock table name is null, set name:%s", id, vgId, groupId, - dstTableName); - } else { - if (pTableSinkInfo->uid != 0) { - tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(uid:%" PRIu64 ")", id, numOfRows, groupId, - dstTableName, pTableSinkInfo->uid); - } else { - tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(not set uid yet for the secondary block)", - id, numOfRows, groupId, dstTableName); - } - } - } else { // not exist - if (dstTableName[0] == 0) { - memset(dstTableName, 0, TSDB_TABLE_NAME_LEN); - buildCtbNameByGroupIdImpl(stbFullName, groupId, dstTableName); - } + *msgLen = len; + *pMsg = pBuf; + return TSDB_CODE_SUCCESS; +} - int32_t nameLen = strlen(dstTableName); - pTableSinkInfo = taosMemoryCalloc(1, sizeof(STableSinkInfo) + nameLen); - - pTableSinkInfo->name.len = nameLen; - memcpy(pTableSinkInfo->name.data, dstTableName, nameLen); - tqDebug("s-task:%s build new sinkTableInfo to add cache, dstTable:%s", id, dstTableName); - } - - if (exist) { - tbData.uid = pTableSinkInfo->uid; - - if (tbData.uid == 0) { - tqDebug("s-task:%s cached tableInfo uid is invalid, acquire it from meta", id); - } - - while (pTableSinkInfo->uid == 0) { - // wait for the table to be created - SMetaReader mr = {0}; - metaReaderDoInit(&mr, pVnode->pMeta, 0); - - code = metaGetTableEntryByName(&mr, dstTableName); - if (code == 0) { // table alreay exists, check its type and uid - bool isValid = isValidDestChildTable(&mr, vgId, dstTableName, suid); - if (!isValid) { // not valid table, ignore it - metaReaderClear(&mr); - - taosArrayDestroy(tbData.aRowP); - taosArrayDestroy(pVals); - - return TSDB_CODE_SUCCESS; - } else { - tqDebug("s-task:%s set uid:%"PRIu64" for dstTable:%s from meta", id, mr.me.uid, pTableSinkInfo->name.data); - - tbData.uid = mr.me.uid; - pTableSinkInfo->uid = mr.me.uid; - metaReaderClear(&mr); - } - } else { // not exist, wait and retry - metaReaderClear(&mr); - taosMsleep(100); - tqDebug("s-task:%s wait for the table:%s ready before insert data", id, dstTableName); - } - } +int32_t tsAscendingSortFn(const void* p1, const void* p2) { + SRow* pRow1 = *(SRow**) p1; + SRow* pRow2 = *(SRow**) p2; + if (pRow1->ts == pRow2->ts) { + return 0; } else { - // todo: this check is not safe, and results in losing of submit message from WAL. - // The auto-create option will always set to be open for those submit messages, which arrive during the period - // the creating of the destination table, due to the absence of the user-specified table in TSDB. When scanning - // data from WAL, those submit messages, with auto-created table option, will be discarded expect the first, for - // those mismatched table uids. Only the FIRST table has the correct table uid, and those remain all have - // randomly generated false table uid in the WAL. - SMetaReader mr = {0}; - metaReaderDoInit(&mr, pVnode->pMeta, 0); + return pRow1->ts > pRow2->ts? 1:-1; + } +} - // table not in cache, let's try the extract it from tsdb meta - if (metaGetTableEntryByName(&mr, dstTableName) < 0) { - metaReaderClear(&mr); +int32_t doConvertRows(SSubmitTbData* pTableData, STSchema* pTSchema, SSDataBlock* pDataBlock, const char* id) { + int32_t numOfRows = pDataBlock->info.rows; + int32_t code = TSDB_CODE_SUCCESS; - tqDebug("s-task:%s stream write into table:%s, table auto created", id, dstTableName); + SArray* pVals = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal)); + pTableData->aRowP = taosArrayInit(numOfRows, sizeof(SRow*)); - tbData.flags = SUBMIT_REQ_AUTO_CREATE_TABLE; - tbData.pCreateTbReq = buildAutoCreateTableReq(stbFullName, suid, pTSchema->numOfCols + 1, pDataBlock); - if (tbData.pCreateTbReq == NULL) { - tqError("s-task:%s failed to build auto create table req, code:%s", id, tstrerror(terrno)); - - taosArrayDestroy(tbData.aRowP); - taosArrayDestroy(pVals); - - return terrno; - } - - doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, 0, id); - } else { - bool isValid = isValidDestChildTable(&mr, vgId, dstTableName, suid); - if (!isValid) { - metaReaderClear(&mr); - taosMemoryFree(pTableSinkInfo); - taosArrayDestroy(tbData.aRowP); - taosArrayDestroy(pVals); - return TSDB_CODE_SUCCESS; - } else { - tbData.uid = mr.me.uid; - metaReaderClear(&mr); - - doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, tbData.uid, id); - } - } + if (pTableData->aRowP == NULL || pVals == NULL) { + pTableData->aRowP = taosArrayDestroy(pTableData->aRowP); + taosArrayDestroy(pVals); + code = TSDB_CODE_OUT_OF_MEMORY; + tqError("s-task:%s failed to prepare write stream res blocks, code:%s", id, tstrerror(code)); + return code; } - // rows for (int32_t j = 0; j < numOfRows; j++) { taosArrayClear(pVals); @@ -549,7 +537,7 @@ int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, if (k == 0) { SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); void* colData = colDataGetData(pColData, j); - tqDebug("s-task:%s tq sink pipe2, row %d, col %d ts %" PRId64, id, j, k, *(int64_t*)colData); + tqDebug("s-task:%s sink row %d, col %d ts %" PRId64, id, j, k, *(int64_t*)colData); } if (IS_SET_NULL(pCol)) { @@ -565,7 +553,7 @@ int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, void* colData = colDataGetData(pColData, j); if (IS_STR_DATA_TYPE(pCol->type)) { // address copy, no value - SValue sv = (SValue){.nData = varDataLen(colData), .pData = varDataVal(colData)}; + SValue sv = (SValue){.nData = varDataLen(colData), .pData = (uint8_t*) varDataVal(colData)}; SColVal cv = COL_VAL_VALUE(pCol->colId, pCol->type, sv); taosArrayPush(pVals, &cv); } else { @@ -582,69 +570,313 @@ int32_t doSinkResultBlock(SVnode* pVnode, int32_t blockIndex, char* stbFullName, SRow* pRow = NULL; code = tRowBuild(pVals, (STSchema*)pTSchema, &pRow); if (code != TSDB_CODE_SUCCESS) { - tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); - - taosArrayDestroy(tbData.aRowP); + tDestroySubmitTbData(pTableData, TSDB_MSG_FLG_ENCODE); + pTableData->aRowP = taosArrayDestroy(pTableData->aRowP); taosArrayDestroy(pVals); return code; } ASSERT(pRow); - taosArrayPush(tbData.aRowP, &pRow); - } - - SSubmitReq2 submitReq = {0}; - if (!(submitReq.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData)))) { - tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); - - taosArrayDestroy(tbData.aRowP); - taosArrayDestroy(pVals); - return TSDB_CODE_OUT_OF_MEMORY; - } - - taosArrayPush(submitReq.aSubmitTbData, &tbData); - - // encode - int32_t len = 0; - tEncodeSize(tEncodeSubmitReq, &submitReq, len, code); - - SEncoder encoder; - len += sizeof(SSubmitReq2Msg); - - pBuf = rpcMallocCont(len); - if (NULL == pBuf) { - tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); - taosArrayDestroy(tbData.aRowP); - taosArrayDestroy(pVals); - } - - ((SSubmitReq2Msg*)pBuf)->header.vgId = vgId; - ((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len); - ((SSubmitReq2Msg*)pBuf)->version = htobe64(1); - - tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); - if (tEncodeSubmitReq(&encoder, &submitReq) < 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tqError("failed to encode submit req, code:%s, ignore and continue", terrstr()); - tEncoderClear(&encoder); - rpcFreeCont(pBuf); - tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); - - return code; - } - - tEncoderClear(&encoder); - tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); - - SRpcMsg msg = { .msgType = TDMT_VND_SUBMIT, .pCont = pBuf, .contLen = len }; - code = tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg); - - if(code == TSDB_CODE_SUCCESS) { - tqDebug("s-task:%s send submit msg to dstTable:%s, numOfRows:%d", id, pTableSinkInfo->name.data, numOfRows); - } else { - tqError("s-task:%s failed to put into write-queue since %s", id, terrstr()); + taosArrayPush(pTableData->aRowP, &pRow); } taosArrayDestroy(pVals); + return TSDB_CODE_SUCCESS; +} + +int32_t doWaitForDstTableCreated(SVnode* pVnode, SStreamTask* pTask, STableSinkInfo* pTableSinkInfo, + const char* dstTableName, int64_t* uid) { + int32_t vgId = TD_VID(pVnode); + int64_t suid = pTask->tbSink.stbUid; + const char* id = pTask->id.idStr; + + while (pTableSinkInfo->uid == 0) { + if (streamTaskShouldStop(&pTask->status)) { + tqDebug("s-task:%s task will stop, quit from waiting for table:%s create", id, dstTableName); + return TSDB_CODE_STREAM_EXEC_CANCELLED; + } + + // wait for the table to be created + SMetaReader mr = {0}; + metaReaderDoInit(&mr, pVnode->pMeta, 0); + + int32_t code = metaGetTableEntryByName(&mr, dstTableName); + if (code == 0) { // table already exists, check its type and uid + bool isValid = isValidDstChildTable(&mr, vgId, dstTableName, suid); + if (isValid) { // not valid table, ignore it + tqDebug("s-task:%s set uid:%" PRIu64 " for dstTable:%s from meta", id, mr.me.uid, pTableSinkInfo->name.data); + ASSERT(terrno == 0); + + // set the destination table uid + (*uid) = mr.me.uid; + pTableSinkInfo->uid = mr.me.uid; + } + + metaReaderClear(&mr); + return terrno; + } else { // not exist, wait and retry + metaReaderClear(&mr); + taosMsleep(100); + tqDebug("s-task:%s wait 100ms for the table:%s ready before insert data", id, dstTableName); + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDataBlock, char* stbFullName, + SSubmitTbData* pTableData) { + uint64_t groupId = pDataBlock->info.id.groupId; + char* dstTableName = pDataBlock->info.parTbName; + int32_t numOfRows = pDataBlock->info.rows; + const char* id = pTask->id.idStr; + int64_t suid = pTask->tbSink.stbUid; + STSchema* pTSchema = pTask->tbSink.pTSchema; + int32_t vgId = TD_VID(pVnode); + STableSinkInfo* pTableSinkInfo = NULL; + + bool alreadyCached = tqGetTableInfo(pTask->tbSink.pTblInfo, groupId, &pTableSinkInfo); + + if (alreadyCached) { + if (dstTableName[0] == 0) { // data block does not set the destination table name + tstrncpy(dstTableName, pTableSinkInfo->name.data, pTableSinkInfo->name.len + 1); + tqDebug("s-task:%s vgId:%d, gropuId:%" PRIu64 " datablock table name is null, set name:%s", id, vgId, groupId, + dstTableName); + } else { + if (pTableSinkInfo->uid != 0) { + tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(uid:%" PRIu64 ")", id, numOfRows, groupId, + dstTableName, pTableSinkInfo->uid); + } else { + tqDebug("s-task:%s write %d rows into groupId:%" PRIu64 " dstTable:%s(not set uid yet for the secondary block)", + id, numOfRows, groupId, dstTableName); + } + } + } else { // this groupId has not been kept in cache yet + if (dstTableName[0] == 0) { + memset(dstTableName, 0, TSDB_TABLE_NAME_LEN); + buildCtbNameByGroupIdImpl(stbFullName, groupId, dstTableName); + } + + int32_t nameLen = strlen(dstTableName); + pTableSinkInfo = taosMemoryCalloc(1, sizeof(STableSinkInfo) + nameLen); + if (pTableSinkInfo == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pTableSinkInfo->name.len = nameLen; + memcpy(pTableSinkInfo->name.data, dstTableName, nameLen); + tqDebug("s-task:%s build new sinkTableInfo to add cache, dstTable:%s", id, dstTableName); + } + + if (alreadyCached) { + pTableData->uid = pTableSinkInfo->uid; + + if (pTableData->uid == 0) { + tqDebug("s-task:%s cached tableInfo uid is invalid, acquire it from meta", id); + return doWaitForDstTableCreated(pVnode, pTask, pTableSinkInfo, dstTableName, &pTableData->uid); + } else { + tqDebug("s-task:%s set the dstTable uid from cache:%"PRId64, id, pTableData->uid); + } + } else { + // The auto-create option will always set to be open for those submit messages, which arrive during the period + // the creating of the destination table, due to the absence of the user-specified table in TSDB. When scanning + // data from WAL, those submit messages, with auto-created table option, will be discarded expect the first, for + // those mismatched table uids. Only the FIRST table has the correct table uid, and those remain all have + // randomly generated, but false table uid in the WAL. + SMetaReader mr = {0}; + metaReaderDoInit(&mr, pVnode->pMeta, 0); + + // table not in cache, let's try the extract it from tsdb meta + if (metaGetTableEntryByName(&mr, dstTableName) < 0) { + metaReaderClear(&mr); + + tqDebug("s-task:%s stream write into table:%s, table auto created", id, dstTableName); + + pTableData->flags = SUBMIT_REQ_AUTO_CREATE_TABLE; + pTableData->pCreateTbReq = buildAutoCreateTableReq(stbFullName, suid, pTSchema->numOfCols + 1, pDataBlock); + if (pTableData->pCreateTbReq == NULL) { + tqError("s-task:%s failed to build auto create table req, code:%s", id, tstrerror(terrno)); + taosMemoryFree(pTableSinkInfo); + return terrno; + } + + pTableSinkInfo->uid = 0; + doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, id); + } else { + bool isValid = isValidDstChildTable(&mr, vgId, dstTableName, suid); + if (!isValid) { + metaReaderClear(&mr); + taosMemoryFree(pTableSinkInfo); + tqError("s-task:%s vgId:%d table:%s already exists, but not child table, stream results is discarded", id, vgId, + dstTableName); + return terrno; + } else { + pTableData->uid = mr.me.uid; + pTableSinkInfo->uid = mr.me.uid; + + metaReaderClear(&mr); + doPutIntoCache(pTask->tbSink.pTblInfo, pTableSinkInfo, groupId, id); + } + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t setDstTableDataPayload(SStreamTask* pTask, int32_t blockIndex, SSDataBlock* pDataBlock, + SSubmitTbData* pTableData) { + int32_t numOfRows = pDataBlock->info.rows; + const char* id = pTask->id.idStr; + + tqDebug("s-task:%s sink data pipeline, build submit msg from %dth resBlock, including %d rows, dst suid:%" PRId64, + id, blockIndex + 1, numOfRows, pTask->tbSink.stbUid); + char* dstTableName = pDataBlock->info.parTbName; + + // convert all rows + int32_t code = doConvertRows(pTableData, pTask->tbSink.pTSchema, pDataBlock, id); + if (code != TSDB_CODE_SUCCESS) { + tqError("s-task:%s failed to convert rows from result block, code:%s", id, tstrerror(terrno)); + return code; + } + + taosArraySort(pTableData->aRowP, tsAscendingSortFn); + tqDebug("s-task:%s build submit msg for dstTable:%s, numOfRows:%d", id, dstTableName, numOfRows); return code; -} \ No newline at end of file +} + +void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { + const SArray* pBlocks = (const SArray*)data; + SVnode* pVnode = (SVnode*)vnode; + int64_t suid = pTask->tbSink.stbUid; + char* stbFullName = pTask->tbSink.stbFullName; + STSchema* pTSchema = pTask->tbSink.pTSchema; + int32_t vgId = TD_VID(pVnode); + int32_t numOfBlocks = taosArrayGetSize(pBlocks); + int32_t code = TSDB_CODE_SUCCESS; + const char* id = pTask->id.idStr; + + if (pTask->taskExecInfo.start == 0) { + pTask->taskExecInfo.start = taosGetTimestampMs(); + } + + bool onlySubmitData = true; + for(int32_t i = 0; i < numOfBlocks; ++i) { + SSDataBlock* p = taosArrayGet(pBlocks, i); + if (p->info.type == STREAM_DELETE_RESULT || p->info.type == STREAM_CREATE_CHILD_TABLE) { + onlySubmitData = false; + break; + } + } + + if (!onlySubmitData) { + tqDebug("vgId:%d, s-task:%s write %d stream resBlock(s) into table, has delete block, submit one-by-one", vgId, id, + numOfBlocks); + + for(int32_t i = 0; i < numOfBlocks; ++i) { + if (streamTaskShouldStop(&pTask->status)) { + return; + } + + SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); + if (pDataBlock->info.type == STREAM_DELETE_RESULT) { + code = doBuildAndSendDeleteMsg(pVnode, stbFullName, pDataBlock, pTask, suid); + } else if (pDataBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + code = doBuildAndSendCreateTableMsg(pVnode, stbFullName, pDataBlock, pTask, suid); + } else if (pDataBlock->info.type == STREAM_CHECKPOINT) { + continue; + } else { + pTask->sinkRecorder.numOfBlocks += 1; + + SSubmitReq2 submitReq = {.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData))}; + if (submitReq.aSubmitTbData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + tqError("s-task:%s vgId:%d failed to prepare submit msg in sink task, code:%s", id, vgId, tstrerror(code)); + return; + } + + SSubmitTbData tbData = {.suid = suid, .uid = 0, .sver = pTSchema->version}; + code = setDstTableDataUid(pVnode, pTask, pDataBlock, stbFullName, &tbData); + if (code != TSDB_CODE_SUCCESS) { + continue; + } + + code = setDstTableDataPayload(pTask, i, pDataBlock, &tbData); + if (code != TSDB_CODE_SUCCESS) { + continue; + } + + taosArrayPush(submitReq.aSubmitTbData, &tbData); + code = doBuildAndSendSubmitMsg(pVnode, pTask, &submitReq, 1); + } + } + } else { + tqDebug("vgId:%d, s-task:%s write %d stream resBlock(s) into table, merge submit msg", vgId, id, numOfBlocks); + SHashObj* pTableIndexMap = taosHashInit(numOfBlocks, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); + + SSubmitReq2 submitReq = {.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData))}; + if (submitReq.aSubmitTbData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + tqError("s-task:%s vgId:%d failed to prepare submit msg in sink task, code:%s", id, vgId, tstrerror(code)); + taosHashCleanup(pTableIndexMap); + return; + } + + bool hasSubmit = false; + for (int32_t i = 0; i < numOfBlocks; i++) { + if (streamTaskShouldStop(&pTask->status)) { + return; + } + + SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); + if (pDataBlock->info.type == STREAM_CHECKPOINT) { + continue; + } + + hasSubmit = true; + pTask->sinkRecorder.numOfBlocks += 1; + uint64_t groupId = pDataBlock->info.id.groupId; + + SSubmitTbData tbData = {.suid = suid, .uid = 0, .sver = pTSchema->version}; + + int32_t* index = taosHashGet(pTableIndexMap, &groupId, sizeof(groupId)); + if (index == NULL) { // no data yet, append it + code = setDstTableDataUid(pVnode, pTask, pDataBlock, stbFullName, &tbData); + if (code != TSDB_CODE_SUCCESS) { + continue; + } + + code = setDstTableDataPayload(pTask, i, pDataBlock, &tbData); + if (code != TSDB_CODE_SUCCESS) { + continue; + } + + taosArrayPush(submitReq.aSubmitTbData, &tbData); + + int32_t size = (int32_t)taosArrayGetSize(submitReq.aSubmitTbData) - 1; + taosHashPut(pTableIndexMap, &groupId, sizeof(groupId), &size, sizeof(size)); + } else { + code = setDstTableDataPayload(pTask, i, pDataBlock, &tbData); + if (code != TSDB_CODE_SUCCESS) { + continue; + } + + SSubmitTbData* pExisted = taosArrayGet(submitReq.aSubmitTbData, *index); + code = doMergeExistedRows(pExisted, &tbData, id); + if (code != TSDB_CODE_SUCCESS) { + continue; + } + } + + pTask->sinkRecorder.numOfRows += pDataBlock->info.rows; + } + + taosHashCleanup(pTableIndexMap); + + if (hasSubmit) { + doBuildAndSendSubmitMsg(pVnode, pTask, &submitReq, numOfBlocks); + } else { + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + tqDebug("vgId:%d, s-task:%s write results completed", vgId, id); + } + } +} diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index 4a1b3961cd..b8d5c35dfb 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -91,7 +91,7 @@ int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) { uint8_t* rowData = NULL; int64_t len; code = streamSnapRead(pReader->pReaderImpl, &rowData, &len); - if (rowData == NULL || len == 0) { + if (code != 0 || rowData == NULL || len == 0) { return code; } *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + len); @@ -168,7 +168,7 @@ int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) } int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) { tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); - int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta, chkpId); + int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta); if (code == 0) { code = streamStateLoadTasks(pWriter); } diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index eb587b8be2..3cba4567fe 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -94,6 +94,9 @@ int32_t tqCheckAndRunStreamTask(STQ* pTq) { continue; } + pTask->taskExecInfo.init = taosGetTimestampMs(); + tqDebug("s-task:%s set the init ts:%"PRId64, pTask->id.idStr, pTask->taskExecInfo.init); + streamSetStatusNormal(pTask); streamTaskCheckDownstream(pTask); @@ -108,12 +111,12 @@ int32_t tqCheckAndRunStreamTaskAsync(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; - taosWLockLatch(&pMeta->lock); +// taosWLockLatch(&pMeta->lock); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); if (numOfTasks == 0) { - tqInfo("vgId:%d no stream tasks exist", vgId); - taosWUnLockLatch(&pMeta->lock); + tqDebug("vgId:%d no stream tasks existed to run", vgId); +// taosWUnLockLatch(&pMeta->lock); return 0; } @@ -121,7 +124,7 @@ int32_t tqCheckAndRunStreamTaskAsync(STQ* pTq) { if (pRunReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr()); - taosWUnLockLatch(&pMeta->lock); +// taosWUnLockLatch(&pMeta->lock); return -1; } @@ -132,7 +135,7 @@ int32_t tqCheckAndRunStreamTaskAsync(STQ* pTq) { SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); - taosWUnLockLatch(&pMeta->lock); +// taosWUnLockLatch(&pMeta->lock); return 0; } @@ -150,7 +153,7 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); if (numOfTasks == 0) { - tqInfo("vgId:%d no stream tasks exist", vgId); + tqDebug("vgId:%d no stream tasks existed to run", vgId); taosWUnLockLatch(&pMeta->lock); return 0; } @@ -163,7 +166,7 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { return 0; } - int32_t numOfPauseTasks = pTq->pStreamMeta->pauseTaskNum; + int32_t numOfPauseTasks = pTq->pStreamMeta->numOfPausedTasks; if (ckPause && numOfTasks == numOfPauseTasks) { tqDebug("vgId:%d ignore all submit, all streams had been paused, reset the walScanCounter", vgId); @@ -198,8 +201,7 @@ int32_t tqStopStreamTasks(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start to stop all %d stream task(s)", vgId, numOfTasks); - + tqDebug("vgId:%d stop all %d stream task(s)", vgId, numOfTasks); if (numOfTasks == 0) { return TSDB_CODE_SUCCESS; } @@ -229,63 +231,60 @@ int32_t tqStartStreamTasks(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start to stop all %d stream task(s)", vgId, numOfTasks); + tqDebug("vgId:%d start all %d stream task(s)", vgId, numOfTasks); if (numOfTasks == 0) { return TSDB_CODE_SUCCESS; } - taosWLockLatch(&pMeta->lock); - for (int32_t i = 0; i < numOfTasks; ++i) { SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i); - int64_t key[2] = {pTaskId->streamId, pTaskId->taskId}; - SStreamTask** pTask = taosHashGet(pMeta->pTasks, key, sizeof(key)); + STaskId id = {.streamId = pTaskId->streamId, .taskId = pTaskId->taskId}; + SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); int8_t status = (*pTask)->status.taskStatus; - if (status == TASK_STATUS__STOP) { + if (status == TASK_STATUS__STOP && (*pTask)->info.fillHistory != 1) { streamSetStatusNormal(*pTask); } } - taosWUnLockLatch(&pMeta->lock); return 0; } int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId) { // seek the stored version and extract data from WAL int64_t firstVer = walReaderGetValidFirstVer(pTask->exec.pWalReader); - if (pTask->chkInfo.currentVer < firstVer) { + if (pTask->chkInfo.nextProcessVer < firstVer) { tqWarn("vgId:%d s-task:%s ver:%" PRId64 " earlier than the first ver of wal range %" PRId64 ", forward to %" PRId64, - vgId, pTask->id.idStr, pTask->chkInfo.currentVer, firstVer, firstVer); + vgId, pTask->id.idStr, pTask->chkInfo.nextProcessVer, firstVer, firstVer); - pTask->chkInfo.currentVer = firstVer; + pTask->chkInfo.nextProcessVer = firstVer; // todo need retry if failed - int32_t code = walReaderSeekVer(pTask->exec.pWalReader, pTask->chkInfo.currentVer); + int32_t code = walReaderSeekVer(pTask->exec.pWalReader, pTask->chkInfo.nextProcessVer); if (code != TSDB_CODE_SUCCESS) { return code; } // append the data for the stream - tqDebug("vgId:%d s-task:%s wal reader seek to ver:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.currentVer); + tqDebug("vgId:%d s-task:%s wal reader seek to ver:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.nextProcessVer); } else { int64_t currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader); if (currentVer == -1) { // we only seek the read for the first time - int32_t code = walReaderSeekVer(pTask->exec.pWalReader, pTask->chkInfo.currentVer); + int32_t code = walReaderSeekVer(pTask->exec.pWalReader, pTask->chkInfo.nextProcessVer); if (code != TSDB_CODE_SUCCESS) { // no data in wal, quit return code; } // append the data for the stream tqDebug("vgId:%d s-task:%s wal reader initial seek to ver:%" PRId64, vgId, pTask->id.idStr, - pTask->chkInfo.currentVer); + pTask->chkInfo.nextProcessVer); } } int64_t skipToVer = walReaderGetSkipToVersion(pTask->exec.pWalReader); - if (skipToVer != 0 && skipToVer > pTask->chkInfo.currentVer) { + if (skipToVer != 0 && skipToVer > pTask->chkInfo.nextProcessVer) { int32_t code = walReaderSeekVer(pTask->exec.pWalReader, skipToVer); if (code != TSDB_CODE_SUCCESS) { // no data in wal, quit return code; @@ -304,16 +303,16 @@ void handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver) { if ((pTask->info.fillHistory == 1) && ver > pTask->dataRange.range.maxVer) { if (!pTask->status.appendTranstateBlock) { - qWarn("s-task:%s fill-history scan WAL, currentVer:%" PRId64 " reach the maximum ver:%" PRId64 + qWarn("s-task:%s fill-history scan WAL, nextProcessVer:%" PRId64 " out of the maximum ver:%" PRId64 ", not scan wal anymore, add transfer-state block into inputQ", id, ver, maxVer); - double el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0; + double el = (taosGetTimestampMs() - pTask->taskExecInfo.step2Start) / 1000.0; qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, el); /*int32_t code = */streamTaskPutTranstateIntoInputQ(pTask); - /*int32_t code = */ streamSchedExec(pTask); + /*int32_t code = */streamSchedExec(pTask); } else { - qWarn("s-task:%s fill-history scan WAL, currentVer:%" PRId64 " reach the maximum ver:%" PRId64 ", not scan wal", + qWarn("s-task:%s fill-history scan WAL, nextProcessVer:%" PRId64 " out of the maximum ver:%" PRId64 ", not scan wal", id, ver, maxVer); } } @@ -371,7 +370,7 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } - if (streamQueueIsFull(pTask->inputInfo.queue->pQueue)) { + if (streamQueueIsFull(pTask->inputInfo.queue->pQueue, true)) { tqTrace("s-task:%s input queue is full, do nothing", pTask->id.idStr); streamMetaReleaseTask(pStreamMeta, pTask); continue; @@ -393,28 +392,26 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } - int32_t numOfItems = streamTaskGetInputQItems(pTask); + int32_t numOfItems = streamQueueGetNumOfItems(pTask->inputInfo.queue); int64_t maxVer = (pTask->info.fillHistory == 1) ? pTask->dataRange.range.maxVer : INT64_MAX; + taosThreadMutexLock(&pTask->lock); + + pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + if (pTask->status.taskStatus != TASK_STATUS__NORMAL) { + tqDebug("s-task:%s not ready for submit block from wal, status:%s", pTask->id.idStr, pStatus); + taosThreadMutexUnlock(&pTask->lock); + streamMetaReleaseTask(pStreamMeta, pTask); + continue; + } + SStreamQueueItem* pItem = NULL; code = extractMsgFromWal(pTask->exec.pWalReader, (void**)&pItem, maxVer, pTask->id.idStr); if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItems == 0)) { // failed, continue handleFillhistoryScanComplete(pTask, walReaderGetCurrentVer(pTask->exec.pWalReader)); streamMetaReleaseTask(pStreamMeta, pTask); - continue; - } - - taosThreadMutexLock(&pTask->lock); - pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - - if (pTask->status.taskStatus != TASK_STATUS__NORMAL) { - tqDebug("s-task:%s not ready for submit block from wal, status:%s", pTask->id.idStr, pStatus); taosThreadMutexUnlock(&pTask->lock); - streamMetaReleaseTask(pStreamMeta, pTask); - if (pItem != NULL) { - streamFreeQitem(pItem); - } continue; } @@ -423,12 +420,12 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { code = streamTaskPutDataIntoInputQ(pTask, pItem); if (code == TSDB_CODE_SUCCESS) { int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader); - pTask->chkInfo.currentVer = ver; + pTask->chkInfo.nextProcessVer = ver; handleFillhistoryScanComplete(pTask, ver); tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, ver); } else { tqError("s-task:%s append input queue failed, too many in inputQ, ver:%" PRId64, pTask->id.idStr, - pTask->chkInfo.currentVer); + pTask->chkInfo.nextProcessVer); } } diff --git a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c index 2d58a10e51..458e2c74ff 100644 --- a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c @@ -165,7 +165,6 @@ struct SStreamTaskWriter { STQ* pTq; int64_t sver; int64_t ever; - TXN* txn; }; int32_t streamTaskSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTaskWriter** ppWriter) { @@ -182,12 +181,6 @@ int32_t streamTaskSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamTa pWriter->sver = sver; pWriter->ever = ever; - if (tdbBegin(pTq->pStreamMeta->db, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { - code = -1; - taosMemoryFree(pWriter); - goto _err; - } - *ppWriter = pWriter; tqDebug("vgId:%d, vnode stream-task snapshot writer opened", TD_VID(pTq->pVnode)); return code; @@ -204,54 +197,57 @@ int32_t streamTaskSnapWriterClose(SStreamTaskWriter* pWriter, int8_t rollback) { STQ* pTq = pWriter->pTq; tqDebug("vgId:%d, vnode stream-task snapshot writer closed", TD_VID(pTq->pVnode)); + + taosWLockLatch(&pTq->pStreamMeta->lock); if (rollback) { - tdbAbort(pWriter->pTq->pStreamMeta->db, pWriter->txn); + tdbAbort(pTq->pStreamMeta->db, pTq->pStreamMeta->txn); } else { - code = tdbCommit(pWriter->pTq->pStreamMeta->db, pWriter->txn); + code = tdbCommit(pTq->pStreamMeta->db, pTq->pStreamMeta->txn); if (code) goto _err; - code = tdbPostCommit(pWriter->pTq->pStreamMeta->db, pWriter->txn); + code = tdbPostCommit(pTq->pStreamMeta->db, pTq->pStreamMeta->txn); if (code) goto _err; } + if (tdbBegin(pTq->pStreamMeta->db, &pTq->pStreamMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) { + code = -1; + goto _err; + } + + taosWUnLockLatch(&pTq->pStreamMeta->lock); taosMemoryFree(pWriter); - - // restore from metastore - // if (tqMetaRestoreHandle(pTq) < 0) { - // goto _err; - // } - return code; _err: tqError("vgId:%d, vnode stream-task snapshot writer failed to close since %s", TD_VID(pWriter->pTq->pVnode), tstrerror(code)); + taosWUnLockLatch(&pTq->pStreamMeta->lock); return code; - return 0; } int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t nData) { int32_t code = 0; STQ* pTq = pWriter->pTq; - STqHandle handle; SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; if (pHdr->type == SNAP_DATA_STREAM_TASK) { - SStreamTaskId task = {0}; + STaskId taskId = {0}; SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); - - code = tDecodeStreamTaskId(&decoder, &task); + code = tDecodeStreamTaskId(&decoder, &taskId); if (code < 0) { tDecoderClear(&decoder); goto _err; } tDecoderClear(&decoder); - // tdbTbInsert(TTB *pTb, const void *pKey, int keyLen, const void *pVal, int valLen, TXN *pTxn) - int64_t key[2] = {task.streamId, task.taskId}; + + int64_t key[2] = {taskId.streamId, taskId.taskId}; + taosWLockLatch(&pTq->pStreamMeta->lock); if (tdbTbUpsert(pTq->pStreamMeta->pTaskDb, key, sizeof(int64_t) << 1, (uint8_t*)pData + sizeof(SSnapDataHdr), - nData - sizeof(SSnapDataHdr), pWriter->txn) < 0) { + nData - sizeof(SSnapDataHdr), pTq->pStreamMeta->txn) < 0) { + taosWUnLockLatch(&pTq->pStreamMeta->lock); return -1; } + taosWUnLockLatch(&pTq->pStreamMeta->lock); } else if (pHdr->type == SNAP_DATA_STREAM_TASK_CHECKPOINT) { // do nothing } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index f1b154a4ac..62ef06fec2 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -36,10 +36,15 @@ int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset) { return 0; } -void tqUpdateNodeStage(STQ* pTq) { +void tqUpdateNodeStage(STQ* pTq, bool isLeader) { SSyncState state = syncGetState(pTq->pVnode->sync); - pTq->pStreamMeta->stage = state.term; - tqDebug("vgId:%d update the meta stage to be:%"PRId64, pTq->pStreamMeta->vgId, pTq->pStreamMeta->stage); + SStreamMeta* pMeta = pTq->pStreamMeta; + tqDebug("vgId:%d update the meta stage:%"PRId64", prev:%"PRId64" leader:%d", pMeta->vgId, state.term, pMeta->stage, isLeader); + pMeta->stage = state.term; + pMeta->leader = isLeader; + if (isLeader) { + streamMetaStartHb(pMeta); + } } static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, STqOffsetVal pOffset) { @@ -437,6 +442,8 @@ int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStream taosArrayDestroy(pRes->uidList); *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); if (*pRefBlock == NULL) { + blockDataCleanup(pDelBlock); + taosMemoryFree(pDelBlock); return TSDB_CODE_OUT_OF_MEMORY; } diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c index d4cb63fb7b..c55e5f92ea 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit2.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -185,29 +185,22 @@ static int32_t tsdbCommitTombData(SCommitter2 *committer) { } if (record->ekey < committer->ctx->minKey) { - goto _next; + // do nothing } else if (record->skey > committer->ctx->maxKey) { - committer->ctx->maxKey = TMIN(record->skey, committer->ctx->maxKey); - goto _next; + committer->ctx->nextKey = TMIN(record->skey, committer->ctx->nextKey); + } else { + if (record->ekey > committer->ctx->maxKey) { + committer->ctx->nextKey = TMIN(committer->ctx->nextKey, committer->ctx->maxKey + 1); + } + + record->skey = TMAX(record->skey, committer->ctx->minKey); + record->ekey = TMIN(record->ekey, committer->ctx->maxKey); + + numRecord++; + code = tsdbFSetWriteTombRecord(committer->writer, record); + TSDB_CHECK_CODE(code, lino, _exit); } - TSKEY maxKey = committer->ctx->maxKey; - if (record->ekey > committer->ctx->maxKey) { - maxKey = committer->ctx->maxKey + 1; - } - - if (record->ekey > committer->ctx->maxKey && committer->ctx->nextKey > maxKey) { - committer->ctx->nextKey = maxKey; - } - - record->skey = TMAX(record->skey, committer->ctx->minKey); - record->ekey = TMIN(record->ekey, maxKey); - - numRecord++; - code = tsdbFSetWriteTombRecord(committer->writer, record); - TSDB_CHECK_CODE(code, lino, _exit); - - _next: code = tsdbIterMergerNext(committer->tombIterMerger); TSDB_CHECK_CODE(code, lino, _exit); } diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index dc5e3649cc..7e5eb2c553 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -980,9 +980,7 @@ static int32_t tsdbDataFileDoWriteTableOldData(SDataFileWriter *writer, const TS writer->ctx->brinBlkArray = NULL; writer->ctx->tbHasOldData = false; goto _exit; - } - - for (; writer->ctx->brinBlkArrayIdx < TARRAY2_SIZE(writer->ctx->brinBlkArray); writer->ctx->brinBlkArrayIdx++) { + } else { const SBrinBlk *brinBlk = TARRAY2_GET_PTR(writer->ctx->brinBlkArray, writer->ctx->brinBlkArrayIdx); if (brinBlk->minTbid.uid != writer->ctx->tbid->uid) { @@ -995,7 +993,6 @@ static int32_t tsdbDataFileDoWriteTableOldData(SDataFileWriter *writer, const TS writer->ctx->brinBlockIdx = 0; writer->ctx->brinBlkArrayIdx++; - break; } } @@ -1110,9 +1107,7 @@ static int32_t tsdbDataFileWriteTableDataBegin(SDataFileWriter *writer, const TA if (writer->ctx->brinBlkArrayIdx >= TARRAY2_SIZE(writer->ctx->brinBlkArray)) { writer->ctx->brinBlkArray = NULL; break; - } - - for (; writer->ctx->brinBlkArrayIdx < TARRAY2_SIZE(writer->ctx->brinBlkArray); writer->ctx->brinBlkArrayIdx++) { + } else { const SBrinBlk *brinBlk = TARRAY2_GET_PTR(writer->ctx->brinBlkArray, writer->ctx->brinBlkArrayIdx); code = tsdbDataFileReadBrinBlock(writer->ctx->reader, brinBlk, writer->ctx->brinBlock); @@ -1120,7 +1115,6 @@ static int32_t tsdbDataFileWriteTableDataBegin(SDataFileWriter *writer, const TA writer->ctx->brinBlockIdx = 0; writer->ctx->brinBlkArrayIdx++; - break; } } @@ -1251,9 +1245,7 @@ static int32_t tsdbDataFileDoWriteTombRecord(SDataFileWriter *writer, const STom if (writer->ctx->tombBlkArrayIdx >= TARRAY2_SIZE(writer->ctx->tombBlkArray)) { writer->ctx->hasOldTomb = false; break; - } - - for (; writer->ctx->tombBlkArrayIdx < TARRAY2_SIZE(writer->ctx->tombBlkArray); ++writer->ctx->tombBlkArrayIdx) { + } else { const STombBlk *tombBlk = TARRAY2_GET_PTR(writer->ctx->tombBlkArray, writer->ctx->tombBlkArrayIdx); code = tsdbDataFileReadTombBlock(writer->ctx->reader, tombBlk, writer->ctx->tombBlock); @@ -1261,7 +1253,6 @@ static int32_t tsdbDataFileDoWriteTombRecord(SDataFileWriter *writer, const STom writer->ctx->tombBlockIdx = 0; writer->ctx->tombBlkArrayIdx++; - break; } } diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index a997c3eea5..f43bb52d05 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -174,11 +174,17 @@ int32_t save_fs(const TFileSetArray *arr, const char *fname) { // fset cJSON *ajson = cJSON_AddArrayToObject(json, "fset"); - if (!ajson) TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit); + if (!ajson) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } const STFileSet *fset; TARRAY2_FOREACH(arr, fset) { cJSON *item = cJSON_CreateObject(); - if (!item) TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit); + if (!item) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } cJSON_AddItemToArray(ajson, item); code = tsdbTFileSetToJson(fset, item); @@ -231,7 +237,8 @@ static int32_t load_fs(STsdb *pTsdb, const char *fname, TFileSetArray *arr) { TSDB_CHECK_CODE(code, lino, _exit); } } else { - TSDB_CHECK_CODE(code = TSDB_CODE_FILE_CORRUPTED, lino, _exit); + code = TSDB_CODE_FILE_CORRUPTED; + TSDB_CHECK_CODE(code, lino, _exit); } _exit: @@ -312,7 +319,8 @@ static int32_t commit_edit(STFileSystem *fs) { int32_t code; int32_t lino; if ((code = taosRenameFile(current_t, current))) { - TSDB_CHECK_CODE(code = TAOS_SYSTEM_ERROR(code), lino, _exit); + code = TAOS_SYSTEM_ERROR(code); + TSDB_CHECK_CODE(code, lino, _exit); } code = apply_commit(fs); @@ -345,7 +353,8 @@ static int32_t abort_edit(STFileSystem *fs) { int32_t code; int32_t lino; if ((code = taosRemoveFile(fname))) { - TSDB_CHECK_CODE(code = TAOS_SYSTEM_ERROR(code), lino, _exit); + code = TAOS_SYSTEM_ERROR(code); + TSDB_CHECK_CODE(code, lino, _exit); } code = apply_abort(fs); @@ -398,7 +407,7 @@ static int32_t tsdbFSAddEntryToFileObjHash(STFileHash *hash, const char *fname) STFileHashEntry *entry = taosMemoryMalloc(sizeof(*entry)); if (entry == NULL) return TSDB_CODE_OUT_OF_MEMORY; - strcpy(entry->fname, fname); + strncpy(entry->fname, fname, TSDB_FILENAME_LEN); uint32_t idx = MurmurHash3_32(fname, strlen(fname)) % hash->numBucket; @@ -873,7 +882,7 @@ int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr) { STFileSet *fset1; fsetArr[0] = taosMemoryMalloc(sizeof(TFileSetArray)); - if (fsetArr == NULL) return TSDB_CODE_OUT_OF_MEMORY; + if (fsetArr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; TARRAY2_INIT(fsetArr[0]); diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index 13ef7b3ba6..37c7e2ffc1 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -46,7 +46,8 @@ static int32_t tsdbSttLvlInitEx(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl **lvl return code; } - TARRAY2_APPEND(lvl[0]->fobjArr, fobj); + code = TARRAY2_APPEND(lvl[0]->fobjArr, fobj); + if (code) return code; } return 0; } @@ -185,7 +186,8 @@ static int32_t tsdbJsonToSttLvl(STsdb *pTsdb, const cJSON *json, SSttLvl **lvl) return code; } - TARRAY2_APPEND(lvl[0]->fobjArr, fobj); + code = TARRAY2_APPEND(lvl[0]->fobjArr, fobj); + if (code) return code; } return 0; } @@ -263,7 +265,8 @@ int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset) { return code; } - TARRAY2_APPEND((*fset)->lvlArr, lvl); + code = TARRAY2_APPEND((*fset)->lvlArr, lvl); + if (code) return code; } } else { return TSDB_CODE_FILE_CORRUPTED; @@ -326,11 +329,12 @@ int32_t tsdbTFileSetEdit(STsdb *pTsdb, STFileSet *fset, const STFileOp *op) { STFileObj tfobj = {.f[0] = {.cid = op->of.cid}}, *tfobjp = &tfobj; STFileObj **fobjPtr = TARRAY2_SEARCH(lvl->fobjArr, &tfobjp, tsdbTFileObjCmpr, TD_EQ); - tfobjp = (fobjPtr ? *fobjPtr : NULL); - - ASSERT(tfobjp); - - tfobjp->f[0] = op->nf; + if (fobjPtr) { + tfobjp = *fobjPtr; + tfobjp->f[0] = op->nf; + } else { + tsdbError("file not found, cid:%" PRId64, op->of.cid); + } } else { fset->farr[op->nf.type]->f[0] = op->nf; } diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.c b/source/dnode/vnode/src/tsdb/tsdbFile2.c index 585316469a..3d8964d41b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.c @@ -42,8 +42,12 @@ static const struct { }; void remove_file(const char *fname) { - taosRemoveFile(fname); - tsdbInfo("file:%s is removed", fname); + int32_t code = taosRemoveFile(fname); + if (code) { + tsdbError("file:%s remove failed", fname); + } else { + tsdbInfo("file:%s is removed", fname); + } } static int32_t tfile_to_json(const STFile *file, cJSON *json) { diff --git a/source/dnode/vnode/src/tsdb/tsdbIter.c b/source/dnode/vnode/src/tsdb/tsdbIter.c index 9780cc6be6..447832108d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbIter.c +++ b/source/dnode/vnode/src/tsdb/tsdbIter.c @@ -356,7 +356,8 @@ static int32_t tsdbSttIterOpen(STsdbIter *iter) { } iter->sttData->sttBlkArrayIdx = 0; - tBlockDataCreate(iter->sttData->blockData); + code = tBlockDataCreate(iter->sttData->blockData); + if (code) return code; iter->sttData->blockDataIdx = 0; return tsdbSttIterNext(iter, NULL); @@ -381,7 +382,8 @@ static int32_t tsdbDataIterOpen(STsdbIter *iter) { iter->dataData->brinBlockIdx = 0; // SBlockData - tBlockDataCreate(iter->dataData->blockData); + code = tBlockDataCreate(iter->dataData->blockData); + if (code) return code; iter->dataData->blockDataIdx = 0; return tsdbDataIterNext(iter, NULL); diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c index 4927b1539b..4705d95c0e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -22,38 +22,6 @@ static void tLDataIterClose2(SLDataIter *pIter); // SLDataIter ================================================= -SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols, - int32_t numOfSttTrigger) { - SSttBlockLoadInfo *pLoadInfo = taosMemoryCalloc(numOfSttTrigger, sizeof(SSttBlockLoadInfo)); - if (pLoadInfo == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - for (int32_t i = 0; i < numOfSttTrigger; ++i) { - pLoadInfo[i].blockIndex[0] = -1; - pLoadInfo[i].blockIndex[1] = -1; - pLoadInfo[i].currentLoadBlockIndex = 1; - - int32_t code = tBlockDataCreate(&pLoadInfo[i].blockData[0]); - if (code) { - terrno = code; - } - - code = tBlockDataCreate(&pLoadInfo[i].blockData[1]); - if (code) { - terrno = code; - } - - pLoadInfo[i].aSttBlk = taosArrayInit(4, sizeof(SSttBlk)); - pLoadInfo[i].pSchema = pSchema; - pLoadInfo[i].colIds = colList; - pLoadInfo[i].numOfCols = numOfCols; - } - - return pLoadInfo; -} - SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols) { SSttBlockLoadInfo *pLoadInfo = taosMemoryCalloc(1, sizeof(SSttBlockLoadInfo)); if (pLoadInfo == NULL) { @@ -83,25 +51,6 @@ SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colLi return pLoadInfo; } -void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { - for (int32_t i = 0; i < 1; ++i) { - pLoadInfo[i].currentLoadBlockIndex = 1; - pLoadInfo[i].blockIndex[0] = -1; - pLoadInfo[i].blockIndex[1] = -1; - - taosArrayClear(pLoadInfo[i].aSttBlk); - - pLoadInfo[i].cost.loadBlocks = 0; - pLoadInfo[i].cost.blockElapsedTime = 0; - pLoadInfo[i].cost.statisElapsedTime = 0; - pLoadInfo[i].cost.loadStatisBlocks = 0; - pLoadInfo[i].statisBlockIndex = -1; - tStatisBlockDestroy(pLoadInfo[i].statisBlock); - - pLoadInfo[i].sttBlockLoaded = false; - } -} - void getSttBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, SSttBlockLoadCostInfo* pLoadCost) { for (int32_t i = 0; i < 1; ++i) { pLoadCost->blockElapsedTime += pLoadInfo[i].cost.blockElapsedTime; @@ -309,12 +258,6 @@ static int32_t binarySearchForStartRowIndex(uint64_t *uidList, int32_t num, uint } } -int32_t tLDataIterOpen(struct SLDataIter *pIter, SDataFReader *pReader, int32_t iStt, int8_t backward, uint64_t suid, - uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange, SSttBlockLoadInfo *pBlockLoadInfo, - const char *idStr, bool strictTimeRange) { - return 0; -} - static int32_t extractSttBlockInfo(SLDataIter *pIter, const TSttBlkArray *pArray, SSttBlockLoadInfo *pBlockLoadInfo, uint64_t suid) { if (TARRAY2_SIZE(pArray) <= 0) { @@ -767,50 +710,6 @@ static FORCE_INLINE int32_t tLDataIterDescCmprFn(const SRBTreeNode *p1, const SR return -1 * tLDataIterCmprFn(p1, p2); } -int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid, - STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo, - bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter) { - int32_t code = TSDB_CODE_SUCCESS; - - pMTree->backward = backward; - pMTree->pIter = NULL; - pMTree->idStr = idStr; - - if (!pMTree->backward) { // asc - tRBTreeCreate(&pMTree->rbt, tLDataIterCmprFn); - } else { // desc - tRBTreeCreate(&pMTree->rbt, tLDataIterDescCmprFn); - } - - pMTree->pLoadInfo = pBlockLoadInfo; - pMTree->destroyLoadInfo = destroyLoadInfo; - pMTree->ignoreEarlierTs = false; - - for (int32_t i = 0; i < pFReader->pSet->nSttF; ++i) { // open all last file - memset(&pLDataIter[i], 0, sizeof(SLDataIter)); - code = tLDataIterOpen(&pLDataIter[i], pFReader, i, pMTree->backward, suid, uid, pTimeWindow, pVerRange, - &pMTree->pLoadInfo[i], pMTree->idStr, strictTimeRange); - if (code != TSDB_CODE_SUCCESS) { - goto _end; - } - - bool hasVal = tLDataIterNextRow(&pLDataIter[i], pMTree->idStr); - if (hasVal) { - tMergeTreeAddIter(pMTree, &pLDataIter[i]); - } else { - if (!pMTree->ignoreEarlierTs) { - pMTree->ignoreEarlierTs = pLDataIter[i].ignoreEarlierTs; - } - } - } - - return code; - -_end: - tMergeTreeClose(pMTree); - return code; -} - int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) { int32_t code = TSDB_CODE_SUCCESS; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c deleted file mode 100644 index c02cff3aa9..0000000000 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ /dev/null @@ -1,5611 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "osDef.h" -#include "tsdb.h" -#include "tsimplehash.h" - -#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) -#define getCurrentKeyInLastBlock(_r) ((_r)->currentKey) - -typedef enum { - READER_STATUS_SUSPEND = 0x1, - READER_STATUS_NORMAL = 0x2, -} EReaderStatus; - -typedef enum { - EXTERNAL_ROWS_PREV = 0x1, - EXTERNAL_ROWS_MAIN = 0x2, - EXTERNAL_ROWS_NEXT = 0x3, -} EContentData; -/* -typedef enum { - READ_MODE_COUNT_ONLY = 0x1, - READ_MODE_ALL, -} EReadMode; -*/ -typedef struct { - STbDataIter* iter; - int32_t index; - bool hasVal; -} SIterInfo; - -typedef struct { - int32_t numOfBlocks; - int32_t numOfLastFiles; -} SBlockNumber; - -typedef struct SBlockIndex { - int32_t ordinalIndex; - int64_t inFileOffset; - STimeWindow window; // todo replace it with overlap flag. -} SBlockIndex; - -typedef struct STableBlockScanInfo { - uint64_t uid; - TSKEY lastKey; - TSKEY lastKeyInStt; // last accessed key in stt - SMapData mapData; // block info (compressed) - SArray* pBlockList; // block data index list, SArray - SIterInfo iter; // mem buffer skip list iterator - SIterInfo iiter; // imem buffer skip list iterator - SArray* delSkyline; // delete info for this table - int32_t fileDelIndex; // file block delete index - int32_t sttBlockDelIndex; // delete index for last block - bool iterInit; // whether to initialize the in-memory skip list iterator or not -} STableBlockScanInfo; - -typedef struct SBlockOrderWrapper { - int64_t uid; - int64_t offset; -} SBlockOrderWrapper; - -typedef struct SBlockOrderSupporter { - SBlockOrderWrapper** pDataBlockInfo; - int32_t* indexPerTable; - int32_t* numOfBlocksPerTable; - int32_t numOfTables; -} SBlockOrderSupporter; - -typedef struct SIOCostSummary { - int64_t numOfBlocks; - double blockLoadTime; - double buildmemBlock; - int64_t headFileLoad; - double headFileLoadTime; - int64_t smaDataLoad; - double smaLoadTime; - int64_t sttStatisBlockLoad; - int64_t sttBlockLoad; - double sttBlockLoadTime; - int64_t composedBlocks; - double buildComposedBlockTime; - double createScanInfoList; - SSttBlockLoadCostInfo sttCost; - double initDelSkylineIterTime; -} SIOCostSummary; - -typedef struct SBlockLoadSuppInfo { - SArray* pColAgg; - SColumnDataAgg tsColAgg; - int16_t* colId; - int16_t* slotId; - int32_t numOfCols; - char** buildBuf; // build string tmp buffer, todo remove it later after all string format being updated. - bool smaValid; // the sma on all queried columns are activated -} SBlockLoadSuppInfo; - -typedef struct SLastBlockReader { - STimeWindow window; - SVersionRange verRange; - int32_t order; - uint64_t uid; - SMergeTree mergeTree; - SSttBlockLoadInfo* pInfo; - int64_t currentKey; -} SLastBlockReader; - -typedef struct SFilesetIter { - int32_t numOfFiles; // number of total files - int32_t index; // current accessed index in the list - SArray* pFileList; // data file list - int32_t order; - SLastBlockReader* pLastBlockReader; // last file block reader -} SFilesetIter; - -typedef struct SFileDataBlockInfo { - // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it - uint64_t uid; - int32_t tbBlockIdx; -} SFileDataBlockInfo; - -typedef struct SDataBlockIter { - int32_t numOfBlocks; - int32_t index; - SArray* blockList; // SArray - int32_t order; - SDataBlk block; // current SDataBlk data - SSHashObj* pTableMap; -} SDataBlockIter; - -typedef struct SFileBlockDumpInfo { - int32_t totalRows; - int32_t rowIndex; - int64_t lastKey; - bool allDumped; -} SFileBlockDumpInfo; - -typedef struct STableUidList { - uint64_t* tableUidList; // access table uid list in uid ascending order list - int32_t currentIndex; // index in table uid list -} STableUidList; - -typedef struct SReaderStatus { - bool loadFromFile; // check file stage - bool composedDataBlock; // the returned data block is a composed block or not - bool mapDataCleaned; // mapData has been cleaned up alreay or not - SSHashObj* pTableMap; // SHash - STableBlockScanInfo** pTableIter; // table iterator used in building in-memory buffer data blocks. - STableUidList uidList; // check tables in uid order, to avoid the repeatly load of blocks in STT. - SFileBlockDumpInfo fBlockDumpInfo; - SDFileSet* pCurrentFileset; // current opened file set - SBlockData fileBlockData; - SFilesetIter fileIter; - SDataBlockIter blockIter; - SLDataIter* pLDataIter; - SRowMerger merger; - SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data -} SReaderStatus; - -typedef struct SBlockInfoBuf { - int32_t currentIndex; - SArray* pData; - int32_t numPerBucket; - int32_t numOfTables; -} SBlockInfoBuf; - -typedef struct STsdbReaderAttr { - STSchema* pSchema; - EReadMode readMode; - uint64_t rowsNum; - STimeWindow window; - bool freeBlock; - SVersionRange verRange; - int16_t order; -} STsdbReaderAttr; - -typedef struct SResultBlockInfo { - SSDataBlock* pResBlock; - bool freeBlock; - int64_t capacity; -} SResultBlockInfo; - -struct STsdbReader { - STsdb* pTsdb; - SVersionRange verRange; - TdThreadMutex readerMutex; - EReaderStatus flag; - int32_t code; - uint64_t suid; - int16_t order; - EReadMode readMode; - uint64_t rowsNum; - STimeWindow window; // the primary query time window that applies to all queries - SResultBlockInfo resBlockInfo; - SReaderStatus status; - char* idStr; // query info handle, for debug purpose - int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows - SBlockLoadSuppInfo suppInfo; - STsdbReadSnap* pReadSnap; - SIOCostSummary cost; - SHashObj** pIgnoreTables; - STSchema* pSchema; // the newest version schema - SSHashObj* pSchemaMap; // keep the retrieved schema info, to avoid the overhead by repeatly load schema - SDataFReader* pFileReader; // the file reader - SDelFReader* pDelFReader; // the del file reader - SArray* pDelIdx; // del file block index; - SBlockInfoBuf blockInfoBuf; - EContentData step; - STsdbReader* innerReader[2]; -}; - -static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter); -static int buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity, - STsdbReader* pReader); -static TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader); -static int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader); -static int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, - SRowMerger* pMerger, SVersionRange* pVerRange, const char* id); -static int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, STsdbReader* pReader); -static int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, SRow* pTSRow, - STableBlockScanInfo* pScanInfo); -static int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, - int32_t rowIndex); -static void setComposedBlockFlag(STsdbReader* pReader, bool composed); -static bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, - SVersionRange* pVerRange); - -static int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, - TSDBROW* pResRow, STsdbReader* pReader, bool* freeTSRow); -static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, - STsdbReader* pReader, SRow** pTSRow); -static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, - STsdbReader* pReader); - -static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData, - STbData* piMemTbData); -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, - int8_t* pLevel); -static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id); -static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); -static int32_t doBuildDataBlock(STsdbReader* pReader); -static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader); -static bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo); -static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter); -static int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order); - -static STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id); - -static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } - -static int32_t setColumnIdSlotList(SBlockLoadSuppInfo* pSupInfo, SColumnInfo* pCols, const int32_t* pSlotIdList, - int32_t numOfCols) { - pSupInfo->smaValid = true; - pSupInfo->numOfCols = numOfCols; - pSupInfo->colId = taosMemoryMalloc(numOfCols * (sizeof(int16_t) * 2 + POINTER_BYTES)); - if (pSupInfo->colId == NULL) { - taosMemoryFree(pSupInfo->colId); - return TSDB_CODE_OUT_OF_MEMORY; - } - - pSupInfo->slotId = (int16_t*)((char*)pSupInfo->colId + (sizeof(int16_t) * numOfCols)); - pSupInfo->buildBuf = (char**)((char*)pSupInfo->slotId + (sizeof(int16_t) * numOfCols)); - for (int32_t i = 0; i < numOfCols; ++i) { - pSupInfo->colId[i] = pCols[i].colId; - pSupInfo->slotId[i] = pSlotIdList[i]; - - if (IS_VAR_DATA_TYPE(pCols[i].type)) { - pSupInfo->buildBuf[i] = taosMemoryMalloc(pCols[i].bytes); - } else { - pSupInfo->buildBuf[i] = NULL; - } - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) { - int32_t i = 0, j = 0; - - if (j < pSupInfo->numOfCols && PRIMARYKEY_TIMESTAMP_COL_ID == pSupInfo->colId[j]) { - j += 1; - } - - while (i < pSchema->numOfCols && j < pSupInfo->numOfCols) { - STColumn* pTCol = &pSchema->columns[i]; - if (pTCol->colId == pSupInfo->colId[j]) { - if (!IS_BSMA_ON(pTCol)) { - pSupInfo->smaValid = false; - return TSDB_CODE_SUCCESS; - } - - i += 1; - j += 1; - } else if (pTCol->colId < pSupInfo->colId[j]) { - // do nothing - i += 1; - } else { - return TSDB_CODE_INVALID_PARA; - } - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { - int32_t num = numOfTables / pBuf->numPerBucket; - int32_t remainder = numOfTables % pBuf->numPerBucket; - if (pBuf->pData == NULL) { - pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES); - } - - for (int32_t i = 0; i < num; ++i) { - char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo)); - if (p == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - taosArrayPush(pBuf->pData, &p); - } - - if (remainder > 0) { - char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo)); - if (p == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - taosArrayPush(pBuf->pData, &p); - } - - pBuf->numOfTables = numOfTables; - - return TSDB_CODE_SUCCESS; -} - -static int32_t ensureBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { - if (numOfTables <= pBuf->numOfTables) { - return TSDB_CODE_SUCCESS; - } - - if (pBuf->numOfTables > 0) { - STableBlockScanInfo** p = (STableBlockScanInfo**)taosArrayPop(pBuf->pData); - taosMemoryFree(*p); - pBuf->numOfTables /= pBuf->numPerBucket; - } - - int32_t num = (numOfTables - pBuf->numOfTables) / pBuf->numPerBucket; - int32_t remainder = (numOfTables - pBuf->numOfTables) % pBuf->numPerBucket; - if (pBuf->pData == NULL) { - pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES); - } - - for (int32_t i = 0; i < num; ++i) { - char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo)); - if (p == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - taosArrayPush(pBuf->pData, &p); - } - - if (remainder > 0) { - char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo)); - if (p == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - taosArrayPush(pBuf->pData, &p); - } - - pBuf->numOfTables = numOfTables; - - return TSDB_CODE_SUCCESS; -} - -static void clearBlockScanInfoBuf(SBlockInfoBuf* pBuf) { - size_t num = taosArrayGetSize(pBuf->pData); - for (int32_t i = 0; i < num; ++i) { - char** p = taosArrayGet(pBuf->pData, i); - taosMemoryFree(*p); - } - - taosArrayDestroy(pBuf->pData); -} - -static void* getPosInBlockInfoBuf(SBlockInfoBuf* pBuf, int32_t index) { - int32_t bucketIndex = index / pBuf->numPerBucket; - char** pBucket = taosArrayGet(pBuf->pData, bucketIndex); - return (*pBucket) + (index % pBuf->numPerBucket) * sizeof(STableBlockScanInfo); -} - -static int32_t uidComparFunc(const void* p1, const void* p2) { - uint64_t pu1 = *(uint64_t*)p1; - uint64_t pu2 = *(uint64_t*)p2; - if (pu1 == pu2) { - return 0; - } else { - return (pu1 < pu2) ? -1 : 1; - } -} - -// NOTE: speedup the whole processing by preparing the buffer for STableBlockScanInfo in batch model -static SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList, - STableUidList* pUidList, int32_t numOfTables) { - // allocate buffer in order to load data blocks from file - // todo use simple hash instead, optimize the memory consumption - SSHashObj* pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); - if (pTableMap == NULL) { - return NULL; - } - - int64_t st = taosGetTimestampUs(); - initBlockScanInfoBuf(pBuf, numOfTables); - - pUidList->tableUidList = taosMemoryMalloc(numOfTables * sizeof(uint64_t)); - if (pUidList->tableUidList == NULL) { - tSimpleHashCleanup(pTableMap); - return NULL; - } - - pUidList->currentIndex = 0; - - for (int32_t j = 0; j < numOfTables; ++j) { - STableBlockScanInfo* pScanInfo = getPosInBlockInfoBuf(pBuf, j); - - pScanInfo->uid = idList[j].uid; - pUidList->tableUidList[j] = idList[j].uid; - - if (ASCENDING_TRAVERSE(pTsdbReader->order)) { - int64_t skey = pTsdbReader->window.skey; - pScanInfo->lastKey = (skey > INT64_MIN) ? (skey - 1) : skey; - pScanInfo->lastKeyInStt = skey; - } else { - int64_t ekey = pTsdbReader->window.ekey; - pScanInfo->lastKey = (ekey < INT64_MAX) ? (ekey + 1) : ekey; - pScanInfo->lastKeyInStt = ekey; - } - - tSimpleHashPut(pTableMap, &pScanInfo->uid, sizeof(uint64_t), &pScanInfo, POINTER_BYTES); - tsdbTrace("%p check table uid:%" PRId64 " from lastKey:%" PRId64 " %s", pTsdbReader, pScanInfo->uid, - pScanInfo->lastKey, pTsdbReader->idStr); - } - - taosSort(pUidList->tableUidList, numOfTables, sizeof(uint64_t), uidComparFunc); - - pTsdbReader->cost.createScanInfoList = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug("%p create %d tables scan-info, size:%.2f Kb, elapsed time:%.2f ms, %s", pTsdbReader, numOfTables, - (sizeof(STableBlockScanInfo) * numOfTables) / 1024.0, pTsdbReader->cost.createScanInfoList, - pTsdbReader->idStr); - - return pTableMap; -} - -static void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step) { - void* p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - pInfo->lastKey = ts; - pInfo->lastKeyInStt = ts + step; - } -} - -static void clearBlockScanInfo(STableBlockScanInfo* p) { - p->iterInit = false; - - p->iter.hasVal = false; - p->iiter.hasVal = false; - - if (p->iter.iter != NULL) { - p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter); - } - - if (p->iiter.iter != NULL) { - p->iiter.iter = tsdbTbDataIterDestroy(p->iiter.iter); - } - - p->delSkyline = taosArrayDestroy(p->delSkyline); - p->pBlockList = taosArrayDestroy(p->pBlockList); - tMapDataClear(&p->mapData); -} - -static void destroyAllBlockScanInfo(SSHashObj* pTableMap) { - void* p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { - clearBlockScanInfo(*(STableBlockScanInfo**)p); - } - - tSimpleHashCleanup(pTableMap); -} - -static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) { return pWindow->skey > pWindow->ekey; } - -// Update the query time window according to the data time to live(TTL) information, in order to avoid to return -// the expired data to client, even it is queried already. -static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) { - STsdbKeepCfg* pCfg = &pTsdb->keepCfg; - - int64_t now = taosGetTimestamp(pCfg->precision); - int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1; // needs to add one tick - - STimeWindow win = *pWindow; - if (win.skey < earilyTs) { - win.skey = earilyTs; - } - - return win; -} - -// init file iterator -static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, STsdbReader* pReader) { - size_t numOfFileset = taosArrayGetSize(aDFileSet); - - pIter->index = ASCENDING_TRAVERSE(pReader->order) ? -1 : numOfFileset; - pIter->order = pReader->order; - pIter->pFileList = aDFileSet; - pIter->numOfFiles = numOfFileset; - - if (pIter->pLastBlockReader == NULL) { - pIter->pLastBlockReader = taosMemoryCalloc(1, sizeof(struct SLastBlockReader)); - if (pIter->pLastBlockReader == NULL) { - int32_t code = TSDB_CODE_OUT_OF_MEMORY; - tsdbError("failed to prepare the last block iterator, since:%s %s", tstrerror(code), pReader->idStr); - return code; - } - } - - SLastBlockReader* pLReader = pIter->pLastBlockReader; - pLReader->order = pReader->order; - pLReader->window = pReader->window; - pLReader->verRange = pReader->verRange; - - pLReader->uid = 0; - tMergeTreeClose(&pLReader->mergeTree); - - if (pLReader->pInfo == NULL) { - // here we ignore the first column, which is always be the primary timestamp column - SBlockLoadSuppInfo* pInfo = &pReader->suppInfo; - - int32_t numOfStt = pReader->pTsdb->pVnode->config.sttTrigger; - pLReader->pInfo = tCreateLastBlockLoadInfo(pReader->pSchema, &pInfo->colId[1], pInfo->numOfCols - 1, numOfStt); - if (pLReader->pInfo == NULL) { - tsdbDebug("init fileset iterator failed, code:%s %s", tstrerror(terrno), pReader->idStr); - return terrno; - } - } - - tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, pReader->idStr); - return TSDB_CODE_SUCCESS; -} - -static int32_t filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader, bool* hasNext) { - bool asc = ASCENDING_TRAVERSE(pIter->order); - int32_t step = asc ? 1 : -1; - pIter->index += step; - int32_t code = 0; - - if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) { - *hasNext = false; - return TSDB_CODE_SUCCESS; - } - - SIOCostSummary* pCost = &pReader->cost; - getSttBlockLoadInfo(pIter->pLastBlockReader->pInfo, &pCost->sttCost); - - pIter->pLastBlockReader->uid = 0; - tMergeTreeClose(&pIter->pLastBlockReader->mergeTree); - resetLastBlockLoadInfo(pIter->pLastBlockReader->pInfo); - - // check file the time range of coverage - STimeWindow win = {0}; - - while (1) { - if (pReader->pFileReader != NULL) { - tsdbDataFReaderClose(&pReader->pFileReader); - } - - pReader->status.pCurrentFileset = (SDFileSet*)taosArrayGet(pIter->pFileList, pIter->index); - - code = tsdbDataFReaderOpen(&pReader->pFileReader, pReader->pTsdb, pReader->status.pCurrentFileset); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - - pReader->cost.headFileLoad += 1; - - int32_t fid = pReader->status.pCurrentFileset->fid; - tsdbFidKeyRange(fid, pReader->pTsdb->keepCfg.days, pReader->pTsdb->keepCfg.precision, &win.skey, &win.ekey); - - // current file are no longer overlapped with query time window, ignore remain files - if ((asc && win.skey > pReader->window.ekey) || (!asc && win.ekey < pReader->window.skey)) { - tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, - pReader->window.skey, pReader->window.ekey, pReader->idStr); - *hasNext = false; - return TSDB_CODE_SUCCESS; - } - - if ((asc && (win.ekey < pReader->window.skey)) || ((!asc) && (win.skey > pReader->window.ekey))) { - pIter->index += step; - if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) { - *hasNext = false; - return TSDB_CODE_SUCCESS; - } - continue; - } - - tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", %s", pReader, fid, pReader->window.skey, - pReader->window.ekey, pReader->idStr); - *hasNext = true; - return TSDB_CODE_SUCCESS; - } - -_err: - *hasNext = false; - return code; -} - -static void resetDataBlockIterator(SDataBlockIter* pIter, int32_t order) { - pIter->order = order; - pIter->index = -1; - pIter->numOfBlocks = 0; - if (pIter->blockList == NULL) { - pIter->blockList = taosArrayInit(4, sizeof(SFileDataBlockInfo)); - } else { - taosArrayClear(pIter->blockList); - } -} - -static void cleanupDataBlockIterator(SDataBlockIter* pIter) { taosArrayDestroy(pIter->blockList); } - -static void initReaderStatus(SReaderStatus* pStatus) { - pStatus->pTableIter = NULL; - pStatus->loadFromFile = true; -} - -static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) { - SSDataBlock* pResBlock = createDataBlock(); - if (pResBlock == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - for (int32_t i = 0; i < pCond->numOfCols; ++i) { - SColumnInfoData colInfo = {0}; - colInfo.info = pCond->colList[i]; - blockDataAppendColInfo(pResBlock, &colInfo); - } - - int32_t code = blockDataEnsureCapacity(pResBlock, capacity); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - taosMemoryFree(pResBlock); - return NULL; - } - return pResBlock; -} - -static int32_t tsdbInitReaderLock(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexInit(&pReader->readerMutex, NULL); - - qTrace("tsdb/read: %p, post-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -static int32_t tsdbUninitReaderLock(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexDestroy(&pReader->readerMutex); - - qTrace("tsdb/read: %p, post-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -static int32_t tsdbAcquireReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexLock(&pReader->readerMutex); - - qTrace("tsdb/read: %p, post-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -static int32_t tsdbTryAcquireReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexTryLock(&pReader->readerMutex); - - qTrace("tsdb/read: %p, post-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -static int32_t tsdbReleaseReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexUnlock(&pReader->readerMutex); - - qTrace("tsdb/read: %p, post-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - return code; -} - -void tsdbReleaseDataBlock(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - if (!pStatus->composedDataBlock) { - tsdbReleaseReader(pReader); - } -} - -static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock, - SQueryTableDataCond* pCond) { - pResBlockInfo->capacity = capacity; - pResBlockInfo->pResBlock = pResBlock; - terrno = 0; - - if (pResBlockInfo->pResBlock == NULL) { - pResBlockInfo->freeBlock = true; - pResBlockInfo->pResBlock = createResBlock(pCond, pResBlockInfo->capacity); - } else { - pResBlockInfo->freeBlock = false; - } - - return terrno; -} - -static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void** ppReader, int32_t capacity, - SSDataBlock* pResBlock, const char* idstr) { - int32_t code = 0; - int8_t level = 0; - STsdbReader* pReader = (STsdbReader*)taosMemoryCalloc(1, sizeof(*pReader)); - if (pReader == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _end; - } - - if (VND_IS_TSMA(pVnode)) { - tsdbDebug("vgId:%d, tsma is selected to query, %s", TD_VID(pVnode), idstr); - } - - initReaderStatus(&pReader->status); - - pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level); - pReader->suid = pCond->suid; - pReader->order = pCond->order; - - pReader->idStr = (idstr != NULL) ? taosStrdup(idstr) : NULL; - pReader->verRange = getQueryVerRange(pVnode, pCond, idstr); - pReader->type = pCond->type; - pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); - pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket - - code = initResBlockInfo(&pReader->resBlockInfo, capacity, pResBlock, pCond); - if (code != TSDB_CODE_SUCCESS) { - goto _end; - } - - if (pCond->numOfCols <= 0) { - tsdbError("vgId:%d, invalid column number %d in query cond, %s", TD_VID(pVnode), pCond->numOfCols, idstr); - code = TSDB_CODE_INVALID_PARA; - goto _end; - } - - // allocate buffer in order to load data blocks from file - SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - pSup->pColAgg = taosArrayInit(pCond->numOfCols, sizeof(SColumnDataAgg)); - if (pSup->pColAgg == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _end; - } - - pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; - setColumnIdSlotList(pSup, pCond->colList, pCond->pSlotList, pCond->numOfCols); - - code = tBlockDataCreate(&pReader->status.fileBlockData); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - goto _end; - } - - if (pReader->suppInfo.colId[0] != PRIMARYKEY_TIMESTAMP_COL_ID) { - tsdbError("the first column isn't primary timestamp, %d, %s", pReader->suppInfo.colId[0], pReader->idStr); - code = TSDB_CODE_INVALID_PARA; - goto _end; - } - - pReader->status.pPrimaryTsCol = taosArrayGet(pReader->resBlockInfo.pResBlock->pDataBlock, pSup->slotId[0]); - int32_t type = pReader->status.pPrimaryTsCol->info.type; - if (type != TSDB_DATA_TYPE_TIMESTAMP) { - tsdbError("the first column isn't primary timestamp in result block, actual: %s, %s", tDataTypes[type].name, - pReader->idStr); - code = TSDB_CODE_INVALID_PARA; - goto _end; - } - - tsdbInitReaderLock(pReader); - - *ppReader = pReader; - return code; - -_end: - tsdbReaderClose(pReader); - *ppReader = NULL; - return code; -} - -static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, SArray* pIndexList) { - int64_t st = taosGetTimestampUs(); - LRUHandle* handle = NULL; - int32_t code = tsdbCacheGetBlockIdx(pFileReader->pTsdb->biCache, pFileReader, &handle); - if (code != TSDB_CODE_SUCCESS || handle == NULL) { - goto _end; - } - - int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - - SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); - size_t num = taosArrayGetSize(aBlockIdx); - if (num == 0) { - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return TSDB_CODE_SUCCESS; - } - - // todo binary search to the start position - int64_t et1 = taosGetTimestampUs(); - - SBlockIdx* pBlockIdx = NULL; - STableUidList* pList = &pReader->status.uidList; - - int32_t i = 0, j = 0; - while (i < num && j < numOfTables) { - pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i); - if (pBlockIdx->suid != pReader->suid) { - i += 1; - continue; - } - - if (pBlockIdx->uid < pList->tableUidList[j]) { - i += 1; - continue; - } - - if (pBlockIdx->uid > pList->tableUidList[j]) { - j += 1; - continue; - } - - if (pBlockIdx->uid == pList->tableUidList[j]) { - // this block belongs to a table that is not queried. - STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockIdx->uid, pReader->idStr); - if (pScanInfo == NULL) { - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return terrno; - } - - if (pScanInfo->pBlockList == NULL) { - pScanInfo->pBlockList = taosArrayInit(4, sizeof(SBlockIndex)); - } - - taosArrayPush(pIndexList, pBlockIdx); - - i += 1; - j += 1; - } - } - - int64_t et2 = taosGetTimestampUs(); - tsdbDebug("load block index for %d/%d tables completed, elapsed time:%.2f ms, set blockIdx:%.2f ms, size:%.2f Kb %s", - numOfTables, (int32_t)num, (et1 - st) / 1000.0, (et2 - et1) / 1000.0, num * sizeof(SBlockIdx) / 1024.0, - pReader->idStr); - - pReader->cost.headFileLoadTime += (et1 - st) / 1000.0; - -_end: - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return code; -} - -static void doCleanupTableScanInfo(STableBlockScanInfo* pScanInfo) { - // reset the index in last block when handing a new file - tMapDataClear(&pScanInfo->mapData); - taosArrayClear(pScanInfo->pBlockList); -} - -static void cleanupTableScanInfo(SReaderStatus* pStatus) { - if (pStatus->mapDataCleaned) { - return; - } - - SSHashObj* pTableMap = pStatus->pTableMap; - STableBlockScanInfo** px = NULL; - int32_t iter = 0; - - while (1) { - px = tSimpleHashIterate(pTableMap, px, &iter); - if (px == NULL) { - break; - } - - doCleanupTableScanInfo(*px); - } - - pStatus->mapDataCleaned = true; -} - -static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum, - SArray* pTableScanInfoList) { - size_t sizeInDisk = 0; - size_t numOfTables = taosArrayGetSize(pIndexList); - - int64_t st = taosGetTimestampUs(); - cleanupTableScanInfo(&pReader->status); - - // set the flag for the new file - pReader->status.mapDataCleaned = false; - for (int32_t i = 0; i < numOfTables; ++i) { - SBlockIdx* pBlockIdx = taosArrayGet(pIndexList, i); - STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockIdx->uid, pReader->idStr); - if (pScanInfo == NULL) { - return terrno; - } - - tMapDataReset(&pScanInfo->mapData); - tsdbReadDataBlk(pReader->pFileReader, pBlockIdx, &pScanInfo->mapData); - taosArrayEnsureCap(pScanInfo->pBlockList, pScanInfo->mapData.nItem); - - sizeInDisk += pScanInfo->mapData.nData; - - int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; - STimeWindow w = pReader->window; - if (ASCENDING_TRAVERSE(pReader->order)) { - w.skey = pScanInfo->lastKey + step; - } else { - w.ekey = pScanInfo->lastKey + step; - } - - if (isEmptyQueryTimeWindow(&w)) { - continue; - } - - SDataBlk block = {0}; - for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) { - tGetDataBlk(pScanInfo->mapData.pData + pScanInfo->mapData.aOffset[j], &block); - - // 1. time range check - // if (block.minKey.ts > pReader->window.ekey || block.maxKey.ts < pReader->window.skey) { - if (block.minKey.ts > w.ekey || block.maxKey.ts < w.skey) { - continue; - } - - // 2. version range check - if (block.minVer > pReader->verRange.maxVer || block.maxVer < pReader->verRange.minVer) { - continue; - } - - SBlockIndex bIndex = {.ordinalIndex = j, .inFileOffset = block.aSubBlock->offset}; - bIndex.window = (STimeWindow){.skey = block.minKey.ts, .ekey = block.maxKey.ts}; - - void* p1 = taosArrayPush(pScanInfo->pBlockList, &bIndex); - if (p1 == NULL) { - tMapDataClear(&pScanInfo->mapData); - return TSDB_CODE_OUT_OF_MEMORY; - } - - pBlockNum->numOfBlocks += 1; - } - - if (taosArrayGetSize(pScanInfo->pBlockList) > 0) { - taosArrayPush(pTableScanInfoList, &pScanInfo); - } - } - - pBlockNum->numOfLastFiles = pReader->pFileReader->pSet->nSttF; - int32_t total = pBlockNum->numOfLastFiles + pBlockNum->numOfBlocks; - - double el = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug( - "load block of %ld tables completed, blocks:%d in %d tables, last-files:%d, block-info-size:%.2f Kb, elapsed " - "time:%.2f ms %s", - numOfTables, pBlockNum->numOfBlocks, (int32_t)taosArrayGetSize(pTableScanInfoList), pBlockNum->numOfLastFiles, - sizeInDisk / 1000.0, el, pReader->idStr); - - pReader->cost.numOfBlocks += total; - pReader->cost.headFileLoadTime += el; - - return TSDB_CODE_SUCCESS; -} - -static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, int64_t maxKey, int32_t order) { - int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1; - pDumpInfo->allDumped = true; - pDumpInfo->lastKey = maxKey + step; -} - -static int32_t doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_t colIndex, SColVal* pColVal, - SBlockLoadSuppInfo* pSup) { - if (IS_VAR_DATA_TYPE(pColVal->type)) { - if (!COL_VAL_IS_VALUE(pColVal)) { - colDataSetNULL(pColInfoData, rowIndex); - } else { - varDataSetLen(pSup->buildBuf[colIndex], pColVal->value.nData); - if (pColVal->value.nData > pColInfoData->info.bytes) { - tsdbWarn("column cid:%d actual data len %d is bigger than schema len %d", pColVal->cid, pColVal->value.nData, - pColInfoData->info.bytes); - return TSDB_CODE_TDB_INVALID_TABLE_SCHEMA_VER; - } - if (pColVal->value.nData > 0) { // pData may be null, if nData is 0 - memcpy(varDataVal(pSup->buildBuf[colIndex]), pColVal->value.pData, pColVal->value.nData); - } - - colDataSetVal(pColInfoData, rowIndex, pSup->buildBuf[colIndex], false); - } - } else { - colDataSetVal(pColInfoData, rowIndex, (const char*)&pColVal->value, !COL_VAL_IS_VALUE(pColVal)); - } - - return TSDB_CODE_SUCCESS; -} - -static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { - size_t num = taosArrayGetSize(pBlockIter->blockList); - if (num == 0) { - ASSERT(pBlockIter->numOfBlocks == num); - return NULL; - } - - SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); - return pBlockInfo; -} - -static SDataBlk* getCurrentBlock(SDataBlockIter* pBlockIter) { return &pBlockIter->block; } - -static int doBinarySearchKey(TSKEY* keyList, int num, int pos, TSKEY key, int order) { - // start end position - int s, e; - s = pos; - - // check - ASSERT(pos >= 0 && pos < num && num > 0); - if (order == TSDB_ORDER_ASC) { - // find the first position which is smaller than the key - e = num - 1; - if (key < keyList[pos]) return -1; - while (1) { - // check can return - if (key >= keyList[e]) return e; - if (key <= keyList[s]) return s; - if (e - s <= 1) return s; - - // change start or end position - int mid = s + (e - s + 1) / 2; - if (keyList[mid] > key) - e = mid; - else if (keyList[mid] < key) - s = mid; - else - return mid; - } - } else { // DESC - // find the first position which is bigger than the key - e = 0; - if (key > keyList[pos]) return -1; - while (1) { - // check can return - if (key <= keyList[e]) return e; - if (key >= keyList[s]) return s; - if (s - e <= 1) return s; - - // change start or end position - int mid = s - (s - e + 1) / 2; - if (keyList[mid] < key) - e = mid; - else if (keyList[mid] > key) - s = mid; - else - return mid; - } - } -} - -static int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData, SDataBlk* pBlock, int32_t pos) { - // NOTE: reverse the order to find the end position in data block - int32_t endPos = -1; - bool asc = ASCENDING_TRAVERSE(pReader->order); - - if (asc && pReader->window.ekey >= pBlock->maxKey.ts) { - endPos = pBlock->nRow - 1; - } else if (!asc && pReader->window.skey <= pBlock->minKey.ts) { - endPos = 0; - } else { - int64_t key = asc ? pReader->window.ekey : pReader->window.skey; - endPos = doBinarySearchKey(pBlockData->aTSKEY, pBlock->nRow, pos, key, pReader->order); - } - - if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) || - (pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) { - int32_t i = endPos; - - if (asc) { - for (; i >= 0; --i) { - if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) { - break; - } - } - } else { - for (; i < pBlock->nRow; ++i) { - if (pBlockData->aVersion[i] >= pReader->verRange.minVer) { - break; - } - } - } - - endPos = i; - } - - return endPos; -} - -static void copyPrimaryTsCol(const SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, - int32_t dumpedRows, bool asc) { - if (asc) { - memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], dumpedRows * sizeof(int64_t)); - } else { - int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; - memcpy(pColData->pData, &pBlockData->aTSKEY[startIndex], dumpedRows * sizeof(int64_t)); - - // todo: opt perf by extract the loop - // reverse the array list - int32_t mid = dumpedRows >> 1u; - int64_t* pts = (int64_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - } -} - -// a faster version of copy procedure. -static void copyNumericCols(const SColData* pData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, - int32_t dumpedRows, bool asc) { - uint8_t* p = NULL; - if (asc) { - p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; - } else { - int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; - p = pData->pData + tDataTypes[pData->type].bytes * startIndex; - } - - int32_t step = asc ? 1 : -1; - - // make sure it is aligned to 8bit, the allocated memory address is aligned to 256bit - // ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); - - // 1. copy data in a batch model - memcpy(pColData->pData, p, dumpedRows * tDataTypes[pData->type].bytes); - - // 2. reverse the array list in case of descending order scan data block - if (!asc) { - switch (pColData->info.type) { - case TSDB_DATA_TYPE_TIMESTAMP: - case TSDB_DATA_TYPE_DOUBLE: - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_UBIGINT: { - int32_t mid = dumpedRows >> 1u; - int64_t* pts = (int64_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_UTINYINT: { - int32_t mid = dumpedRows >> 1u; - int8_t* pts = (int8_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int8_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_USMALLINT: { - int32_t mid = dumpedRows >> 1u; - int16_t* pts = (int16_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int64_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - - case TSDB_DATA_TYPE_FLOAT: - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_UINT: { - int32_t mid = dumpedRows >> 1u; - int32_t* pts = (int32_t*)pColData->pData; - for (int32_t j = 0; j < mid; ++j) { - int32_t t = pts[j]; - pts[j] = pts[dumpedRows - j - 1]; - pts[dumpedRows - j - 1] = t; - } - break; - } - } - } - - // 3. if the null value exists, check items one-by-one - if (pData->flag != HAS_VALUE) { - int32_t rowIndex = 0; - - for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step, rowIndex++) { - uint8_t v = tColDataGetBitValue(pData, j); - if (v == 0 || v == 1) { - colDataSetNull_f(pColData->nullbitmap, rowIndex); - pColData->hasNull = true; - } - } - } -} - -static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - SBlockData* pBlockData = &pStatus->fileBlockData; - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - int32_t numOfOutputCols = pSupInfo->numOfCols; - int32_t code = TSDB_CODE_SUCCESS; - - SColVal cv = {0}; - int64_t st = taosGetTimestampUs(); - bool asc = ASCENDING_TRAVERSE(pReader->order); - int32_t step = asc ? 1 : -1; - - // no data exists, return directly. - if (pBlockData->nRow == 0 || pBlockData->aTSKEY == 0) { - tsdbWarn("%p no need to copy since no data in blockData, table uid:%" PRIu64 " has been dropped, %s", pReader, - pBlockInfo->uid, pReader->idStr); - pResBlock->info.rows = 0; - return 0; - } - - // row index of dump info remain the initial position, let's find the appropriate start position. - if ((pDumpInfo->rowIndex == 0 && asc) || (pDumpInfo->rowIndex == pBlock->nRow - 1 && (!asc))) { - if (asc && pReader->window.skey <= pBlock->minKey.ts && pReader->verRange.minVer <= pBlock->minVer) { - // pDumpInfo->rowIndex = 0; - } else if (!asc && pReader->window.ekey >= pBlock->maxKey.ts && pReader->verRange.maxVer >= pBlock->maxVer) { - // pDumpInfo->rowIndex = pBlock->nRow - 1; - } else { // find the appropriate the start position in current block, and set it to be the current rowIndex - int32_t pos = asc ? pBlock->nRow - 1 : 0; - int32_t order = asc ? TSDB_ORDER_DESC : TSDB_ORDER_ASC; - int64_t key = asc ? pReader->window.skey : pReader->window.ekey; - pDumpInfo->rowIndex = doBinarySearchKey(pBlockData->aTSKEY, pBlock->nRow, pos, key, order); - - if (pDumpInfo->rowIndex < 0) { - tsdbError( - "%p failed to locate the start position in current block, global index:%d, table index:%d, brange:%" PRId64 - "-%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 " %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->minVer, - pBlock->maxVer, pReader->idStr); - return TSDB_CODE_INVALID_PARA; - } - - ASSERT(pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.maxVer >= pBlock->minVer); - - // find the appropriate start position that satisfies the version requirement. - if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) || - (pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) { - int32_t i = pDumpInfo->rowIndex; - if (asc) { - for (; i < pBlock->nRow; ++i) { - if (pBlockData->aVersion[i] >= pReader->verRange.minVer) { - break; - } - } - } else { - for (; i >= 0; --i) { - if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) { - break; - } - } - } - - pDumpInfo->rowIndex = i; - } - } - } - - // time window check - int32_t endIndex = getEndPosInDataBlock(pReader, pBlockData, pBlock, pDumpInfo->rowIndex); - if (endIndex == -1) { - setBlockAllDumped(pDumpInfo, pReader->window.ekey, pReader->order); - return TSDB_CODE_SUCCESS; - } - - endIndex += step; - int32_t dumpedRows = asc ? (endIndex - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex - endIndex); - if (dumpedRows > pReader->resBlockInfo.capacity) { // output buffer check - dumpedRows = pReader->resBlockInfo.capacity; - } else if (dumpedRows <= 0) { // no qualified rows in current data block, abort directly. - setBlockAllDumped(pDumpInfo, pReader->window.ekey, pReader->order); - return TSDB_CODE_SUCCESS; - } - - int32_t i = 0; - int32_t rowIndex = 0; - - SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - if (pSupInfo->colId[i] == PRIMARYKEY_TIMESTAMP_COL_ID) { - copyPrimaryTsCol(pBlockData, pDumpInfo, pColData, dumpedRows, asc); - i += 1; - } - - int32_t colIndex = 0; - int32_t num = pBlockData->nColData; - while (i < numOfOutputCols && colIndex < num) { - rowIndex = 0; - - SColData* pData = tBlockDataGetColDataByIdx(pBlockData, colIndex); - if (pData->cid < pSupInfo->colId[i]) { - colIndex += 1; - } else if (pData->cid == pSupInfo->colId[i]) { - pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - - if (pData->flag == HAS_NONE || pData->flag == HAS_NULL || pData->flag == (HAS_NULL | HAS_NONE)) { - colDataSetNNULL(pColData, 0, dumpedRows); - } else { - if (IS_MATHABLE_TYPE(pColData->info.type)) { - copyNumericCols(pData, pDumpInfo, pColData, dumpedRows, asc); - } else { // varchar/nchar type - for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step) { - tColDataGetValue(pData, j, &cv); - code = doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo); - if (code) { - return code; - } - } - } - } - - colIndex += 1; - i += 1; - } else { // the specified column does not exist in file block, fill with null data - pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - colDataSetNNULL(pColData, 0, dumpedRows); - i += 1; - } - } - - // fill the mis-matched columns with null value - while (i < numOfOutputCols) { - pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - colDataSetNNULL(pColData, 0, dumpedRows); - i += 1; - } - - pResBlock->info.dataLoad = 1; - pResBlock->info.rows = dumpedRows; - pDumpInfo->rowIndex += step * dumpedRows; - - // check if current block are all handled - if (pDumpInfo->rowIndex >= 0 && pDumpInfo->rowIndex < pBlock->nRow) { - int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - if (outOfTimeWindow(ts, &pReader->window)) { // the remain data has out of query time window, ignore current block - setBlockAllDumped(pDumpInfo, ts, pReader->order); - } - } else { - int64_t ts = asc ? pBlock->maxKey.ts : pBlock->minKey.ts; - setBlockAllDumped(pDumpInfo, ts, pReader->order); - } - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; - pReader->cost.blockLoadTime += elapsedTime; - - int32_t unDumpedRows = asc ? pBlock->nRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1; - tsdbDebug("%p copy file block to sdatablock, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 - ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", uid:%" PRIu64 " elapsed time:%.2f ms, %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, dumpedRows, - unDumpedRows, pBlock->minVer, pBlock->maxVer, pBlockInfo->uid, elapsedTime, pReader->idStr); - - return TSDB_CODE_SUCCESS; -} - -static FORCE_INLINE STSchema* getTableSchemaImpl(STsdbReader* pReader, uint64_t uid) { - ASSERT(pReader->pSchema == NULL); - - int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, -1, &pReader->pSchema); - if (code != TSDB_CODE_SUCCESS || pReader->pSchema == NULL) { - terrno = code; - tsdbError("failed to get table schema, uid:%" PRIu64 ", it may have been dropped, ver:-1, %s", uid, pReader->idStr); - return NULL; - } - - code = tsdbRowMergerInit(&pReader->status.merger, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - tsdbError("failed to init merger, code:%s, %s", tstrerror(code), pReader->idStr); - return NULL; - } - - return pReader->pSchema; -} - -static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter, SBlockData* pBlockData, - uint64_t uid) { - int32_t code = 0; - STSchema* pSchema = pReader->pSchema; - int64_t st = taosGetTimestampUs(); - - tBlockDataReset(pBlockData); - - if (pReader->pSchema == NULL) { - pSchema = getTableSchemaImpl(pReader, uid); - if (pSchema == NULL) { - tsdbDebug("%p table uid:%" PRIu64 " has been dropped, no data existed, %s", pReader, uid, pReader->idStr); - return code; - } - } - - SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - TABLEID tid = {.suid = pReader->suid, .uid = uid}; - code = tBlockDataInit(pBlockData, &tid, pSchema, &pSup->colId[1], pSup->numOfCols - 1); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - code = tsdbReadDataBlock(pReader->pFileReader, pBlock, pBlockData); - if (code != TSDB_CODE_SUCCESS) { - tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 - ", rows:%d, code:%s %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow, - tstrerror(code), pReader->idStr); - return code; - } - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; - - tsdbDebug("%p load file block into buffer, global index:%d, index in table block list:%d, brange:%" PRId64 "-%" PRId64 - ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", - pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow, - pBlock->minVer, pBlock->maxVer, elapsedTime, pReader->idStr); - - pReader->cost.blockLoadTime += elapsedTime; - pDumpInfo->allDumped = false; - - return TSDB_CODE_SUCCESS; -} - -static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) { - taosMemoryFreeClear(pSup->numOfBlocksPerTable); - taosMemoryFreeClear(pSup->indexPerTable); - - for (int32_t i = 0; i < pSup->numOfTables; ++i) { - SBlockOrderWrapper* pBlockInfo = pSup->pDataBlockInfo[i]; - taosMemoryFreeClear(pBlockInfo); - } - - taosMemoryFreeClear(pSup->pDataBlockInfo); -} - -static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) { - pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); - pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); - pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables); - - if (pSup->numOfBlocksPerTable == NULL || pSup->indexPerTable == NULL || pSup->pDataBlockInfo == NULL) { - cleanupBlockOrderSupporter(pSup); - return TSDB_CODE_OUT_OF_MEMORY; - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) { - int32_t leftIndex = *(int32_t*)pLeft; - int32_t rightIndex = *(int32_t*)pRight; - - SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param; - - int32_t leftTableBlockIndex = pSupporter->indexPerTable[leftIndex]; - int32_t rightTableBlockIndex = pSupporter->indexPerTable[rightIndex]; - - if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftIndex]) { - /* left block is empty */ - return 1; - } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightIndex]) { - /* right block is empty */ - return -1; - } - - SBlockOrderWrapper* pLeftBlock = &pSupporter->pDataBlockInfo[leftIndex][leftTableBlockIndex]; - SBlockOrderWrapper* pRightBlock = &pSupporter->pDataBlockInfo[rightIndex][rightTableBlockIndex]; - - return pLeftBlock->offset > pRightBlock->offset ? 1 : -1; -} - -static int32_t doSetCurrentBlock(SDataBlockIter* pBlockIter, const char* idStr) { - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - if (pBlockInfo != NULL) { - STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pBlockIter->pTableMap, pBlockInfo->uid, idStr); - if (pScanInfo == NULL) { - return terrno; - } - - SBlockIndex* pIndex = taosArrayGet(pScanInfo->pBlockList, pBlockInfo->tbBlockIdx); - tMapDataGetItemByIdx(&pScanInfo->mapData, pIndex->ordinalIndex, &pBlockIter->block, tGetDataBlk); - } - -#if 0 - qDebug("check file block, table uid:%"PRIu64" index:%d offset:%"PRId64", ", pScanInfo->uid, *mapDataIndex, pBlockIter->block.aSubBlock[0].offset); -#endif - - return TSDB_CODE_SUCCESS; -} - -static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, - SArray* pTableList) { - bool asc = ASCENDING_TRAVERSE(pReader->order); - - SBlockOrderSupporter sup = {0}; - pBlockIter->numOfBlocks = numOfBlocks; - taosArrayClear(pBlockIter->blockList); - pBlockIter->pTableMap = pReader->status.pTableMap; - - // access data blocks according to the offset of each block in asc/desc order. - int32_t numOfTables = taosArrayGetSize(pTableList); - - int64_t st = taosGetTimestampUs(); - int32_t code = initBlockOrderSupporter(&sup, numOfTables); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - int32_t cnt = 0; - - for (int32_t i = 0; i < numOfTables; ++i) { - STableBlockScanInfo* pTableScanInfo = taosArrayGetP(pTableList, i); - ASSERT(pTableScanInfo->pBlockList != NULL && taosArrayGetSize(pTableScanInfo->pBlockList) > 0); - - size_t num = taosArrayGetSize(pTableScanInfo->pBlockList); - sup.numOfBlocksPerTable[sup.numOfTables] = num; - - char* buf = taosMemoryMalloc(sizeof(SBlockOrderWrapper) * num); - if (buf == NULL) { - cleanupBlockOrderSupporter(&sup); - return TSDB_CODE_OUT_OF_MEMORY; - } - - sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf; - - for (int32_t k = 0; k < num; ++k) { - SBlockIndex* pIndex = taosArrayGet(pTableScanInfo->pBlockList, k); - sup.pDataBlockInfo[sup.numOfTables][k] = - (SBlockOrderWrapper){.uid = pTableScanInfo->uid, .offset = pIndex->inFileOffset}; - cnt++; - } - - sup.numOfTables += 1; - } - - if (numOfBlocks != cnt && sup.numOfTables != numOfTables) { - cleanupBlockOrderSupporter(&sup); - return TSDB_CODE_INVALID_PARA; - } - - // since there is only one table qualified, blocks are not sorted - if (sup.numOfTables == 1) { - for (int32_t i = 0; i < numOfBlocks; ++i) { - SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i}; - taosArrayPush(pBlockIter->blockList, &blockInfo); - } - - int64_t et = taosGetTimestampUs(); - tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s", - pReader, numOfBlocks, (et - st) / 1000.0, pReader->idStr); - - pBlockIter->index = asc ? 0 : (numOfBlocks - 1); - cleanupBlockOrderSupporter(&sup); - doSetCurrentBlock(pBlockIter, pReader->idStr); - return TSDB_CODE_SUCCESS; - } - - tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables, - pReader->idStr); - - SMultiwayMergeTreeInfo* pTree = NULL; - - uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar); - if (ret != TSDB_CODE_SUCCESS) { - cleanupBlockOrderSupporter(&sup); - return TSDB_CODE_OUT_OF_MEMORY; - } - - int32_t numOfTotal = 0; - while (numOfTotal < cnt) { - int32_t pos = tMergeTreeGetChosenIndex(pTree); - int32_t index = sup.indexPerTable[pos]++; - - SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index}; - taosArrayPush(pBlockIter->blockList, &blockInfo); - - // set data block index overflow, in order to disable the offset comparator - if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) { - sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1; - } - - numOfTotal += 1; - tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); - } - - int64_t et = taosGetTimestampUs(); - tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, numOfBlocks, - (et - st) / 1000.0, pReader->idStr); - cleanupBlockOrderSupporter(&sup); - taosMemoryFree(pTree); - - pBlockIter->index = asc ? 0 : (numOfBlocks - 1); - doSetCurrentBlock(pBlockIter, pReader->idStr); - - return TSDB_CODE_SUCCESS; -} - -static bool blockIteratorNext(SDataBlockIter* pBlockIter, const char* idStr) { - bool asc = ASCENDING_TRAVERSE(pBlockIter->order); - - int32_t step = asc ? 1 : -1; - if ((pBlockIter->index >= pBlockIter->numOfBlocks - 1 && asc) || (pBlockIter->index <= 0 && (!asc))) { - return false; - } - - pBlockIter->index += step; - doSetCurrentBlock(pBlockIter, idStr); - - return true; -} - -/** - * This is an two rectangles overlap cases. - */ -static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SDataBlk* pBlock) { - return (pWindow->ekey < pBlock->maxKey.ts && pWindow->ekey >= pBlock->minKey.ts) || - (pWindow->skey > pBlock->minKey.ts && pWindow->skey <= pBlock->maxKey.ts) || - (pVerRange->minVer > pBlock->minVer && pVerRange->minVer <= pBlock->maxVer) || - (pVerRange->maxVer < pBlock->maxVer && pVerRange->maxVer >= pBlock->minVer); -} - -static bool getNeighborBlockOfSameTable(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pTableBlockScanInfo, - int32_t* nextIndex, int32_t order, SBlockIndex* pBlockIndex) { - bool asc = ASCENDING_TRAVERSE(order); - if (asc && pBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) { - return false; - } - - if (!asc && pBlockInfo->tbBlockIdx == 0) { - return false; - } - - int32_t step = asc ? 1 : -1; - *nextIndex = pBlockInfo->tbBlockIdx + step; - *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); - // tMapDataGetItemByIdx(&pTableBlockScanInfo->mapData, pIndex->ordinalIndex, pBlock, tGetDataBlk); - return true; -} - -static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) { - int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1; - int32_t index = pBlockIter->index; - - while (index < pBlockIter->numOfBlocks && index >= 0) { - SFileDataBlockInfo* pFBlock = taosArrayGet(pBlockIter->blockList, index); - if (pFBlock->uid == pFBlockInfo->uid && pFBlock->tbBlockIdx == pFBlockInfo->tbBlockIdx) { - return index; - } - - index += step; - } - - return -1; -} - -static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t index, int32_t step) { - if (index < 0 || index >= pBlockIter->numOfBlocks) { - return -1; - } - - SFileDataBlockInfo fblock = *(SFileDataBlockInfo*)taosArrayGet(pBlockIter->blockList, index); - pBlockIter->index += step; - - if (index != pBlockIter->index) { - taosArrayRemove(pBlockIter->blockList, index); - taosArrayInsert(pBlockIter->blockList, pBlockIter->index, &fblock); - - SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); - ASSERT(pBlockInfo->uid == fblock.uid && pBlockInfo->tbBlockIdx == fblock.tbBlockIdx); - } - - doSetCurrentBlock(pBlockIter, ""); - return TSDB_CODE_SUCCESS; -} - -// todo: this attribute could be acquired during extractin the global ordered block list. -static bool overlapWithNeighborBlock(SDataBlk* pBlock, SBlockIndex* pNeighborBlockIndex, int32_t order) { - // it is the last block in current file, no chance to overlap with neighbor blocks. - if (ASCENDING_TRAVERSE(order)) { - return pBlock->maxKey.ts == pNeighborBlockIndex->window.skey; - } else { - return pBlock->minKey.ts == pNeighborBlockIndex->window.ekey; - } -} - -static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SDataBlk* pBlock) { - bool ascScan = ASCENDING_TRAVERSE(order); - - return (ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts <= pBlock->minKey.ts)) || - (!ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts >= pBlock->maxKey.ts)); -} - -static bool keyOverlapFileBlock(TSDBKEY key, SDataBlk* pBlock, SVersionRange* pVerRange) { - return (key.ts >= pBlock->minKey.ts && key.ts <= pBlock->maxKey.ts) && (pBlock->maxVer >= pVerRange->minVer) && - (pBlock->minVer <= pVerRange->maxVer); -} - -static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SDataBlk* pBlock, - int32_t startIndex) { - size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); - - for (int32_t i = startIndex; i < num; i += 1) { - TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); - if (p->ts >= pBlock->minKey.ts && p->ts <= pBlock->maxKey.ts) { - if (p->version >= pBlock->minVer) { - return true; - } - } else if (p->ts < pBlock->minKey.ts) { // p->ts < pBlock->minKey.ts - if (p->version >= pBlock->minVer) { - if (i < num - 1) { - TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1); - if (pnext->ts >= pBlock->minKey.ts) { - return true; - } - } else { // it must be the last point - ASSERT(p->version == 0); - } - } - } else { // (p->ts > pBlock->maxKey.ts) { - return false; - } - } - - return false; -} - -static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SDataBlk* pBlock, int32_t order) { - if (pBlockScanInfo->delSkyline == NULL) { - return false; - } - - // ts is not overlap - TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0); - TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline); - if (pBlock->minKey.ts > pLast->ts || pBlock->maxKey.ts < pFirst->ts) { - return false; - } - - // version is not overlap - if (ASCENDING_TRAVERSE(order)) { - return doCheckforDatablockOverlap(pBlockScanInfo, pBlock, pBlockScanInfo->fileDelIndex); - } else { - int32_t index = pBlockScanInfo->fileDelIndex; - while (1) { - TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, index); - if (p->ts > pBlock->minKey.ts && index > 0) { - index -= 1; - } else { // find the first point that is smaller than the minKey.ts of dataBlock. - if (p->ts == pBlock->minKey.ts && p->version < pBlock->maxVer && index > 0) { - index -= 1; - } - break; - } - } - - return doCheckforDatablockOverlap(pBlockScanInfo, pBlock, index); - } -} - -typedef struct { - bool overlapWithNeighborBlock; - bool hasDupTs; - bool overlapWithDelInfo; - bool overlapWithLastBlock; - bool overlapWithKeyInBuf; - bool partiallyRequired; - bool moreThanCapcity; -} SDataBlockToLoadInfo; - -static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo* pBlockInfo, SDataBlk* pBlock, - STableBlockScanInfo* pScanInfo, TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader, - STsdbReader* pReader) { - int32_t neighborIndex = 0; - SBlockIndex bIndex = {0}; - - bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pScanInfo, &neighborIndex, pReader->order, &bIndex); - - // overlap with neighbor - if (hasNeighbor) { - pInfo->overlapWithNeighborBlock = overlapWithNeighborBlock(pBlock, &bIndex, pReader->order); - } - - // has duplicated ts of different version in this block - pInfo->hasDupTs = (pBlock->nSubBlock == 1) ? pBlock->hasDup : true; - pInfo->overlapWithDelInfo = overlapWithDelSkyline(pScanInfo, pBlock, pReader->order); - - if (hasDataInLastBlock(pLastBlockReader)) { - int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); - pInfo->overlapWithLastBlock = !(pBlock->maxKey.ts < tsLast || pBlock->minKey.ts > tsLast); - } - - pInfo->moreThanCapcity = pBlock->nRow > pReader->resBlockInfo.capacity; - pInfo->partiallyRequired = dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock); - pInfo->overlapWithKeyInBuf = keyOverlapFileBlock(keyInBuf, pBlock, &pReader->verRange); -} - -// 1. the version of all rows should be less than the endVersion -// 2. current block should not overlap with next neighbor block -// 3. current timestamp should not be overlap with each other -// 4. output buffer should be large enough to hold all rows in current block -// 5. delete info should not overlap with current block data -// 6. current block should not contain the duplicated ts -static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pBlockInfo, SDataBlk* pBlock, - STableBlockScanInfo* pScanInfo, TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader) { - SDataBlockToLoadInfo info = {0}; - getBlockToLoadInfo(&info, pBlockInfo, pBlock, pScanInfo, keyInBuf, pLastBlockReader, pReader); - - bool loadDataBlock = - (info.overlapWithNeighborBlock || info.hasDupTs || info.partiallyRequired || info.overlapWithKeyInBuf || - info.moreThanCapcity || info.overlapWithDelInfo || info.overlapWithLastBlock); - - // log the reason why load the datablock for profile - if (loadDataBlock) { - tsdbDebug("%p uid:%" PRIu64 - " need to load the datablock, overlapneighbor:%d, hasDup:%d, partiallyRequired:%d, " - "overlapWithKey:%d, greaterThanBuf:%d, overlapWithDel:%d, overlapWithlastBlock:%d, %s", - pReader, pBlockInfo->uid, info.overlapWithNeighborBlock, info.hasDupTs, info.partiallyRequired, - info.overlapWithKeyInBuf, info.moreThanCapcity, info.overlapWithDelInfo, info.overlapWithLastBlock, - pReader->idStr); - } - - return loadDataBlock; -} - -static bool isCleanFileDataBlock(STsdbReader* pReader, SFileDataBlockInfo* pBlockInfo, SDataBlk* pBlock, - STableBlockScanInfo* pScanInfo, TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader) { - SDataBlockToLoadInfo info = {0}; - getBlockToLoadInfo(&info, pBlockInfo, pBlock, pScanInfo, keyInBuf, pLastBlockReader, pReader); - bool isCleanFileBlock = !(info.overlapWithNeighborBlock || info.hasDupTs || info.overlapWithKeyInBuf || - info.overlapWithDelInfo || info.overlapWithLastBlock); - return isCleanFileBlock; -} - -static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) { - if (!(pBlockScanInfo->iiter.hasVal || pBlockScanInfo->iter.hasVal)) { - return TSDB_CODE_SUCCESS; - } - - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - - int64_t st = taosGetTimestampUs(); - int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->resBlockInfo.capacity, pReader); - - blockDataUpdateTsWindow(pBlock, pReader->suppInfo.slotId[0]); - pBlock->info.id.uid = pBlockScanInfo->uid; - - setComposedBlockFlag(pReader, true); - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug("%p build data block from cache completed, elapsed time:%.2f ms, numOfRows:%" PRId64 ", brange:%" PRId64 - " - %" PRId64 ", uid:%" PRIu64 ", %s", - pReader, elapsedTime, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey, - pBlockScanInfo->uid, pReader->idStr); - - pReader->cost.buildmemBlock += elapsedTime; - return code; -} - -static bool tryCopyDistinctRowFromFileBlock(STsdbReader* pReader, SBlockData* pBlockData, int64_t key, - SFileBlockDumpInfo* pDumpInfo, bool* copied) { - // opt version - // 1. it is not a border point - // 2. the direct next point is not an duplicated timestamp - int32_t code = TSDB_CODE_SUCCESS; - - *copied = false; - bool asc = (pReader->order == TSDB_ORDER_ASC); - if ((pDumpInfo->rowIndex < pDumpInfo->totalRows - 1 && asc) || (pDumpInfo->rowIndex > 0 && (!asc))) { - int32_t step = pReader->order == TSDB_ORDER_ASC ? 1 : -1; - - int64_t nextKey = pBlockData->aTSKEY[pDumpInfo->rowIndex + step]; - if (nextKey != key) { // merge is not needed - code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, pBlockData, pDumpInfo->rowIndex); - if (code) { - return code; - } - pDumpInfo->rowIndex += step; - *copied = true; - } - } - - return code; -} - -static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, - SVersionRange* pVerRange) { - int32_t step = ASCENDING_TRAVERSE(pLastBlockReader->order) ? 1 : -1; - - while (1) { - bool hasVal = tMergeTreeNext(&pLastBlockReader->mergeTree); - if (!hasVal) { // the next value will be the accessed key in stt - pScanInfo->lastKeyInStt += step; - return false; - } - - TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow]; - int64_t ver = pRow->pBlockData->aVersion[pRow->iRow]; - - pLastBlockReader->currentKey = key; - pScanInfo->lastKeyInStt = key; - - if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->sttBlockDelIndex, key, ver, pLastBlockReader->order, - pVerRange)) { - return true; - } - } -} - -static bool tryCopyDistinctRowFromSttBlock(TSDBROW* fRow, SLastBlockReader* pLastBlockReader, - STableBlockScanInfo* pScanInfo, int64_t ts, STsdbReader* pReader, - bool* copied) { - int32_t code = TSDB_CODE_SUCCESS; - - *copied = false; - - bool hasVal = nextRowFromLastBlocks(pLastBlockReader, pScanInfo, &pReader->verRange); - if (hasVal) { - int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); - if (next1 != ts) { - code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, fRow->pBlockData, fRow->iRow); - if (code) { - return code; - } - - *copied = true; - return code; - } - } else { - code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, fRow->pBlockData, fRow->iRow); - if (code) { - return code; - } - - *copied = true; - return code; - } - - return code; -} - -static FORCE_INLINE STSchema* doGetSchemaForTSRow(int32_t sversion, STsdbReader* pReader, uint64_t uid) { - // always set the newest schema version in pReader->pSchema - if (pReader->pSchema == NULL) { - STSchema* ps = getTableSchemaImpl(pReader, uid); - if (ps == NULL) { - return NULL; - } - } - - if (pReader->pSchema && sversion == pReader->pSchema->version) { - return pReader->pSchema; - } - - void** p = tSimpleHashGet(pReader->pSchemaMap, &sversion, sizeof(sversion)); - if (p != NULL) { - return *(STSchema**)p; - } - - STSchema* ptr = NULL; - int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &ptr); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - return NULL; - } else { - code = tSimpleHashPut(pReader->pSchemaMap, &sversion, sizeof(sversion), &ptr, POINTER_BYTES); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - return NULL; - } - return ptr; - } -} - -static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow, - SIterInfo* pIter, int64_t key, SLastBlockReader* pLastBlockReader) { - SRowMerger* pMerger = &pReader->status.merger; - SRow* pTSRow = NULL; - SBlockData* pBlockData = &pReader->status.fileBlockData; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - int64_t tsLast = INT64_MIN; - if (hasDataInLastBlock(pLastBlockReader)) { - tsLast = getCurrentKeyInLastBlock(pLastBlockReader); - } - - TSDBKEY k = TSDBROW_KEY(pRow); - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - - // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized - if (pMerger->pArray == NULL) { - ASSERT(pReader->pSchema == NULL); - STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); - if (ps == NULL) { - return terrno; - } - } - - int64_t minKey = 0; - if (pReader->order == TSDB_ORDER_ASC) { - minKey = INT64_MAX; // chosen the minimum value - if (minKey > tsLast && hasDataInLastBlock(pLastBlockReader)) { - minKey = tsLast; - } - - if (minKey > k.ts) { - minKey = k.ts; - } - - if (minKey > key && hasDataInFileBlock(pBlockData, pDumpInfo)) { - minKey = key; - } - } else { - minKey = INT64_MIN; - if (minKey < tsLast && hasDataInLastBlock(pLastBlockReader)) { - minKey = tsLast; - } - - if (minKey < k.ts) { - minKey = k.ts; - } - - if (minKey < key && hasDataInFileBlock(pBlockData, pDumpInfo)) { - minKey = key; - } - } - - // todo remove init - bool init = false; - - // ASC: file block ---> last block -----> imem -----> mem - // DESC: mem -----> imem -----> last block -----> file block - if (pReader->order == TSDB_ORDER_ASC) { - if (minKey == key) { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - - if (minKey == tsLast) { - TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, fRow1, NULL); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->verRange, pReader->idStr); - } - - if (minKey == k.ts) { - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return terrno; - } - if (init) { - tsdbRowMergerAdd(pMerger, pRow, pSchema); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - int32_t code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } else { - if (minKey == k.ts) { - init = true; - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return terrno; - } - - int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS || pMerger->pTSchema == NULL) { - return code; - } - } - - if (minKey == tsLast) { - TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, fRow1, NULL); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->verRange, pReader->idStr); - } - - if (minKey == key) { - if (init) { - tsdbRowMergerAdd(pMerger, &fRow, NULL); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - } - - int32_t code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - - return code; -} - -static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, STsdbReader* pReader, - STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, - bool mergeBlockData) { - SRowMerger* pMerger = &pReader->status.merger; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader); - bool copied = false; - int32_t code = TSDB_CODE_SUCCESS; - SRow* pTSRow = NULL; - TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - - // create local variable to hold the row value - TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData}; - - tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, - pReader->idStr); - - // only last block exists - if ((!mergeBlockData) || (tsLastBlock != pBlockData->aTSKEY[pDumpInfo->rowIndex])) { - code = tryCopyDistinctRowFromSttBlock(&fRow, pLastBlockReader, pBlockScanInfo, tsLastBlock, pReader, &copied); - if (code) { - return code; - } - - if (copied) { - pBlockScanInfo->lastKey = tsLastBlock; - return TSDB_CODE_SUCCESS; - } else { - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - tsdbRowMergerAdd(pMerger, pRow1, NULL); - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange, - pReader->idStr); - - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } else { // not merge block data - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange, pReader->idStr); - - // merge with block data if ts == key - if (tsLastBlock == pBlockData->aTSKEY[pDumpInfo->rowIndex]) { - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader* pLastBlockReader, int64_t key, - STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SRowMerger* pMerger = &pReader->status.merger; - - // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized - if (pMerger->pArray == NULL) { - ASSERT(pReader->pSchema == NULL); - STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); - if (ps == NULL) { - return terrno; - } - } - - if (hasDataInFileBlock(pBlockData, pDumpInfo)) { - // no last block available, only data block exists - if (!hasDataInLastBlock(pLastBlockReader)) { - return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); - } - - // row in last file block - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - int64_t ts = getCurrentKeyInLastBlock(pLastBlockReader); - ASSERT(ts >= key); - - if (ASCENDING_TRAVERSE(pReader->order)) { - if (key < ts) { // imem, mem are all empty, file blocks (data blocks and last block) exist - return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); - } else if (key == ts) { - SRow* pTSRow = NULL; - int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - tsdbRowMergerAdd(pMerger, pRow1, NULL); - - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, ts, pMerger, &pReader->verRange, pReader->idStr); - - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - return code; - } else { - return TSDB_CODE_SUCCESS; - } - } else { // desc order - return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, pBlockData, true); - } - } else { // only last block exists - return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, NULL, false); - } -} - -static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, - SLastBlockReader* pLastBlockReader) { - SRowMerger* pMerger = &pReader->status.merger; - SRow* pTSRow = NULL; - int32_t code = TSDB_CODE_SUCCESS; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SArray* pDelList = pBlockScanInfo->delSkyline; - - TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pDelList, pReader); - TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pDelList, pReader); - - int64_t tsLast = INT64_MIN; - if (hasDataInLastBlock(pLastBlockReader)) { - tsLast = getCurrentKeyInLastBlock(pLastBlockReader); - } - - int64_t key = hasDataInFileBlock(pBlockData, pDumpInfo) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] : INT64_MIN; - - TSDBKEY k = TSDBROW_KEY(pRow); - TSDBKEY ik = TSDBROW_KEY(piRow); - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return code; - } - - STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); - if (piSchema == NULL) { - return code; - } - - // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized - if (pMerger->pArray == NULL) { - ASSERT(pReader->pSchema == NULL); - STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); - if (ps == NULL) { - return terrno; - } - } - - int64_t minKey = 0; - if (ASCENDING_TRAVERSE(pReader->order)) { - minKey = INT64_MAX; // let's find the minimum - if (minKey > k.ts) { - minKey = k.ts; - } - - if (minKey > ik.ts) { - minKey = ik.ts; - } - - if (minKey > key && hasDataInFileBlock(pBlockData, pDumpInfo)) { - minKey = key; - } - - if (minKey > tsLast && hasDataInLastBlock(pLastBlockReader)) { - minKey = tsLast; - } - } else { - minKey = INT64_MIN; // let find the maximum ts value - if (minKey < k.ts) { - minKey = k.ts; - } - - if (minKey < ik.ts) { - minKey = ik.ts; - } - - if (minKey < key && hasDataInFileBlock(pBlockData, pDumpInfo)) { - minKey = key; - } - - if (minKey < tsLast && hasDataInLastBlock(pLastBlockReader)) { - minKey = tsLast; - } - } - - bool init = false; - - // ASC: file block -----> last block -----> imem -----> mem - // DESC: mem -----> imem -----> last block -----> file block - if (ASCENDING_TRAVERSE(pReader->order)) { - if (minKey == key) { - init = true; - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - - if (minKey == tsLast) { - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, pRow1, NULL); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, pRow1, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->verRange, pReader->idStr); - } - - if (minKey == ik.ts) { - if (init) { - tsdbRowMergerAdd(pMerger, piRow, piSchema); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, piRow, piSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - if (minKey == k.ts) { - if (init) { - tsdbRowMergerAdd(pMerger, pRow, pSchema); - } else { - // STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } else { - if (minKey == k.ts) { - init = true; - code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - if (minKey == ik.ts) { - if (init) { - tsdbRowMergerAdd(pMerger, piRow, piSchema); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, piRow, piSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - if (minKey == tsLast) { - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, pRow1, NULL); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, pRow1, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->verRange, pReader->idStr); - } - - if (minKey == key) { - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - if (!init) { - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } else { - tsdbRowMergerAdd(pMerger, &fRow, NULL); - } - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - } - } - - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - return code; -} - -static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { - if (pBlockScanInfo->iterInit) { - return TSDB_CODE_SUCCESS; - } - - int32_t code = TSDB_CODE_SUCCESS; - - TSDBKEY startKey = {0}; - if (ASCENDING_TRAVERSE(pReader->order)) { - // startKey = (TSDBKEY){.ts = pReader->window.skey, .version = pReader->verRange.minVer}; - startKey = (TSDBKEY){.ts = pBlockScanInfo->lastKey + 1, .version = pReader->verRange.minVer}; - } else { - // startKey = (TSDBKEY){.ts = pReader->window.ekey, .version = pReader->verRange.maxVer}; - startKey = (TSDBKEY){.ts = pBlockScanInfo->lastKey - 1, .version = pReader->verRange.maxVer}; - } - - int32_t backward = (!ASCENDING_TRAVERSE(pReader->order)); - int64_t st = 0; - - STbData* d = NULL; - if (pReader->pReadSnap->pMem != NULL) { - d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid); - if (d != NULL) { - code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter); - if (code == TSDB_CODE_SUCCESS) { - pBlockScanInfo->iter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iter.iter) != NULL); - - tsdbDebug("%p uid:%" PRIu64 ", check data in mem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64 - "-%" PRId64 " %s", - pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, d->minKey, d->maxKey, pReader->idStr); - } else { - tsdbError("%p uid:%" PRIu64 ", failed to create iterator for imem, code:%s, %s", pReader, pBlockScanInfo->uid, - tstrerror(code), pReader->idStr); - return code; - } - } - } else { - tsdbDebug("%p uid:%" PRIu64 ", no data in mem, %s", pReader, pBlockScanInfo->uid, pReader->idStr); - } - - STbData* di = NULL; - if (pReader->pReadSnap->pIMem != NULL) { - di = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid); - if (di != NULL) { - code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter); - if (code == TSDB_CODE_SUCCESS) { - pBlockScanInfo->iiter.hasVal = (tsdbTbDataIterGet(pBlockScanInfo->iiter.iter) != NULL); - - tsdbDebug("%p uid:%" PRIu64 ", check data in imem from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64 - "-%" PRId64 " %s", - pReader, pBlockScanInfo->uid, startKey.ts, pReader->order, di->minKey, di->maxKey, pReader->idStr); - } else { - tsdbError("%p uid:%" PRIu64 ", failed to create iterator for mem, code:%s, %s", pReader, pBlockScanInfo->uid, - tstrerror(code), pReader->idStr); - return code; - } - } - } else { - tsdbDebug("%p uid:%" PRIu64 ", no data in imem, %s", pReader, pBlockScanInfo->uid, pReader->idStr); - } - - st = taosGetTimestampUs(); - initDelSkylineIterator(pBlockScanInfo, pReader, d, di); - pReader->cost.initDelSkylineIterTime += (taosGetTimestampUs() - st) / 1000.0; - - pBlockScanInfo->iterInit = true; - return TSDB_CODE_SUCCESS; -} - -static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, - STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { - // it is an multi-table data block - if (pBlockData->aUid != NULL) { - uint64_t uid = pBlockData->aUid[pDumpInfo->rowIndex]; - if (uid != pBlockScanInfo->uid) { // move to next row - return false; - } - } - - // check for version and time range - int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex]; - if (ver > pReader->verRange.maxVer || ver < pReader->verRange.minVer) { - return false; - } - - int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - if (ts > pReader->window.ekey || ts < pReader->window.skey) { - return false; - } - - if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, ts, ver, pReader->order, - &pReader->verRange)) { - return false; - } - - return true; -} - -static bool initLastBlockReader(SLastBlockReader* pLBlockReader, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { - // the last block reader has been initialized for this table. - if (pLBlockReader->uid == pScanInfo->uid) { - return hasDataInLastBlock(pLBlockReader); - } - - if (pLBlockReader->uid != 0) { - tMergeTreeClose(&pLBlockReader->mergeTree); - } - - initMemDataIterator(pScanInfo, pReader); - pLBlockReader->uid = pScanInfo->uid; - - STimeWindow w = pLBlockReader->window; - if (ASCENDING_TRAVERSE(pLBlockReader->order)) { - w.skey = pScanInfo->lastKeyInStt; - } else { - w.ekey = pScanInfo->lastKeyInStt; - } - - tsdbDebug("init last block reader, window:%" PRId64 "-%" PRId64 ", uid:%" PRIu64 ", %s", w.skey, w.ekey, - pScanInfo->uid, pReader->idStr); - int32_t code = tMergeTreeOpen(&pLBlockReader->mergeTree, (pLBlockReader->order == TSDB_ORDER_DESC), - pReader->pFileReader, pReader->suid, pScanInfo->uid, &w, &pLBlockReader->verRange, - pLBlockReader->pInfo, false, pReader->idStr, false, pReader->status.pLDataIter); - if (code != TSDB_CODE_SUCCESS) { - return false; - } - - return nextRowFromLastBlocks(pLBlockReader, pScanInfo, &pReader->verRange); -} - -static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader) { return pLastBlockReader->mergeTree.pIter != NULL; } - -bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo) { - if ((pBlockData->nRow > 0) && (pBlockData->nRow != pDumpInfo->totalRows)) { - return false; // this is an invalid result. - } - return pBlockData->nRow > 0 && (!pDumpInfo->allDumped); -} - -int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, - STsdbReader* pReader) { - SRowMerger* pMerger = &pReader->status.merger; - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - bool copied = false; - int32_t code = tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo, &copied); - if (code) { - return code; - } - - // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized - if (pMerger->pArray == NULL) { - ASSERT(pReader->pSchema == NULL); - STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); - if (ps == NULL) { - return terrno; - } - } - - if (copied) { - pBlockScanInfo->lastKey = key; - return TSDB_CODE_SUCCESS; - } else { - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - - SRow* pTSRow = NULL; - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); - code = tsdbRowMergerGetRow(pMerger, &pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); - - taosMemoryFree(pTSRow); - tsdbRowMergerClear(pMerger); - return code; - } -} - -static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, - SBlockData* pBlockData, SLastBlockReader* pLastBlockReader) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - TSDBROW *pRow = NULL, *piRow = NULL; - int64_t key = (pBlockData->nRow > 0 && (!pDumpInfo->allDumped)) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] : INT64_MIN; - if (pBlockScanInfo->iter.hasVal) { - pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); - } - - if (pBlockScanInfo->iiter.hasVal) { - piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); - } - - // two levels of mem-table does contain the valid rows - if (pRow != NULL && piRow != NULL) { - return doMergeMultiLevelRows(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); - } - - // imem + file + last block - if (pBlockScanInfo->iiter.hasVal) { - return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, &pBlockScanInfo->iiter, key, pLastBlockReader); - } - - // mem + file + last block - if (pBlockScanInfo->iter.hasVal) { - return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, &pBlockScanInfo->iter, key, pLastBlockReader); - } - - // files data blocks + last block - return mergeFileBlockAndLastBlock(pReader, pLastBlockReader, key, pBlockScanInfo, pBlockData); -} - -static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pBlockScanInfo, - STsdbReader* pReader, bool* loadNeighbor) { - int32_t code = TSDB_CODE_SUCCESS; - int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; - int32_t nextIndex = -1; - SBlockIndex nxtBIndex = {0}; - - *loadNeighbor = false; - SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); - - bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pBlockScanInfo, &nextIndex, pReader->order, &nxtBIndex); - if (!hasNeighbor) { // do nothing - return code; - } - - if (overlapWithNeighborBlock(pBlock, &nxtBIndex, pReader->order)) { // load next block - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - - // 1. find the next neighbor block in the scan block list - SFileDataBlockInfo fb = {.uid = pBlockInfo->uid, .tbBlockIdx = nextIndex}; - int32_t neighborIndex = findFileBlockInfoIndex(pBlockIter, &fb); - - // 2. remove it from the scan block list - setFileBlockActiveInBlockIter(pBlockIter, neighborIndex, step); - - // 3. load the neighbor block, and set it to be the currently accessed file data block - code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pBlockInfo->uid); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - // 4. check the data values - initBlockDumpInfo(pReader, pBlockIter); - *loadNeighbor = true; - } - - return code; -} - -static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - - pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; - pResBlock->info.dataLoad = 1; - blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); - - setComposedBlockFlag(pReader, true); - - pReader->cost.composedBlocks += 1; - pReader->cost.buildComposedBlockTime += el; -} - -static int32_t buildComposedDataBlock(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); - SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - - bool asc = ASCENDING_TRAVERSE(pReader->order); - int64_t st = taosGetTimestampUs(); - int32_t step = asc ? 1 : -1; - double el = 0; - SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - STableBlockScanInfo* pBlockScanInfo = NULL; - if (pBlockInfo != NULL) { - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) { - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - return code; - } - - pBlockScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); - if (pBlockScanInfo == NULL) { - goto _end; - } - - TSDBKEY keyInBuf = getCurrentKeyInBuf(pBlockScanInfo, pReader); - - // it is a clean block, load it directly - if (isCleanFileDataBlock(pReader, pBlockInfo, pBlock, pBlockScanInfo, keyInBuf, pLastBlockReader) && - pBlock->nRow <= pReader->resBlockInfo.capacity) { - if (asc || (!hasDataInLastBlock(pLastBlockReader) && (pBlock->maxKey.ts > keyInBuf.ts))) { - code = copyBlockDataToSDataBlock(pReader); - if (code) { - goto _end; - } - - // record the last key value - pBlockScanInfo->lastKey = asc ? pBlock->maxKey.ts : pBlock->minKey.ts; - goto _end; - } - } - } else { // file blocks not exist - pBlockScanInfo = *pReader->status.pTableIter; - if (pReader->pIgnoreTables && - taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - return code; - } - } - - SBlockData* pBlockData = &pReader->status.fileBlockData; - - while (1) { - bool hasBlockData = false; - { - while (pBlockData->nRow > 0 && - pBlockData->uid == pBlockScanInfo->uid) { // find the first qualified row in data block - if (isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) { - hasBlockData = true; - break; - } - - pDumpInfo->rowIndex += step; - - pBlock = getCurrentBlock(&pReader->status.blockIter); - if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) { - pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); // NOTE: get the new block info - - // continue check for the next file block if the last ts in the current block - // is overlapped with the next neighbor block - bool loadNeighbor = false; - code = loadNeighborIfOverlap(pBlockInfo, pBlockScanInfo, pReader, &loadNeighbor); - if ((!loadNeighbor) || (code != 0)) { - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - break; - } - } - } - } - - // no data in last block and block, no need to proceed. - if (hasBlockData == false) { - break; - } - - code = buildComposedDataBlockImpl(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); - if (code) { - goto _end; - } - - // currently loaded file data block is consumed - if ((pBlockData->nRow > 0) && (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0)) { - pBlock = getCurrentBlock(&pReader->status.blockIter); - setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); - break; - } - - if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { - break; - } - } - -_end: - el = (taosGetTimestampUs() - st) / 1000.0; - updateComposedBlockInfo(pReader, el, pBlockScanInfo); - - if (pResBlock->info.rows > 0) { - tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 - ", elapsed time:%.2f ms %s", - pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, - pResBlock->info.rows, el, pReader->idStr); - } - - return code; -} - -void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; } - -int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order) { - if (pDelSkyline == NULL) { - return 0; - } - - return ASCENDING_TRAVERSE(order) ? 0 : taosArrayGetSize(pDelSkyline) - 1; -} - -int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, STbData* pMemTbData, - STbData* piMemTbData) { - if (pBlockScanInfo->delSkyline != NULL) { - return TSDB_CODE_SUCCESS; - } - - int32_t code = 0; - SArray* pDelData = taosArrayInit(4, sizeof(SDelData)); - - SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; - if (pDelFile && taosArrayGetSize(pReader->pDelIdx) > 0) { - SDelIdx idx = {.suid = pReader->suid, .uid = pBlockScanInfo->uid}; - SDelIdx* pIdx = taosArraySearch(pReader->pDelIdx, &idx, tCmprDelIdx, TD_EQ); - - if (pIdx != NULL) { - code = tsdbReadDelDatav1(pReader->pDelFReader, pIdx, pDelData, pReader->verRange.maxVer); - } - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - } - - SDelData* p = NULL; - if (pMemTbData != NULL) { - p = pMemTbData->pHead; - while (p) { - if (p->version <= pReader->verRange.maxVer) { - taosArrayPush(pDelData, p); - } - - p = p->pNext; - } - } - - if (piMemTbData != NULL) { - p = piMemTbData->pHead; - while (p) { - if (p->version <= pReader->verRange.maxVer) { - taosArrayPush(pDelData, p); - } - p = p->pNext; - } - } - - if (taosArrayGetSize(pDelData) > 0) { - pBlockScanInfo->delSkyline = taosArrayInit(4, sizeof(TSDBKEY)); - code = tsdbBuildDeleteSkyline(pDelData, 0, (int32_t)(taosArrayGetSize(pDelData) - 1), pBlockScanInfo->delSkyline); - } - - taosArrayDestroy(pDelData); - int32_t index = getInitialDelIndex(pBlockScanInfo->delSkyline, pReader->order); - - pBlockScanInfo->iter.index = index; - pBlockScanInfo->iiter.index = index; - pBlockScanInfo->fileDelIndex = index; - pBlockScanInfo->sttBlockDelIndex = index; - - return code; - -_err: - taosArrayDestroy(pDelData); - return code; -} - -TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { - bool asc = ASCENDING_TRAVERSE(pReader->order); - // TSKEY initialVal = asc? TSKEY_MIN:TSKEY_MAX; - - TSDBKEY key = {.ts = TSKEY_INITIAL_VAL}, ikey = {.ts = TSKEY_INITIAL_VAL}; - - bool hasKey = false, hasIKey = false; - TSDBROW* pRow = getValidMemRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader); - if (pRow != NULL) { - hasKey = true; - key = TSDBROW_KEY(pRow); - } - - TSDBROW* pIRow = getValidMemRow(&pScanInfo->iiter, pScanInfo->delSkyline, pReader); - if (pIRow != NULL) { - hasIKey = true; - ikey = TSDBROW_KEY(pIRow); - } - - if (hasKey) { - if (hasIKey) { // has data in mem & imem - if (asc) { - return key.ts <= ikey.ts ? key : ikey; - } else { - return key.ts <= ikey.ts ? ikey : key; - } - } else { // no data in imem - return key; - } - } else { - // no data in mem & imem, return the initial value - // only imem has data, return ikey - return ikey; - } -} - -static int32_t moveToNextFile(STsdbReader* pReader, SBlockNumber* pBlockNum, SArray* pTableList) { - SReaderStatus* pStatus = &pReader->status; - pBlockNum->numOfBlocks = 0; - pBlockNum->numOfLastFiles = 0; - - size_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - SArray* pIndexList = taosArrayInit(numOfTables, sizeof(SBlockIdx)); - - while (1) { - // only check here, since the iterate data in memory is very fast. - if (pReader->code != TSDB_CODE_SUCCESS) { - tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); - taosArrayDestroy(pIndexList); - return pReader->code; - } - - bool hasNext = false; - int32_t code = filesetIteratorNext(&pStatus->fileIter, pReader, &hasNext); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pIndexList); - return code; - } - - if (!hasNext) { // no data files on disk - break; - } - - taosArrayClear(pIndexList); - code = doLoadBlockIndex(pReader, pReader->pFileReader, pIndexList); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pIndexList); - return code; - } - - if (taosArrayGetSize(pIndexList) > 0 || pReader->pFileReader->pSet->nSttF > 0) { - code = doLoadFileBlock(pReader, pIndexList, pBlockNum, pTableList); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pIndexList); - return code; - } - - if (pBlockNum->numOfBlocks + pBlockNum->numOfLastFiles > 0) { - break; - } - } - - // no blocks in current file, try next files - } - - taosArrayDestroy(pIndexList); - - if (pReader->pReadSnap != NULL) { - SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; - if (pReader->pDelFReader == NULL && pDelFile != NULL) { - int32_t code = tsdbDelFReaderOpen(&pReader->pDelFReader, pDelFile, pReader->pTsdb); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - pReader->pDelIdx = taosArrayInit(4, sizeof(SDelIdx)); - if (pReader->pDelIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - return code; - } - - code = tsdbReadDelIdx(pReader->pDelFReader, pReader->pDelIdx); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pReader->pDelIdx); - return code; - } - } - } - - return TSDB_CODE_SUCCESS; -} - -static void resetTableListIndex(SReaderStatus* pStatus) { - STableUidList* pList = &pStatus->uidList; - - pList->currentIndex = 0; - uint64_t uid = pList->tableUidList[0]; - pStatus->pTableIter = tSimpleHashGet(pStatus->pTableMap, &uid, sizeof(uid)); -} - -static bool moveToNextTable(STableUidList* pOrderedCheckInfo, SReaderStatus* pStatus) { - pOrderedCheckInfo->currentIndex += 1; - if (pOrderedCheckInfo->currentIndex >= tSimpleHashGetSize(pStatus->pTableMap)) { - pStatus->pTableIter = NULL; - return false; - } - - uint64_t uid = pOrderedCheckInfo->tableUidList[pOrderedCheckInfo->currentIndex]; - pStatus->pTableIter = tSimpleHashGet(pStatus->pTableMap, &uid, sizeof(uid)); - return (pStatus->pTableIter != NULL); -} - -static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - SLastBlockReader* pLastBlockReader = pStatus->fileIter.pLastBlockReader; - STableUidList* pUidList = &pStatus->uidList; - int32_t code = TSDB_CODE_SUCCESS; - - if (tSimpleHashGetSize(pStatus->pTableMap) == 0) { - return TSDB_CODE_SUCCESS; - } - - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - - while (1) { - if (pReader->code != TSDB_CODE_SUCCESS) { - tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); - return pReader->code; - } - - // load the last data block of current table - STableBlockScanInfo* pScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; - if (pScanInfo == NULL) { - tsdbError("table Iter is null, invalid pScanInfo, try next table %s", pReader->idStr); - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - - continue; - } - - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pScanInfo->uid, sizeof(pScanInfo->uid))) { - // reset the index in last block when handing a new file - doCleanupTableScanInfo(pScanInfo); - pStatus->mapDataCleaned = true; - - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - - continue; - } - - // reset the index in last block when handing a new file - doCleanupTableScanInfo(pScanInfo); - pStatus->mapDataCleaned = true; - - bool hasDataInLastFile = initLastBlockReader(pLastBlockReader, pScanInfo, pReader); - if (!hasDataInLastFile) { - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - - continue; - } - - int64_t st = taosGetTimestampUs(); - while (1) { - bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); - - // no data in last block and block, no need to proceed. - if (hasBlockLData == false) { - break; - } - - code = buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); - if (code) { - return code; - } - - if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { - break; - } - } - - double el = (taosGetTimestampUs() - st) / 1000.0; - updateComposedBlockInfo(pReader, el, pScanInfo); - - if (pResBlock->info.rows > 0) { - tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 - ", elapsed time:%.2f ms %s", - pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, - pResBlock->info.rows, el, pReader->idStr); - return TSDB_CODE_SUCCESS; - } - - // current table is exhausted, let's try next table - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - } -} - -static int32_t doBuildDataBlock(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - STableBlockScanInfo* pScanInfo = NULL; - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) { - setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock->maxKey.ts, pReader->order); - return code; - } - - if (pReader->code != TSDB_CODE_SUCCESS) { - return pReader->code; - } - - pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); - if (pScanInfo == NULL) { - return terrno; - } - - initLastBlockReader(pLastBlockReader, pScanInfo, pReader); - TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader); - - if (fileBlockShouldLoad(pReader, pBlockInfo, pBlock, pScanInfo, keyInBuf, pLastBlockReader)) { - code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pScanInfo->uid); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - // build composed data block - code = buildComposedDataBlock(pReader); - } else if (bufferDataInFileBlockGap(pReader->order, keyInBuf, pBlock)) { - // data in memory that are earlier than current file block - // rows in buffer should be less than the file block in asc, greater than file block in desc - int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? pBlock->minKey.ts : pBlock->maxKey.ts; - code = buildDataBlockFromBuf(pReader, pScanInfo, endKey); - } else { - if (hasDataInLastBlock(pLastBlockReader) && !ASCENDING_TRAVERSE(pReader->order)) { - // only return the rows in last block - int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); - ASSERT(tsLast >= pBlock->maxKey.ts); - - SBlockData* pBData = &pReader->status.fileBlockData; - tBlockDataReset(pBData); - - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - tsdbDebug("load data in last block firstly, due to desc scan data, %s", pReader->idStr); - - int64_t st = taosGetTimestampUs(); - - while (1) { - bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); - - // no data in last block and block, no need to proceed. - if (hasBlockLData == false) { - break; - } - - code = buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); - if (code) { - return code; - } - - if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { - break; - } - } - - double el = (taosGetTimestampUs() - st) / 1000.0; - updateComposedBlockInfo(pReader, el, pScanInfo); - - if (pResBlock->info.rows > 0) { - tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 - ", elapsed time:%.2f ms %s", - pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, - pResBlock->info.rows, el, pReader->idStr); - } - } else { // whole block is required, return it directly - SDataBlockInfo* pInfo = &pReader->resBlockInfo.pResBlock->info; - pInfo->rows = pBlock->nRow; - pInfo->id.uid = pScanInfo->uid; - pInfo->dataLoad = 0; - pInfo->window = (STimeWindow){.skey = pBlock->minKey.ts, .ekey = pBlock->maxKey.ts}; - setComposedBlockFlag(pReader, false); - setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock->maxKey.ts, pReader->order); - - // update the last key for the corresponding table - pScanInfo->lastKey = ASCENDING_TRAVERSE(pReader->order) ? pInfo->window.ekey : pInfo->window.skey; - tsdbDebug("%p uid:%" PRIu64 - " clean file block retrieved from file, global index:%d, " - "table index:%d, rows:%d, brange:%" PRId64 "-%" PRId64 ", %s", - pReader, pScanInfo->uid, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->nRow, pBlock->minKey.ts, - pBlock->maxKey.ts, pReader->idStr); - } - } - - return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; -} - -static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReader) { - int64_t st = taosGetTimestampUs(); - LRUHandle* handle = NULL; - int32_t code = tsdbCacheGetBlockIdx(pFileReader->pTsdb->biCache, pFileReader, &handle); - if (code != TSDB_CODE_SUCCESS || handle == NULL) { - goto _end; - } - - int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - - SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); - size_t num = taosArrayGetSize(aBlockIdx); - if (num == 0) { - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return TSDB_CODE_SUCCESS; - } - - SBlockIdx* pBlockIdx = NULL; - for (int32_t i = 0; i < num; ++i) { - pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i); - if (pBlockIdx->suid != pReader->suid) { - continue; - } - - STableBlockScanInfo** p = tSimpleHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(pBlockIdx->uid)); - if (p == NULL) { - continue; - } - - STableBlockScanInfo* pScanInfo = *p; - tMapDataReset(&pScanInfo->mapData); - tsdbReadDataBlk(pReader->pFileReader, pBlockIdx, &pScanInfo->mapData); - - SDataBlk block = {0}; - for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) { - tGetDataBlk(pScanInfo->mapData.pData + pScanInfo->mapData.aOffset[j], &block); - pReader->rowsNum += block.nRow; - } - } - -_end: - tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); - return code; -} - -static int32_t doSumSttBlockRows(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - SSttBlockLoadInfo* pBlockLoadInfo = NULL; - - for (int32_t i = 0; i < pReader->pFileReader->pSet->nSttF; ++i) { // open all last file - pBlockLoadInfo = &pLastBlockReader->pInfo[i]; - - code = tsdbReadSttBlk(pReader->pFileReader, i, pBlockLoadInfo->aSttBlk); - if (code) { - return code; - } - - size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); - if (size >= 1) { - SSttBlk* pStart = taosArrayGet(pBlockLoadInfo->aSttBlk, 0); - SSttBlk* pEnd = taosArrayGet(pBlockLoadInfo->aSttBlk, size - 1); - - // all identical - if (pStart->suid == pEnd->suid) { - if (pStart->suid != pReader->suid) { - // no qualified stt block existed - taosArrayClear(pBlockLoadInfo->aSttBlk); - continue; - } - for (int32_t j = 0; j < size; ++j) { - SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); - pReader->rowsNum += p->nRow; - } - } else { - for (int32_t j = 0; j < size; ++j) { - SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); - uint64_t s = p->suid; - if (s < pReader->suid) { - continue; - } - - if (s == pReader->suid) { - pReader->rowsNum += p->nRow; - } else if (s > pReader->suid) { - break; - } - } - } - } - } - - return code; -} - -static int32_t readRowsCountFromFiles(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - - while (1) { - bool hasNext = false; - code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext); - if (code) { - return code; - } - - if (!hasNext) { // no data files on disk - break; - } - - code = doSumFileBlockRows(pReader, pReader->pFileReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doSumSttBlockRows(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - pReader->status.loadFromFile = false; - - return code; -} - -static int32_t readRowsCountFromMem(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - int64_t memNum = 0, imemNum = 0; - if (pReader->pReadSnap->pMem != NULL) { - tsdbMemTableCountRows(pReader->pReadSnap->pMem, pReader->status.pTableMap, &memNum); - } - - if (pReader->pReadSnap->pIMem != NULL) { - tsdbMemTableCountRows(pReader->pReadSnap->pIMem, pReader->status.pTableMap, &imemNum); - } - - pReader->rowsNum += memNum + imemNum; - - return code; -} - -static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - STableUidList* pUidList = &pStatus->uidList; - - while (1) { - if (pReader->code != TSDB_CODE_SUCCESS) { - tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); - return pReader->code; - } - - STableBlockScanInfo** pBlockScanInfo = pStatus->pTableIter; - if (pReader->pIgnoreTables && - taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) { - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - pBlockScanInfo = pStatus->pTableIter; - } - - initMemDataIterator(*pBlockScanInfo, pReader); - - int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? INT64_MAX : INT64_MIN; - int32_t code = buildDataBlockFromBuf(pReader, *pBlockScanInfo, endKey); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - if (pReader->resBlockInfo.pResBlock->info.rows > 0) { - return TSDB_CODE_SUCCESS; - } - - // current table is exhausted, let's try next table - bool hasNexTable = moveToNextTable(pUidList, pStatus); - if (!hasNexTable) { - return TSDB_CODE_SUCCESS; - } - } -} - -// set the correct start position in case of the first/last file block, according to the query time window -static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) { - int64_t lastKey = ASCENDING_TRAVERSE(pReader->order) ? INT64_MIN : INT64_MAX; - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); - if (pBlockInfo) { - STableBlockScanInfo* pScanInfo = tSimpleHashGet(pBlockIter->pTableMap, &pBlockInfo->uid, sizeof(pBlockInfo->uid)); - if (pScanInfo) { - lastKey = pScanInfo->lastKey; - } - } - SReaderStatus* pStatus = &pReader->status; - - SFileBlockDumpInfo* pDumpInfo = &pStatus->fBlockDumpInfo; - - pDumpInfo->totalRows = pBlock->nRow; - pDumpInfo->allDumped = false; - pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->order) ? 0 : pBlock->nRow - 1; - pDumpInfo->lastKey = lastKey; -} - -static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) { - SBlockNumber num = {0}; - SArray* pTableList = taosArrayInit(40, POINTER_BYTES); - - int32_t code = moveToNextFile(pReader, &num, pTableList); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(pTableList); - return code; - } - - // all data files are consumed, try data in buffer - if (num.numOfBlocks + num.numOfLastFiles == 0) { - pReader->status.loadFromFile = false; - taosArrayDestroy(pTableList); - return code; - } - - // initialize the block iterator for a new fileset - if (num.numOfBlocks > 0) { - code = initBlockIterator(pReader, pBlockIter, num.numOfBlocks, pTableList); - } else { // no block data, only last block exists - tBlockDataReset(&pReader->status.fileBlockData); - resetDataBlockIterator(pBlockIter, pReader->order); - resetTableListIndex(&pReader->status); - } - - // set the correct start position according to the query time window - initBlockDumpInfo(pReader, pBlockIter); - taosArrayDestroy(pTableList); - return code; -} - -static bool fileBlockPartiallyRead(SFileBlockDumpInfo* pDumpInfo, bool asc) { - return (!pDumpInfo->allDumped) && - ((pDumpInfo->rowIndex > 0 && asc) || (pDumpInfo->rowIndex < (pDumpInfo->totalRows - 1) && (!asc))); -} - -typedef enum { - TSDB_READ_RETURN = 0x1, - TSDB_READ_CONTINUE = 0x2, -} ERetrieveType; - -static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - SDataBlockIter* pBlockIter = &pReader->status.blockIter; - - while (1) { - terrno = 0; - - code = doLoadLastBlockSequentially(pReader); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - return TSDB_READ_RETURN; - } - - if (pResBlock->info.rows > 0) { - return TSDB_READ_RETURN; - } - - // all data blocks are checked in this last block file, now let's try the next file - ASSERT(pReader->status.pTableIter == NULL); - code = initForFirstBlockInFile(pReader, pBlockIter); - - // error happens or all the data files are completely checked - if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { - terrno = code; - return TSDB_READ_RETURN; - } - - if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. - return TSDB_READ_CONTINUE; - } else { // all blocks in data file are checked, let's check the data in last files - resetTableListIndex(&pReader->status); - } - } -} - -static int32_t buildBlockFromFiles(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - bool asc = ASCENDING_TRAVERSE(pReader->order); - - SDataBlockIter* pBlockIter = &pReader->status.blockIter; - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - - if (pBlockIter->numOfBlocks == 0) { - // let's try to extract data from stt files. - ERetrieveType type = doReadDataFromLastFiles(pReader); - if (type == TSDB_READ_RETURN) { - return terrno; - } - - code = doBuildDataBlock(pReader); - if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { - return code; - } - } - - while (1) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - if (fileBlockPartiallyRead(pDumpInfo, asc)) { // file data block is partially loaded - code = buildComposedDataBlock(pReader); - } else { - // current block are exhausted, try the next file block - if (pDumpInfo->allDumped) { - // try next data block in current file - bool hasNext = blockIteratorNext(&pReader->status.blockIter, pReader->idStr); - if (hasNext) { // check for the next block in the block accessed order list - initBlockDumpInfo(pReader, pBlockIter); - } else { - // all data blocks in files are checked, let's check the data in last files. - ASSERT(pReader->status.pCurrentFileset->nSttF > 0); - - // data blocks in current file are exhausted, let's try the next file now - SBlockData* pBlockData = &pReader->status.fileBlockData; - if (pBlockData->uid != 0) { - tBlockDataClear(pBlockData); - } - - tBlockDataReset(pBlockData); - resetDataBlockIterator(pBlockIter, pReader->order); - resetTableListIndex(&pReader->status); - - ERetrieveType type = doReadDataFromLastFiles(pReader); - if (type == TSDB_READ_RETURN) { - return terrno; - } - } - } - - code = doBuildDataBlock(pReader); - } - - if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { - return code; - } - } -} - -static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, - int8_t* pLevel) { - if (VND_IS_RSMA(pVnode)) { - int8_t level = 0; - int8_t precision = pVnode->config.tsdbCfg.precision; - int64_t now = taosGetTimestamp(precision); - int64_t offset = tsQueryRsmaTolerance * ((precision == TSDB_TIME_PRECISION_MILLI) ? 1L - : (precision == TSDB_TIME_PRECISION_MICRO) ? 1000L - : 1000000L); - - for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) { - SRetention* pRetention = retentions + level; - if (pRetention->keep <= 0) { - if (level > 0) { - --level; - } - break; - } - if ((now - pRetention->keep) <= (winSKey + offset)) { - break; - } - ++level; - } - - const char* str = (idStr != NULL) ? idStr : ""; - - if (level == TSDB_RETENTION_L0) { - *pLevel = TSDB_RETENTION_L0; - tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L0, str); - return VND_RSMA0(pVnode); - } else if (level == TSDB_RETENTION_L1) { - *pLevel = TSDB_RETENTION_L1; - tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L1, str); - return VND_RSMA1(pVnode); - } else { - *pLevel = TSDB_RETENTION_L2; - tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L2, str); - return VND_RSMA2(pVnode); - } - } - - return VND_TSDB(pVnode); -} - -SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id) { - int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion; - - int64_t endVer = 0; - if (pCond->endVersion == -1) { - // user not specified end version, set current maximum version of vnode as the endVersion - endVer = pVnode->state.applied; - } else { - endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; - } - - tsdbDebug("queried verRange:%" PRId64 "-%" PRId64 ", revised query verRange:%" PRId64 "-%" PRId64 ", %s", - pCond->startVersion, pCond->endVersion, startVer, endVer, id); - - return (SVersionRange){.minVer = startVer, .maxVer = endVer}; -} - -bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, - SVersionRange* pVerRange) { - if (pDelList == NULL) { - return false; - } - - size_t num = taosArrayGetSize(pDelList); - bool asc = ASCENDING_TRAVERSE(order); - int32_t step = asc ? 1 : -1; - - if (asc) { - if (*index >= num - 1) { - TSDBKEY* last = taosArrayGetLast(pDelList); - ASSERT(key >= last->ts); - - if (key > last->ts) { - return false; - } else if (key == last->ts) { - TSDBKEY* prev = taosArrayGet(pDelList, num - 2); - return (prev->version >= ver && prev->version <= pVerRange->maxVer && prev->version >= pVerRange->minVer); - } - } else { - TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); - TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1); - - if (key < pCurrent->ts) { - return false; - } - - if (pCurrent->ts <= key && pNext->ts >= key && pCurrent->version >= ver && - pVerRange->maxVer >= pCurrent->version) { - return true; - } - - while (pNext->ts <= key && (*index) < num - 1) { - (*index) += 1; - - if ((*index) < num - 1) { - pCurrent = taosArrayGet(pDelList, *index); - pNext = taosArrayGet(pDelList, (*index) + 1); - - // it is not a consecutive deletion range, ignore it - if (pCurrent->version == 0 && pNext->version > 0) { - continue; - } - - if (pCurrent->ts <= key && pNext->ts >= key && pCurrent->version >= ver && - pVerRange->maxVer >= pCurrent->version) { - return true; - } - } - } - - return false; - } - } else { - if (*index <= 0) { - TSDBKEY* pFirst = taosArrayGet(pDelList, 0); - - if (key < pFirst->ts) { - return false; - } else if (key == pFirst->ts) { - return pFirst->version >= ver; - } else { - ASSERT(0); - } - } else { - TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); - TSDBKEY* pPrev = taosArrayGet(pDelList, (*index) - 1); - - if (key > pCurrent->ts) { - return false; - } - - if (pPrev->ts <= key && pCurrent->ts >= key && pPrev->version >= ver) { - return true; - } - - while (pPrev->ts >= key && (*index) > 1) { - (*index) += step; - - if ((*index) >= 1) { - pCurrent = taosArrayGet(pDelList, *index); - pPrev = taosArrayGet(pDelList, (*index) - 1); - - // it is not a consecutive deletion range, ignore it - if (pCurrent->version > 0 && pPrev->version == 0) { - continue; - } - - if (pPrev->ts <= key && pCurrent->ts >= key && pPrev->version >= ver) { - return true; - } - } - } - - return false; - } - } - - return false; -} - -TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) { - if (!pIter->hasVal) { - return NULL; - } - - TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter); - TSDBKEY key = TSDBROW_KEY(pRow); - - if (outOfTimeWindow(key.ts, &pReader->window)) { - pIter->hasVal = false; - return NULL; - } - - // it is a valid data version - if ((key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer) && - (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->order, &pReader->verRange))) { - return pRow; - } - - while (1) { - pIter->hasVal = tsdbTbDataIterNext(pIter->iter); - if (!pIter->hasVal) { - return NULL; - } - - pRow = tsdbTbDataIterGet(pIter->iter); - - key = TSDBROW_KEY(pRow); - if (outOfTimeWindow(key.ts, &pReader->window)) { - pIter->hasVal = false; - return NULL; - } - - if (key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer && - (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->order, &pReader->verRange))) { - return pRow; - } - } -} - -int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, STsdbReader* pReader) { - SRowMerger* pMerger = &pReader->status.merger; - - while (1) { - pIter->hasVal = tsdbTbDataIterNext(pIter->iter); - if (!pIter->hasVal) { - break; - } - - // data exists but not valid - TSDBROW* pRow = getValidMemRow(pIter, pDelList, pReader); - if (pRow == NULL) { - break; - } - - // ts is not identical, quit - TSDBKEY k = TSDBROW_KEY(pRow); - if (k.ts != ts) { - break; - } - - if (pRow->type == TSDBROW_ROW_FMT) { - STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, uid); - if (pTSchema == NULL) { - return terrno; - } - - tsdbRowMergerAdd(pMerger, pRow, pTSchema); - } else { // column format - tsdbRowMergerAdd(pMerger, pRow, NULL); - } - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t doMergeRowsInFileBlockImpl(SBlockData* pBlockData, int32_t rowIndex, int64_t key, SRowMerger* pMerger, - SVersionRange* pVerRange, int32_t step) { - while (rowIndex < pBlockData->nRow && rowIndex >= 0 && pBlockData->aTSKEY[rowIndex] == key) { - if (pBlockData->aVersion[rowIndex] > pVerRange->maxVer || pBlockData->aVersion[rowIndex] < pVerRange->minVer) { - rowIndex += step; - continue; - } - - TSDBROW fRow = tsdbRowFromBlockData(pBlockData, rowIndex); - tsdbRowMergerAdd(pMerger, &fRow, NULL); - rowIndex += step; - } - - return rowIndex; -} - -typedef enum { - CHECK_FILEBLOCK_CONT = 0x1, - CHECK_FILEBLOCK_QUIT = 0x2, -} CHECK_FILEBLOCK_STATE; - -static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, SDataBlk* pBlock, - SFileDataBlockInfo* pFBlock, SRowMerger* pMerger, int64_t key, - CHECK_FILEBLOCK_STATE* state) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SBlockData* pBlockData = &pReader->status.fileBlockData; - bool asc = ASCENDING_TRAVERSE(pReader->order); - - *state = CHECK_FILEBLOCK_QUIT; - int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; - - bool loadNeighbor = true; - int32_t code = loadNeighborIfOverlap(pFBlock, pScanInfo, pReader, &loadNeighbor); - - if (loadNeighbor && (code == TSDB_CODE_SUCCESS)) { - pDumpInfo->rowIndex = - doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step); - if ((pDumpInfo->rowIndex >= pDumpInfo->totalRows && asc) || (pDumpInfo->rowIndex < 0 && !asc)) { - *state = CHECK_FILEBLOCK_CONT; - } - } - - return code; -} - -int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - - SRowMerger* pMerger = &pReader->status.merger; - bool asc = ASCENDING_TRAVERSE(pReader->order); - int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - int32_t step = asc ? 1 : -1; - - pDumpInfo->rowIndex += step; - if ((pDumpInfo->rowIndex <= pBlockData->nRow - 1 && asc) || (pDumpInfo->rowIndex >= 0 && !asc)) { - pDumpInfo->rowIndex = - doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->verRange, step); - } - - // all rows are consumed, let's try next file block - if ((pDumpInfo->rowIndex >= pBlockData->nRow && asc) || (pDumpInfo->rowIndex < 0 && !asc)) { - while (1) { - CHECK_FILEBLOCK_STATE st; - - SFileDataBlockInfo* pFileBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); - SDataBlk* pCurrentBlock = getCurrentBlock(&pReader->status.blockIter); - if (pFileBlockInfo == NULL) { - st = CHECK_FILEBLOCK_QUIT; - break; - } - - checkForNeighborFileBlock(pReader, pScanInfo, pCurrentBlock, pFileBlockInfo, pMerger, key, &st); - if (st == CHECK_FILEBLOCK_QUIT) { - break; - } - } - } - - return TSDB_CODE_SUCCESS; -} - -int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, - SRowMerger* pMerger, SVersionRange* pVerRange, const char* idStr) { - while (nextRowFromLastBlocks(pLastBlockReader, pScanInfo, pVerRange)) { - int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); - if (next1 == ts) { - TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - tsdbRowMergerAdd(pMerger, pRow1, NULL); - } else { - tsdbTrace("uid:%" PRIu64 " last del index:%d, del range:%d, lastKeyInStt:%" PRId64 ", %s", pScanInfo->uid, - pScanInfo->sttBlockDelIndex, (int32_t)taosArrayGetSize(pScanInfo->delSkyline), pScanInfo->lastKeyInStt, - idStr); - break; - } - } - - return TSDB_CODE_SUCCESS; -} - -int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, TSDBROW* pResRow, - STsdbReader* pReader, bool* freeTSRow) { - TSDBROW* pNextRow = NULL; - TSDBROW current = *pRow; - - { // if the timestamp of the next valid row has a different ts, return current row directly - pIter->hasVal = tsdbTbDataIterNext(pIter->iter); - - if (!pIter->hasVal) { - *pResRow = *pRow; - *freeTSRow = false; - return TSDB_CODE_SUCCESS; - } else { // has next point in mem/imem - pNextRow = getValidMemRow(pIter, pDelList, pReader); - if (pNextRow == NULL) { - *pResRow = current; - *freeTSRow = false; - return TSDB_CODE_SUCCESS; - } - - if (TSDBROW_TS(¤t) != TSDBROW_TS(pNextRow)) { - *pResRow = current; - *freeTSRow = false; - return TSDB_CODE_SUCCESS; - } - } - } - - terrno = 0; - int32_t code = 0; - - // start to merge duplicated rows - if (current.type == TSDBROW_ROW_FMT) { - // get the correct schema for data in memory - STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(¤t), pReader, uid); - if (pTSchema == NULL) { - return terrno; - } - - code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pTSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - STSchema* pTSchema1 = doGetSchemaForTSRow(TSDBROW_SVERSION(pNextRow), pReader, uid); - if (pTSchema1 == NULL) { - return terrno; - } - - tsdbRowMergerAdd(&pReader->status.merger, pNextRow, pTSchema1); - } else { // let's merge rows in file block - code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pReader->pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - tsdbRowMergerAdd(&pReader->status.merger, pNextRow, NULL); - } - - code = doMergeRowsInBuf(pIter, uid, TSDBROW_TS(¤t), pDelList, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = tsdbRowMergerGetRow(&pReader->status.merger, &pResRow->pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - pResRow->type = TSDBROW_ROW_FMT; - tsdbRowMergerClear(&pReader->status.merger); - *freeTSRow = true; - - return TSDB_CODE_SUCCESS; -} - -int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, - SRow** pTSRow) { - SRowMerger* pMerger = &pReader->status.merger; - - TSDBKEY k = TSDBROW_KEY(pRow); - TSDBKEY ik = TSDBROW_KEY(piRow); - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return terrno; - } - - STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); - if (piSchema == NULL) { - return terrno; - } - - if (ASCENDING_TRAVERSE(pReader->order)) { // ascending order imem --> mem - int32_t code = tsdbRowMergerAdd(&pReader->status.merger, piRow, piSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); - code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - } else { - int32_t code = tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS || pMerger->pTSchema == NULL) { - return code; - } - - code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - tsdbRowMergerAdd(&pReader->status.merger, piRow, piSchema); - code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - int32_t code = tsdbRowMergerGetRow(pMerger, pTSRow); - tsdbRowMergerClear(pMerger); - return code; -} - -int32_t tsdbGetNextRowInMem(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, TSDBROW* pResRow, int64_t endKey, - bool* freeTSRow) { - TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); - TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); - SArray* pDelList = pBlockScanInfo->delSkyline; - uint64_t uid = pBlockScanInfo->uid; - - // todo refactor - bool asc = ASCENDING_TRAVERSE(pReader->order); - if (pBlockScanInfo->iter.hasVal) { - TSDBKEY k = TSDBROW_KEY(pRow); - if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) { - pRow = NULL; - } - } - - if (pBlockScanInfo->iiter.hasVal) { - TSDBKEY k = TSDBROW_KEY(piRow); - if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) { - piRow = NULL; - } - } - - if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal && pRow != NULL && piRow != NULL) { - TSDBKEY k = TSDBROW_KEY(pRow); - TSDBKEY ik = TSDBROW_KEY(piRow); - - int32_t code = TSDB_CODE_SUCCESS; - if (ik.ts != k.ts) { - if (((ik.ts < k.ts) && asc) || ((ik.ts > k.ts) && (!asc))) { // ik.ts < k.ts - code = doMergeMemTableMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, pResRow, pReader, freeTSRow); - } else if (((k.ts < ik.ts) && asc) || ((k.ts > ik.ts) && (!asc))) { - code = doMergeMemTableMultiRows(pRow, uid, &pBlockScanInfo->iter, pDelList, pResRow, pReader, freeTSRow); - } - } else { // ik.ts == k.ts - *freeTSRow = true; - pResRow->type = TSDBROW_ROW_FMT; - code = doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, &pResRow->pTSRow); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - - return code; - } - - if (pBlockScanInfo->iter.hasVal && pRow != NULL) { - return doMergeMemTableMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pResRow, pReader, - freeTSRow); - } - - if (pBlockScanInfo->iiter.hasVal && piRow != NULL) { - return doMergeMemTableMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, pResRow, pReader, freeTSRow); - } - - return TSDB_CODE_SUCCESS; -} - -int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, SRow* pTSRow, STableBlockScanInfo* pScanInfo) { - int32_t outputRowIndex = pBlock->info.rows; - int64_t uid = pScanInfo->uid; - int32_t code = TSDB_CODE_SUCCESS; - - SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - STSchema* pSchema = doGetSchemaForTSRow(pTSRow->sver, pReader, uid); - if (pSchema == NULL) { - return terrno; - } - - SColVal colVal = {0}; - int32_t i = 0, j = 0; - - if (pSupInfo->colId[i] == PRIMARYKEY_TIMESTAMP_COL_ID) { - SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); - ((int64_t*)pColData->pData)[outputRowIndex] = pTSRow->ts; - i += 1; - } - - while (i < pSupInfo->numOfCols && j < pSchema->numOfCols) { - col_id_t colId = pSupInfo->colId[i]; - - if (colId == pSchema->columns[j].colId) { - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); - - tRowGet(pTSRow, pSchema, j, &colVal); - code = doCopyColVal(pColInfoData, outputRowIndex, i, &colVal, pSupInfo); - if (code) { - return code; - } - i += 1; - j += 1; - } else if (colId < pSchema->columns[j].colId) { - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); - - colDataSetNULL(pColInfoData, outputRowIndex); - i += 1; - } else if (colId > pSchema->columns[j].colId) { - j += 1; - } - } - - // set null value since current column does not exist in the "pSchema" - while (i < pSupInfo->numOfCols) { - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); - colDataSetNULL(pColInfoData, outputRowIndex); - i += 1; - } - - pBlock->info.dataLoad = 1; - pBlock->info.rows += 1; - pScanInfo->lastKey = pTSRow->ts; - return TSDB_CODE_SUCCESS; -} - -int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, - int32_t rowIndex) { - int32_t i = 0, j = 0; - int32_t outputRowIndex = pResBlock->info.rows; - int32_t code = TSDB_CODE_SUCCESS; - - SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - ((int64_t*)pReader->status.pPrimaryTsCol->pData)[outputRowIndex] = pBlockData->aTSKEY[rowIndex]; - i += 1; - - SColVal cv = {0}; - int32_t numOfInputCols = pBlockData->nColData; - int32_t numOfOutputCols = pSupInfo->numOfCols; - - while (i < numOfOutputCols && j < numOfInputCols) { - SColData* pData = tBlockDataGetColDataByIdx(pBlockData, j); - if (pData->cid < pSupInfo->colId[i]) { - j += 1; - continue; - } - - SColumnInfoData* pCol = TARRAY_GET_ELEM(pResBlock->pDataBlock, pSupInfo->slotId[i]); - if (pData->cid == pSupInfo->colId[i]) { - tColDataGetValue(pData, rowIndex, &cv); - code = doCopyColVal(pCol, outputRowIndex, i, &cv, pSupInfo); - if (code) { - return code; - } - j += 1; - } else if (pData->cid > pCol->info.colId) { - // the specified column does not exist in file block, fill with null data - colDataSetNULL(pCol, outputRowIndex); - } - - i += 1; - } - - while (i < numOfOutputCols) { - SColumnInfoData* pCol = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); - colDataSetNULL(pCol, outputRowIndex); - i += 1; - } - - pResBlock->info.dataLoad = 1; - pResBlock->info.rows += 1; - return TSDB_CODE_SUCCESS; -} - -int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity, - STsdbReader* pReader) { - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - int32_t code = TSDB_CODE_SUCCESS; - - do { - // SRow* pTSRow = NULL; - TSDBROW row = {.type = -1}; - bool freeTSRow = false; - tsdbGetNextRowInMem(pBlockScanInfo, pReader, &row, endKey, &freeTSRow); - if (row.type == -1) { - break; - } - - if (row.type == TSDBROW_ROW_FMT) { - code = doAppendRowFromTSRow(pBlock, pReader, row.pTSRow, pBlockScanInfo); - - if (freeTSRow) { - taosMemoryFree(row.pTSRow); - } - - if (code) { - return code; - } - } else { - code = doAppendRowFromFileBlock(pBlock, pReader, row.pBlockData, row.iRow); - if (code) { - break; - } - } - - // no data in buffer, return immediately - if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) { - break; - } - - if (pBlock->info.rows >= capacity) { - break; - } - } while (1); - - return code; -} - -// TODO refactor: with createDataBlockScanInfo -int32_t tsdbSetTableList(STsdbReader* pReader, const void* pTableList, int32_t num) { - int32_t size = tSimpleHashGetSize(pReader->status.pTableMap); - - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pReader->status.pTableMap, p, &iter)) != NULL) { - clearBlockScanInfo(*p); - } - - if (size < num) { - int32_t code = ensureBlockScanInfoBuf(&pReader->blockInfoBuf, num); - if (code) { - return code; - } - - char* p1 = taosMemoryRealloc(pReader->status.uidList.tableUidList, sizeof(uint64_t) * num); - if (p1 == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - pReader->status.uidList.tableUidList = (uint64_t*)p1; - } - - tSimpleHashClear(pReader->status.pTableMap); - STableUidList* pUidList = &pReader->status.uidList; - pUidList->currentIndex = 0; - - STableKeyInfo* pList = (STableKeyInfo*)pTableList; - for (int32_t i = 0; i < num; ++i) { - STableBlockScanInfo* pInfo = getPosInBlockInfoBuf(&pReader->blockInfoBuf, i); - pInfo->uid = pList[i].uid; - pUidList->tableUidList[i] = pList[i].uid; - - // todo extract method - if (ASCENDING_TRAVERSE(pReader->order)) { - int64_t skey = pReader->window.skey; - pInfo->lastKey = (skey > INT64_MIN) ? (skey - 1) : skey; - pInfo->lastKeyInStt = skey; - } else { - int64_t ekey = pReader->window.ekey; - pInfo->lastKey = (ekey < INT64_MAX) ? (ekey + 1) : ekey; - pInfo->lastKeyInStt = ekey; - } - - tSimpleHashPut(pReader->status.pTableMap, &pInfo->uid, sizeof(uint64_t), &pInfo, POINTER_BYTES); - } - - return TDB_CODE_SUCCESS; -} - -void* tsdbGetIdx(SMeta* pMeta) { - if (pMeta == NULL) { - return NULL; - } - return metaGetIdx(pMeta); -} - -void* tsdbGetIvtIdx(SMeta* pMeta) { - if (pMeta == NULL) { - return NULL; - } - return metaGetIvtIdx(pMeta); -} - -uint64_t tsdbGetReaderMaxVersion(STsdbReader* pReader) { return pReader->verRange.maxVer; } - -static int32_t doOpenReaderImpl(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - - initFilesetIterator(&pStatus->fileIter, pReader->pReadSnap->fs.aDFileSet, pReader); - resetDataBlockIterator(&pStatus->blockIter, pReader->order); - - int32_t code = TSDB_CODE_SUCCESS; - if (pStatus->fileIter.numOfFiles == 0) { - pStatus->loadFromFile = false; - } else if (READ_MODE_COUNT_ONLY == pReader->readMode) { - // DO NOTHING - } else { - code = initForFirstBlockInFile(pReader, pBlockIter); - } - - if (!pStatus->loadFromFile) { - resetTableListIndex(pStatus); - } - - return code; -} - -static void freeSchemaFunc(void* param) { - void** p = (void**)param; - taosMemoryFreeClear(*p); -} - -// ====================================== EXPOSED APIs ====================================== -int32_t tsdbReaderOpen(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, - SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, - SHashObj** pIgnoreTables) { - STimeWindow window = pCond->twindows; - SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); - - int32_t capacity = pConf->tsdbCfg.maxRows; - if (pResBlock != NULL) { - blockDataEnsureCapacity(pResBlock, capacity); - } - - int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, capacity, pResBlock, idstr); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - - // check for query time window - STsdbReader* pReader = *ppReader; - if (isEmptyQueryTimeWindow(&pReader->window) && pCond->type == TIMEWINDOW_RANGE_CONTAINED) { - tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pReader, pReader->idStr); - return TSDB_CODE_SUCCESS; - } - - if (pCond->type == TIMEWINDOW_RANGE_EXTERNAL) { - // update the SQueryTableDataCond to create inner reader - int32_t order = pCond->order; - if (order == TSDB_ORDER_ASC) { - pCond->twindows.ekey = window.skey - 1; - pCond->twindows.skey = INT64_MIN; - pCond->order = TSDB_ORDER_DESC; - } else { - pCond->twindows.skey = window.ekey + 1; - pCond->twindows.ekey = INT64_MAX; - pCond->order = TSDB_ORDER_ASC; - } - - // here we only need one more row, so the capacity is set to be ONE. - code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[0], 1, pResBlock, idstr); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - - if (order == TSDB_ORDER_ASC) { - pCond->twindows.skey = window.ekey + 1; - pCond->twindows.ekey = INT64_MAX; - } else { - pCond->twindows.skey = INT64_MIN; - pCond->twindows.ekey = window.ekey - 1; - } - pCond->order = order; - - code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[1], 1, pResBlock, idstr); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - } - - // NOTE: the endVersion in pCond is the data version not schema version, so pCond->endVersion is not correct here. - // no valid error code set in metaGetTbTSchema, so let's set the error code here. - // we should proceed in case of tmq processing. - if (pCond->suid != 0) { - pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pReader->suid, -1, 1); - if (pReader->pSchema == NULL) { - tsdbError("failed to get table schema, suid:%" PRIu64 ", ver:-1, %s", pReader->suid, pReader->idStr); - } - } else if (numOfTables > 0) { - STableKeyInfo* pKey = pTableList; - pReader->pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pKey->uid, -1, 1); - if (pReader->pSchema == NULL) { - tsdbError("failed to get table schema, uid:%" PRIu64 ", ver:-1, %s", pKey->uid, pReader->idStr); - } - } - - if (pReader->pSchema != NULL) { - tsdbRowMergerInit(&pReader->status.merger, pReader->pSchema); - } - - pReader->pSchemaMap = tSimpleHashInit(8, taosFastHash); - if (pReader->pSchemaMap == NULL) { - tsdbError("failed init schema hash for reader %s", pReader->idStr); - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - tSimpleHashSetFreeFp(pReader->pSchemaMap, freeSchemaFunc); - if (pReader->pSchema != NULL) { - code = updateBlockSMAInfo(pReader->pSchema, &pReader->suppInfo); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - } - - STsdbReader* p = (pReader->innerReader[0] != NULL) ? pReader->innerReader[0] : pReader; - pReader->status.pTableMap = - createDataBlockScanInfo(p, &pReader->blockInfoBuf, pTableList, &pReader->status.uidList, numOfTables); - if (pReader->status.pTableMap == NULL) { - *ppReader = NULL; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - pReader->status.pLDataIter = taosMemoryCalloc(pConf->sttTrigger, sizeof(SLDataIter)); - if (pReader->status.pLDataIter == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - pReader->flag = READER_STATUS_SUSPEND; - - if (countOnly) { - pReader->readMode = READ_MODE_COUNT_ONLY; - } - - pReader->pIgnoreTables = pIgnoreTables; - - tsdbDebug("%p total numOfTable:%d, window:%" PRId64 " - %" PRId64 ", verRange:%" PRId64 " - %" PRId64 - " in this query %s", - pReader, numOfTables, pReader->window.skey, pReader->window.ekey, pReader->verRange.minVer, - pReader->verRange.maxVer, pReader->idStr); - - return code; - -_err: - tsdbError("failed to create data reader, code:%s %s", tstrerror(code), idstr); - tsdbReaderClose(*ppReader); - *ppReader = NULL; // reset the pointer value. - return code; -} - -static void clearSharedPtr(STsdbReader* p) { - p->status.pLDataIter = NULL; - p->status.pTableMap = NULL; - p->status.uidList.tableUidList = NULL; - p->pReadSnap = NULL; - p->pSchema = NULL; - p->pSchemaMap = NULL; -} - -static void setSharedPtr(STsdbReader* pDst, const STsdbReader* pSrc) { - pDst->status.pTableMap = pSrc->status.pTableMap; - pDst->status.pLDataIter = pSrc->status.pLDataIter; - pDst->status.uidList = pSrc->status.uidList; - pDst->pSchema = pSrc->pSchema; - pDst->pSchemaMap = pSrc->pSchemaMap; - pDst->pReadSnap = pSrc->pReadSnap; - - if (pDst->pSchema) { - tsdbRowMergerInit(&pDst->status.merger, pDst->pSchema); - } -} - -void tsdbReaderClose(STsdbReader* pReader) { - if (pReader == NULL) { - return; - } - - tsdbAcquireReader(pReader); - - { - if (pReader->innerReader[0] != NULL || pReader->innerReader[1] != NULL) { - STsdbReader* p = pReader->innerReader[0]; - clearSharedPtr(p); - - p = pReader->innerReader[1]; - clearSharedPtr(p); - - tsdbReaderClose(pReader->innerReader[0]); - tsdbReaderClose(pReader->innerReader[1]); - } - } - - SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - - taosArrayDestroy(pSupInfo->pColAgg); - for (int32_t i = 0; i < pSupInfo->numOfCols; ++i) { - if (pSupInfo->buildBuf[i] != NULL) { - taosMemoryFreeClear(pSupInfo->buildBuf[i]); - } - } - - if (pReader->resBlockInfo.freeBlock) { - pReader->resBlockInfo.pResBlock = blockDataDestroy(pReader->resBlockInfo.pResBlock); - } - - taosMemoryFree(pSupInfo->colId); - tBlockDataDestroy(&pReader->status.fileBlockData); - cleanupDataBlockIterator(&pReader->status.blockIter); - - size_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - if (pReader->status.pTableMap != NULL) { - destroyAllBlockScanInfo(pReader->status.pTableMap); - clearBlockScanInfoBuf(&pReader->blockInfoBuf); - } - - if (pReader->pFileReader != NULL) { - tsdbDataFReaderClose(&pReader->pFileReader); - } - - if (pReader->pDelFReader != NULL) { - tsdbDelFReaderClose(&pReader->pDelFReader); - } - - if (pReader->pDelIdx != NULL) { - taosArrayDestroy(pReader->pDelIdx); - pReader->pDelIdx = NULL; - } - - qTrace("tsdb/reader-close: %p, untake snapshot", pReader); - tsdbUntakeReadSnap(pReader, pReader->pReadSnap, true); - pReader->pReadSnap = NULL; - - tsdbReleaseReader(pReader); - - tsdbUninitReaderLock(pReader); - - taosMemoryFreeClear(pReader->status.pLDataIter); - taosMemoryFreeClear(pReader->status.uidList.tableUidList); - SIOCostSummary* pCost = &pReader->cost; - - SFilesetIter* pFilesetIter = &pReader->status.fileIter; - if (pFilesetIter->pLastBlockReader != NULL) { - SLastBlockReader* pLReader = pFilesetIter->pLastBlockReader; - tMergeTreeClose(&pLReader->mergeTree); - - getSttBlockLoadInfo(pLReader->pInfo, &pCost->sttCost); - - pLReader->pInfo = destroyLastBlockLoadInfo(pLReader->pInfo); - taosMemoryFree(pLReader); - } - - tsdbDebug( - "%p :io-cost summary: head-file:%" PRIu64 ", head-file time:%.2f ms, SMA:%" PRId64 - " SMA-time:%.2f ms, fileBlocks:%" PRId64 - ", fileBlocks-load-time:%.2f ms, " - "build in-memory-block-time:%.2f ms, lastBlocks:%" PRId64 ", lastBlocks-time:%.2f ms, composed-blocks:%" PRId64 - ", composed-blocks-time:%.2fms, STableBlockScanInfo size:%.2f Kb, createTime:%.2f ms,initDelSkylineIterTime:%.2f " - "ms, %s", - pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaDataLoad, pCost->smaLoadTime, pCost->numOfBlocks, - pCost->blockLoadTime, pCost->buildmemBlock, pCost->sttBlockLoad, pCost->sttBlockLoadTime, pCost->composedBlocks, - pCost->buildComposedBlockTime, numOfTables * sizeof(STableBlockScanInfo) / 1000.0, pCost->createScanInfoList, - pCost->initDelSkylineIterTime, pReader->idStr); - - taosMemoryFree(pReader->idStr); - - tsdbRowMergerCleanup(&pReader->status.merger); - taosMemoryFree(pReader->pSchema); - - tSimpleHashCleanup(pReader->pSchemaMap); - taosMemoryFreeClear(pReader); -} - -int32_t tsdbReaderSuspend(STsdbReader* pReader) { - int32_t code = 0; - - // save reader's base state & reset top state to be reconstructed from base state - SReaderStatus* pStatus = &pReader->status; - STableBlockScanInfo* pBlockScanInfo = NULL; - - if (pStatus->loadFromFile) { - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); - if (pBlockInfo != NULL) { - pBlockScanInfo = getTableBlockScanInfo(pStatus->pTableMap, pBlockInfo->uid, pReader->idStr); - if (pBlockScanInfo == NULL) { - goto _err; - } - } else { - pBlockScanInfo = *pStatus->pTableIter; - } - - tsdbDataFReaderClose(&pReader->pFileReader); - - // resetDataBlockScanInfo excluding lastKey - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - } - } else { - // resetDataBlockScanInfo excluding lastKey - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - } - - pBlockScanInfo = pStatus->pTableIter == NULL ? NULL : *pStatus->pTableIter; - if (pBlockScanInfo) { - // save lastKey to restore memory iterator - STimeWindow w = pReader->resBlockInfo.pResBlock->info.window; - pBlockScanInfo->lastKey = ASCENDING_TRAVERSE(pReader->order) ? w.ekey : w.skey; - - // reset current current table's data block scan info, - pBlockScanInfo->iterInit = false; - pBlockScanInfo->iter.hasVal = false; - pBlockScanInfo->iiter.hasVal = false; - if (pBlockScanInfo->iter.iter != NULL) { - pBlockScanInfo->iter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iter.iter); - } - - if (pBlockScanInfo->iiter.iter != NULL) { - pBlockScanInfo->iiter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iiter.iter); - } - - pBlockScanInfo->pBlockList = taosArrayDestroy(pBlockScanInfo->pBlockList); - tMapDataClear(&pBlockScanInfo->mapData); - // TODO: keep skyline for reuse - pBlockScanInfo->delSkyline = taosArrayDestroy(pBlockScanInfo->delSkyline); - } - } - - tsdbUntakeReadSnap(pReader, pReader->pReadSnap, false); - pReader->pReadSnap = NULL; - pReader->flag = READER_STATUS_SUSPEND; - - tsdbDebug("reader: %p suspended uid %" PRIu64 " in this query %s", pReader, pBlockScanInfo ? pBlockScanInfo->uid : 0, - pReader->idStr); - return code; - -_err: - tsdbError("failed to suspend data reader, code:%s %s", tstrerror(code), pReader->idStr); - return code; -} - -static int32_t tsdbSetQueryReseek(void* pQHandle) { - int32_t code = 0; - STsdbReader* pReader = pQHandle; - - code = tsdbTryAcquireReader(pReader); - if (code == 0) { - if (pReader->flag == READER_STATUS_SUSPEND) { - tsdbReleaseReader(pReader); - return code; - } - - tsdbReaderSuspend(pReader); - - tsdbReleaseReader(pReader); - - return code; - } else if (code == EBUSY) { - return TSDB_CODE_VND_QUERY_BUSY; - } else { - terrno = TAOS_SYSTEM_ERROR(code); - return TSDB_CODE_FAILED; - } -} - -int32_t tsdbReaderResume(STsdbReader* pReader) { - int32_t code = 0; - - STableBlockScanInfo** pBlockScanInfo = pReader->status.pTableIter; - - // restore reader's state - // task snapshot - int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); - if (numOfTables > 0) { - qTrace("tsdb/reader: %p, take snapshot", pReader); - code = tsdbTakeReadSnap(pReader, tsdbSetQueryReseek, &pReader->pReadSnap); - if (code != TSDB_CODE_SUCCESS) { - goto _err; - } - - if (pReader->type == TIMEWINDOW_RANGE_CONTAINED) { - code = doOpenReaderImpl(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } else { - STsdbReader* pPrevReader = pReader->innerReader[0]; - STsdbReader* pNextReader = pReader->innerReader[1]; - - // we need only one row - pPrevReader->resBlockInfo.capacity = 1; - setSharedPtr(pPrevReader, pReader); - - pNextReader->resBlockInfo.capacity = 1; - setSharedPtr(pNextReader, pReader); - - code = doOpenReaderImpl(pPrevReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } - - pReader->flag = READER_STATUS_NORMAL; - tsdbDebug("reader: %p resumed uid %" PRIu64 ", numOfTable:%" PRId32 ", in this query %s", pReader, - pBlockScanInfo ? (*pBlockScanInfo)->uid : 0, numOfTables, pReader->idStr); - return code; - -_err: - tsdbError("failed to resume data reader, code:%s %s", tstrerror(code), pReader->idStr); - return code; -} - -static bool tsdbReadRowsCountOnly(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - - if (pReader->status.loadFromFile == false) { - return false; - } - - code = readRowsCountFromFiles(pReader); - if (code != TSDB_CODE_SUCCESS) { - return false; - } - - code = readRowsCountFromMem(pReader); - if (code != TSDB_CODE_SUCCESS) { - return false; - } - - pBlock->info.rows = pReader->rowsNum; - pBlock->info.id.uid = 0; - pBlock->info.dataLoad = 0; - - pReader->rowsNum = 0; - - return pBlock->info.rows > 0; -} - -static int32_t doTsdbNextDataBlock(STsdbReader* pReader, bool* hasNext) { - int32_t code = TSDB_CODE_SUCCESS; - - // cleanup the data that belongs to the previous data block - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - blockDataCleanup(pBlock); - - *hasNext = false; - - SReaderStatus* pStatus = &pReader->status; - if (tSimpleHashGetSize(pStatus->pTableMap) == 0) { - return code; - } - - if (READ_MODE_COUNT_ONLY == pReader->readMode) { - return tsdbReadRowsCountOnly(pReader); - } - - if (pStatus->loadFromFile) { - code = buildBlockFromFiles(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - if (pBlock->info.rows <= 0) { - resetTableListIndex(&pReader->status); - code = buildBlockFromBufferSequentially(pReader); - } - } else { // no data in files, let's try the buffer - code = buildBlockFromBufferSequentially(pReader); - } - - *hasNext = pBlock->info.rows > 0; - - return code; -} - -int32_t tsdbNextDataBlock(STsdbReader* pReader, bool* hasNext) { - int32_t code = TSDB_CODE_SUCCESS; - - *hasNext = false; - - if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT || - pReader->code != TSDB_CODE_SUCCESS) { - return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; - } - - SReaderStatus* pStatus = &pReader->status; - - code = tsdbAcquireReader(pReader); - qTrace("tsdb/read: %p, take read mutex, code: %d", pReader, code); - - if (pReader->flag == READER_STATUS_SUSPEND) { - code = tsdbReaderResume(pReader); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - } - - if (pReader->innerReader[0] != NULL && pReader->step == 0) { - code = doTsdbNextDataBlock(pReader->innerReader[0], hasNext); - if (code) { - tsdbReleaseReader(pReader); - return code; - } - - pReader->step = EXTERNAL_ROWS_PREV; - if (*hasNext) { - pStatus = &pReader->innerReader[0]->status; - if (pStatus->composedDataBlock) { - qTrace("tsdb/read: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - } - - return code; - } - } - - if (pReader->step == EXTERNAL_ROWS_PREV) { - // prepare for the main scan - code = doOpenReaderImpl(pReader); - int32_t step = 1; - resetAllDataBlockScanInfo(pReader->status.pTableMap, pReader->innerReader[0]->window.ekey, step); - - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - pReader->step = EXTERNAL_ROWS_MAIN; - } - - code = doTsdbNextDataBlock(pReader, hasNext); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - - if (*hasNext) { - if (pStatus->composedDataBlock) { - qTrace("tsdb/read: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - } - - return code; - } - - if (pReader->step == EXTERNAL_ROWS_MAIN && pReader->innerReader[1] != NULL) { - // prepare for the next row scan - int32_t step = -1; - code = doOpenReaderImpl(pReader->innerReader[1]); - resetAllDataBlockScanInfo(pReader->innerReader[1]->status.pTableMap, pReader->window.ekey, step); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - code = doTsdbNextDataBlock(pReader->innerReader[1], hasNext); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - - pReader->step = EXTERNAL_ROWS_NEXT; - if (*hasNext) { - pStatus = &pReader->innerReader[1]->status; - if (pStatus->composedDataBlock) { - qTrace("tsdb/read: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - } - - return code; - } - } - - qTrace("tsdb/read: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - - return code; -} - -static bool doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_t numOfCols, SColumnDataAgg* pTsAgg) { - bool hasNullSMA = false; - // do fill all null column value SMA info - int32_t i = 0, j = 0; - int32_t size = (int32_t)taosArrayGetSize(pSup->pColAgg); - taosArrayInsert(pSup->pColAgg, 0, pTsAgg); - size++; - - while (j < numOfCols && i < size) { - SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i); - if (pAgg->colId == pSup->colId[j]) { - i += 1; - j += 1; - } else if (pAgg->colId < pSup->colId[j]) { - i += 1; - } else if (pSup->colId[j] < pAgg->colId) { - if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { - SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; - taosArrayInsert(pSup->pColAgg, i, &nullColAgg); - i += 1; - size++; - hasNullSMA = true; - } - j += 1; - } - } - - while (j < numOfCols) { - if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { - SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; - taosArrayInsert(pSup->pColAgg, i, &nullColAgg); - i += 1; - hasNullSMA = true; - } - j++; - } - - return hasNullSMA; -} - -int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool* hasNullSMA) { - SColumnDataAgg*** pBlockSMA = &pDataBlock->pBlockAgg; - - int32_t code = 0; - *allHave = false; - *pBlockSMA = NULL; - - if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { - return TSDB_CODE_SUCCESS; - } - - // there is no statistics data for composed block - if (pReader->status.composedDataBlock || (!pReader->suppInfo.smaValid)) { - return TSDB_CODE_SUCCESS; - } - - SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); - SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - - if (pReader->resBlockInfo.pResBlock->info.id.uid != pFBlock->uid) { - return TSDB_CODE_SUCCESS; - } - - int64_t st = taosGetTimestampUs(); - - SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); - if (tDataBlkHasSma(pBlock)) { - code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg); - if (code != TSDB_CODE_SUCCESS) { - tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code), - pReader->idStr); - return code; - } - } else { - *pBlockSMA = NULL; - return TSDB_CODE_SUCCESS; - } - - *allHave = true; - - // always load the first primary timestamp column data - SColumnDataAgg* pTsAgg = &pSup->tsColAgg; - - pTsAgg->numOfNull = 0; - pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID; - pTsAgg->min = pReader->resBlockInfo.pResBlock->info.window.skey; - pTsAgg->max = pReader->resBlockInfo.pResBlock->info.window.ekey; - - // update the number of NULL data rows - size_t numOfCols = pSup->numOfCols; - - // ensure capacity - if (pDataBlock->pDataBlock) { - size_t colsNum = taosArrayGetSize(pDataBlock->pDataBlock); - taosArrayEnsureCap(pSup->pColAgg, colsNum); - } - - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; - if (pResBlock->pBlockAgg == NULL) { - size_t num = taosArrayGetSize(pResBlock->pDataBlock); - pResBlock->pBlockAgg = taosMemoryCalloc(num, POINTER_BYTES); - } - - // do fill all null column value SMA info - if (doFillNullColSMA(pSup, pBlock->nRow, numOfCols, pTsAgg)) { - *hasNullSMA = true; - return TSDB_CODE_SUCCESS; - } - size_t size = taosArrayGetSize(pSup->pColAgg); - - int32_t i = 0, j = 0; - while (j < numOfCols && i < size) { - SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i); - if (pAgg->colId == pSup->colId[j]) { - pResBlock->pBlockAgg[pSup->slotId[j]] = pAgg; - i += 1; - j += 1; - } else if (pAgg->colId < pSup->colId[j]) { - i += 1; - } else if (pSup->colId[j] < pAgg->colId) { - pResBlock->pBlockAgg[pSup->slotId[j]] = NULL; - *allHave = false; - j += 1; - } - } - - *pBlockSMA = pResBlock->pBlockAgg; - pReader->cost.smaDataLoad += 1; - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; - pReader->cost.smaLoadTime += elapsedTime; - - tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64 ", %s", 0, pFBlock->uid, pReader->idStr); - return code; -} - -STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id) { - STableBlockScanInfo** p = tSimpleHashGet(pTableMap, &uid, sizeof(uid)); - if (p == NULL || *p == NULL) { - terrno = TSDB_CODE_INVALID_PARA; - int32_t size = tSimpleHashGetSize(pTableMap); - tsdbError("failed to locate the uid:%" PRIu64 " in query table uid list, total tables:%d, %s", uid, size, id); - return NULL; - } - - return *p; -} - -static SSDataBlock* doRetrieveDataBlock(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - int32_t code = TSDB_CODE_SUCCESS; - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter); - - if (pReader->code != TSDB_CODE_SUCCESS) { - return NULL; - } - - STableBlockScanInfo* pBlockScanInfo = getTableBlockScanInfo(pStatus->pTableMap, pBlockInfo->uid, pReader->idStr); - if (pBlockScanInfo == NULL) { - return NULL; - } - - code = doLoadFileBlockData(pReader, &pStatus->blockIter, &pStatus->fileBlockData, pBlockScanInfo->uid); - if (code != TSDB_CODE_SUCCESS) { - tBlockDataDestroy(&pStatus->fileBlockData); - terrno = code; - return NULL; - } - - code = copyBlockDataToSDataBlock(pReader); - if (code != TSDB_CODE_SUCCESS) { - tBlockDataDestroy(&pStatus->fileBlockData); - terrno = code; - return NULL; - } - - return pReader->resBlockInfo.pResBlock; -} - -SSDataBlock* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) { - STsdbReader* pTReader = pReader; - if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { - if (pReader->step == EXTERNAL_ROWS_PREV) { - pTReader = pReader->innerReader[0]; - } else if (pReader->step == EXTERNAL_ROWS_NEXT) { - pTReader = pReader->innerReader[1]; - } - } - - SReaderStatus* pStatus = &pTReader->status; - if (pStatus->composedDataBlock) { - return pTReader->resBlockInfo.pResBlock; - } - - SSDataBlock* ret = doRetrieveDataBlock(pTReader); - - qTrace("tsdb/read-retrieve: %p, unlock read mutex", pReader); - tsdbReleaseReader(pReader); - - return ret; -} - -int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { - int32_t code = TSDB_CODE_SUCCESS; - - qTrace("tsdb/reader-reset: %p, take read mutex", pReader); - tsdbAcquireReader(pReader); - - if (pReader->flag == READER_STATUS_SUSPEND) { - code = tsdbReaderResume(pReader); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - } - - if (isEmptyQueryTimeWindow(&pReader->window) || pReader->pReadSnap == NULL) { - tsdbDebug("tsdb reader reset return %p, %s", pReader->pReadSnap, pReader->idStr); - tsdbReleaseReader(pReader); - return TSDB_CODE_SUCCESS; - } - - SReaderStatus* pStatus = &pReader->status; - SDataBlockIter* pBlockIter = &pStatus->blockIter; - - pReader->order = pCond->order; - pReader->type = TIMEWINDOW_RANGE_CONTAINED; - pStatus->loadFromFile = true; - pStatus->pTableIter = NULL; - pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); - - // allocate buffer in order to load data blocks from file - memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg)); - - pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; - tsdbDataFReaderClose(&pReader->pFileReader); - - int32_t numOfTables = tSimpleHashGetSize(pStatus->pTableMap); - - initFilesetIterator(&pStatus->fileIter, pReader->pReadSnap->fs.aDFileSet, pReader); - resetDataBlockIterator(pBlockIter, pReader->order); - resetTableListIndex(&pReader->status); - - bool asc = ASCENDING_TRAVERSE(pReader->order); - int32_t step = asc ? 1 : -1; - int64_t ts = asc ? pReader->window.skey - 1 : pReader->window.ekey + 1; - resetAllDataBlockScanInfo(pStatus->pTableMap, ts, step); - - // no data in files, let's try buffer in memory - if (pStatus->fileIter.numOfFiles == 0) { - pStatus->loadFromFile = false; - resetTableListIndex(pStatus); - } else { - code = initForFirstBlockInFile(pReader, pBlockIter); - if (code != TSDB_CODE_SUCCESS) { - tsdbError("%p reset reader failed, numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s", pReader, - numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr); - - tsdbReleaseReader(pReader); - return code; - } - } - - tsdbDebug("%p reset reader, suid:%" PRIu64 ", numOfTables:%d, skey:%" PRId64 ", query range:%" PRId64 " - %" PRId64 - " in query %s", - pReader, pReader->suid, numOfTables, pCond->twindows.skey, pReader->window.skey, pReader->window.ekey, - pReader->idStr); - - tsdbReleaseReader(pReader); - - return code; -} - -static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows, int32_t numOfBucket) { - if (numOfRows < startRow) { - return 0; - } - int32_t bucketIndex = ((numOfRows - startRow) / bucketRange); - if (bucketIndex == numOfBucket) { - bucketIndex -= 1; - } - return bucketIndex; -} - -int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) { - int32_t code = TSDB_CODE_SUCCESS; - pTableBlockInfo->totalSize = 0; - pTableBlockInfo->totalRows = 0; - pTableBlockInfo->numOfVgroups = 1; - - const int32_t numOfBuckets = 20.0; - - // find the start data block in file - tsdbAcquireReader(pReader); - if (pReader->flag == READER_STATUS_SUSPEND) { - code = tsdbReaderResume(pReader); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - } - SReaderStatus* pStatus = &pReader->status; - - STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg; - pTableBlockInfo->defMinRows = pc->minRows; - pTableBlockInfo->defMaxRows = pc->maxRows; - - int32_t bucketRange = ceil(((double)(pc->maxRows - pc->minRows)) / numOfBuckets); - - pTableBlockInfo->numOfFiles += 1; - - int32_t numOfTables = (int32_t)tSimpleHashGetSize(pStatus->pTableMap); - int defaultRows = 4096; - - SDataBlockIter* pBlockIter = &pStatus->blockIter; - pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles; - - if (pBlockIter->numOfBlocks > 0) { - pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; - } - - pTableBlockInfo->numOfTables = numOfTables; - bool hasNext = (pBlockIter->numOfBlocks > 0); - - while (true) { - if (hasNext) { - SDataBlk* pBlock = getCurrentBlock(pBlockIter); - - int32_t numOfRows = pBlock->nRow; - pTableBlockInfo->totalRows += numOfRows; - - if (numOfRows > pTableBlockInfo->maxRows) { - pTableBlockInfo->maxRows = numOfRows; - } - - if (numOfRows < pTableBlockInfo->minRows) { - pTableBlockInfo->minRows = numOfRows; - } - - if (numOfRows < defaultRows) { - pTableBlockInfo->numOfSmallBlocks += 1; - } - - pTableBlockInfo->totalSize += pBlock->aSubBlock[0].szBlock; - - int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows, numOfBuckets); - pTableBlockInfo->blockRowsHisto[bucketIndex]++; - - hasNext = blockIteratorNext(&pStatus->blockIter, pReader->idStr); - } else { - code = initForFirstBlockInFile(pReader, pBlockIter); - if ((code != TSDB_CODE_SUCCESS) || (pStatus->loadFromFile == false)) { - break; - } - - pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; - hasNext = (pBlockIter->numOfBlocks > 0); - } - - // tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables, - // pReader->pFileGroup->fid, pReader->idStr); - } - tsdbReleaseReader(pReader); - return code; -} - -int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; - int64_t rows = 0; - - SReaderStatus* pStatus = &pReader->status; - tsdbAcquireReader(pReader); - if (pReader->flag == READER_STATUS_SUSPEND) { - code = tsdbReaderResume(pReader); - if (code != TSDB_CODE_SUCCESS) { - tsdbReleaseReader(pReader); - return code; - } - } - - int32_t iter = 0; - pStatus->pTableIter = tSimpleHashIterate(pStatus->pTableMap, NULL, &iter); - - while (pStatus->pTableIter != NULL) { - STableBlockScanInfo* pBlockScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; - - STbData* d = NULL; - if (pReader->pReadSnap->pMem != NULL) { - d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid); - if (d != NULL) { - rows += tsdbGetNRowsInTbData(d); - } - } - - STbData* di = NULL; - if (pReader->pReadSnap->pIMem != NULL) { - di = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid); - if (di != NULL) { - rows += tsdbGetNRowsInTbData(di); - } - } - - // current table is exhausted, let's try the next table - pStatus->pTableIter = tSimpleHashIterate(pStatus->pTableMap, pStatus->pTableIter, &iter); - } - - tsdbReleaseReader(pReader); - - return rows; -} - -int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { - SMetaReader mr = {0}; - metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); - int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid); - if (code != TSDB_CODE_SUCCESS) { - terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; - metaReaderClear(&mr); - return terrno; - } - - *suid = 0; - - // only child table and ordinary table is allowed, super table is not allowed. - if (mr.me.type == TSDB_CHILD_TABLE) { - tDecoderClear(&mr.coder); - *suid = mr.me.ctbEntry.suid; - code = metaReaderGetTableEntryByUidCache(&mr, *suid); - if (code != TSDB_CODE_SUCCESS) { - terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; - metaReaderClear(&mr); - return terrno; - } - } else if (mr.me.type == TSDB_NORMAL_TABLE) { // do nothing - } else { - terrno = TSDB_CODE_INVALID_PARA; - metaReaderClear(&mr); - return terrno; - } - - metaReaderClear(&mr); - - // get the newest table schema version - code = metaGetTbTSchemaEx(((SVnode*)pVnode)->pMeta, *suid, uid, -1, pSchema); - return code; -} - -int32_t tsdbTakeReadSnap(STsdbReader* pReader, _query_reseek_func_t reseek, STsdbReadSnap** ppSnap) { - int32_t code = 0; - STsdb* pTsdb = pReader->pTsdb; - SVersionRange* pRange = &pReader->verRange; - - // alloc - STsdbReadSnap* pSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(*pSnap)); - if (pSnap == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - - // lock - taosThreadRwlockRdlock(&pTsdb->rwLock); - - // take snapshot - if (pTsdb->mem && (pRange->minVer <= pTsdb->mem->maxVer && pRange->maxVer >= pTsdb->mem->minVer)) { - pSnap->pMem = pTsdb->mem; - pSnap->pNode = taosMemoryMalloc(sizeof(*pSnap->pNode)); - if (pSnap->pNode == NULL) { - taosThreadRwlockUnlock(&pTsdb->rwLock); - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - pSnap->pNode->pQHandle = pReader; - pSnap->pNode->reseek = reseek; - - tsdbRefMemTable(pTsdb->mem, pSnap->pNode); - } - - if (pTsdb->imem && (pRange->minVer <= pTsdb->imem->maxVer && pRange->maxVer >= pTsdb->imem->minVer)) { - pSnap->pIMem = pTsdb->imem; - pSnap->pINode = taosMemoryMalloc(sizeof(*pSnap->pINode)); - if (pSnap->pINode == NULL) { - taosThreadRwlockUnlock(&pTsdb->rwLock); - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - pSnap->pINode->pQHandle = pReader; - pSnap->pINode->reseek = reseek; - - tsdbRefMemTable(pTsdb->imem, pSnap->pINode); - } - - // fs - code = tsdbFSRef(pTsdb, &pSnap->fs); - if (code) { - taosThreadRwlockUnlock(&pTsdb->rwLock); - goto _exit; - } - - // unlock - taosThreadRwlockUnlock(&pTsdb->rwLock); - - tsdbTrace("vgId:%d, take read snapshot", TD_VID(pTsdb->pVnode)); - -_exit: - if (code) { - *ppSnap = NULL; - if (pSnap) { - if (pSnap->pNode) taosMemoryFree(pSnap->pNode); - if (pSnap->pINode) taosMemoryFree(pSnap->pINode); - taosMemoryFree(pSnap); - } - } else { - *ppSnap = pSnap; - } - return code; -} - -void tsdbUntakeReadSnap(STsdbReader* pReader, STsdbReadSnap* pSnap, bool proactive) { - STsdb* pTsdb = pReader->pTsdb; - - if (pSnap) { - if (pSnap->pMem) { - tsdbUnrefMemTable(pSnap->pMem, pSnap->pNode, proactive); - } - - if (pSnap->pIMem) { - tsdbUnrefMemTable(pSnap->pIMem, pSnap->pINode, proactive); - } - - tsdbFSUnref(pTsdb, &pSnap->fs); - if (pSnap->pNode) taosMemoryFree(pSnap->pNode); - if (pSnap->pINode) taosMemoryFree(pSnap->pINode); - taosMemoryFree(pSnap); - } - tsdbTrace("vgId:%d, untake read snapshot", TD_VID(pTsdb->pVnode)); -} - -// if failed, do nothing -void tsdbReaderSetId(STsdbReader* pReader, const char* idstr) { - taosMemoryFreeClear(pReader->idStr); - pReader->idStr = taosStrdup(idstr); -} - -void tsdbReaderSetCloseFlag(STsdbReader* pReader) { /*pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED;*/ } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index fa3e00ce0f..e2123df3a8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -452,6 +452,9 @@ static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFileReader* pFileRead const TBrinBlkArray* pBlkArray = NULL; int32_t code = tsdbDataFileReadBrinBlk(pFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + return code; + } #if 0 LRUHandle* handle = NULL; @@ -1117,8 +1120,8 @@ static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlo // *nextIndex = pBlockInfo->tbBlockIdx + step; // *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pTableBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx + step); - SBrinRecord* p = taosArrayGet(pBlockIter->blockList, pTableDataBlockIdx->globalIndex); - memcpy(pRecord, p, sizeof(SBrinRecord)); + SFileDataBlockInfo* p = taosArrayGet(pBlockIter->blockList, pTableDataBlockIdx->globalIndex); + memcpy(pRecord, &p->record, sizeof(SBrinRecord)); *nextIndex = pBlockInfo->tbBlockIdx + step; @@ -1504,6 +1507,12 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* if (ps == NULL) { return terrno; } + + int32_t code = tsdbRowMergerInit(pMerger, ps); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("failed to init row merger, code:%s", tstrerror(code)); + return code; + } } int64_t minKey = 0; @@ -1535,15 +1544,11 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* } } - // todo remove init - bool init = false; - // ASC: file block ---> last block -----> imem -----> mem // DESC: mem -----> imem -----> last block -----> file block if (pReader->info.order == TSDB_ORDER_ASC) { if (minKey == key) { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + int32_t code = tsdbRowMergerAdd(pMerger, &fRow, NULL); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -1552,47 +1557,44 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* if (minKey == tsLast) { TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, fRow1, NULL); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->info.pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + int32_t code = tsdbRowMergerAdd(pMerger, fRow1, NULL); + if (code != TSDB_CODE_SUCCESS) { + return code; } doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, pReader->idStr); } if (minKey == k.ts) { - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return terrno; - } - if (init) { - tsdbRowMergerAdd(pMerger, pRow, pSchema); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; + STSchema* pTSchema = NULL; + if (pRow->type == TSDBROW_ROW_FMT) { + pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pTSchema == NULL) { + return terrno; } } - int32_t code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + + int32_t code = tsdbRowMergerAdd(pMerger, pRow, pTSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); if (code != TSDB_CODE_SUCCESS) { return code; } } } else { if (minKey == k.ts) { - init = true; - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return terrno; + STSchema* pTSchema = NULL; + if (pRow->type == TSDBROW_ROW_FMT) { + pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pTSchema == NULL) { + return terrno; + } } - int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + int32_t code = tsdbRowMergerAdd(pMerger, pRow, pTSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -1605,28 +1607,18 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* if (minKey == tsLast) { TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, fRow1, NULL); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->info.pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + int32_t code = tsdbRowMergerAdd(pMerger, fRow1, NULL); + if (code != TSDB_CODE_SUCCESS) { + return code; } doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, pReader->idStr); } if (minKey == key) { - if (init) { - tsdbRowMergerAdd(pMerger, &fRow, NULL); - } else { - init = true; - int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + int32_t code = tsdbRowMergerAdd(pMerger, &fRow, NULL); + if (code != TSDB_CODE_SUCCESS) { + return code; } doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); } @@ -1674,7 +1666,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, pBlockScanInfo->lastKey = tsLastBlock; return TSDB_CODE_SUCCESS; } else { - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + code = tsdbRowMergerAdd(pMerger, &fRow, NULL); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -1699,7 +1691,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, } } } else { // not merge block data - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + code = tsdbRowMergerAdd(pMerger, &fRow, NULL); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -1742,6 +1734,12 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader if (ps == NULL) { return terrno; } + + int32_t code = tsdbRowMergerInit(pMerger, ps); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("failed to init row merger, code:%s", tstrerror(code)); + return code; + } } if (hasDataInFileBlock(pBlockData, pDumpInfo)) { @@ -1768,7 +1766,7 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader } // the following for key == tsLast SRow* pTSRow = NULL; - int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + int32_t code = tsdbRowMergerAdd(pMerger, &fRow, NULL); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -1776,7 +1774,10 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - tsdbRowMergerAdd(pMerger, pRow1, NULL); + code = tsdbRowMergerAdd(pMerger, pRow1, NULL); + if (code != TSDB_CODE_SUCCESS) { + return code; + } doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, pReader->idStr); @@ -1816,14 +1817,21 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* TSDBKEY k = TSDBROW_KEY(pRow); TSDBKEY ik = TSDBROW_KEY(piRow); - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return code; + + STSchema* pSchema = NULL; + if (pRow->type == TSDBROW_ROW_FMT) { + pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return terrno; + } } - STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); - if (piSchema == NULL) { - return code; + STSchema* piSchema = NULL; + if (piRow->type == TSDBROW_ROW_FMT) { + piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + if (piSchema == NULL) { + return code; + } } // merge is not initialized yet, due to the fact that the pReader->info.pSchema is not initialized @@ -1833,6 +1841,12 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* if (ps == NULL) { return terrno; } + + code = tsdbRowMergerInit(pMerger, ps); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("failed to init row merger, code:%s", tstrerror(code)); + return code; + } } int64_t minKey = 0; @@ -1872,15 +1886,12 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* } } - bool init = false; - // ASC: file block -----> last block -----> imem -----> mem // DESC: mem -----> imem -----> last block -----> file block if (ASCENDING_TRAVERSE(pReader->info.order)) { if (minKey == key) { - init = true; TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + code = tsdbRowMergerAdd(pMerger, &fRow, NULL); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -1890,14 +1901,9 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* if (minKey == tsLast) { TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, pRow1, NULL); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, pRow1, pReader->info.pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + code = tsdbRowMergerAdd(pMerger, pRow1, NULL); + if (code != TSDB_CODE_SUCCESS) { + return code; } doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, @@ -1905,14 +1911,9 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* } if (minKey == ik.ts) { - if (init) { - tsdbRowMergerAdd(pMerger, piRow, piSchema); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, piRow, piSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + code = tsdbRowMergerAdd(pMerger, piRow, piSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; } code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); @@ -1922,15 +1923,11 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* } if (minKey == k.ts) { - if (init) { - tsdbRowMergerAdd(pMerger, pRow, pSchema); - } else { - // STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - code = tsdbRowMergerAdd(pMerger, pRow, pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; } + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); if (code != TSDB_CODE_SUCCESS) { return code; @@ -1938,7 +1935,6 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* } } else { if (minKey == k.ts) { - init = true; code = tsdbRowMergerAdd(pMerger, pRow, pSchema); if (code != TSDB_CODE_SUCCESS) { return code; @@ -1951,15 +1947,11 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* } if (minKey == ik.ts) { - if (init) { - tsdbRowMergerAdd(pMerger, piRow, piSchema); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, piRow, piSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + code = tsdbRowMergerAdd(pMerger, piRow, piSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; } + code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); if (code != TSDB_CODE_SUCCESS) { return code; @@ -1968,29 +1960,22 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* if (minKey == tsLast) { TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - if (init) { - tsdbRowMergerAdd(pMerger, pRow1, NULL); - } else { - init = true; - code = tsdbRowMergerAdd(pMerger, pRow1, pReader->info.pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + code = tsdbRowMergerAdd(pMerger, pRow1, NULL); + if (code != TSDB_CODE_SUCCESS) { + return code; } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, pReader->idStr); } if (minKey == key) { TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - if (!init) { - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } else { - tsdbRowMergerAdd(pMerger, &fRow, NULL); + code = tsdbRowMergerAdd(pMerger, &fRow, NULL); + if (code != TSDB_CODE_SUCCESS) { + return code; } + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); } } @@ -2184,6 +2169,12 @@ int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBloc if (ps == NULL) { return terrno; } + + code = tsdbRowMergerInit(pMerger, ps); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("failed to init row merger, code:%s", tstrerror(code)); + return code; + } } if (copied) { @@ -2193,7 +2184,7 @@ int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBloc TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); SRow* pTSRow = NULL; - code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + code = tsdbRowMergerAdd(pMerger, &fRow, NULL); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -2772,6 +2763,7 @@ static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReade goto _end; } +#if 0 int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); @@ -2800,6 +2792,7 @@ static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReade // pReader->rowsNum += block.nRow; // } } +#endif _end: tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); @@ -3328,16 +3321,15 @@ int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDe break; } + STSchema* pTSchema = NULL; if (pRow->type == TSDBROW_ROW_FMT) { - STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, uid); + pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, uid); if (pTSchema == NULL) { return terrno; } - - tsdbRowMergerAdd(pMerger, pRow, pTSchema); - } else { // column format - tsdbRowMergerAdd(pMerger, pRow, NULL); } + + tsdbRowMergerAdd(pMerger, pRow, pTSchema); } return TSDB_CODE_SUCCESS; @@ -3473,31 +3465,30 @@ int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, int32_t code = 0; // start to merge duplicated rows - if (current.type == TSDBROW_ROW_FMT) { - // get the correct schema for data in memory - STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(¤t), pReader, uid); + STSchema* pTSchema = NULL; + if (current.type == TSDBROW_ROW_FMT) { // get the correct schema for row-wise data in memory + pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(¤t), pReader, uid); if (pTSchema == NULL) { return terrno; } + } - code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pTSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pTSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } - STSchema* pTSchema1 = doGetSchemaForTSRow(TSDBROW_SVERSION(pNextRow), pReader, uid); + STSchema* pTSchema1 = NULL; + if (pNextRow->type == TSDBROW_ROW_FMT) { // get the correct schema for row-wise data in memory + pTSchema1 = doGetSchemaForTSRow(TSDBROW_SVERSION(pNextRow), pReader, uid); if (pTSchema1 == NULL) { return terrno; } + } - tsdbRowMergerAdd(&pReader->status.merger, pNextRow, pTSchema1); - } else { // let's merge rows in file block - code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pReader->info.pSchema); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - tsdbRowMergerAdd(&pReader->status.merger, pNextRow, NULL); + code = tsdbRowMergerAdd(&pReader->status.merger, pNextRow, pTSchema1); + if (code != TSDB_CODE_SUCCESS) { + return code; } code = doMergeRowsInBuf(pIter, uid, TSDBROW_TS(¤t), pDelList, pReader); @@ -3523,14 +3514,21 @@ int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* p TSDBKEY k = TSDBROW_KEY(pRow); TSDBKEY ik = TSDBROW_KEY(piRow); - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - if (pSchema == NULL) { - return terrno; + + STSchema* pSchema = NULL; + if (pRow->type == TSDBROW_ROW_FMT) { + pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return terrno; + } } - STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); - if (piSchema == NULL) { - return terrno; + STSchema* piSchema = NULL; + if (piRow->type == TSDBROW_ROW_FMT) { + piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + if (piSchema == NULL) { + return terrno; + } } if (ASCENDING_TRAVERSE(pReader->info.order)) { // ascending order imem --> mem @@ -4460,7 +4458,11 @@ static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ // do fill all null column value SMA info int32_t i = 0, j = 0; int32_t size = (int32_t)TARRAY2_SIZE(&pSup->colAggArray); - TARRAY2_INSERT_PTR(&pSup->colAggArray, 0, pTsAgg); + int32_t code = TARRAY2_INSERT_PTR(&pSup->colAggArray, 0, pTsAgg); + if (code != TSDB_CODE_SUCCESS) { + return; + } + size++; while (j < numOfCols && i < size) { @@ -4473,7 +4475,11 @@ static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ } else if (pSup->colId[j] < pAgg->colId) { if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; - TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + code = TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + if (code != TSDB_CODE_SUCCESS) { + return; + } + i += 1; size++; } @@ -4484,7 +4490,11 @@ static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ while (j < numOfCols) { if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; - TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + code = TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + if (code != TSDB_CODE_SUCCESS) { + return; + } + i += 1; } j++; @@ -4842,7 +4852,7 @@ int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader* pReader) { return rows; } -int32_t tsdbGetTableSchema2(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { +int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { SMetaReader mr = {0}; metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid); @@ -4898,10 +4908,10 @@ int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STs pSnap->pMem = pTsdb->mem; pSnap->pNode = taosMemoryMalloc(sizeof(*pSnap->pNode)); if (pSnap->pNode == NULL) { - taosThreadRwlockUnlock(&pTsdb->rwLock); code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } + pSnap->pNode->pQHandle = pReader; pSnap->pNode->reseek = reseek; @@ -4912,10 +4922,10 @@ int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STs pSnap->pIMem = pTsdb->imem; pSnap->pINode = taosMemoryMalloc(sizeof(*pSnap->pINode)); if (pSnap->pINode == NULL) { - taosThreadRwlockUnlock(&pTsdb->rwLock); code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } + pSnap->pINode->pQHandle = pReader; pSnap->pINode->reseek = reseek; @@ -4924,18 +4934,16 @@ int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STs // fs code = tsdbFSCreateRefSnapshot(pTsdb->pFS, &pSnap->pfSetArray); - if (code) { - taosThreadRwlockUnlock(&pTsdb->rwLock); - goto _exit; + if (code == TSDB_CODE_SUCCESS) { + tsdbTrace("vgId:%d, take read snapshot", TD_VID(pTsdb->pVnode)); } - // unlock +_exit: taosThreadRwlockUnlock(&pTsdb->rwLock); - tsdbTrace("vgId:%d, take read snapshot", TD_VID(pTsdb->pVnode)); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("vgId:%d take read snapshot failed, code:%s", TD_VID(pTsdb->pVnode), tstrerror(code)); -_exit: - if (code) { *ppSnap = NULL; if (pSnap) { if (pSnap->pNode) taosMemoryFree(pSnap->pNode); @@ -4979,4 +4987,4 @@ void tsdbReaderSetId2(STsdbReader* pReader, const char* idstr) { pReader->status.fileIter.pLastBlockReader->mergeTree.idStr = pReader->idStr; } -void tsdbReaderSetCloseFlag2(STsdbReader* pReader) { pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED; } +void tsdbReaderSetCloseFlag(STsdbReader* pReader) { /*pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED;*/ } diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c index 1f3c8b54ec..74eb1c7302 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c @@ -273,7 +273,12 @@ SBrinRecord* getNextBrinRecord(SBrinRecordIter* pIter) { pIter->pCurrentBlk = taosArrayGet(pIter->pBrinBlockList, pIter->blockIndex); tBrinBlockClear(&pIter->block); - tsdbDataFileReadBrinBlock(pIter->pReader, pIter->pCurrentBlk, &pIter->block); + int32_t code = tsdbDataFileReadBrinBlock(pIter->pReader, pIter->pCurrentBlk, &pIter->block); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("failed to read brinBlock from file, code:%s", tstrerror(code)); + return NULL; + } + pIter->recordIndex = -1; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 8601248a69..ed4257b86d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -857,6 +857,9 @@ static int32_t tsdbSnapWriteTombRecord(STsdbSnapWriter* writer, const STombRecor } else { break; } + + code = tsdbIterMergerNext(writer->ctx->tombIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); } if (record->suid == INT64_MAX) { diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 733f073ce9..6df2044612 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -414,20 +414,7 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * if (pWriter->pStreamTaskWriter) { code = streamTaskSnapWriterClose(pWriter->pStreamTaskWriter, rollback); if (code) goto _exit; - } - - if (pWriter->pStreamStateWriter) { - code = streamStateSnapWriterClose(pWriter->pStreamStateWriter, rollback); - if (code) goto _exit; - - code = streamStateRebuildFromSnap(pWriter->pStreamStateWriter, 0); - pWriter->pStreamStateWriter = NULL; - if (code) goto _exit; - } - - if (pWriter->pStreamTaskWriter) { - code = streamTaskSnapWriterClose(pWriter->pStreamTaskWriter, rollback); - if (code) goto _exit; + pWriter->pStreamTaskWriter = NULL; } if (pWriter->pStreamStateWriter) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 220b30090a..20d2faa7f0 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -160,7 +160,7 @@ static int32_t vnodePreProcessDropTtlMsg(SVnode *pVnode, SRpcMsg *pMsg) { } { // find expired uids - tbUids = taosArrayInit(8, sizeof(int64_t)); + tbUids = taosArrayInit(8, sizeof(tb_uid_t)); if (tbUids == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); @@ -756,7 +756,7 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) case TDMT_VND_STREAM_TASK_CHECK: return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg); case TDMT_VND_STREAM_TASK_CHECK_RSP: - return tqProcessStreamTaskCheckRsp(pVnode->pTq, 0, pMsg); + return tqProcessStreamTaskCheckRsp(pVnode->pTq, pMsg); case TDMT_STREAM_RETRIEVE: return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg); case TDMT_STREAM_RETRIEVE_RSP: @@ -1441,8 +1441,11 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, in SColData *pColData = (SColData *)taosArrayGet(pSubmitTbData->aCol, 0); TSKEY *aKey = (TSKEY *)(pColData->pData); + vDebug("vgId:%d submit %d rows data, uid:%"PRId64, TD_VID(pVnode), pColData->nVal, pSubmitTbData->uid); for (int32_t iRow = 0; iRow < pColData->nVal; iRow++) { + vDebug("vgId:%d uid:%"PRId64" ts:%"PRId64, TD_VID(pVnode), pSubmitTbData->uid, aKey[iRow]); + if (aKey[iRow] < minKey || aKey[iRow] > maxKey || (iRow > 0 && aKey[iRow] <= aKey[iRow - 1])) { code = TSDB_CODE_INVALID_MSG; vError("vgId:%d %s failed since %s, version:%" PRId64, TD_VID(pVnode), __func__, tstrerror(terrno), ver); @@ -1454,10 +1457,13 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, in int32_t nRow = TARRAY_SIZE(pSubmitTbData->aRowP); SRow **aRow = (SRow **)TARRAY_DATA(pSubmitTbData->aRowP); + vDebug("vgId:%d submit %d rows data, uid:%"PRId64, TD_VID(pVnode), nRow, pSubmitTbData->uid); for (int32_t iRow = 0; iRow < nRow; ++iRow) { + vDebug("vgId:%d uid:%"PRId64" ts:%"PRId64, TD_VID(pVnode), pSubmitTbData->uid, aRow[iRow]->ts); + if (aRow[iRow]->ts < minKey || aRow[iRow]->ts > maxKey || (iRow > 0 && aRow[iRow]->ts <= aRow[iRow - 1]->ts)) { code = TSDB_CODE_INVALID_MSG; - vError("vgId:%d %s failed since %s, version:%" PRId64, TD_VID(pVnode), __func__, tstrerror(terrno), ver); + vError("vgId:%d %s failed since %s, version:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), ver); goto _exit; } } @@ -1562,6 +1568,8 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, in } else { // create table failed if (terrno != TSDB_CODE_TDB_TABLE_ALREADY_EXIST) { code = terrno; + vError("vgId:%d failed to create table:%s, code:%s", TD_VID(pVnode), pSubmitTbData->pCreateTbReq->name, + tstrerror(terrno)); goto _exit; } terrno = 0; @@ -1864,7 +1872,6 @@ static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, in tDecoderInit(pCoder, pReq, len); tDecodeDeleteRes(pCoder, pRes); - ASSERT(taosArrayGetSize(pRes->uidList) == 0 || (pRes->skey != 0 && pRes->ekey != 0)); for (int32_t iUid = 0; iUid < taosArrayGetSize(pRes->uidList); iUid++) { uint64_t uid = *(uint64_t *)taosArrayGet(pRes->uidList, iUid); @@ -1938,6 +1945,10 @@ static int32_t vnodeProcessDropIndexReq(SVnode *pVnode, int64_t ver, void *pReq, extern int32_t vnodeProcessCompactVnodeReqImpl(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCompactVnodeReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { + if (!pVnode->restored) { + vInfo("vgId:%d, ignore compact req during restoring. ver:%" PRId64, TD_VID(pVnode), ver); + return 0; + } return vnodeProcessCompactVnodeReqImpl(pVnode, ver, pReq, len, pRsp); } diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 43850ebfee..42acdd2b40 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -549,23 +549,34 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) ASSERT(commitIdx == vnodeSyncAppliedIndex(pFsm)); walApplyVer(pVnode->pWal, commitIdx); - pVnode->restored = true; - if (vnodeIsRoleLeader(pVnode)) { - vInfo("vgId:%d, sync restore finished, start to launch stream tasks", vgId); + if (pVnode->pTq->pStreamMeta->taskWillbeLaunched) { + vInfo("vgId:%d, sync restore finished, stream tasks will be launched by other thread", vgId); + return; + } + taosWLockLatch(&pVnode->pTq->pStreamMeta->lock); + if (pVnode->pTq->pStreamMeta->taskWillbeLaunched) { + vInfo("vgId:%d, sync restore finished, stream tasks will be launched by other thread", vgId); + taosWUnLockLatch(&pVnode->pTq->pStreamMeta->lock); + return; + } + + if (vnodeIsRoleLeader(pVnode)) { // start to restore all stream tasks if (tsDisableStream) { - vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", vgId); + vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId); } else { - vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId); + vInfo("vgId:%d sync restore finished, start to launch stream tasks", pVnode->config.vgId); tqStartStreamTasks(pVnode->pTq); tqCheckAndRunStreamTaskAsync(pVnode->pTq); } } else { vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId); } + + taosWUnLockLatch(&pVnode->pTq->pStreamMeta->lock); } static void vnodeBecomeFollower(const SSyncFSM *pFsm) { @@ -580,7 +591,10 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) { } taosThreadMutexUnlock(&pVnode->lock); - tqStopStreamTasks(pVnode->pTq); + if (pVnode->pTq) { + tqUpdateNodeStage(pVnode->pTq, false); + tqStopStreamTasks(pVnode->pTq); + } } static void vnodeBecomeLearner(const SSyncFSM *pFsm) { @@ -599,7 +613,7 @@ static void vnodeBecomeLearner(const SSyncFSM *pFsm) { static void vnodeBecomeLeader(const SSyncFSM *pFsm) { SVnode *pVnode = pFsm->data; if (pVnode->pTq) { - tqUpdateNodeStage(pVnode->pTq); + tqUpdateNodeStage(pVnode->pTq, true); } vDebug("vgId:%d, become leader", pVnode->config.vgId); } diff --git a/source/libs/catalog/src/ctgCache.c b/source/libs/catalog/src/ctgCache.c index 44de83b7ef..979085c397 100644 --- a/source/libs/catalog/src/ctgCache.c +++ b/source/libs/catalog/src/ctgCache.c @@ -760,12 +760,14 @@ int32_t ctgGetCachedStbNameFromSuid(SCatalog* pCtg, char* dbFName, uint64_t suid char *stb = taosHashAcquire(dbCache->stbCache, &suid, sizeof(suid)); if (NULL == stb) { ctgDebug("stb 0x%" PRIx64 " not in cache, dbFName:%s", suid, dbFName); + ctgReleaseDBCache(pCtg, dbCache); return TSDB_CODE_SUCCESS; } *stbName = taosStrdup(stb); taosHashRelease(dbCache->stbCache, stb); + ctgReleaseDBCache(pCtg, dbCache); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/command/src/command.c b/source/libs/command/src/command.c index 991e2013f5..8b868ffde4 100644 --- a/source/libs/command/src/command.c +++ b/source/libs/command/src/command.c @@ -308,10 +308,11 @@ static void setCreateDBResultIntoDataBlock(SSDataBlock* pBlock, char* dbName, ch if (retentions) { len += sprintf(buf2 + VARSTR_HEADER_SIZE + len, " RETENTIONS %s", retentions); - taosMemoryFree(retentions); } } + taosMemoryFree(retentions); + (varDataLen(buf2)) = len; colDataSetVal(pCol2, 0, buf2, false); diff --git a/source/libs/executor/src/dataInserter.c b/source/libs/executor/src/dataInserter.c index 38d82f05fb..e47cbb7eba 100644 --- a/source/libs/executor/src/dataInserter.c +++ b/source/libs/executor/src/dataInserter.c @@ -446,7 +446,7 @@ int32_t createDataInserter(SDataSinkManager* pManager, const SDataSinkNode* pDat taosThreadMutexInit(&inserter->mutex, NULL); if (NULL == inserter->pDataBlocks) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return TSDB_CODE_OUT_OF_MEMORY; + goto _return; } inserter->fullOrderColList = pInserterNode->pCols->length == inserter->pSchema->numOfCols; diff --git a/source/libs/executor/src/dynqueryctrloperator.c b/source/libs/executor/src/dynqueryctrloperator.c index f2ed4ba618..8fc46e0239 100755 --- a/source/libs/executor/src/dynqueryctrloperator.c +++ b/source/libs/executor/src/dynqueryctrloperator.c @@ -151,14 +151,21 @@ static void updatePostJoinCurrTableInfo(SStbJoinDynCtrlInfo* pStbJoin) static int32_t buildGroupCacheOperatorParam(SOperatorParam** ppRes, int32_t downstreamIdx, int32_t vgId, int64_t tbUid, bool needCache, SOperatorParam* pChild) { *ppRes = taosMemoryMalloc(sizeof(SOperatorParam)); if (NULL == *ppRes) { + freeOperatorParam(pChild, OP_GET_PARAM); return TSDB_CODE_OUT_OF_MEMORY; } if (pChild) { (*ppRes)->pChildren = taosArrayInit(1, POINTER_BYTES); - if (NULL == *ppRes) { + if (NULL == (*ppRes)->pChildren) { + freeOperatorParam(pChild, OP_GET_PARAM); + freeOperatorParam(*ppRes, OP_GET_PARAM); + *ppRes = NULL; return TSDB_CODE_OUT_OF_MEMORY; } if (NULL == taosArrayPush((*ppRes)->pChildren, &pChild)) { + freeOperatorParam(pChild, OP_GET_PARAM); + freeOperatorParam(*ppRes, OP_GET_PARAM); + *ppRes = NULL; return TSDB_CODE_OUT_OF_MEMORY; } } else { @@ -167,6 +174,8 @@ static int32_t buildGroupCacheOperatorParam(SOperatorParam** ppRes, int32_t down SGcOperatorParam* pGc = taosMemoryMalloc(sizeof(SGcOperatorParam)); if (NULL == pGc) { + freeOperatorParam(*ppRes, OP_GET_PARAM); + *ppRes = NULL; return TSDB_CODE_OUT_OF_MEMORY; } @@ -193,6 +202,7 @@ static int32_t buildGroupCacheNotifyOperatorParam(SOperatorParam** ppRes, int32_ SGcNotifyOperatorParam* pGc = taosMemoryMalloc(sizeof(SGcNotifyOperatorParam)); if (NULL == pGc) { + freeOperatorParam(*ppRes, OP_NOTIFY_PARAM); return TSDB_CODE_OUT_OF_MEMORY; } @@ -248,6 +258,7 @@ static int32_t buildBatchExchangeOperatorParam(SOperatorParam** ppRes, int32_t d SExchangeOperatorBatchParam* pExc = taosMemoryMalloc(sizeof(SExchangeOperatorBatchParam)); if (NULL == pExc) { + taosMemoryFreeClear(*ppRes); return TSDB_CODE_OUT_OF_MEMORY; } @@ -255,6 +266,7 @@ static int32_t buildBatchExchangeOperatorParam(SOperatorParam** ppRes, int32_t d pExc->pBatchs = tSimpleHashInit(tSimpleHashGetSize(pVg), taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT)); if (NULL == pExc->pBatchs) { taosMemoryFree(pExc); + taosMemoryFreeClear(*ppRes); return TSDB_CODE_OUT_OF_MEMORY; } tSimpleHashSetFreeFp(pExc->pBatchs, freeExchangeGetBasicOperatorParam); @@ -288,21 +300,36 @@ static int32_t buildBatchExchangeOperatorParam(SOperatorParam** ppRes, int32_t d static int32_t buildMergeJoinOperatorParam(SOperatorParam** ppRes, bool initParam, SOperatorParam* pChild0, SOperatorParam* pChild1) { *ppRes = taosMemoryMalloc(sizeof(SOperatorParam)); if (NULL == *ppRes) { + freeOperatorParam(pChild0, OP_GET_PARAM); + freeOperatorParam(pChild1, OP_GET_PARAM); return TSDB_CODE_OUT_OF_MEMORY; } (*ppRes)->pChildren = taosArrayInit(2, POINTER_BYTES); if (NULL == *ppRes) { + freeOperatorParam(pChild0, OP_GET_PARAM); + freeOperatorParam(pChild1, OP_GET_PARAM); + freeOperatorParam(*ppRes, OP_GET_PARAM); + *ppRes = NULL; return TSDB_CODE_OUT_OF_MEMORY; } if (NULL == taosArrayPush((*ppRes)->pChildren, &pChild0)) { + freeOperatorParam(pChild0, OP_GET_PARAM); + freeOperatorParam(pChild1, OP_GET_PARAM); + freeOperatorParam(*ppRes, OP_GET_PARAM); + *ppRes = NULL; return TSDB_CODE_OUT_OF_MEMORY; } if (NULL == taosArrayPush((*ppRes)->pChildren, &pChild1)) { + freeOperatorParam(pChild1, OP_GET_PARAM); + freeOperatorParam(*ppRes, OP_GET_PARAM); + *ppRes = NULL; return TSDB_CODE_OUT_OF_MEMORY; } SSortMergeJoinOperatorParam* pJoin = taosMemoryMalloc(sizeof(SSortMergeJoinOperatorParam)); if (NULL == pJoin) { + freeOperatorParam(*ppRes, OP_GET_PARAM); + *ppRes = NULL; return TSDB_CODE_OUT_OF_MEMORY; } @@ -318,16 +345,28 @@ static int32_t buildMergeJoinOperatorParam(SOperatorParam** ppRes, bool initPara static int32_t buildMergeJoinNotifyOperatorParam(SOperatorParam** ppRes, SOperatorParam* pChild0, SOperatorParam* pChild1) { *ppRes = taosMemoryMalloc(sizeof(SOperatorParam)); if (NULL == *ppRes) { + freeOperatorParam(pChild0, OP_NOTIFY_PARAM); + freeOperatorParam(pChild1, OP_NOTIFY_PARAM); return TSDB_CODE_OUT_OF_MEMORY; } (*ppRes)->pChildren = taosArrayInit(2, POINTER_BYTES); if (NULL == *ppRes) { + taosMemoryFreeClear(*ppRes); + freeOperatorParam(pChild0, OP_NOTIFY_PARAM); + freeOperatorParam(pChild1, OP_NOTIFY_PARAM); return TSDB_CODE_OUT_OF_MEMORY; } if (pChild0 && NULL == taosArrayPush((*ppRes)->pChildren, &pChild0)) { + freeOperatorParam(*ppRes, OP_NOTIFY_PARAM); + freeOperatorParam(pChild0, OP_NOTIFY_PARAM); + freeOperatorParam(pChild1, OP_NOTIFY_PARAM); + *ppRes = NULL; return TSDB_CODE_OUT_OF_MEMORY; } if (pChild1 && NULL == taosArrayPush((*ppRes)->pChildren, &pChild1)) { + freeOperatorParam(*ppRes, OP_NOTIFY_PARAM); + freeOperatorParam(pChild1, OP_NOTIFY_PARAM); + *ppRes = NULL; return TSDB_CODE_OUT_OF_MEMORY; } @@ -420,13 +459,34 @@ static int32_t buildSeqStbJoinOperatorParam(SDynQueryCtrlOperatorInfo* pInfo, SS if (TSDB_CODE_SUCCESS == code) { code = buildGroupCacheOperatorParam(&pGcParam0, 0, *leftVg, *leftUid, pPost->leftNeedCache, pSrcParam0); + pSrcParam0 = NULL; } if (TSDB_CODE_SUCCESS == code) { code = buildGroupCacheOperatorParam(&pGcParam1, 1, *rightVg, *rightUid, pPost->rightNeedCache, pSrcParam1); + pSrcParam1 = NULL; } if (TSDB_CODE_SUCCESS == code) { code = buildMergeJoinOperatorParam(ppParam, pSrcParam0 ? true : false, pGcParam0, pGcParam1); } + if (TSDB_CODE_SUCCESS != code) { + if (pSrcParam0) { + freeOperatorParam(pSrcParam0, OP_GET_PARAM); + } + if (pSrcParam1) { + freeOperatorParam(pSrcParam1, OP_GET_PARAM); + } + if (pGcParam0) { + freeOperatorParam(pGcParam0, OP_GET_PARAM); + } + if (pGcParam1) { + freeOperatorParam(pGcParam1, OP_GET_PARAM); + } + if (*ppParam) { + freeOperatorParam(*ppParam, OP_GET_PARAM); + *ppParam = NULL; + } + } + return code; } @@ -488,7 +548,7 @@ static void handleSeqJoinCurrRetrieveEnd(SOperatorInfo* pOperator, SStbJoinDynCt if (pPost->leftNeedCache) { uint32_t* num = tSimpleHashGet(pStbJoin->ctx.prev.leftCache, &pPost->leftCurrUid, sizeof(pPost->leftCurrUid)); - if (--(*num) <= 0) { + if (num && --(*num) <= 0) { tSimpleHashRemove(pStbJoin->ctx.prev.leftCache, &pPost->leftCurrUid, sizeof(pPost->leftCurrUid)); notifySeqJoinTableCacheEnd(pOperator, pPost, true); } diff --git a/source/libs/executor/src/eventwindowoperator.c b/source/libs/executor/src/eventwindowoperator.c index d61034c26e..3cfd0ab582 100644 --- a/source/libs/executor/src/eventwindowoperator.c +++ b/source/libs/executor/src/eventwindowoperator.c @@ -277,7 +277,7 @@ int32_t eventWindowAggImpl(SOperatorInfo* pOperator, SEventWindowOperatorInfo* p SFilterColumnParam param2 = {.numOfCols = taosArrayGetSize(pBlock->pDataBlock), .pDataBlock = pBlock->pDataBlock}; code = filterSetDataFromSlotId(pInfo->pEndCondInfo, ¶m2); if (code != TSDB_CODE_SUCCESS) { - return code; + goto _return; } int32_t status2 = 0; @@ -331,10 +331,12 @@ int32_t eventWindowAggImpl(SOperatorInfo* pOperator, SEventWindowOperatorInfo* p } } +_return: + colDataDestroy(ps); taosMemoryFree(ps); colDataDestroy(pe); taosMemoryFree(pe); - return TSDB_CODE_SUCCESS; + return code; } diff --git a/source/libs/executor/src/groupcacheoperator.c b/source/libs/executor/src/groupcacheoperator.c index ff771ccc8c..d20ad1d67c 100755 --- a/source/libs/executor/src/groupcacheoperator.c +++ b/source/libs/executor/src/groupcacheoperator.c @@ -223,6 +223,9 @@ static int32_t acquireFdFromFileCtx(SGcFileCacheCtx* pFileCtx, int32_t fileId, S SGroupCacheFileInfo newFile = {0}; taosHashPut(pFileCtx->pCacheFile, &fileId, sizeof(fileId), &newFile, sizeof(newFile)); pTmp = taosHashGet(pFileCtx->pCacheFile, &fileId, sizeof(fileId)); + if (NULL == pTmp) { + return TSDB_CODE_OUT_OF_MEMORY; + } } if (pTmp->deleted) { @@ -287,7 +290,7 @@ static int32_t saveBlocksToDisk(SGroupCacheOperatorInfo* pGCache, SGcDownstreamC if (deleted) { qTrace("FileId:%d-%d-%d already be deleted, skip write", - pCtx->id, pGroup->vgId, pHead->basic.fileId); + pCtx->id, pGroup ? pGroup->vgId : GROUP_CACHE_DEFAULT_VGID, pHead->basic.fileId); int64_t blkId = pHead->basic.blkId; pHead = pHead->next; @@ -337,7 +340,9 @@ static int32_t addBlkToDirtyBufList(SGroupCacheOperatorInfo* pGCache, SGcDownstr return TSDB_CODE_OUT_OF_MEMORY; } pBufInfo = taosHashGet(pCache->pDirtyBlk, &pBufInfo->basic.blkId, sizeof(pBufInfo->basic.blkId)); - + if (NULL == pBufInfo) { + return TSDB_CODE_OUT_OF_MEMORY; + } int32_t code = TSDB_CODE_SUCCESS; SGcBlkBufInfo* pWriteHead = NULL; @@ -378,6 +383,10 @@ static int32_t addBlkToDirtyBufList(SGroupCacheOperatorInfo* pGCache, SGcDownstr static FORCE_INLINE void chkRemoveVgroupCurrFile(SGcFileCacheCtx* pFileCtx, int32_t downstreamIdx, int32_t vgId) { SGroupCacheFileInfo* pFileInfo = taosHashGet(pFileCtx->pCacheFile, &pFileCtx->fileId, sizeof(pFileCtx->fileId)); + if (NULL == pFileInfo) { + return; + } + if (0 == pFileInfo->groupNum) { removeGroupCacheFile(pFileInfo); @@ -711,6 +720,9 @@ static int32_t addFileRefTableNum(SGcFileCacheCtx* pFileCtx, int32_t fileId, int newFile.groupNum = 1; taosHashPut(pFileCtx->pCacheFile, &fileId, sizeof(fileId), &newFile, sizeof(newFile)); pTmp = taosHashGet(pFileCtx->pCacheFile, &fileId, sizeof(fileId)); + if (NULL == pTmp) { + return TSDB_CODE_OUT_OF_MEMORY; + } } else { pTmp->groupNum++; } @@ -786,6 +798,9 @@ static int32_t addNewGroupData(struct SOperatorInfo* pOperator, SOperatorParam* } *ppGrp = taosHashGet(pGrpHash, &uid, sizeof(uid)); + if (NULL == *ppGrp) { + return TSDB_CODE_OUT_OF_MEMORY; + } initNewGroupData(pCtx, *ppGrp, pParam->downstreamIdx, vgId, pGCache->batchFetch, pGcParam->needCache); qError("new group %" PRIu64 " initialized, downstreamIdx:%d, vgId:%d, needCache:%d", uid, pParam->downstreamIdx, vgId, pGcParam->needCache); diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index fb2204eae8..f5cabd823c 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -1132,7 +1132,8 @@ static SSDataBlock* doStreamHashPartition(SOperatorInfo* pOperator) { } break; case STREAM_CREATE_CHILD_TABLE: case STREAM_RETRIEVE: - case STREAM_CHECKPOINT: { + case STREAM_CHECKPOINT: + case STREAM_GET_ALL: { return pBlock; } default: diff --git a/source/libs/executor/src/hashjoinoperator.c b/source/libs/executor/src/hashjoinoperator.c index d57f9f9f90..f382283d27 100755 --- a/source/libs/executor/src/hashjoinoperator.c +++ b/source/libs/executor/src/hashjoinoperator.c @@ -636,12 +636,14 @@ static int32_t addRowToHashImpl(SHJoinOperatorInfo* pJoin, SGroupData* pGroup, S int32_t code = getValBufFromPages(pJoin->pRowBufs, getHJoinValBufSize(pTable, rowIdx), &pTable->valData, pRow); if (code) { + taosMemoryFree(pRow); return code; } if (NULL == pGroup) { pRow->next = NULL; if (tSimpleHashPut(pJoin->pKeyHash, pTable->keyData, keyLen, &group, sizeof(group))) { + taosMemoryFree(pRow); return TSDB_CODE_OUT_OF_MEMORY; } } else { diff --git a/source/libs/executor/src/mergejoinoperator.c b/source/libs/executor/src/mergejoinoperator.c index 1b286f9bdd..2348a3c97b 100644 --- a/source/libs/executor/src/mergejoinoperator.c +++ b/source/libs/executor/src/mergejoinoperator.c @@ -711,6 +711,11 @@ static bool mergeJoinGetNextTimestamp(SOperatorInfo* pOperator, int64_t* pLeftTs } } } + + if (NULL == pJoinInfo->pLeft || NULL == pJoinInfo->pRight) { + setMergeJoinDone(pOperator); + return false; + } // only the timestamp match support for ordinary table SColumnInfoData* pLeftCol = taosArrayGet(pJoinInfo->pLeft->pDataBlock, pJoinInfo->leftCol.slotId); diff --git a/source/libs/executor/src/projectoperator.c b/source/libs/executor/src/projectoperator.c index 00b246afad..ab7a15eacd 100644 --- a/source/libs/executor/src/projectoperator.c +++ b/source/libs/executor/src/projectoperator.c @@ -293,7 +293,8 @@ SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { // for stream interval if (pBlock->info.type == STREAM_RETRIEVE || pBlock->info.type == STREAM_DELETE_RESULT || - pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_CREATE_CHILD_TABLE || + pBlock->info.type == STREAM_CHECKPOINT) { return pBlock; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index e96ed87d3e..474128007a 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2257,7 +2257,7 @@ FETCH_NEXT_BLOCK: int32_t current = pInfo->validBlockIndex++; SPackedData* pSubmit = taosArrayGet(pInfo->pBlockLists, current); - qDebug("set %d/%d as the input submit block, %s", current, totalBlocks, id); + qDebug("set %d/%d as the input submit block, %s", current + 1, totalBlocks, id); if (pAPI->tqReaderFn.tqReaderSetSubmitMsg(pInfo->tqReader, pSubmit->msgStr, pSubmit->msgLen, pSubmit->ver) < 0) { qError("submit msg messed up when initializing stream submit block %p, current %d/%d, %s", pSubmit, current, totalBlocks, id); continue; @@ -2883,7 +2883,7 @@ static EDealRes tagScanRewriteTagColumn(SNode** pNode, void* pContext) { } -static void tagScanFilterByTagCond(SArray* aUidTags, SNode* pTagCond, SArray* aFilterIdxs, void* pVnode, SStorageAPI* pAPI, STagScanInfo* pInfo) { +static int32_t tagScanFilterByTagCond(SArray* aUidTags, SNode* pTagCond, SArray* aFilterIdxs, void* pVnode, SStorageAPI* pAPI, STagScanInfo* pInfo) { int32_t code = 0; int32_t numOfTables = taosArrayGetSize(aUidTags); @@ -2894,9 +2894,15 @@ static void tagScanFilterByTagCond(SArray* aUidTags, SNode* pTagCond, SArray* aF SDataType type = {.type = TSDB_DATA_TYPE_BOOL, .bytes = sizeof(bool)}; SScalarParam output = {0}; - tagScanCreateResultData(&type, numOfTables, &output); + code = tagScanCreateResultData(&type, numOfTables, &output); + if (TSDB_CODE_SUCCESS != code) { + return code; + } - scalarCalculate(pTagCond, pBlockList, &output); + code = scalarCalculate(pTagCond, pBlockList, &output); + if (TSDB_CODE_SUCCESS != code) { + return code; + } bool* result = (bool*)output.columnData->pData; for (int32_t i = 0 ; i < numOfTables; ++i) { @@ -2911,7 +2917,7 @@ static void tagScanFilterByTagCond(SArray* aUidTags, SNode* pTagCond, SArray* aF blockDataDestroy(pResBlock); taosArrayDestroy(pBlockList); - + return TSDB_CODE_SUCCESS; } static void tagScanFillOneCellWithTag(SOperatorInfo* pOperator, const STUidTagInfo* pUidTagInfo, SExprInfo* pExprInfo, SColumnInfoData* pColInfo, int rowIndex, const SStorageAPI* pAPI, void* pVnode) { @@ -3024,7 +3030,11 @@ static SSDataBlock* doTagScanFromCtbIdx(SOperatorInfo* pOperator) { bool ignoreFilterIdx = true; if (pInfo->pTagCond != NULL) { ignoreFilterIdx = false; - tagScanFilterByTagCond(aUidTags, pInfo->pTagCond, aFilterIdxs, pInfo->readHandle.vnode, pAPI, pInfo); + int32_t code = tagScanFilterByTagCond(aUidTags, pInfo->pTagCond, aFilterIdxs, pInfo->readHandle.vnode, pAPI, pInfo); + if (TSDB_CODE_SUCCESS != code) { + pOperator->pTaskInfo->code = code; + T_LONG_JMP(pOperator->pTaskInfo->env, code); + } } else { ignoreFilterIdx = true; } diff --git a/source/libs/executor/src/tfill.c b/source/libs/executor/src/tfill.c index 4e0dff9d4f..44d39392a2 100644 --- a/source/libs/executor/src/tfill.c +++ b/source/libs/executor/src/tfill.c @@ -578,9 +578,8 @@ int64_t getNumOfResultsAfterFillGap(SFillInfo* pFillInfo, TSKEY ekey, int32_t ma SColumnInfoData* pCol = taosArrayGet(pFillInfo->pSrcBlock->pDataBlock, pFillInfo->srcTsSlotId); int64_t* tsList = (int64_t*)pCol->pData; TSKEY lastKey = tsList[pFillInfo->numOfRows - 1]; - numOfRes = taosTimeCountInterval(lastKey, pFillInfo->currentKey, pFillInfo->interval.sliding, - pFillInfo->interval.slidingUnit, pFillInfo->interval.precision); - numOfRes += 1; + numOfRes = taosTimeCountIntervalForFill(lastKey, pFillInfo->currentKey, pFillInfo->interval.sliding, + pFillInfo->interval.slidingUnit, pFillInfo->interval.precision, pFillInfo->order); ASSERT(numOfRes >= numOfRows); } else { // reach the end of data if ((ekey1 < pFillInfo->currentKey && FILL_IS_ASC_FILL(pFillInfo)) || @@ -588,9 +587,8 @@ int64_t getNumOfResultsAfterFillGap(SFillInfo* pFillInfo, TSKEY ekey, int32_t ma return 0; } - numOfRes = taosTimeCountInterval(ekey1, pFillInfo->currentKey, pFillInfo->interval.sliding, - pFillInfo->interval.slidingUnit, pFillInfo->interval.precision); - numOfRes += 1; + numOfRes = taosTimeCountIntervalForFill(ekey1, pFillInfo->currentKey, pFillInfo->interval.sliding, + pFillInfo->interval.slidingUnit, pFillInfo->interval.precision, pFillInfo->order); } return (numOfRes > maxNumOfRows) ? maxNumOfRows : numOfRes; diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 6c4a780dfb..287c824540 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -904,6 +904,7 @@ static int32_t getPageBufIncForRow(SSDataBlock* blk, int32_t row, int32_t rowIdx } static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockOrderInfo* order, SArray* aExtSrc) { + int32_t code = TSDB_CODE_SUCCESS; int pgHeaderSz = sizeof(int32_t) + sizeof(int32_t) * taosArrayGetSize(pHandle->pDataBlock->pDataBlock); int32_t rowCap = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize, pgHeaderSz); blockDataEnsureCapacity(pHandle->pDataBlock, rowCap); @@ -930,7 +931,13 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO SArray* aPgId = taosArrayInit(8, sizeof(int32_t)); SMultiwayMergeTreeInfo* pTree = NULL; - tMergeTreeCreate(&pTree, taosArrayGetSize(aBlk), &sup, blockCompareTsFn); + code = tMergeTreeCreate(&pTree, taosArrayGetSize(aBlk), &sup, blockCompareTsFn); + if (TSDB_CODE_SUCCESS != code) { + taosMemoryFree(sup.aRowIdx); + taosMemoryFree(sup.aTs); + + return code; + } int32_t nRows = 0; int32_t nMergedRows = 0; bool mergeLimitReached = false; @@ -1054,7 +1061,14 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { tSimpleHashClear(mUidBlk); int64_t p = taosGetTimestampUs(); - sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); + code = sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); + if (code != TSDB_CODE_SUCCESS) { + tSimpleHashCleanup(mUidBlk); + taosArrayDestroy(aBlkSort); + taosArrayDestroy(aExtSrc); + return code; + } + int64_t el = taosGetTimestampUs() - p; pHandle->sortElapsed += el; diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 0057df7902..aad08fc337 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -1786,7 +1786,7 @@ static int32_t translateDiff(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { } uint8_t colType = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type; - if (!IS_SIGNED_NUMERIC_TYPE(colType) && !IS_FLOAT_TYPE(colType) && TSDB_DATA_TYPE_BOOL != colType && + if (!IS_INTEGER_TYPE(colType) && !IS_FLOAT_TYPE(colType) && TSDB_DATA_TYPE_BOOL != colType && !IS_TIMESTAMP_TYPE(colType)) { return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); } @@ -1815,6 +1815,8 @@ static int32_t translateDiff(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { uint8_t resType; if (IS_SIGNED_NUMERIC_TYPE(colType) || IS_TIMESTAMP_TYPE(colType) || TSDB_DATA_TYPE_BOOL == colType) { resType = TSDB_DATA_TYPE_BIGINT; + } else if (IS_UNSIGNED_NUMERIC_TYPE(colType)) { + resType = TSDB_DATA_TYPE_UBIGINT; } else { resType = TSDB_DATA_TYPE_DOUBLE; } diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index bcbb3af950..108a641c08 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -2714,16 +2714,20 @@ static int32_t doSetPrevVal(SDiffInfo* pDiffInfo, int32_t type, const char* pv, case TSDB_DATA_TYPE_BOOL: pDiffInfo->prev.i64 = *(bool*)pv ? 1 : 0; break; + case TSDB_DATA_TYPE_UTINYINT: case TSDB_DATA_TYPE_TINYINT: pDiffInfo->prev.i64 = *(int8_t*)pv; break; + case TSDB_DATA_TYPE_UINT: case TSDB_DATA_TYPE_INT: pDiffInfo->prev.i64 = *(int32_t*)pv; break; + case TSDB_DATA_TYPE_USMALLINT: case TSDB_DATA_TYPE_SMALLINT: pDiffInfo->prev.i64 = *(int16_t*)pv; break; case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_UBIGINT: case TSDB_DATA_TYPE_BIGINT: pDiffInfo->prev.i64 = *(int64_t*)pv; break; @@ -2745,6 +2749,7 @@ static int32_t doHandleDiff(SDiffInfo* pDiffInfo, int32_t type, const char* pv, int64_t ts) { pDiffInfo->prevTs = ts; switch (type) { + case TSDB_DATA_TYPE_UINT: case TSDB_DATA_TYPE_INT: { int32_t v = *(int32_t*)pv; int64_t delta = v - pDiffInfo->prev.i64; // direct previous may be null @@ -2758,6 +2763,7 @@ static int32_t doHandleDiff(SDiffInfo* pDiffInfo, int32_t type, const char* pv, break; } case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_UTINYINT: case TSDB_DATA_TYPE_TINYINT: { int8_t v = *(int8_t*)pv; int64_t delta = v - pDiffInfo->prev.i64; // direct previous may be null @@ -2769,6 +2775,7 @@ static int32_t doHandleDiff(SDiffInfo* pDiffInfo, int32_t type, const char* pv, pDiffInfo->prev.i64 = v; break; } + case TSDB_DATA_TYPE_USMALLINT: case TSDB_DATA_TYPE_SMALLINT: { int16_t v = *(int16_t*)pv; int64_t delta = v - pDiffInfo->prev.i64; // direct previous may be null @@ -2781,6 +2788,7 @@ static int32_t doHandleDiff(SDiffInfo* pDiffInfo, int32_t type, const char* pv, break; } case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_UBIGINT: case TSDB_DATA_TYPE_BIGINT: { int64_t v = *(int64_t*)pv; int64_t delta = v - pDiffInfo->prev.i64; // direct previous may be null diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index bd459af9f5..c8eb7580ed 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -69,7 +69,7 @@ const char *udfdCPluginUdfInitLoadInitDestoryFuncs(SUdfCPluginCtx *udfCtx, const void udfdCPluginUdfInitLoadAggFuncs(SUdfCPluginCtx *udfCtx, const char *udfName) { char processFuncName[TSDB_FUNC_NAME_LEN] = {0}; - strncpy(processFuncName, udfName, sizeof(processFuncName)); + snprintf(processFuncName, sizeof(processFuncName), "%s", udfName); uv_dlsym(&udfCtx->lib, processFuncName, (void **)(&udfCtx->aggProcFunc)); char startFuncName[TSDB_FUNC_NAME_LEN + 7] = {0}; @@ -103,7 +103,7 @@ int32_t udfdCPluginUdfInit(SScriptUdfInfo *udf, void **pUdfCtx) { if (udf->funcType == UDF_FUNC_TYPE_SCALAR) { char processFuncName[TSDB_FUNC_NAME_LEN] = {0}; - strncpy(processFuncName, udfName, sizeof(processFuncName)); + snprintf(processFuncName, sizeof(processFuncName), "%s", udfName); uv_dlsym(&udfCtx->lib, processFuncName, (void **)(&udfCtx->scalarProcFunc)); } else if (udf->funcType == UDF_FUNC_TYPE_AGG) { udfdCPluginUdfInitLoadAggFuncs(udfCtx, udfName); diff --git a/source/libs/parser/src/parInsertUtil.c b/source/libs/parser/src/parInsertUtil.c index 5ae2cf12c9..79e305989b 100644 --- a/source/libs/parser/src/parInsertUtil.c +++ b/source/libs/parser/src/parInsertUtil.c @@ -683,8 +683,10 @@ int rawBlockBindData(SQuery* query, STableMeta* pTableMeta, void* data, SVCreate pStart += BitmapLen(numOfRows); } char* pData = pStart; - - tColDataAddValueByDataBlock(pCol, pColSchema->type, pColSchema->bytes, numOfRows, offset, pData); + ret = tColDataAddValueByDataBlock(pCol, pColSchema->type, pColSchema->bytes, numOfRows, offset, pData); + if(ret != 0){ + goto end; + } fields += sizeof(int8_t) + sizeof(int32_t); if (needChangeLength) { pStart += htonl(colLength[j]); @@ -712,7 +714,10 @@ int rawBlockBindData(SQuery* query, STableMeta* pTableMeta, void* data, SVCreate char* pData = pStart; SColData* pCol = taosArrayGet(pTableCxt->pData->aCol, j); - tColDataAddValueByDataBlock(pCol, pColSchema->type, pColSchema->bytes, numOfRows, offset, pData); + ret = tColDataAddValueByDataBlock(pCol, pColSchema->type, pColSchema->bytes, numOfRows, offset, pData); + if(ret != 0){ + goto end; + } fields += sizeof(int8_t) + sizeof(int32_t); if (needChangeLength) { pStart += htonl(colLength[i]); @@ -729,7 +734,10 @@ int rawBlockBindData(SQuery* query, STableMeta* pTableMeta, void* data, SVCreate for (int c = 0; c < boundInfo->numOfBound; ++c) { if( boundInfo->pColIndex[c] != -1){ SColData* pCol = taosArrayGet(pTableCxt->pData->aCol, c); - tColDataAddValueByDataBlock(pCol, 0, 0, numOfRows, NULL, NULL); + ret = tColDataAddValueByDataBlock(pCol, 0, 0, numOfRows, NULL, NULL); + if(ret != 0){ + goto end; + } }else{ boundInfo->pColIndex[c] = c; // restore for next block } diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 476df19ad0..2d3710be70 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -7108,8 +7108,8 @@ static int32_t createLastTsSelectStmt(char* pDb, char* pTable, STableMeta* pMeta return TSDB_CODE_OUT_OF_MEMORY; } - strcpy(col->tableAlias, pTable); - strcpy(col->colName, pMeta->schema[0].name); + tstrncpy(col->tableAlias, pTable, tListLen(col->tableAlias)); + tstrncpy(col->colName, pMeta->schema[0].name, tListLen(col->colName)); SNodeList* pParamterList = nodesMakeList(); if (NULL == pParamterList) { nodesDestroyNode((SNode*)col); @@ -7118,7 +7118,6 @@ static int32_t createLastTsSelectStmt(char* pDb, char* pTable, STableMeta* pMeta int32_t code = nodesListStrictAppend(pParamterList, (SNode*)col); if (code) { - nodesDestroyNode((SNode*)col); nodesDestroyList(pParamterList); return code; } @@ -7136,7 +7135,6 @@ static int32_t createLastTsSelectStmt(char* pDb, char* pTable, STableMeta* pMeta } code = nodesListStrictAppend(pProjectionList, pFunc); if (code) { - nodesDestroyNode(pFunc); nodesDestroyList(pProjectionList); return code; } diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index d460c8074d..96d253494d 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -46,8 +46,8 @@ static void setColumnInfo(SFunctionNode* pFunc, SColumnNode* pCol, bool isPartit pCol->colType = COLUMN_TYPE_TBNAME; SValueNode* pVal = (SValueNode*)nodesListGetNode(pFunc->pParameterList, 0); if (pVal) { - strcpy(pCol->tableName, pVal->literal); - strcpy(pCol->tableAlias, pVal->literal); + snprintf(pCol->tableName, sizeof(pCol->tableName), "%s", pVal->literal); + snprintf(pCol->tableAlias, sizeof(pCol->tableAlias), "%s", pVal->literal); } break; case FUNCTION_TYPE_WSTART: @@ -531,6 +531,9 @@ static int32_t createJoinLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect if (TSDB_CODE_SUCCESS == code) { code = nodesListStrictAppend(pJoin->node.pChildren, (SNode*)pLeft); } + if (TSDB_CODE_SUCCESS != code) { + pLeft = NULL; + } } SLogicNode* pRight = NULL; @@ -584,7 +587,7 @@ static int32_t createJoinLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect } } - if (NULL == pJoin->node.pTargets) { + if (NULL == pJoin->node.pTargets && NULL != pLeft) { pJoin->node.pTargets = nodesCloneList(pLeft->pTargets); if (NULL == pJoin->node.pTargets) { code = TSDB_CODE_OUT_OF_MEMORY; diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 7b2cd71677..1b32cdb17b 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -812,7 +812,7 @@ static bool pushDownCondOptIsColEqualOnCond(SJoinLogicNode* pJoin, SNode* pCond, return false; } SOperatorNode* pOper = (SOperatorNode*)pCond; - if (QUERY_NODE_COLUMN != nodeType(pOper->pLeft) || QUERY_NODE_COLUMN != nodeType(pOper->pRight)) { + if (QUERY_NODE_COLUMN != nodeType(pOper->pLeft) || NULL == pOper->pRight || QUERY_NODE_COLUMN != nodeType(pOper->pRight)) { return false; } SColumnNode* pLeft = (SColumnNode*)(pOper->pLeft); @@ -3221,8 +3221,11 @@ int32_t stbJoinOptAddFuncToScanNode(char* funcName, SScanLogicNode* pScan) { SFunctionNode* pUidFunc = createFunction(funcName, NULL); snprintf(pUidFunc->node.aliasName, sizeof(pUidFunc->node.aliasName), "%s.%p", pUidFunc->functionName, pUidFunc); - nodesListStrictAppend(pScan->pScanPseudoCols, (SNode *)pUidFunc); - return createColumnByRewriteExpr((SNode*)pUidFunc, &pScan->node.pTargets); + int32_t code = nodesListStrictAppend(pScan->pScanPseudoCols, (SNode *)pUidFunc); + if (TSDB_CODE_SUCCESS == code) { + code = createColumnByRewriteExpr((SNode*)pUidFunc, &pScan->node.pTargets); + } + return code; } @@ -3369,12 +3372,7 @@ static int32_t stbJoinOptCreateTableScanNodes(SLogicNode* pJoin, SNodeList** ppL pScan->scanType = SCAN_TYPE_TABLE; } - if (TSDB_CODE_SUCCESS == code) { - *ppList = pList; - } else { - nodesDestroyList(pList); - *ppList = NULL; - } + *ppList = pList; return code; } @@ -3478,12 +3476,15 @@ static int32_t stbJoinOptCreateMergeJoinNode(SLogicNode* pOrig, SLogicNode* pChi FOREACH(pNode, pJoin->node.pChildren) { ERASE_NODE(pJoin->node.pChildren); } - nodesListStrictAppend(pJoin->node.pChildren, (SNode *)pChild); - pChild->pParent = (SLogicNode*)pJoin; + int32_t code = nodesListStrictAppend(pJoin->node.pChildren, (SNode *)pChild); + if (TSDB_CODE_SUCCESS == code) { + pChild->pParent = (SLogicNode*)pJoin; + *ppLogic = (SLogicNode*)pJoin; + } else { + nodesDestroyNode((SNode*)pJoin); + } - *ppLogic = (SLogicNode*)pJoin; - - return TSDB_CODE_SUCCESS; + return code; } static int32_t stbJoinOptCreateDynQueryCtrlNode(SLogicNode* pRoot, SLogicNode* pPrev, SLogicNode* pPost, bool* srcScan, SLogicNode** ppDynNode) { @@ -3523,11 +3524,18 @@ static int32_t stbJoinOptCreateDynQueryCtrlNode(SLogicNode* pRoot, SLogicNode* p nodesListStrictAppend(pDynCtrl->stbJoin.pUidList, nodesListGetNode(pHJoin->node.pTargets, 2)); nodesListStrictAppend(pDynCtrl->stbJoin.pVgList, nodesListGetNode(pHJoin->node.pTargets, 1)); nodesListStrictAppend(pDynCtrl->stbJoin.pVgList, nodesListGetNode(pHJoin->node.pTargets, 3)); - + if (TSDB_CODE_SUCCESS == code) { - nodesListStrictAppend(pDynCtrl->node.pChildren, (SNode*)pPrev); - nodesListStrictAppend(pDynCtrl->node.pChildren, (SNode*)pPost); - pDynCtrl->node.pTargets = nodesCloneList(pPost->pTargets); + code = nodesListStrictAppend(pDynCtrl->node.pChildren, (SNode*)pPrev); + if (TSDB_CODE_SUCCESS == code) { + code = nodesListStrictAppend(pDynCtrl->node.pChildren, (SNode*)pPost); + } + if (TSDB_CODE_SUCCESS == code) { + pDynCtrl->node.pTargets = nodesCloneList(pPost->pTargets); + if (!pDynCtrl->node.pTargets) { + code = TSDB_CODE_OUT_OF_MEMORY; + } + } } if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index 5628f5cf0f..e92e70b757 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -1025,11 +1025,7 @@ static int32_t createGroupCachePhysiNode(SPhysiPlanContext* pCxt, SNodeList* pCh } */ - if (TSDB_CODE_SUCCESS == code) { - *pPhyNode = (SPhysiNode*)pGrpCache; - } else { - nodesDestroyNode((SNode*)pGrpCache); - } + *pPhyNode = (SPhysiNode*)pGrpCache; return code; } @@ -1059,6 +1055,8 @@ static int32_t updateDynQueryCtrlStbJoinInfo(SPhysiPlanContext* pCxt, SNodeList* } pDynCtrl->stbJoin.batchFetch = pLogicNode->stbJoin.batchFetch; } + nodesDestroyList(pVgList); + nodesDestroyList(pUidList); return code; } diff --git a/source/libs/scalar/src/scalar.c b/source/libs/scalar/src/scalar.c index cc6be68c85..3e003234cf 100644 --- a/source/libs/scalar/src/scalar.c +++ b/source/libs/scalar/src/scalar.c @@ -1671,6 +1671,9 @@ static int32_t sclGetJsonOperatorResType(SOperatorNode *pOp) { } static int32_t sclGetBitwiseOperatorResType(SOperatorNode *pOp) { + if (!pOp->pLeft || !pOp->pRight) { + return TSDB_CODE_TSC_INVALID_OPERATION; + } SDataType ldt = ((SExprNode *)(pOp->pLeft))->resType; SDataType rdt = ((SExprNode *)(pOp->pRight))->resType; if(TSDB_DATA_TYPE_VARBINARY == ldt.type || TSDB_DATA_TYPE_VARBINARY == rdt.type){ diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index e12c62ad87..c5789a65ca 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -329,6 +329,7 @@ static FORCE_INLINE void varToVarbinary(char *buf, SScalarParam *pOut, int32_t r if (t == NULL) { sclError("Out of memory"); terrno = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(data); return; } varDataSetLen(t, size); diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index bb81582a2d..64df8e2f44 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -29,17 +29,24 @@ extern "C" { #define ONE_MB_F (1048576.0) #define SIZE_IN_MB(_v) ((_v) / ONE_MB_F) -typedef struct { +typedef struct SStreamGlobalEnv { int8_t inited; void* timer; } SStreamGlobalEnv; -typedef struct { +typedef struct SStreamContinueExecInfo { SEpSet epset; int32_t taskId; SRpcMsg msg; } SStreamContinueExecInfo; +struct STokenBucket { + int32_t capacity; // total capacity + int64_t fillTimestamp;// fill timestamp + int32_t numOfToken; // total available tokens + int32_t rate; // number of token per second +}; + extern SStreamGlobalEnv streamEnv; extern int32_t streamBackendId; extern int32_t streamBackendCfWrapperId; @@ -77,6 +84,17 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); int32_t streamTransferStateToStreamTask(SStreamTask* pTask); +int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t cap, int32_t rate); + +SStreamQueue* streamQueueOpen(int64_t cap); +void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); +void streamQueueProcessSuccess(SStreamQueue* queue); +void streamQueueProcessFail(SStreamQueue* queue); +void* streamQueueNextItem(SStreamQueue* pQueue); +void streamFreeQitem(SStreamQueueItem* data); + +STaskId extractStreamTaskKey(const SStreamTask* pTask); + #ifdef __cplusplus } #endif diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 1f93498557..5a7e14c629 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -83,7 +83,6 @@ static void streamSchedByTimer(void* param, void* tmrId) { atomic_store_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE); pTrigger->pBlock->info.type = STREAM_GET_ALL; if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTrigger) < 0) { - taosFreeQitem(pTrigger); taosTmrReset(streamSchedByTimer, (int32_t)pTask->info.triggerParam, pTask, streamEnv.timer, &pTask->schedInfo.pTimer); return; } @@ -109,14 +108,12 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { } int32_t streamSchedExec(SStreamTask* pTask) { - int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, - TASK_SCHED_STATUS__WAITING); - + int8_t schedStatus = streamTaskSetSchedStatusWait(pTask); if (schedStatus == TASK_SCHED_STATUS__INACTIVE) { SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); if (pRunReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + /*int8_t status = */streamTaskSetSchedStatusInActive(pTask); qError("failed to create msg to aunch s-task:%s, reason out of memory", pTask->id.idStr); return -1; } @@ -178,7 +175,7 @@ static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDisp } int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) { - SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); + SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SStreamDataBlock)); int8_t status = TASK_INPUT_STATUS__NORMAL; // enqueue @@ -213,29 +210,6 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1; } -int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { - int32_t code = 0; - int32_t type = pTask->outputInfo.type; - if (type == TASK_OUTPUT__TABLE) { - pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, pBlock->blocks); - destroyStreamDataBlock(pBlock); - } else if (type == TASK_OUTPUT__SMA) { - pTask->smaSink.smaSink(pTask->smaSink.vnode, pTask->smaSink.smaId, pBlock->blocks); - destroyStreamDataBlock(pBlock); - } else { - ASSERT(type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH); - code = taosWriteQitem(pTask->outputInfo.queue->pQueue, pBlock); - if (code != 0) { - qError("s-task:%s failed to put res into outputQ", pTask->id.idStr); - } - - streamDispatchStreamBlock(pTask); - return code; - } - - return 0; -} - int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr, pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen); @@ -280,8 +254,11 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S } tDeleteStreamDispatchReq(pReq); - streamSchedExec(pTask); + int8_t schedStatus = streamTaskSetSchedStatusWait(pTask); + if (schedStatus == TASK_SCHED_STATUS__INACTIVE) { + streamTryExec(pTask); + } return 0; } diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 82fa21ea40..7b3c99acfb 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -178,6 +178,10 @@ void bkdMgtDestroy(SBackendManager* bm) { taosHashCleanup(bm->pSstTbl[0]); taosHashCleanup(bm->pSstTbl[1]); + + taosMemoryFree(bm->pCurrent); + taosMemoryFree(bm->pManifest); + taosMemoryFree(bm); } @@ -239,7 +243,7 @@ int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { continue; } if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { - char* p = taosStrdup(name); + // char* p = taosStrdup(name); taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); continue; } @@ -267,7 +271,7 @@ int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { taosArrayClearP(bm->pDel, taosMemoryFree); taosHashClear(bm->pSstTbl[1 - bm->idx]); bm->update = 0; - + taosCloseDir(&pDir); return code; } @@ -280,6 +284,8 @@ int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { taosHashClear(bm->pSstTbl[bm->idx]); bm->idx = 1 - bm->idx; + taosCloseDir(&pDir); + return 0; } @@ -287,8 +293,8 @@ int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { int32_t code = 0; int32_t len = bm->len + 128; - char* dstBuf = taosMemoryCalloc(1, len); char* srcBuf = taosMemoryCalloc(1, len); + char* dstBuf = taosMemoryCalloc(1, len); char* srcDir = taosMemoryCalloc(1, len); char* dstDir = taosMemoryCalloc(1, len); @@ -297,12 +303,16 @@ int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { sprintf(dstDir, "%s%s%s", bm->path, TD_DIRSEP, dname); if (!taosDirExist(srcDir)) { - return 0; + qError("failed to dump srcDir %s, reason: not exist such dir", srcDir); + code = -1; + goto _ERROR; } code = taosMkDir(dstDir); if (code != 0) { - return code; + terrno = TAOS_SYSTEM_ERROR(errno); + qError("failed to mkdir srcDir %s, reason: %s", dstDir, terrstr()); + goto _ERROR; } // clear current file @@ -353,6 +363,7 @@ int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { taosArrayClearP(bm->pAdd, taosMemoryFree); taosArrayClearP(bm->pDel, taosMemoryFree); +_ERROR: taosMemoryFree(srcBuf); taosMemoryFree(dstBuf); taosMemoryFree(srcDir); @@ -388,7 +399,11 @@ int32_t copyFiles(const char* src, const char* dst) { char* dstName = taosMemoryCalloc(1, dLen + 64); TdDirPtr pDir = taosOpenDir(src); - if (pDir == NULL) return 0; + if (pDir == NULL) { + taosMemoryFree(srcName); + taosMemoryFree(dstName); + return -1; + } TdDirEntryPtr de = NULL; while ((de = taosReadDir(pDir)) != NULL) { @@ -1483,7 +1498,11 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t rocksdb_options_set_block_based_table_factory((rocksdb_options_t*)cfOpts[i], tableOpt); params[i].tableOpt = tableOpt; - int idx = streamStateGetCfIdx(NULL, funcname); + int idx = streamStateGetCfIdx(NULL, funcname); + if (idx < 0 || idx >= sizeof(ginitDict) / sizeof(ginitDict[0])) { + qError("failed to open cf"); + return -1; + } SCfInit* cfPara = &ginitDict[idx]; rocksdb_comparator_t* compare = @@ -1591,7 +1610,6 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t return 0; } int streamStateOpenBackend(void* backend, SStreamState* pState) { - // qInfo("start to open state %p on backend %p 0x%" PRIx64 "-%d", pState, backend, pState->streamId, pState->taskId); taosAcquireRef(streamBackendId, pState->streamBackendRid); SBackendWrapper* handle = backend; SBackendCfWrapper* pBackendCfWrapper = taosMemoryCalloc(1, sizeof(SBackendCfWrapper)); @@ -1713,25 +1731,23 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) { if (pState != NULL && idx != -1) { SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; rocksdb_column_family_handle_t* cf = NULL; - taosThreadRwlockRdlock(&wrapper->rwLock); + taosThreadRwlockWrlock(&wrapper->rwLock); cf = wrapper->pHandle[idx]; - taosThreadRwlockUnlock(&wrapper->rwLock); if (cf == NULL) { char buf[128] = {0}; GEN_COLUMN_FAMILY_NAME(buf, wrapper->idstr, ginitDict[idx].key); char* err = NULL; - - taosThreadRwlockWrlock(&wrapper->rwLock); cf = rocksdb_create_column_family(wrapper->rocksdb, wrapper->cfOpts[idx], buf, &err); if (err != NULL) { idx = -1; - qError("failed to to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err); + qError("failed to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err); taosMemoryFree(err); } else { + qDebug("succ to to open cf, %p %s_%s", pState, wrapper->idstr, funcName); wrapper->pHandle[idx] = cf; } - taosThreadRwlockUnlock(&wrapper->rwLock); } + taosThreadRwlockUnlock(&wrapper->rwLock); } return idx; @@ -2316,6 +2332,7 @@ int32_t streamStateSessionGetKVByCur_rocksdb(SStreamStateCur* pCur, SSessionKey* char* val = NULL; int32_t len = decodeValueFunc((void*)vval, vLen, NULL, &val); if (len < 0) { + taosMemoryFree(val); return -1; } @@ -2799,6 +2816,7 @@ char* streamDefaultIterVal_rocksdb(void* iter, int32_t* len) { const char* val = rocksdb_iter_value(pCur->iter, (size_t*)&vlen); *len = decodeValueFunc((void*)val, vlen, NULL, &ret); if (*len < 0) { + taosMemoryFree(ret); return NULL; } diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index baf319d014..a48f74ce86 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -125,7 +125,6 @@ static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpoint taosMemoryFree(pBlock); if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pChkpoint) < 0) { - taosFreeQitem(pChkpoint); return TSDB_CODE_OUT_OF_MEMORY; } @@ -183,8 +182,7 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc taosWLockLatch(&pMeta->lock); if (pMeta->chkptNotReadyTasks == 0) { - pMeta->chkptNotReadyTasks = streamMetaGetNumOfStreamTasks(pMeta); - pMeta->totalTasks = pMeta->chkptNotReadyTasks; + pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks; } taosWUnLockLatch(&pMeta->lock); @@ -267,11 +265,13 @@ int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { int64_t keys[2]; for (int32_t i = 0; i < taosArrayGetSize(pMeta->pTaskList); ++i) { - SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); - keys[0] = pId->streamId; - keys[1] = pId->taskId; + STaskId* pId = taosArrayGet(pMeta->pTaskList, i); + SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); + if (ppTask == NULL) { + continue; + } - SStreamTask* p = *(SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + SStreamTask* p = *ppTask; if (p->info.fillHistory == 1) { continue; } @@ -287,7 +287,7 @@ int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { streamTaskOpenAllUpstreamInput(p); // open inputQ for all upstream tasks qDebug("vgId:%d s-task:%s level:%d commit task status after checkpoint completed, checkpointId:%" PRId64 ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status to be normal, prev:%s", - pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.currentVer, + pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer, streamGetTaskStatusStr(prev)); } @@ -314,15 +314,13 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { if (remain == 0) { // all tasks are ready qDebug("s-task:%s is ready for checkpoint", pTask->id.idStr); - pMeta->totalTasks = 0; - streamBackendDoCheckpoint(pMeta, pTask->checkpointingId); streamSaveAllTaskStatus(pMeta, pTask->checkpointingId); qDebug("vgId:%d vnode wide checkpoint completed, save all tasks status, checkpointId:%" PRId64, pMeta->vgId, pTask->checkpointingId); } else { qDebug("vgId:%d vnode wide tasks not reach checkpoint ready status, ready s-task:%s, not ready:%d/%d", pMeta->vgId, - pTask->id.idStr, remain, pMeta->totalTasks); + pTask->id.idStr, remain, pMeta->numOfStreamTasks); } // send check point response to upstream task diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index ea3e4f5985..00bf631d74 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -115,28 +115,16 @@ SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type) { return NULL; } - pDataSubmit->dataRef = (int32_t*)taosMemoryMalloc(sizeof(int32_t)); - if (pDataSubmit->dataRef == NULL) { - taosFreeQitem(pDataSubmit); - return NULL; - } - pDataSubmit->ver = pData->ver; pDataSubmit->submit = *pData; - *pDataSubmit->dataRef = 1; // initialize the reference count to be 1 pDataSubmit->type = type; return pDataSubmit; } void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit) { - int32_t ref = atomic_sub_fetch_32(pDataSubmit->dataRef, 1); - ASSERT(ref >= 0 && pDataSubmit->type == STREAM_INPUT__DATA_SUBMIT); - - if (ref == 0) { - taosMemoryFree(pDataSubmit->submit.msgStr); - taosMemoryFree(pDataSubmit->dataRef); - } + ASSERT(pDataSubmit->type == STREAM_INPUT__DATA_SUBMIT); + taosMemoryFree(pDataSubmit->submit.msgStr); } SStreamMergedSubmit* streamMergedSubmitNew() { @@ -146,11 +134,8 @@ SStreamMergedSubmit* streamMergedSubmitNew() { } pMerged->submits = taosArrayInit(0, sizeof(SPackedData)); - pMerged->dataRefs = taosArrayInit(0, sizeof(void*)); - - if (pMerged->dataRefs == NULL || pMerged->submits == NULL) { + if (pMerged->submits == NULL) { taosArrayDestroy(pMerged->submits); - taosArrayDestroy(pMerged->dataRefs); taosFreeQitem(pMerged); return NULL; } @@ -160,9 +145,10 @@ SStreamMergedSubmit* streamMergedSubmitNew() { } int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubmit) { - taosArrayPush(pMerged->dataRefs, &pSubmit->dataRef); taosArrayPush(pMerged->submits, &pSubmit->submit); - pMerged->ver = pSubmit->ver; + if (pSubmit->ver > pMerged->ver) { + pMerged->ver = pSubmit->ver; + } return 0; } @@ -222,18 +208,10 @@ void streamFreeQitem(SStreamQueueItem* data) { int32_t sz = taosArrayGetSize(pMerge->submits); for (int32_t i = 0; i < sz; i++) { - int32_t* pRef = taosArrayGetP(pMerge->dataRefs, i); - int32_t ref = atomic_sub_fetch_32(pRef, 1); - ASSERT(ref >= 0); - - if (ref == 0) { - SPackedData* pSubmit = (SPackedData*)taosArrayGet(pMerge->submits, i); - taosMemoryFree(pSubmit->msgStr); - taosMemoryFree(pRef); - } + SPackedData* pSubmit = (SPackedData*)taosArrayGet(pMerge->submits, i); + taosMemoryFree(pSubmit->msgStr); } taosArrayDestroy(pMerge->submits); - taosArrayDestroy(pMerge->dataRefs); taosFreeQitem(pMerge); } else if (type == STREAM_INPUT__REF_DATA_BLOCK) { SStreamRefDataBlock* pRefBlock = (SStreamRefDataBlock*)data; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 916cc6e9ee..4d5234a68c 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -498,9 +498,10 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { ASSERT((pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH)); const char* id = pTask->id.idStr; - int32_t numOfElems = taosQueueItemSize(pTask->outputInfo.queue->pQueue); + int32_t numOfElems = streamQueueGetNumOfItems(pTask->outputInfo.queue); if (numOfElems > 0) { - qDebug("s-task:%s try to dispatch intermediate block to downstream, elem in outputQ:%d", id, numOfElems); + double size = SIZE_IN_MB(taosQueueMemorySize(pTask->outputInfo.queue->pQueue)); + qDebug("s-task:%s start to dispatch intermediate block to downstream, elem in outputQ:%d, size:%.2fMiB", id, numOfElems, size); } // to make sure only one dispatch is running @@ -1001,10 +1002,17 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i // so the TASK_INPUT_STATUS_BLOCKED is rsp if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) { pTask->inputInfo.status = TASK_INPUT_STATUS__BLOCKED; // block the input of current task, to push pressure to upstream - pTask->msgInfo.blockingTs = taosGetTimestampMs(); // record the blocking start time + double el = 0; + if (pTask->msgInfo.blockingTs == 0) { + pTask->msgInfo.blockingTs = taosGetTimestampMs(); // record the blocking start time + } else { + el = (taosGetTimestampMs() - pTask->msgInfo.blockingTs) / 1000.0; + } + int8_t ref = atomic_add_fetch_8(&pTask->status.timerActive, 1); - qError("s-task:%s inputQ of downstream task:0x%x is full, time:%" PRId64 " wait for %dms and retry dispatch data, ref:%d", - id, pRsp->downstreamTaskId, pTask->msgInfo.blockingTs, DISPATCH_RETRY_INTERVAL_MS, ref); + qError("s-task:%s inputQ of downstream task:0x%x is full, time:%" PRId64 + " wait for %dms and retry dispatch data, total wait:%.2fSec ref:%d", + id, pRsp->downstreamTaskId, pTask->msgInfo.blockingTs, DISPATCH_RETRY_INTERVAL_MS, el, ref); streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); } else { // pipeline send data in output queue // this message has been sent successfully, let's try next one. diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index ff667fa778..91c46c8ad9 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -32,33 +32,58 @@ bool streamTaskShouldPause(const SStreamStatus* pStatus) { return (status == TASK_STATUS__PAUSE); } +static int32_t doOutputResultBlockImpl(SStreamTask* pTask, SStreamDataBlock* pBlock) { + int32_t code = 0; + int32_t type = pTask->outputInfo.type; + if (type == TASK_OUTPUT__TABLE) { + pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, pBlock->blocks); + destroyStreamDataBlock(pBlock); + } else if (type == TASK_OUTPUT__SMA) { + pTask->smaSink.smaSink(pTask->smaSink.vnode, pTask->smaSink.smaId, pBlock->blocks); + destroyStreamDataBlock(pBlock); + } else { + ASSERT(type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH); + code = streamTaskPutDataIntoOutputQ(pTask, pBlock); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + streamDispatchStreamBlock(pTask); + return code; + } + + return 0; +} + static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* pRes, int32_t size, int64_t* totalSize, int32_t* totalBlocks) { int32_t numOfBlocks = taosArrayGetSize(pRes); - if (numOfBlocks > 0) { - SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(pItem, pTask, size, pRes); - if (pStreamBlocks == NULL) { - qError("s-task:%s failed to create result stream data block, code:%s", pTask->id.idStr, tstrerror(terrno)); - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - return -1; - } - - qDebug("s-task:%s dump stream result data blocks, num:%d, size:%.2fMiB", pTask->id.idStr, numOfBlocks, - SIZE_IN_MB(size)); - - int32_t code = streamTaskOutputResultBlock(pTask, pStreamBlocks); - if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { // back pressure and record position - destroyStreamDataBlock(pStreamBlocks); - return -1; - } - - *totalSize += size; - *totalBlocks += numOfBlocks; - } else { + if (numOfBlocks == 0) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); + return TSDB_CODE_SUCCESS; } - return TSDB_CODE_SUCCESS; + SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(pItem, pTask, size, pRes); + if (pStreamBlocks == NULL) { + qError("s-task:%s failed to create result stream data block, code:%s", pTask->id.idStr, tstrerror(terrno)); + taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); + return TSDB_CODE_OUT_OF_MEMORY; + } + + qDebug("s-task:%s dump stream result data blocks, num:%d, size:%.2fMiB", pTask->id.idStr, numOfBlocks, + SIZE_IN_MB(size)); + + int32_t code = doOutputResultBlockImpl(pTask, pStreamBlocks); + if (code != TSDB_CODE_SUCCESS) { // back pressure and record position + //code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY + destroyStreamDataBlock(pStreamBlocks); + return code; + } + + *totalSize += size; + *totalBlocks += numOfBlocks; + + return code; } static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* totalSize, @@ -84,7 +109,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i } if (pTask->inputInfo.status == TASK_INPUT_STATUS__BLOCKED) { - qWarn("s-task:%s downstream task inputQ blocked, idle for 1sec and retry", pTask->id.idStr); + qWarn("s-task:%s downstream task inputQ blocked, idle for 1sec and retry exec task", pTask->id.idStr); taosMsleep(1000); continue; } @@ -164,17 +189,19 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i return code; } -int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) { +int32_t streamScanHistoryData(SStreamTask* pTask) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); + int32_t code = TSDB_CODE_SUCCESS; void* exec = pTask->exec.pExecutor; bool finished = false; + int32_t outputBatchSize = 100; qSetStreamOpOpen(exec); while (!finished) { if (streamTaskShouldPause(&pTask->status)) { - double el = (taosGetTimestampMs() - pTask->tsInfo.step1Start) / 1000.0; + double el = (taosGetTimestampMs() - pTask->taskExecInfo.step1Start) / 1000.0; qDebug("s-task:%s paused from the scan-history task, elapsed time:%.2fsec", pTask->id.idStr, el); break; } @@ -217,8 +244,8 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) { block.info.childId = pTask->info.selfChildId; taosArrayPush(pRes, &block); - if ((++numOfBlocks) >= batchSize) { - qDebug("s-task:%s scan exec numOfBlocks:%d, output limit:%d reached", pTask->id.idStr, numOfBlocks, batchSize); + if ((++numOfBlocks) >= outputBatchSize) { + qDebug("s-task:%s scan exec numOfBlocks:%d, output limit:%d reached", pTask->id.idStr, numOfBlocks, outputBatchSize); break; } } @@ -234,7 +261,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) { qRes->type = STREAM_INPUT__DATA_BLOCK; qRes->blocks = pRes; - code = streamTaskOutputResultBlock(pTask, qRes); + code = doOutputResultBlockImpl(pTask, qRes); if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); taosFreeQitem(qRes); @@ -248,13 +275,6 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) { return 0; } -int32_t streamTaskGetInputQItems(const SStreamTask* pTask) { - int32_t numOfItems1 = taosQueueItemSize(pTask->inputInfo.queue->pQueue); - int32_t numOfItems2 = taosQallItemSize(pTask->inputInfo.queue->qall); - - return numOfItems1 + numOfItems2; -} - // wait for the stream task to be idle static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) { const char* id = pTask->id.idStr; @@ -280,10 +300,10 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { qError( "s-task:%s failed to find related stream task:0x%x, it may have been destroyed or closed, destroy the related " "fill-history task", - pTask->id.idStr, pTask->streamTaskId.taskId); + pTask->id.idStr, (int32_t) pTask->streamTaskId.taskId); // 1. free it and remove fill-history task from disk meta-store - streamMetaUnregisterTask(pMeta, pTask->id.streamId, pTask->id.taskId); + streamBuildAndSendDropTaskMsg(pStreamTask, pMeta->vgId, &pTask->id); // 2. save to disk taosWLockLatch(&pMeta->lock); @@ -338,17 +358,19 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { streamTaskReleaseState(pTask); streamTaskReloadState(pStreamTask); - // 3. clear the link between fill-history task and stream task info - pStreamTask->historyTaskId.taskId = 0; - - // 4. resume the state of stream task, after this function, the stream task will run immidately. But it can not be + // 3. resume the state of stream task, after this function, the stream task will run immidately. But it can not be // pause, since the pause allowed attribute is not set yet. streamTaskResumeFromHalt(pStreamTask); qDebug("s-task:%s fill-history task set status to be dropping, save the state into disk", pTask->id.idStr); - // 5. free it and remove fill-history task from disk meta-store - streamMetaUnregisterTask(pMeta, pTask->id.streamId, pTask->id.taskId); + // 4. free it and remove fill-history task from disk meta-store +// streamMetaUnregisterTask(pMeta, pTask->id.streamId, pTask->id.taskId); + streamBuildAndSendDropTaskMsg(pStreamTask, pMeta->vgId, &pTask->id); + + // 5. clear the link between fill-history task and stream task info + pStreamTask->historyTaskId.taskId = 0; + pStreamTask->historyTaskId.streamId = 0; // 6. save to disk taosWLockLatch(&pMeta->lock); @@ -388,6 +410,8 @@ int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SOURCE) { // do transfer task operator states. code = streamDoTransferStateToStreamTask(pTask); + } else { // drop fill-history task + streamBuildAndSendDropTaskMsg(pTask, pTask->pMeta->vgId, &pTask->id); } return code; @@ -483,16 +507,12 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock } } else { // non-dispatch task, do task state transfer directly streamFreeQitem((SStreamQueueItem*)pBlock); - if (level != TASK_LEVEL__SINK) { - qDebug("s-task:%s non-dispatch task, start to transfer state directly", id); - ASSERT(pTask->info.fillHistory == 1); - code = streamTransferStateToStreamTask(pTask); + qDebug("s-task:%s non-dispatch task, start to transfer state directly", id); + ASSERT(pTask->info.fillHistory == 1); + code = streamTransferStateToStreamTask(pTask); - if (code != TSDB_CODE_SUCCESS) { - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - } - } else { - qDebug("s-task:%s sink task does not transfer state", id); + if (code != TSDB_CODE_SUCCESS) { + /*int8_t status = */ streamTaskSetSchedStatusInActive(pTask); } } @@ -541,7 +561,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { if (type == STREAM_INPUT__DATA_BLOCK) { qDebug("s-task:%s sink task start to sink %d blocks", id, numOfBlocks); - streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput); + doOutputResultBlockImpl(pTask, (SStreamDataBlock*)pInput); continue; } } @@ -563,11 +583,11 @@ int32_t streamExecForAll(SStreamTask* pTask) { SIZE_IN_MB(resSize), totalBlocks); // update the currentVer if processing the submit blocks. - ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.currentVer && ver >= pTask->chkInfo.checkpointVer); + ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.nextProcessVer && ver >= pTask->chkInfo.checkpointVer); if (ver != pTask->chkInfo.checkpointVer) { - qDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64, pTask->id.idStr, - pTask->chkInfo.checkpointVer, ver); + qDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64 " , currentVer:%" PRId64, + pTask->id.idStr, pTask->chkInfo.checkpointVer, ver, pTask->chkInfo.nextProcessVer); pTask->chkInfo.checkpointVer = ver; } @@ -576,7 +596,6 @@ int32_t streamExecForAll(SStreamTask* pTask) { // todo other thread may change the status // do nothing after sync executor state to storage backend, untill the vnode-level checkpoint is completed. if (type == STREAM_INPUT__CHECKPOINT) { -// ASSERT(pTask->status.taskStatus == TASK_STATUS__CK); qDebug("s-task:%s checkpoint block received, set the status:%s", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus)); streamTaskBuildCheckpoint(pTask); @@ -596,27 +615,28 @@ bool streamTaskIsIdle(const SStreamTask* pTask) { int32_t streamTryExec(SStreamTask* pTask) { // this function may be executed by multi-threads, so status check is required. - int8_t schedStatus = - atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE); - const char* id = pTask->id.idStr; + int8_t schedStatus = streamTaskSetSchedStatusActive(pTask); if (schedStatus == TASK_SCHED_STATUS__WAITING) { - int32_t code = streamExecForAll(pTask); - if (code < 0) { // todo this status shoudl be removed - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED); - return -1; - } + while (1) { + int32_t code = streamExecForAll(pTask); + if (code < 0) { // todo this status shoudl be removed + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED); + return -1; + } -// streamTaskBuildCheckpoint(pTask); + taosThreadMutexLock(&pTask->lock); + if (taosQueueEmpty(pTask->inputInfo.queue->pQueue) || streamTaskShouldStop(&pTask->status) || + streamTaskShouldPause(&pTask->status)) { + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + taosThreadMutexUnlock(&pTask->lock); - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), - pTask->status.schedStatus); - - if (!(taosQueueEmpty(pTask->inputInfo.queue->pQueue) || streamTaskShouldStop(&pTask->status) || - streamTaskShouldPause(&pTask->status))) { - streamSchedExec(pTask); + qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, + streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus); + return 0; + } + taosThreadMutexUnlock(&pTask->lock); } } else { qDebug("s-task:%s already started to exec by other thread, status:%s, sched-status:%d", id, diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 5bc21286d7..31bf5a482b 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -36,13 +36,20 @@ static void metaHbToMnode(void* param, void* tmrId); static void streamMetaClear(SStreamMeta* pMeta); static int32_t streamMetaBegin(SStreamMeta* pMeta); static void streamMetaCloseImpl(void* arg); -static void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask); typedef struct { TdThreadMutex mutex; SHashObj* pTable; } SMetaRefMgt; +struct SMetaHbInfo { + tmr_h hbTmr; + int32_t stopFlag; + int32_t tickCounter; + int32_t hbCount; + int64_t hbStart; +}; + SMetaRefMgt gMetaRefMgt; void metaRefMgtInit(); @@ -129,14 +136,23 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF if (tdbTbOpen("checkpoint.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pCheckpointDb, 0) < 0) { goto _err; } - if (streamMetaBegin(pMeta) < 0) { goto _err; } _hash_fn_t fp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR); - pMeta->pTasks = taosHashInit(64, fp, true, HASH_NO_LOCK); - if (pMeta->pTasks == NULL) { + pMeta->pTasksMap = taosHashInit(64, fp, true, HASH_NO_LOCK); + if (pMeta->pTasksMap == NULL) { + goto _err; + } + + pMeta->pUpdateTaskSet = taosHashInit(64, fp, false, HASH_NO_LOCK); + if (pMeta->pUpdateTaskSet == NULL) { + goto _err; + } + + pMeta->pHbInfo = taosMemoryCalloc(1, sizeof(SMetaHbInfo)); + if (pMeta->pHbInfo == NULL) { goto _err; } @@ -160,9 +176,9 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF metaRefMgtAdd(pMeta->vgId, pRid); - pMeta->hbInfo.hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); - pMeta->hbInfo.tickCounter = 0; - pMeta->hbInfo.stopFlag = 0; + pMeta->pHbInfo->hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); + pMeta->pHbInfo->tickCounter = 0; + pMeta->pHbInfo->stopFlag = 0; pMeta->pTaskBackendUnique = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); @@ -188,22 +204,21 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF taosInitRWLatch(&pMeta->lock); taosThreadMutexInit(&pMeta->backendMutex, NULL); - pMeta->pauseTaskNum = 0; - + pMeta->numOfPausedTasks = 0; + pMeta->numOfStreamTasks = 0; qInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId, stage); return pMeta; _err: taosMemoryFree(pMeta->path); - if (pMeta->pTasks) taosHashCleanup(pMeta->pTasks); + if (pMeta->pTasksMap) taosHashCleanup(pMeta->pTasksMap); if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList); if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb); if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb); if (pMeta->db) tdbClose(pMeta->db); - - // taosThreadMutexDestroy(&pMeta->backendMutex); - // taosThreadRwlockDestroy(&pMeta->lock); + if (pMeta->pHbInfo) taosMemoryFreeClear(pMeta->pHbInfo); + if (pMeta->pUpdateTaskSet) taosHashCleanup(pMeta->pUpdateTaskSet); taosMemoryFree(pMeta); @@ -211,7 +226,7 @@ _err: return NULL; } -int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { +int32_t streamMetaReopen(SStreamMeta* pMeta) { streamMetaClear(pMeta); pMeta->streamBackendRid = -1; @@ -257,7 +272,7 @@ int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { void streamMetaClear(SStreamMeta* pMeta) { void* pIter = NULL; - while ((pIter = taosHashIterate(pMeta->pTasks, pIter)) != NULL) { + while ((pIter = taosHashIterate(pMeta->pTasksMap, pIter)) != NULL) { SStreamTask* p = *(SStreamTask**)pIter; // release the ref by timer @@ -273,7 +288,7 @@ void streamMetaClear(SStreamMeta* pMeta) { taosRemoveRef(streamBackendId, pMeta->streamBackendRid); - taosHashClear(pMeta->pTasks); + taosHashClear(pMeta->pTasksMap); taosHashClear(pMeta->pTaskBackendUnique); taosArrayClear(pMeta->pTaskList); @@ -314,9 +329,11 @@ void streamMetaCloseImpl(void* arg) { taosArrayDestroy(pMeta->chkpSaved); taosArrayDestroy(pMeta->chkpInUse); - taosHashCleanup(pMeta->pTasks); + taosHashCleanup(pMeta->pTasksMap); taosHashCleanup(pMeta->pTaskBackendUnique); + taosHashCleanup(pMeta->pUpdateTaskSet); + taosMemoryFree(pMeta->pHbInfo); taosMemoryFree(pMeta->path); taosThreadMutexDestroy(&pMeta->backendMutex); @@ -343,10 +360,8 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); - int64_t key[2] = {0}; - extractStreamTaskKey(key, pTask); - - if (tdbTbUpsert(pMeta->pTaskDb, key, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn) < 0) { + int64_t id[2] = {pTask->id.streamId, pTask->id.taskId}; + if (tdbTbUpsert(pMeta->pTaskDb, id, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn) < 0) { qError("s-task:%s save to disk failed, code:%s", pTask->id.idStr, tstrerror(terrno)); return -1; } @@ -355,18 +370,14 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { return 0; } -void extractStreamTaskKey(int64_t* pKey, const SStreamTask* pTask) { - pKey[0] = pTask->id.streamId; - pKey[1] = pTask->id.taskId; -} - -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey) { - int32_t code = tdbTbDelete(pMeta->pTaskDb, pKey, STREAM_TASK_KEY_LEN, pMeta->txn); +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pTaskId) { + int64_t key[2] = {pTaskId->streamId, pTaskId->taskId}; + int32_t code = tdbTbDelete(pMeta->pTaskDb, key, STREAM_TASK_KEY_LEN, pMeta->txn); if (code != 0) { - qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, (int32_t)pKey[1], + qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, (int32_t) pTaskId->taskId, tstrerror(terrno)); } else { - qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, (int32_t)pKey[1]); + qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, (int32_t) pTaskId->taskId); } return code; @@ -376,8 +387,8 @@ int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int64_t* pKey) { int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded) { *pAdded = false; - int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + void* p = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (p == NULL) { if (pMeta->expandFunc(pMeta->ahandle, pTask, ver) < 0) { tFreeStreamTask(pTask); @@ -399,14 +410,18 @@ int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTa return 0; } - taosHashPut(pMeta->pTasks, keys, sizeof(keys), &pTask, POINTER_BYTES); + taosHashPut(pMeta->pTasksMap, &id, sizeof(id), &pTask, POINTER_BYTES); + if (pTask->info.fillHistory == 0) { + atomic_add_fetch_32(&pMeta->numOfStreamTasks, 1); + } + *pAdded = true; return 0; } int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta) { - size_t size = taosHashGetSize(pMeta->pTasks); - ASSERT(taosArrayGetSize(pMeta->pTaskList) == taosHashGetSize(pMeta->pTasks)); + size_t size = taosHashGetSize(pMeta->pTasksMap); + ASSERT(taosArrayGetSize(pMeta->pTaskList) == taosHashGetSize(pMeta->pTasksMap)); return (int32_t)size; } @@ -414,10 +429,12 @@ int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta) { int32_t num = 0; size_t size = taosArrayGetSize(pMeta->pTaskList); for (int32_t i = 0; i < size; ++i) { - SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); - int64_t keys[2] = {pId->streamId, pId->taskId}; + STaskId* pId = taosArrayGet(pMeta->pTaskList, i); + SStreamTask** p = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); + if (p == NULL) { + continue; + } - SStreamTask** p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); if ((*p)->info.fillHistory == 0) { num += 1; } @@ -429,8 +446,8 @@ int32_t streamMetaGetNumOfStreamTasks(SStreamMeta* pMeta) { SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { taosRLockLatch(&pMeta->lock); - int64_t keys[2] = {streamId, taskId}; - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId id = {.streamId = streamId, .taskId = taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask != NULL) { if (!streamTaskShouldStop(&(*ppTask)->status)) { int32_t ref = atomic_add_fetch_32(&(*ppTask)->refCnt, 1); @@ -473,12 +490,12 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t // pre-delete operation taosWLockLatch(&pMeta->lock); - int64_t keys[2] = {streamId, taskId}; - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId id = {.streamId = streamId, .taskId = taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask) { pTask = *ppTask; if (streamTaskShouldPause(&pTask->status)) { - int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); + int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1); qInfo("vgId:%d s-task:%s drop stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); } atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); @@ -494,7 +511,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t while (1) { taosRLockLatch(&pMeta->lock); - ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask) { if ((*ppTask)->status.timerActive == 0) { @@ -513,9 +530,19 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t // let's do delete of stream task taosWLockLatch(&pMeta->lock); - ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask) { - taosHashRemove(pMeta->pTasks, keys, sizeof(keys)); + // it is an fill-history task, remove the related stream task's id that points to it + if ((*ppTask)->info.fillHistory == 1) { + STaskId streamTaskId = {.streamId = (*ppTask)->streamTaskId.streamId, .taskId = (*ppTask)->streamTaskId.taskId}; + SStreamTask** ppStreamTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &streamTaskId, sizeof(streamTaskId)); + if (ppStreamTask != NULL) { + (*ppStreamTask)->historyTaskId.taskId = 0; + (*ppStreamTask)->historyTaskId.streamId = 0; + } + } + + taosHashRemove(pMeta->pTasksMap, &id, sizeof(id)); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); ASSERT(pTask->status.timerActive == 0); @@ -528,7 +555,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t streamMetaReleaseTask(pMeta, pTask); } - streamMetaRemoveTask(pMeta, keys); + streamMetaRemoveTask(pMeta, &id); streamMetaReleaseTask(pMeta, pTask); } else { qDebug("vgId:%d failed to find the task:0x%x, it may have been dropped already", pMeta->vgId, taskId); @@ -539,10 +566,13 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t } int32_t streamMetaBegin(SStreamMeta* pMeta) { + taosWLockLatch(&pMeta->lock); if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { + taosWUnLockLatch(&pMeta->lock); return -1; } + taosWUnLockLatch(&pMeta->lock); return 0; } @@ -614,8 +644,8 @@ static void doClear(void* pKey, void* pVal, TBC* pCur, SArray* pRecycleList) { int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { TBC* pCur = NULL; - qInfo("vgId:%d load stream tasks from meta files", pMeta->vgId); + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { qError("vgId:%d failed to open stream meta, code:%s", pMeta->vgId, tstrerror(terrno)); return -1; @@ -626,7 +656,7 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { void* pVal = NULL; int32_t vLen = 0; SDecoder decoder; - SArray* pRecycleList = taosArrayInit(4, STREAM_TASK_KEY_LEN); + SArray* pRecycleList = taosArrayInit(4, sizeof(STaskId)); tdbTbcMoveToFirst(pCur); while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { @@ -653,18 +683,17 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { int32_t taskId = pTask->id.taskId; tFreeStreamTask(pTask); - int64_t key[2] = {0}; - extractStreamTaskKey(key, pTask); + STaskId id = extractStreamTaskKey(pTask); - taosArrayPush(pRecycleList, key); + taosArrayPush(pRecycleList, &id); int32_t total = taosArrayGetSize(pRecycleList); qDebug("s-task:0x%x is already dropped, add into recycle list, total:%d", taskId, total); continue; } // do duplicate task check. - int64_t keys[2] = {pTask->id.streamId, pTask->id.taskId}; - void* p = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + void* p = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (p == NULL) { // pTask->chkInfo.checkpointVer may be 0, when a follower is become a leader // In this case, we try not to start fill-history task anymore. @@ -682,20 +711,22 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { continue; } - streamTaskResetUpstreamStageInfo(pTask); - if (taosHashPut(pMeta->pTasks, keys, sizeof(keys), &pTask, sizeof(void*)) < 0) { + if (taosHashPut(pMeta->pTasksMap, &id, sizeof(id), &pTask, POINTER_BYTES) < 0) { doClear(pKey, pVal, pCur, pRecycleList); tFreeStreamTask(pTask); return -1; } + if (pTask->info.fillHistory == 0) { + atomic_add_fetch_32(&pMeta->numOfStreamTasks, 1); + } + if (streamTaskShouldPause(&pTask->status)) { - atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); } ASSERT(pTask->status.downstreamReady == 0); } - qInfo("vgId:%d pause task num:%d", pMeta->vgId, pMeta->pauseTaskNum); tdbFree(pKey); tdbFree(pVal); @@ -706,13 +737,14 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { if (taosArrayGetSize(pRecycleList) > 0) { for (int32_t i = 0; i < taosArrayGetSize(pRecycleList); ++i) { - int64_t* pId = taosArrayGet(pRecycleList, i); + STaskId* pId = taosArrayGet(pRecycleList, i); streamMetaRemoveTask(pMeta, pId); } } int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - qDebug("vgId:%d load %d tasks into meta from disk completed", pMeta->vgId, numOfTasks); + qDebug("vgId:%d load %d tasks into meta from disk completed, streamTask:%d, paused:%d", pMeta->vgId, numOfTasks, + pMeta->numOfStreamTasks, pMeta->numOfPausedTasks); taosArrayDestroy(pRecycleList); return 0; } @@ -724,8 +756,8 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { for (int32_t i = 0; i < pReq->numOfTasks; ++i) { STaskStatusEntry* ps = taosArrayGet(pReq->pTaskStatus, i); - if (tEncodeI64(pEncoder, ps->streamId) < 0) return -1; - if (tEncodeI32(pEncoder, ps->taskId) < 0) return -1; + if (tEncodeI64(pEncoder, ps->id.streamId) < 0) return -1; + if (tEncodeI32(pEncoder, ps->id.taskId) < 0) return -1; if (tEncodeI32(pEncoder, ps->status) < 0) return -1; } tEndEncode(pEncoder); @@ -740,8 +772,11 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { pReq->pTaskStatus = taosArrayInit(pReq->numOfTasks, sizeof(STaskStatusEntry)); for (int32_t i = 0; i < pReq->numOfTasks; ++i) { STaskStatusEntry hb = {0}; - if (tDecodeI64(pDecoder, &hb.streamId) < 0) return -1; - if (tDecodeI32(pDecoder, &hb.taskId) < 0) return -1; + if (tDecodeI64(pDecoder, &hb.id.streamId) < 0) return -1; + int32_t taskId = 0; + if (tDecodeI32(pDecoder, &taskId) < 0) return -1; + + hb.id.taskId = taskId; if (tDecodeI32(pDecoder, &hb.status) < 0) return -1; taosArrayPush(pReq->pTaskStatus, &hb); @@ -751,9 +786,8 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { return 0; } -static bool readyToSendHb(SMetaHbInfo* pInfo) { - if ((++pInfo->tickCounter) >= META_HB_SEND_IDLE_COUNTER) { - // reset the counter +static bool enoughTimeDuration(SMetaHbInfo* pInfo) { + if ((++pInfo->tickCounter) >= META_HB_SEND_IDLE_COUNTER) { // reset the counter pInfo->tickCounter = 0; return true; } @@ -763,48 +797,61 @@ static bool readyToSendHb(SMetaHbInfo* pInfo) { void metaHbToMnode(void* param, void* tmrId) { int64_t rid = *(int64_t*)param; - SStreamHbMsg hbMsg = {0}; SStreamMeta* pMeta = taosAcquireRef(streamMetaId, rid); if (pMeta == NULL) { return; } // need to stop, stop now - if (pMeta->hbInfo.stopFlag == STREAM_META_WILL_STOP) { - pMeta->hbInfo.stopFlag = STREAM_META_OK_TO_STOP; + if (pMeta->pHbInfo->stopFlag == STREAM_META_WILL_STOP) { + pMeta->pHbInfo->stopFlag = STREAM_META_OK_TO_STOP; qDebug("vgId:%d jump out of meta timer", pMeta->vgId); taosReleaseRef(streamMetaId, rid); return; } - if (!readyToSendHb(&pMeta->hbInfo)) { - taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr); + // not leader not send msg + if (!pMeta->leader) { + qInfo("vgId:%d follower not send hb to mnode", pMeta->vgId); + taosReleaseRef(streamMetaId, rid); + pMeta->pHbInfo->hbStart = 0; + return; + } + + // set the hb start time + if (pMeta->pHbInfo->hbStart == 0) { + pMeta->pHbInfo->hbStart = taosGetTimestampMs(); + } + + if (!enoughTimeDuration(pMeta->pHbInfo)) { + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->pHbInfo->hbTmr); taosReleaseRef(streamMetaId, rid); return; } + qDebug("vgId:%d build stream task hb, leader:%d", pMeta->vgId, pMeta->leader); + + SStreamHbMsg hbMsg = {0}; taosRLockLatch(&pMeta->lock); int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); SEpSet epset = {0}; bool hasValEpset = false; - hbMsg.vgId = pMeta->vgId; hbMsg.pTaskStatus = taosArrayInit(numOfTasks, sizeof(STaskStatusEntry)); for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); - int64_t keys[2] = {pId->streamId, pId->taskId}; - SStreamTask** pTask = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + STaskId* pId = taosArrayGet(pMeta->pTaskList, i); + SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); if ((*pTask)->info.fillHistory == 1) { continue; } - STaskStatusEntry entry = {.streamId = pId->streamId, .taskId = pId->taskId, .status = (*pTask)->status.taskStatus}; + STaskStatusEntry entry = {.id = *pId, .status = (*pTask)->status.taskStatus}; taosArrayPush(hbMsg.pTaskStatus, &entry); - if (i == 0) { + if (!hasValEpset) { epsetAssign(&epset, &(*pTask)->info.mnodeEpset); hasValEpset = true; } @@ -848,12 +895,17 @@ void metaHbToMnode(void* param, void* tmrId) { initRpcMsg(&msg, TDMT_MND_STREAM_HEARTBEAT, buf, tlen); msg.info.noResp = 1; - qDebug("vgId:%d, build and send hb to mnode", pMeta->vgId); + pMeta->pHbInfo->hbCount += 1; + + qDebug("vgId:%d, build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks, + pMeta->pHbInfo->hbCount); tmsgSendReq(&epset, &msg); + } else { + qDebug("vgId:%d no tasks and no mnd epset, not send stream hb to mnode", pMeta->vgId); } taosArrayDestroy(hbMsg.pTaskStatus); - taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->hbInfo.hbTmr); + taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->pHbInfo->hbTmr); taosReleaseRef(streamMetaId, rid); } @@ -864,7 +916,7 @@ static bool hasStreamTaskInTimer(SStreamMeta* pMeta) { void* pIter = NULL; while (1) { - pIter = taosHashIterate(pMeta->pTasks, pIter); + pIter = taosHashIterate(pMeta->pTasksMap, pIter); if (pIter == NULL) { break; } @@ -882,12 +934,14 @@ static bool hasStreamTaskInTimer(SStreamMeta* pMeta) { void streamMetaNotifyClose(SStreamMeta* pMeta) { int32_t vgId = pMeta->vgId; - qDebug("vgId:%d notify all stream tasks that the vnode is closing", vgId); + qDebug("vgId:%d notify all stream tasks that the vnode is closing. isLeader:%d startHb%" PRId64 ", totalHb:%d", vgId, + pMeta->leader, pMeta->pHbInfo->hbStart, pMeta->pHbInfo->hbCount); + taosWLockLatch(&pMeta->lock); void* pIter = NULL; while (1) { - pIter = taosHashIterate(pMeta->pTasks, pIter); + pIter = taosHashIterate(pMeta->pTasksMap, pIter); if (pIter == NULL) { break; } @@ -900,10 +954,12 @@ void streamMetaNotifyClose(SStreamMeta* pMeta) { taosWUnLockLatch(&pMeta->lock); // wait for the stream meta hb function stopping - pMeta->hbInfo.stopFlag = STREAM_META_WILL_STOP; - while (pMeta->hbInfo.stopFlag != STREAM_META_OK_TO_STOP) { - taosMsleep(100); - qDebug("vgId:%d wait for meta to stop timer", pMeta->vgId); + if (pMeta->leader) { + pMeta->pHbInfo->stopFlag = STREAM_META_WILL_STOP; + while (pMeta->pHbInfo->stopFlag != STREAM_META_OK_TO_STOP) { + taosMsleep(100); + qDebug("vgId:%d wait for meta to stop timer", pMeta->vgId); + } } qDebug("vgId:%d start to check all tasks", vgId); @@ -917,3 +973,10 @@ void streamMetaNotifyClose(SStreamMeta* pMeta) { int64_t el = taosGetTimestampMs() - st; qDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%" PRId64 " ms", pMeta->vgId, el); } + +void streamMetaStartHb(SStreamMeta* pMeta) { + int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); + metaRefMgtAdd(pMeta->vgId, pRid); + *pRid = pMeta->rid; + metaHbToMnode(pRid, NULL); +} diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 34b0a00639..6aaea2ce24 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -15,10 +15,11 @@ #include "streamInt.h" -#define MAX_STREAM_EXEC_BATCH_NUM 32 -#define MIN_STREAM_EXEC_BATCH_NUM 4 -#define STREAM_TASK_INPUT_QUEUE_CAPACITY 20480 -#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30) +#define MAX_STREAM_EXEC_BATCH_NUM 32 +#define MIN_STREAM_EXEC_BATCH_NUM 4 +#define STREAM_TASK_QUEUE_CAPACITY 20480 +#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30) +#define STREAM_TASK_OUTPUT_QUEUE_CAPACITY_IN_SIZE (50) // todo refactor: // read data from input queue @@ -29,6 +30,8 @@ typedef struct SQueueReader { int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms } SQueueReader; +static bool streamTaskHasAvailableToken(STokenBucket* pBucket); + static void streamQueueCleanup(SStreamQueue* pQueue) { void* qItem = NULL; while ((qItem = streamQueueNextItem(pQueue)) != NULL) { @@ -157,10 +160,22 @@ SStreamQueueRes streamQueueGetRes(SStreamQueue1* pQueue) { } #endif -bool streamQueueIsFull(const STaosQueue* pQueue) { - bool isFull = taosQueueItemSize((STaosQueue*) pQueue) >= STREAM_TASK_INPUT_QUEUE_CAPACITY; - double size = SIZE_IN_MB(taosQueueMemorySize((STaosQueue*) pQueue)); - return (isFull || size >= STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE); +bool streamQueueIsFull(const STaosQueue* pQueue, bool inputQ) { + bool isFull = taosQueueItemSize((STaosQueue*)pQueue) >= STREAM_TASK_QUEUE_CAPACITY; + if (isFull) { + return true; + } + + int32_t threahold = (inputQ) ? STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE : STREAM_TASK_OUTPUT_QUEUE_CAPACITY_IN_SIZE; + double size = SIZE_IN_MB(taosQueueMemorySize((STaosQueue*)pQueue)); + return (size >= threahold); +} + +int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue) { + int32_t numOfItems1 = taosQueueItemSize(pQueue->pQueue); + int32_t numOfItems2 = taosQallItemSize(pQueue->qall); + + return numOfItems1 + numOfItems2; } int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks) { @@ -175,6 +190,13 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu return TSDB_CODE_SUCCESS; } + STokenBucket* pBucket = pTask->pTokenBucket; + if (!streamTaskHasAvailableToken(pBucket)) { // no available token in th bucket, ignore this execution +// qInfo("s-task:%s no available token for sink, capacity:%d, rate:%d token/sec, quit", pTask->id.idStr, +// pBucket->capacity, pBucket->rate); + return TSDB_CODE_SUCCESS; + } + SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputInfo.queue); if (qItem == NULL) { qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); @@ -258,15 +280,15 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) { int8_t type = pItem->type; STaosQueue* pQueue = pTask->inputInfo.queue->pQueue; - int32_t total = taosQueueItemSize(pQueue) + 1; - double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + int32_t total = streamQueueGetNumOfItems(pTask->inputInfo.queue) + 1; if (type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* px = (SStreamDataSubmit*)pItem; - if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && streamQueueIsFull(pQueue)) { - qError( + if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && streamQueueIsFull(pQueue, true)) { + double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + qTrace( "s-task:%s inputQ is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", - pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); + pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); streamDataSubmitDestroy(px); taosFreeQitem(pItem); return -1; @@ -282,32 +304,50 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) return code; } + double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + // use the local variable to avoid the pItem be freed by other threads, since it has been put into queue already. qDebug("s-task:%s submit enqueue msgLen:%d ver:%" PRId64 ", total in queue:%d, size:%.2fMiB", pTask->id.idStr, msgLen, ver, total, size + SIZE_IN_MB(msgLen)); } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__REF_DATA_BLOCK) { - if (streamQueueIsFull(pQueue)) { - qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", - pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); + if (streamQueueIsFull(pQueue, true)) { + double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + + qTrace("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", + pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); destroyStreamDataBlock((SStreamDataBlock*)pItem); return -1; } - qDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); int32_t code = taosWriteQitem(pQueue, pItem); if (code != TSDB_CODE_SUCCESS) { destroyStreamDataBlock((SStreamDataBlock*)pItem); return code; } + + double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + qDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__TRANS_STATE) { - taosWriteQitem(pQueue, pItem); + int32_t code = taosWriteQitem(pQueue, pItem); + if (code != TSDB_CODE_SUCCESS) { + taosFreeQitem(pItem); + return code; + } + + double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); qDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, pTask->info.taskLevel, streamGetBlockTypeStr(type), total, size); } else if (type == STREAM_INPUT__GET_RES) { // use the default memory limit, refactor later. - taosWriteQitem(pQueue, pItem); + int32_t code = taosWriteQitem(pQueue, pItem); + if (code != TSDB_CODE_SUCCESS) { + taosFreeQitem(pItem); + return code; + } + + double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); qDebug("s-task:%s data res enqueue, current(blocks:%d, size:%.2fMiB)", pTask->id.idStr, total, size); } else { ASSERT(0); @@ -320,3 +360,76 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) return 0; } + +// the result should be put into the outputQ in any cases, otherwise, the result may be lost +int32_t streamTaskPutDataIntoOutputQ(SStreamTask* pTask, SStreamDataBlock* pBlock) { + STaosQueue* pQueue = pTask->outputInfo.queue->pQueue; + + while (streamQueueIsFull(pQueue, false)) { + if (streamTaskShouldStop(&pTask->status)) { + qInfo("s-task:%s discard result block due to task stop", pTask->id.idStr); + return TSDB_CODE_STREAM_EXEC_CANCELLED; + } + + int32_t total = streamQueueGetNumOfItems(pTask->outputInfo.queue); + double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + // let's wait for there are enough space to hold this result pBlock + qDebug("s-task:%s outputQ is full, wait for 500ms and retry, outputQ items:%d, size:%.2fMiB", pTask->id.idStr, + total, size); + taosMsleep(500); + } + + int32_t code = taosWriteQitem(pQueue, pBlock); + + int32_t total = streamQueueGetNumOfItems(pTask->outputInfo.queue); + double size = SIZE_IN_MB(taosQueueMemorySize(pQueue)); + if (code != 0) { + qError("s-task:%s failed to put res into outputQ, outputQ items:%d, size:%.2fMiB code:%s, result lost", + pTask->id.idStr, total + 1, size, tstrerror(code)); + } else { + qInfo("s-task:%s data put into outputQ, outputQ items:%d, size:%.2fMiB", pTask->id.idStr, total, size); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t cap, int32_t rate) { + if (cap < 50 || rate < 50 || pBucket == NULL) { + qError("failed to init sink task bucket, cap:%d, rate:%d", cap, rate); + return TSDB_CODE_INVALID_PARA; + } + + pBucket->capacity = cap; + pBucket->rate = rate; + pBucket->numOfToken = cap; + pBucket->fillTimestamp = taosGetTimestampMs(); + return TSDB_CODE_SUCCESS; +} + +static void fillBucket(STokenBucket* pBucket) { + int64_t now = taosGetTimestampMs(); + int64_t delta = now - pBucket->fillTimestamp; + ASSERT(pBucket->numOfToken >= 0); + + int32_t inc = (delta / 1000.0) * pBucket->rate; + if (inc > 0) { + if ((pBucket->numOfToken + inc) < pBucket->capacity) { + pBucket->numOfToken += inc; + } else { + pBucket->numOfToken = pBucket->capacity; + } + + pBucket->fillTimestamp = now; + qDebug("new token available, current:%d, inc:%d ts:%"PRId64, pBucket->numOfToken, inc, now); + } +} + +bool streamTaskHasAvailableToken(STokenBucket* pBucket) { + fillBucket(pBucket); + if (pBucket->numOfToken > 0) { + --pBucket->numOfToken; + return true; + } else { + return false; + } +} \ No newline at end of file diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 743d87e938..d28ec85dd5 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -21,8 +21,7 @@ typedef struct SStreamTaskRetryInfo { SStreamMeta* pMeta; - int32_t taskId; - int64_t streamId; + STaskId id; } SStreamTaskRetryInfo; static int32_t streamSetParamForScanHistory(SStreamTask* pTask); @@ -30,12 +29,19 @@ static void streamTaskSetRangeStreamCalc(SStreamTask* pTask); static int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); static void streamTaskSetReady(SStreamTask* pTask, int32_t numOfReqs) { + if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY && pTask->info.taskLevel != TASK_LEVEL__SOURCE) { + pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); + qDebug("s-task:%s level:%d task wait for %d upstream tasks complete scan-history procedure, status:%s", + pTask->id.idStr, pTask->info.taskLevel, pTask->numOfWaitingUpstream, + streamGetTaskStatusStr(pTask->status.taskStatus)); + } + ASSERT(pTask->status.downstreamReady == 0); pTask->status.downstreamReady = 1; - int64_t el = (taosGetTimestampMs() - pTask->tsInfo.init); - qDebug("s-task:%s all %d downstream ready, init completed, elapsed time:%dms, task status:%s", - pTask->id.idStr, numOfReqs, (int32_t) el, streamGetTaskStatusStr(pTask->status.taskStatus)); + int64_t el = (taosGetTimestampMs() - pTask->taskExecInfo.init); + qDebug("s-task:%s all %d downstream ready, init completed, elapsed time:%"PRId64"ms, task status:%s", + pTask->id.idStr, numOfReqs, el, streamGetTaskStatusStr(pTask->status.taskStatus)); } int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated) { @@ -67,6 +73,7 @@ const char* streamGetTaskStatusStr(int32_t status) { case TASK_STATUS__CK: return "check-point"; case TASK_STATUS__DROPPING: return "dropping"; case TASK_STATUS__STOP: return "stop"; + case TASK_STATUS__UNINIT: return "uninitialized"; default:return ""; } } @@ -97,11 +104,8 @@ int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) { streamSetParamForScanHistory(pTask); streamTaskEnablePause(pTask); } - - streamTaskScanHistoryPrepare(pTask); } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { qDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr); - streamTaskScanHistoryPrepare(pTask); } return 0; } @@ -241,6 +245,7 @@ static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) { ASSERT(pTask->historyTaskId.taskId == 0); } else { qDebug("s-task:%s downstream tasks are ready, now ready for data from wal, status:%s", id, str); + streamTaskEnablePause(pTask); } } @@ -367,10 +372,6 @@ int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8 return 0; } -int32_t streamSourceScanHistoryData(SStreamTask* pTask) { - return streamScanExec(pTask, 100); -} - int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask) { SStreamDataBlock* pTranstate = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); if (pTranstate == NULL) { @@ -402,15 +403,6 @@ int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } -// agg -int32_t streamTaskScanHistoryPrepare(SStreamTask* pTask) { - pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamInfoList); - qDebug("s-task:%s level:%d task wait for %d upstream tasks complete scan-history procedure, status:%s", - pTask->id.idStr, pTask->info.taskLevel, pTask->numOfWaitingUpstream, - streamGetTaskStatusStr(pTask->status.taskStatus)); - return 0; -} - int32_t streamAggUpstreamScanHistoryFinish(SStreamTask* pTask) { void* exec = pTask->exec.pExecutor; if (pTask->info.fillHistory && qRestoreStreamOperatorOption(exec) < 0) { @@ -489,7 +481,7 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) { // execute in the scan history complete call back msg, ready to process data from inputQ streamSetStatusNormal(pTask); - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + streamTaskSetSchedStatusInActive(pTask); taosWLockLatch(&pMeta->lock); streamMetaSaveTask(pMeta, pTask); @@ -509,7 +501,8 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) { static void checkFillhistoryTaskStatus(SStreamTask* pTask, SStreamTask* pHTask) { pHTask->dataRange.range.minVer = 0; - pHTask->dataRange.range.maxVer = pTask->chkInfo.currentVer; + // the query version range should be limited to the already processed data + pHTask->dataRange.range.maxVer = pTask->chkInfo.nextProcessVer - 1; if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { qDebug("s-task:%s set the launch condition for fill-history s-task:%s, window:%" PRId64 " - %" PRId64 @@ -528,12 +521,10 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { SStreamTaskRetryInfo* pInfo = param; SStreamMeta* pMeta = pInfo->pMeta; - qDebug("s-task:0x%x in timer to launch related history task", pInfo->taskId); + qDebug("s-task:0x%x in timer to launch related history task", (int32_t) pInfo->id.taskId); taosWLockLatch(&pMeta->lock); - int64_t keys[2] = {pInfo->streamId, pInfo->taskId}; - - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &pInfo->id, sizeof(pInfo->id)); if (ppTask) { ASSERT((*ppTask)->status.timerActive >= 1); @@ -549,7 +540,7 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { } taosWUnLockLatch(&pMeta->lock); - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pInfo->streamId, pInfo->taskId); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pInfo->id.streamId, pInfo->id.taskId); if (pTask != NULL) { ASSERT(pTask->status.timerActive >= 1); @@ -560,7 +551,7 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { qWarn( "s-task:%s vgId:%d status:%s failed to launch history task:0x%x, since it may not be built, or may have been " "destroyed, or should stop", - pTask->id.idStr, pMeta->vgId, pStatus, pTask->historyTaskId.taskId); + pTask->id.idStr, pMeta->vgId, pStatus, (int32_t) pTask->historyTaskId.taskId); taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer); streamMetaReleaseTask(pMeta, pTask); @@ -576,7 +567,7 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { atomic_sub_fetch_8(&pTask->status.timerActive, 1); streamMetaReleaseTask(pMeta, pTask); } else { - qError("s-task:0x%x failed to load task, it may have been destroyed", pInfo->taskId); + qError("s-task:0x%x failed to load task, it may have been destroyed", (int32_t) pInfo->id.taskId); } taosMemoryFree(pInfo); @@ -595,17 +586,15 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { qDebug("s-task:%s start to launch related fill-history task:0x%" PRIx64 "-0x%x", pTask->id.idStr, pTask->historyTaskId.streamId, hTaskId); - int64_t keys[2] = {pTask->historyTaskId.streamId, hTaskId}; - // Set the execute conditions, including the query time window and the version range - SStreamTask** pHTask = taosHashGet(pMeta->pTasks, keys, sizeof(keys)); + SStreamTask** pHTask = taosHashGet(pMeta->pTasksMap, &pTask->historyTaskId, sizeof(pTask->historyTaskId)); if (pHTask == NULL) { qWarn("s-task:%s vgId:%d failed to launch history task:0x%x, since it is not built yet", pTask->id.idStr, pMeta->vgId, hTaskId); SStreamTaskRetryInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamTaskRetryInfo)); - pInfo->taskId = pTask->id.taskId; - pInfo->streamId = pTask->id.streamId; + pInfo->id.taskId = pTask->id.taskId; + pInfo->id.streamId = pTask->id.streamId; pInfo->pMeta = pTask->pMeta; if (pTask->launchTaskTimer == NULL) { @@ -831,7 +820,7 @@ void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta) { } if(pTask->info.taskLevel == TASK_LEVEL__SINK) { - int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + int32_t num = atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); qInfo("vgId:%d s-task:%s pause stream sink task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); return; } @@ -865,7 +854,7 @@ void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta) { atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); - int32_t num = atomic_add_fetch_32(&pMeta->pauseTaskNum, 1); + int32_t num = atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); qInfo("vgId:%d s-task:%s pause stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); taosWUnLockLatch(&pMeta->lock); @@ -885,10 +874,10 @@ void streamTaskResume(SStreamTask* pTask, SStreamMeta* pMeta) { if (status == TASK_STATUS__PAUSE) { pTask->status.taskStatus = pTask->status.keepTaskStatus; pTask->status.keepTaskStatus = TASK_STATUS__NORMAL; - int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); + int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1); qInfo("vgId:%d s-task:%s resume from pause, status:%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - int32_t num = atomic_sub_fetch_32(&pMeta->pauseTaskNum, 1); + int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1); qInfo("vgId:%d s-task:%s sink task.resume from pause, status:%s. pause task num:%d", pMeta->vgId, pTask->id.idStr, streamGetTaskStatusStr(status), num); } else { qError("s-task:%s not in pause, failed to resume, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 8a4500dd86..feb127e313 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -343,6 +343,7 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize); int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); if (nread == -1) { + taosMemoryFree(buf); code = TAOS_SYSTEM_ERROR(terrno); qError("%s snap failed to read snap, file name:%s, type:%d,reason:%s", STREAM_STATE_TRANSFER, item->name, item->type, tstrerror(code)); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 711dbf65e7..ba8578f98e 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -97,9 +97,12 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI8(pEncoder, pTask->info.fillHistory) < 0) return -1; if (tEncodeI64(pEncoder, pTask->historyTaskId.streamId)) return -1; - if (tEncodeI32(pEncoder, pTask->historyTaskId.taskId)) return -1; + int32_t taskId = pTask->historyTaskId.taskId; + if (tEncodeI32(pEncoder, taskId)) return -1; + if (tEncodeI64(pEncoder, pTask->streamTaskId.streamId)) return -1; - if (tEncodeI32(pEncoder, pTask->streamTaskId.taskId)) return -1; + taskId = pTask->streamTaskId.taskId; + if (tEncodeI32(pEncoder, taskId)) return -1; if (tEncodeU64(pEncoder, pTask->dataRange.range.minVer)) return -1; if (tEncodeU64(pEncoder, pTask->dataRange.range.maxVer)) return -1; @@ -141,6 +144,8 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { } int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { + int32_t taskId = 0; + if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->ver) < 0) return -1; if (pTask->ver != SSTREAM_TASK_VER) return -1; @@ -165,9 +170,12 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI8(pDecoder, &pTask->info.fillHistory) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->historyTaskId.streamId)) return -1; - if (tDecodeI32(pDecoder, &pTask->historyTaskId.taskId)) return -1; + if (tDecodeI32(pDecoder, &taskId)) return -1; + pTask->historyTaskId.taskId = taskId; + if (tDecodeI64(pDecoder, &pTask->streamTaskId.streamId)) return -1; - if (tDecodeI32(pDecoder, &pTask->streamTaskId.taskId)) return -1; + if (tDecodeI32(pDecoder, &taskId)) return -1; + pTask->streamTaskId.taskId = taskId; if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1; if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; @@ -251,15 +259,19 @@ int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo) tEndDecode(pDecoder); return 0; } -int32_t tDecodeStreamTaskId(SDecoder* pDecoder, SStreamTaskId* pTaskId) { + +int32_t tDecodeStreamTaskId(SDecoder* pDecoder, STaskId* pTaskId) { int64_t ver; if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &ver) < 0) return -1; if (ver != SSTREAM_TASK_VER) return -1; if (tDecodeI64(pDecoder, &pTaskId->streamId) < 0) return -1; - if (tDecodeI32(pDecoder, &pTaskId->taskId) < 0) return -1; + int32_t taskId = 0; + if (tDecodeI32(pDecoder, &taskId) < 0) return -1; + + pTaskId->taskId = taskId; tEndDecode(pDecoder); return 0; } @@ -277,7 +289,20 @@ static void freeUpstreamItem(void* p) { void tFreeStreamTask(SStreamTask* pTask) { int32_t taskId = pTask->id.taskId; - qDebug("free s-task:0x%x, %p, state:%p", taskId, pTask, pTask->pState); + STaskExecStatisInfo* pStatis = &pTask->taskExecInfo; + + qDebug("start to free s-task:0x%x, %p, state:%p, status:%s", taskId, pTask, pTask->pState, + streamGetTaskStatusStr(pTask->status.taskStatus)); + + qDebug("s-task:0x%x exec info: create:%" PRId64 ", init:%" PRId64 ", start:%" PRId64 + ", updateCount:%d latestUpdate:%" PRId64 ", latestCheckPoint:%" PRId64 ", ver:%" PRId64 + " nextProcessVer:%" PRId64, + taskId, pStatis->created, pStatis->init, pStatis->start, pStatis->updateCount, pStatis->latestUpdateTs, + pTask->chkInfo.checkpointId, pTask->chkInfo.checkpointVer, pTask->chkInfo.nextProcessVer); + + if (pStatis->created == 0 || pStatis->init == 0 || pStatis->start == 0) { + int32_t k = 1; + } // remove the ref by timer while (pTask->status.timerActive > 0) { @@ -332,7 +357,6 @@ void tFreeStreamTask(SStreamTask* pTask) { } pTask->pReadyMsgList = taosArrayDestroy(pTask->pReadyMsgList); - taosThreadMutexDestroy(&pTask->lock); if (pTask->msgInfo.pData != NULL) { destroyStreamDataBlock(pTask->msgInfo.pData); pTask->msgInfo.pData = NULL; @@ -356,6 +380,7 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->pUpstreamInfoList = NULL; } + taosMemoryFree(pTask->pTokenBucket); taosThreadMutexDestroy(&pTask->lock); taosMemoryFree(pTask); @@ -372,20 +397,41 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i if (pTask->inputInfo.queue == NULL || pTask->outputInfo.queue == NULL) { qError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr); - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } - pTask->tsInfo.init = taosGetTimestampMs(); + pTask->taskExecInfo.created = taosGetTimestampMs(); pTask->inputInfo.status = TASK_INPUT_STATUS__NORMAL; pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; pTask->pMeta = pMeta; - pTask->chkInfo.currentVer = ver; + pTask->chkInfo.nextProcessVer = ver; pTask->dataRange.range.maxVer = ver; pTask->dataRange.range.minVer = ver; pTask->pMsgCb = pMsgCb; - taosThreadMutexInit(&pTask->lock, NULL); + pTask->pTokenBucket = taosMemoryCalloc(1, sizeof(STokenBucket)); + if (pTask->pTokenBucket == NULL) { + qError("s-task:%s failed to prepare the tokenBucket, code:%s", pTask->id.idStr, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return TSDB_CODE_OUT_OF_MEMORY; + } + + streamTaskInitTokenBucket(pTask->pTokenBucket, 50, 50); + + TdThreadMutexAttr attr = {0}; + int code = taosThreadMutexAttrInit(&attr); + if (code != 0) { + qError("s-task:%s initElapsed mutex attr failed, code:%s", pTask->id.idStr, tstrerror(code)); + return code; + } + + code = taosThreadMutexAttrSetType(&attr, PTHREAD_MUTEX_RECURSIVE); + if (code != 0) { + qError("s-task:%s set mutex attr recursive, code:%s", pTask->id.idStr, tstrerror(code)); + return code; + } + + taosThreadMutexInit(&pTask->lock, &attr); streamTaskOpenAllUpstreamInput(pTask); return TSDB_CODE_SUCCESS; @@ -444,7 +490,8 @@ void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpS SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamInfoList, i); if (pInfo->nodeId == nodeId) { epsetAssign(&pInfo->epSet, pEpSet); - qDebug("s-task:0x%x update the upstreamInfo, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf); + qDebug("s-task:0x%x update the upstreamInfo taskId:0x%x(nodeId:%d) newEpset:%s", pTask->id.taskId, + pInfo->taskId, nodeId, buf); break; } } @@ -474,7 +521,8 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE if (pVgInfo->vgId == nodeId) { epsetAssign(&pVgInfo->epSet, pEpSet); - qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpset:%s", pTask->id.taskId, nodeId, buf); + qDebug("s-task:0x%x update the dispatch info, task:0x%x(nodeId:%d) newEpset:%s", pTask->id.taskId, + pVgInfo->taskId, nodeId, buf); break; } } @@ -482,7 +530,8 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE STaskDispatcherFixedEp* pDispatcher = &pTask->fixedEpDispatcher; if (pDispatcher->nodeId == nodeId) { epsetAssign(&pDispatcher->epSet, pEpSet); - qDebug("s-task:0x%x update the dispatch info, nodeId:%d newEpSet:%s", pTask->id.taskId, nodeId, buf); + qDebug("s-task:0x%x update the dispatch info, task:0x%x(nodeId:%d) newEpSet:%s", pTask->id.taskId, + pDispatcher->taskId, nodeId, buf); } } else { // do nothing @@ -531,6 +580,16 @@ int32_t doUpdateTaskEpset(SStreamTask* pTask, int32_t nodeId, SEpSet* pEpSet) { } int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) { + STaskExecStatisInfo* p = &pTask->taskExecInfo; + + int32_t numOfNodes = taosArrayGetSize(pNodeList); + int64_t prevTs = p->latestUpdateTs; + + p->latestUpdateTs = taosGetTimestampMs(); + p->updateCount += 1; + qDebug("s-task:%s update task nodeEp epset, updatedNodes:%d, updateCount:%d, prevTs:%" PRId64, pTask->id.idStr, + numOfNodes, p->updateCount, prevTs); + for (int32_t i = 0; i < taosArrayGetSize(pNodeList); ++i) { SNodeUpdateInfo* pInfo = taosArrayGet(pNodeList, i); doUpdateTaskEpset(pTask, pInfo->nodeId, &pInfo->newEp); @@ -551,3 +610,63 @@ void streamTaskResetUpstreamStageInfo(SStreamTask* pTask) { qDebug("s-task:%s reset all upstream tasks stage info", pTask->id.idStr); } + +int8_t streamTaskSetSchedStatusWait(SStreamTask* pTask) { + taosThreadMutexLock(&pTask->lock); + int8_t status = pTask->status.schedStatus; + if (status == TASK_SCHED_STATUS__INACTIVE) { + pTask->status.schedStatus = TASK_SCHED_STATUS__WAITING; + } + taosThreadMutexUnlock(&pTask->lock); + + return status; +} + +int8_t streamTaskSetSchedStatusActive(SStreamTask* pTask) { + taosThreadMutexLock(&pTask->lock); + int8_t status = pTask->status.schedStatus; + if (status == TASK_SCHED_STATUS__WAITING) { + pTask->status.schedStatus = TASK_SCHED_STATUS__ACTIVE; + } + taosThreadMutexUnlock(&pTask->lock); + + return status; +} + +int8_t streamTaskSetSchedStatusInActive(SStreamTask* pTask) { + taosThreadMutexLock(&pTask->lock); + int8_t status = pTask->status.schedStatus; + ASSERT(status == TASK_SCHED_STATUS__WAITING || status == TASK_SCHED_STATUS__ACTIVE || + status == TASK_SCHED_STATUS__INACTIVE); + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; + taosThreadMutexUnlock(&pTask->lock); + + return status; +} + +int32_t streamBuildAndSendDropTaskMsg(SStreamTask* pTask, int32_t vgId, SStreamTaskId* pTaskId) { + SVDropStreamTaskReq *pReq = rpcMallocCont(sizeof(SVDropStreamTaskReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pReq->head.vgId = vgId; + pReq->taskId = pTaskId->taskId; + pReq->streamId = pTaskId->streamId; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_DROP, .pCont = pReq, .contLen = sizeof(SVDropStreamTaskReq)}; + int32_t code = tmsgPutToQueue(pTask->pMsgCb, WRITE_QUEUE, &msg); + if (code != TSDB_CODE_SUCCESS) { + qError("vgId:%d failed to send drop task:0x%x msg, code:%s", vgId, pTaskId->taskId, tstrerror(code)); + return code; + } + + qDebug("vgId:%d build and send drop table:0x%x msg", vgId, pTaskId->taskId); + return code; +} + +STaskId extractStreamTaskKey(const SStreamTask* pTask) { + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + return id; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 1ff266ad8f..edecfcb2bc 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -2562,7 +2562,11 @@ int32_t syncNodeRebuildAndCopyIfExist(SSyncNode* ths, int32_t oldtotalReplicaNum ths->logReplMgrs[i]->matchIndex, ths->logReplMgrs[i]->endIndex); } - SSyncLogReplMgr oldLogReplMgrs[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA] = {0}; + SSyncLogReplMgr* oldLogReplMgrs = NULL; + int64_t length = sizeof(SSyncLogReplMgr) * (TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA); + oldLogReplMgrs = taosMemoryMalloc(length); + if (NULL == oldLogReplMgrs) return -1; + memset(oldLogReplMgrs, 0, length); for(int i = 0; i < oldtotalReplicaNum; i++){ oldLogReplMgrs[i] = *(ths->logReplMgrs[i]); @@ -2643,6 +2647,8 @@ int32_t syncNodeRebuildAndCopyIfExist(SSyncNode* ths, int32_t oldtotalReplicaNum } } + taosMemoryFree(oldLogReplMgrs); + return 0; } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 9299651999..b167f2ecb6 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -59,7 +59,7 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { ASSERT(pData->pWal != NULL); taosThreadMutexInit(&(pData->mutex), NULL); - pData->pWalHandle = walOpenReader(pData->pWal, NULL); + pData->pWalHandle = walOpenReader(pData->pWal, NULL, 0); ASSERT(pData->pWalHandle != NULL); pLogStore->syncLogUpdateCommitIndex = raftLogUpdateCommitIndex; diff --git a/source/libs/tdb/src/db/tdbBtree.c b/source/libs/tdb/src/db/tdbBtree.c index a48bae002e..7a62b38b16 100644 --- a/source/libs/tdb/src/db/tdbBtree.c +++ b/source/libs/tdb/src/db/tdbBtree.c @@ -2200,10 +2200,15 @@ int tdbBtcDelete(SBTC *pBtc) { tdbOsFree(pCell); if (pPage->nOverflow > 0) { - tdbDebug("tdb/btc-delete: btree balance after update cell, pPage/nOverflow: %p/%d.", pPage, - pPage->nOverflow); + tdbDebug("tdb/btc-delete: btree balance after update cell, pPage/nOverflow/pgno: %p/%d/%" PRIu32 ".", pPage, + pPage->nOverflow, TDB_PAGE_PGNO(pPage)); - pBtc->iPage = iPage; + tdbPagerReturnPage(pBtc->pBt->pPager, pBtc->pPage, pBtc->pTxn); + while (--pBtc->iPage != iPage) { + tdbPagerReturnPage(pBtc->pBt->pPager, pBtc->pgStack[pBtc->iPage], pBtc->pTxn); + } + + // pBtc->iPage = iPage; pBtc->pPage = pPage; ret = tdbBtreeBalance(pBtc); if (ret < 0) { diff --git a/source/libs/tdb/src/db/tdbPCache.c b/source/libs/tdb/src/db/tdbPCache.c index 262f3d27e6..5b8c407e95 100644 --- a/source/libs/tdb/src/db/tdbPCache.c +++ b/source/libs/tdb/src/db/tdbPCache.c @@ -295,7 +295,7 @@ static SPage *tdbPCacheFetchImpl(SPCache *pCache, const SPgid *pPgid, TXN *pTxn) } // 1. pPage == NULL - // 2. pPage && pPage->isLocal == 0 && !TDB_TXN_IS_WRITE(pTxn) + // 2. pPage && !pPage->isLocal == 0 && !TDB_TXN_IS_WRITE(pTxn) pPageH = pPage; pPage = NULL; diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index a53830723c..3117bbf00e 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -395,7 +395,7 @@ static void uvOnPipeWriteCb(uv_write_t* req, int status) { if (status == 0) { tTrace("success to dispatch conn to work thread"); } else { - tError("fail to dispatch conn to work thread"); + tError("fail to dispatch conn to work thread, code:%s", uv_strerror(status)); } if (!uv_is_closing((uv_handle_t*)req->data)) { uv_close((uv_handle_t*)req->data, uvFreeCb); diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index d9e43e4324..2eee04a27a 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -16,7 +16,7 @@ #include "taoserror.h" #include "walInt.h" -SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond) { +SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond, int64_t id) { SWalReader *pReader = taosMemoryCalloc(1, sizeof(SWalReader)); if (pReader == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -24,7 +24,7 @@ SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond) { } pReader->pWal = pWal; - pReader->readerId = tGenIdPI64(); + pReader->readerId = (id != 0)? id:tGenIdPI64(); pReader->pIdxFile = NULL; pReader->pLogFile = NULL; pReader->curVersion = -1; @@ -75,6 +75,7 @@ int32_t walNextValidMsg(SWalReader *pReader) { terrno = TSDB_CODE_WAL_LOG_NOT_EXIST; return -1; } + while (fetchVer <= appliedVer) { if (walFetchHead(pReader, fetchVer) < 0) { return -1; @@ -257,9 +258,9 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver) { bool seeked = false; wDebug("vgId:%d, try to fetch ver %" PRId64 ", first ver:%" PRId64 ", commit ver:%" PRId64 ", last ver:%" PRId64 - ", applied ver:%" PRId64, + ", applied ver:%" PRId64", 0x%"PRIx64, pRead->pWal->cfg.vgId, ver, pRead->pWal->vers.firstVer, pRead->pWal->vers.commitVer, pRead->pWal->vers.lastVer, - pRead->pWal->vers.appliedVer); + pRead->pWal->vers.appliedVer, pRead->readerId); // TODO: valid ver if (ver > pRead->pWal->vers.commitVer) { @@ -297,7 +298,8 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver) { code = walValidHeadCksum(pRead->pHead); if (code != 0) { - wError("vgId:%d, unexpected wal log index:%" PRId64 ", since head checksum not passed", pRead->pWal->cfg.vgId, ver); + wError("vgId:%d, unexpected wal log index:%" PRId64 ", since head checksum not passed, 0x%"PRIx64, pRead->pWal->cfg.vgId, ver, + pRead->readerId); terrno = TSDB_CODE_WAL_FILE_CORRUPTED; return -1; } @@ -307,9 +309,9 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver) { int32_t walSkipFetchBody(SWalReader *pRead) { wDebug("vgId:%d, skip fetch body %" PRId64 ", first ver:%" PRId64 ", commit ver:%" PRId64 ", last ver:%" PRId64 - ", applied ver:%" PRId64, + ", applied ver:%" PRId64", 0x%"PRIx64, pRead->pWal->cfg.vgId, pRead->pHead->head.version, pRead->pWal->vers.firstVer, pRead->pWal->vers.commitVer, - pRead->pWal->vers.lastVer, pRead->pWal->vers.appliedVer); + pRead->pWal->vers.lastVer, pRead->pWal->vers.appliedVer, pRead->readerId); int64_t code = taosLSeekFile(pRead->pLogFile, pRead->pHead->head.bodyLen, SEEK_CUR); if (code < 0) { @@ -324,11 +326,13 @@ int32_t walSkipFetchBody(SWalReader *pRead) { int32_t walFetchBody(SWalReader *pRead) { SWalCont *pReadHead = &pRead->pHead->head; int64_t ver = pReadHead->version; + int32_t vgId = pRead->pWal->cfg.vgId; + int64_t id = pRead->readerId; wDebug("vgId:%d, fetch body %" PRId64 ", first ver:%" PRId64 ", commit ver:%" PRId64 ", last ver:%" PRId64 - ", applied ver:%" PRId64, - pRead->pWal->cfg.vgId, ver, pRead->pWal->vers.firstVer, pRead->pWal->vers.commitVer, pRead->pWal->vers.lastVer, - pRead->pWal->vers.appliedVer); + ", applied ver:%" PRId64 ", 0x%" PRIx64, + vgId, ver, pRead->pWal->vers.firstVer, pRead->pWal->vers.commitVer, pRead->pWal->vers.lastVer, + pRead->pWal->vers.appliedVer, id); if (pRead->capacity < pReadHead->bodyLen) { SWalCkHead *ptr = (SWalCkHead *)taosMemoryRealloc(pRead->pHead, sizeof(SWalCkHead) + pReadHead->bodyLen); @@ -344,26 +348,25 @@ int32_t walFetchBody(SWalReader *pRead) { if (pReadHead->bodyLen != taosReadFile(pRead->pLogFile, pReadHead->body, pReadHead->bodyLen)) { if (pReadHead->bodyLen < 0) { terrno = TAOS_SYSTEM_ERROR(errno); - wError("vgId:%d, wal fetch body error:%" PRId64 ", read request index:%" PRId64 ", since %s", - pRead->pWal->cfg.vgId, pReadHead->version, ver, tstrerror(terrno)); + wError("vgId:%d, wal fetch body error:%" PRId64 ", read request index:%" PRId64 ", since %s, 0x%"PRIx64, + vgId, pReadHead->version, ver, tstrerror(terrno), id); } else { - wError("vgId:%d, wal fetch body error:%" PRId64 ", read request index:%" PRId64 ", since file corrupted", - pRead->pWal->cfg.vgId, pReadHead->version, ver); + wError("vgId:%d, wal fetch body error:%" PRId64 ", read request index:%" PRId64 ", since file corrupted, 0x%"PRIx64, + vgId, pReadHead->version, ver, id); terrno = TSDB_CODE_WAL_FILE_CORRUPTED; } return -1; } if (pReadHead->version != ver) { - wError("vgId:%d, wal fetch body error, index:%" PRId64 ", read request index:%" PRId64, pRead->pWal->cfg.vgId, - pReadHead->version, ver); + wError("vgId:%d, wal fetch body error, index:%" PRId64 ", read request index:%" PRId64", 0x%"PRIx64, vgId, + pReadHead->version, ver, id); terrno = TSDB_CODE_WAL_FILE_CORRUPTED; return -1; } if (walValidBodyCksum(pRead->pHead) != 0) { - wError("vgId:%d, wal fetch body error, index:%" PRId64 ", since body checksum not passed", pRead->pWal->cfg.vgId, - ver); + wError("vgId:%d, wal fetch body error, index:%" PRId64 ", since body checksum not passed, 0x%" PRIx64, vgId, ver, id); terrno = TSDB_CODE_WAL_FILE_CORRUPTED; return -1; } diff --git a/source/libs/wal/test/walMetaTest.cpp b/source/libs/wal/test/walMetaTest.cpp index 0784db917a..70d8921be3 100644 --- a/source/libs/wal/test/walMetaTest.cpp +++ b/source/libs/wal/test/walMetaTest.cpp @@ -326,7 +326,7 @@ TEST_F(WalCleanDeleteEnv, roll) { TEST_F(WalKeepEnv, readHandleRead) { walResetEnv(); int code; - SWalReader* pRead = walOpenReader(pWal, NULL); + SWalReader* pRead = walOpenReader(pWal, NULL, 0); ASSERT(pRead != NULL); int i; @@ -387,7 +387,7 @@ TEST_F(WalRetentionEnv, repairMeta1) { ASSERT_EQ(pWal->vers.lastVer, 99); - SWalReader* pRead = walOpenReader(pWal, NULL); + SWalReader* pRead = walOpenReader(pWal, NULL, 0); ASSERT(pRead != NULL); for (int i = 0; i < 1000; i++) { diff --git a/source/os/src/osFile.c b/source/os/src/osFile.c index 1177ff562e..d5cc272726 100644 --- a/source/os/src/osFile.c +++ b/source/os/src/osFile.c @@ -819,14 +819,38 @@ int64_t taosGetLineFile(TdFilePtr pFile, char **__restrict ptrBuf) { return -1; } #ifdef WINDOWS - *ptrBuf = taosMemoryMalloc(1024); + size_t bufferSize = 512; + *ptrBuf = taosMemoryMalloc(bufferSize); if (*ptrBuf == NULL) return -1; - if (fgets(*ptrBuf, 1023, pFile->fp) == NULL) { - taosMemoryFreeClear(*ptrBuf); - return -1; + + size_t bytesRead = 0; + size_t totalBytesRead = 0; + + while (1) { + char *result = fgets(*ptrBuf + totalBytesRead, bufferSize - totalBytesRead, pFile->fp); + if (result == NULL) { + taosMemoryFreeClear(*ptrBuf); + return -1; + } + bytesRead = strlen(*ptrBuf + totalBytesRead); + totalBytesRead += bytesRead; + + if (totalBytesRead < bufferSize - 1 || (*ptrBuf)[totalBytesRead - 1] == '\n') { + break; + } + + bufferSize += 512; + void* newBuf = taosMemoryRealloc(*ptrBuf, bufferSize); + if (newBuf == NULL) { + taosMemoryFreeClear(*ptrBuf); + return -1; + } + + *ptrBuf = newBuf; } - (*ptrBuf)[1023] = 0; - return strlen(*ptrBuf); + + (*ptrBuf)[totalBytesRead] = '\0'; + return totalBytesRead; #else size_t len = 0; return getline(ptrBuf, &len, pFile->fp); @@ -904,10 +928,16 @@ int32_t taosCompressFile(char *srcFileName, char *destFileName) { goto cmp_end; } - dstFp = gzdopen(pFile->fd, "wb6f"); + // Both gzclose() and fclose() will close the associated fd, so they need to have different fds. + FileFd gzFd = dup(pFile->fd); + if (gzFd < 0) { + ret = -4; + goto cmp_end; + } + dstFp = gzdopen(gzFd, "wb6f"); if (dstFp == NULL) { ret = -3; - taosCloseFile(&pFile); + close(gzFd); goto cmp_end; } @@ -932,3 +962,12 @@ cmp_end: return ret; } + +int32_t taosSetFileHandlesLimit() { +#ifdef WINDOWS + const int max_handles = 8192; + int res = _setmaxstdio(max_handles); + return res == max_handles ? 0 : -1; +#endif + return 0; +} diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 278b23b5a9..461ccc5dce 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -651,8 +651,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_SAME_COMMITTED_VALUE, "Same committed valu // stream TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_TASK_NOT_EXIST, "Stream task not exist") -TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_BACKPRESSURE_OUT_OF_QUEUE,"Out of memory in stream queue") - +TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_EXEC_CANCELLED, "Stream task exec cancelled") // TDLite TAOS_DEFINE_ERROR(TSDB_CODE_TDLITE_IVLD_OPEN_FLAGS, "Invalid TDLite open flags") TAOS_DEFINE_ERROR(TSDB_CODE_TDLITE_IVLD_OPEN_DIR, "Invalid TDLite open directory") diff --git a/tests/docs-examples-test/python.sh b/tests/docs-examples-test/python.sh index 2a44ee7552..5de7261e02 100644 --- a/tests/docs-examples-test/python.sh +++ b/tests/docs-examples-test/python.sh @@ -86,7 +86,7 @@ pip3 install kafka-python python3 kafka_example_consumer.py # 21 -pip3 install taos-ws-py +pip3 install taos-ws-py==0.2.6 python3 conn_websocket_pandas.py # 22 diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index cd37462cde..f1e3058058 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -95,10 +95,10 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/slimit.py -Q 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/slimit.py -Q 4 -,,y,system-test,./pytest.sh python3 ./test.py -f 3-enterprise/restore/restoreDnode.py -N 5 -M 3 -,,y,system-test,./pytest.sh python3 ./test.py -f 3-enterprise/restore/restoreVnode.py -N 5 -M 3 -,,y,system-test,./pytest.sh python3 ./test.py -f 3-enterprise/restore/restoreMnode.py -N 5 -M 3 -,,y,system-test,./pytest.sh python3 ./test.py -f 3-enterprise/restore/restoreQnode.py -N 5 -M 3 +,,y,system-test,./pytest.sh python3 ./test.py -f 3-enterprise/restore/restoreDnode.py -N 5 -M 3 -i False +,,y,system-test,./pytest.sh python3 ./test.py -f 3-enterprise/restore/restoreVnode.py -N 5 -M 3 -i False +,,y,system-test,./pytest.sh python3 ./test.py -f 3-enterprise/restore/restoreMnode.py -N 5 -M 3 -i False +,,y,system-test,./pytest.sh python3 ./test.py -f 3-enterprise/restore/restoreQnode.py -N 5 -M 3 -i False ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/create_wrong_topic.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/dropDbR3ConflictTransaction.py -N 3 @@ -156,8 +156,8 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/raw_block_interface_test.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/stbTagFilter-multiCtb.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqSubscribeStb-r3.py -N 5 -,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -n 3 +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -i True +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -n 3 -i True ,,y,system-test,./pytest.sh python3 ./test.py -f 99-TDcase/TD-19201.py ,,y,system-test,./pytest.sh python3 ./test.py -f 99-TDcase/TD-21561.py ,,y,system-test,./pytest.sh python3 ./test.py -f 99-TDcase/TS-3404.py @@ -818,7 +818,7 @@ ,,y,script,./test.sh -f tsim/dnode/balance3.sim ,,y,script,./test.sh -f tsim/vnode/replica3_many.sim ,,y,script,./test.sh -f tsim/stable/metrics_idx.sim -,,y,script,./test.sh -f tsim/db/alter_replica_13.sim +# ,,y,script,./test.sh -f tsim/db/alter_replica_13.sim ,,y,script,./test.sh -f tsim/sync/3Replica1VgElect.sim ,,y,script,./test.sh -f tsim/sync/3Replica5VgElect.sim ,,n,script,./test.sh -f tsim/valgrind/checkError6.sim @@ -831,7 +831,7 @@ ,,y,script,./test.sh -f tsim/user/privilege_table.sim ,,y,script,./test.sh -f tsim/user/privilege_create_db.sim ,,y,script,./test.sh -f tsim/db/alter_option.sim -,,y,script,./test.sh -f tsim/db/alter_replica_31.sim +# ,,y,script,./test.sh -f tsim/db/alter_replica_31.sim ,,y,script,./test.sh -f tsim/db/basic1.sim ,,y,script,./test.sh -f tsim/db/basic2.sim ,,y,script,./test.sh -f tsim/db/basic3.sim diff --git a/tests/parallel_test/run_case.sh b/tests/parallel_test/run_case.sh index 206f99ff3d..94928e74ae 100755 --- a/tests/parallel_test/run_case.sh +++ b/tests/parallel_test/run_case.sh @@ -81,6 +81,11 @@ pip3 list|grep taospy pip3 uninstall taospy -y pip3 install --default-timeout=120 taospy==2.7.10 +#define taos-ws-py 0.2.8 +pip3 list|grep taos-ws-py +pip3 uninstall taos-ws-py -y +pip3 install --default-timeout=120 taos-ws-py==0.2.8 + $TIMEOUT_CMD $cmd RET=$? echo "cmd exit code: $RET" diff --git a/tests/pytest/util/cluster.py b/tests/pytest/util/cluster.py index a6e3530dc9..7c653f9f2e 100644 --- a/tests/pytest/util/cluster.py +++ b/tests/pytest/util/cluster.py @@ -52,8 +52,8 @@ class ConfigureyCluster: dnode.addExtraCfg("secondEp", f"{hostname}:{startPort_sec}") # configure dnoe of independent mnodes - if num <= self.mnodeNums and self.mnodeNums != 0 and independentMnode == "True" : - tdLog.info("set mnode supportVnodes 0") + if num <= self.mnodeNums and self.mnodeNums != 0 and independentMnode == True : + tdLog.info(f"set mnode:{num} supportVnodes 0") dnode.addExtraCfg("supportVnodes", 0) # print(dnode) self.dnodes.append(dnode) diff --git a/tests/pytest/util/dnodes.py b/tests/pytest/util/dnodes.py index cd873ca0f2..c4fc1ce654 100644 --- a/tests/pytest/util/dnodes.py +++ b/tests/pytest/util/dnodes.py @@ -668,7 +668,7 @@ class TDDnodes: self.testCluster = False self.valgrind = 0 self.asan = False - self.killValgrind = 1 + self.killValgrind = 0 def init(self, path, remoteIP = ""): binPath = self.dnodes[0].getPath() + "/../../../" @@ -775,9 +775,41 @@ class TDDnodes: tdLog.info("execute finished") return + def killProcesser(self, processerName): + if platform.system().lower() == 'windows': + killCmd = ("wmic process where name=\"%s.exe\" call terminate > NUL 2>&1" % processerName) + psCmd = ("wmic process where name=\"%s.exe\" | findstr \"%s.exe\"" % (processerName, processerName)) + else: + killCmd = ( + "ps -ef|grep -w %s| grep -v grep | awk '{print $2}' | xargs kill -TERM > /dev/null 2>&1" + % processerName + ) + psCmd = ("ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % processerName) + + processID = "" + + try: + processID = subprocess.check_output(psCmd, shell=True) + while processID: + os.system(killCmd) + time.sleep(1) + try: + processID = subprocess.check_output(psCmd, shell=True) + except Exception as err: + processID = "" + tdLog.debug('**** kill pid warn: {err}') + except Exception as err: + processID = "" + tdLog.debug(f'**** find pid warn: {err}') + + + def stopAll(self): tdLog.info("stop all dnodes, asan:%d" % self.asan) - distro_id = distro.id() + if platform.system().lower() != 'windows': + distro_id = distro.id() + else: + distro_id = "not alpine" if self.asan and distro_id != "alpine": tdLog.info("execute script: %s" % self.stopDnodesPath) os.system(self.stopDnodesPath) @@ -792,7 +824,6 @@ class TDDnodes: if (distro_id == "alpine"): - print(distro_id) psCmd = "ps -ef | grep -w taosd | grep 'root' | grep -v grep| grep -v defunct | awk '{print $2}' | xargs" processID = subprocess.check_output(psCmd, shell=True).decode("utf-8").strip() while(processID): @@ -803,36 +834,9 @@ class TDDnodes: processID = subprocess.check_output( psCmd, shell=True).decode("utf-8").strip() elif platform.system().lower() == 'windows': - psCmd = "for /f %a in ('wmic process where \"name='taosd.exe'\" get processId ^| xargs echo ^| awk '{print $2}' ^&^& echo aa') do @(ps | grep %a | awk '{print $1}' | xargs)" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8").strip() - while(processID): - print(f"pid of taosd.exe:{processID}") - killCmd = "kill -9 %s > nul 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8").strip() - - psCmd = "for /f %a in ('wmic process where \"name='tmq_sim.exe'\" get processId ^| xargs echo ^| awk '{print $2}' ^&^& echo aa') do @(ps | grep %a | awk '{print $1}' | xargs)" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8").strip() - while(processID): - print(f"pid of tmq_sim.exe:{processID}") - killCmd = "kill -9 %s > nul 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8").strip() - - psCmd = "for /f %a in ('wmic process where \"name='taosBenchmark.exe'\" get processId ^| xargs echo ^| awk '{print $2}' ^&^& echo aa') do @(ps | grep %a | awk '{print $1}' | xargs)" - processID = subprocess.check_output(psCmd, shell=True).decode("utf-8").strip() - while(processID): - print(f"pid of taosBenchmark.exe:{processID}") - killCmd = "kill -9 %s > nul 2>&1" % processID - os.system(killCmd) - time.sleep(1) - processID = subprocess.check_output( - psCmd, shell=True).decode("utf-8").strip() - + self.killProcesser("taosd") + self.killProcesser("tmq_sim") + self.killProcesser("taosBenchmark") else: psCmd = "ps -ef | grep -w taosd | grep 'root' | grep -v grep| grep -v defunct | awk '{print $2}' | xargs" processID = subprocess.check_output(psCmd, shell=True).decode("utf-8").strip() @@ -849,7 +853,6 @@ class TDDnodes: time.sleep(1) processID = subprocess.check_output( psCmd, shell=True).decode("utf-8").strip() - if self.killValgrind == 1: psCmd = "ps -ef|grep -w valgrind.bin| grep -v grep | awk '{print $2}' | xargs" processID = subprocess.check_output(psCmd, shell=True).decode("utf-8").strip() diff --git a/tests/script/wtest.bat b/tests/script/wtest.bat index 88ae703b7c..7321dbfd05 100644 --- a/tests/script/wtest.bat +++ b/tests/script/wtest.bat @@ -67,7 +67,7 @@ set "FILE_NAME=testSuite.sim" if "%1" == "-f" set "FILE_NAME=%2" set FILE_NAME=%FILE_NAME:/=\% -start cmd /k "timeout /t 600 /NOBREAK && taskkill /f /im tsim.exe & exit /b" +start cmd /k "timeout /t 800 /NOBREAK && taskkill /f /im tsim.exe & exit /b" rem echo FILE_NAME: %FILE_NAME% echo ExcuteCmd: %tsim% -c %CFG_DIR% -f %FILE_NAME% diff --git a/tests/system-test/0-others/show.py b/tests/system-test/0-others/show.py index 4ef323db22..75d7116e03 100644 --- a/tests/system-test/0-others/show.py +++ b/tests/system-test/0-others/show.py @@ -210,6 +210,27 @@ class TDTestCase: licences_info = tdSql.queryResult tdSql.checkEqual(grants_info,licences_info) + def show_column_name(self): + tdSql.execute("create database db;") + tdSql.execute("use db;") + tdSql.execute("create table ta(ts timestamp, name nchar(16), age int , address int);") + tdSql.execute("insert into ta values(now, 'jack', 19, 23);") + + colName1 = ["ts","name","age","address"] + colName2 = tdSql.getColNameList("select last(*) from ta;") + for i in range(len(colName1)): + if colName2[i] != f"last({colName1[i]})": + tdLog.exit(f"column name is different. {colName2} != last({colName1[i]} ") + return + + # alter option + tdSql.execute("alter local 'keepColumnName' '1';") + colName3 = tdSql.getColNameList("select last(*) from ta;") + for col in colName3: + if colName1 != colName3: + tdLog.exit(f"column name is different. colName1= {colName1} colName2={colName3}") + return + def run(self): self.check_gitinfo() self.show_base() @@ -218,6 +239,7 @@ class TDTestCase: self.show_create_sql() self.show_create_sysdb_sql() self.show_create_systb_sql() + self.show_column_name() def stop(self): tdSql.close() diff --git a/tests/system-test/0-others/splitVGroup.py b/tests/system-test/0-others/splitVGroup.py index 9fd00892e4..ed80505ce2 100644 --- a/tests/system-test/0-others/splitVGroup.py +++ b/tests/system-test/0-others/splitVGroup.py @@ -198,7 +198,7 @@ class TDTestCase: # init def init(self, conn, logSql, replicaVar=1): - seed = time.clock_gettime(time.CLOCK_REALTIME) + seed = time.time() % 10000 random.seed(seed) self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") diff --git a/tests/system-test/0-others/timeRangeWise.py b/tests/system-test/0-others/timeRangeWise.py index a7dc18aa82..5ef5aa4a75 100644 --- a/tests/system-test/0-others/timeRangeWise.py +++ b/tests/system-test/0-others/timeRangeWise.py @@ -210,7 +210,7 @@ class TDTestCase: # init def init(self, conn, logSql, replicaVar=1): - seed = time.clock_gettime(time.CLOCK_REALTIME) + seed = time.time() % 10000 random.seed(seed) self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") diff --git a/tests/system-test/0-others/ttl.py b/tests/system-test/0-others/ttl.py new file mode 100644 index 0000000000..7de309c135 --- /dev/null +++ b/tests/system-test/0-others/ttl.py @@ -0,0 +1,37 @@ +import time +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * + + +class TDTestCase: + updatecfgDict = {'ttlUnit': 1, "ttlPushInterval": 1, "ttlChangeOnWrite": 0} + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), True) + self.ttl = 5 + self.dbname = "test" + + def check_ttl_result(self): + tdSql.execute(f'create database {self.dbname}') + tdSql.execute(f'create table {self.dbname}.t1(ts timestamp, c1 int)') + tdSql.execute(f'create table {self.dbname}.t2(ts timestamp, c1 int) ttl {self.ttl}') + tdSql.query(f'show {self.dbname}.tables') + tdSql.checkRows(2) + tdSql.execute(f'flush database {self.dbname}') + time.sleep(self.ttl + 2) + tdSql.query(f'show {self.dbname}.tables') + tdSql.checkRows(1) + + def run(self): + self.check_ttl_result() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/0-others/ttlChangeOnWrite.py b/tests/system-test/0-others/ttlChangeOnWrite.py new file mode 100644 index 0000000000..16c6585e07 --- /dev/null +++ b/tests/system-test/0-others/ttlChangeOnWrite.py @@ -0,0 +1,63 @@ +import time +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * + + +class TDTestCase: + updatecfgDict = {'ttlUnit': 1, "ttlPushInterval": 3, "ttlChangeOnWrite": 1, "trimVDbIntervalSec": 360, + "ttlFlushThreshold": 100, "ttlBatchDropNum": 10} + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), True) + self.ttl = 5 + self.tables = 100 + self.dbname = "test" + + def check_batch_drop_num(self): + tdSql.execute(f'create database {self.dbname} vgroups 1') + tdSql.execute(f'use {self.dbname}') + tdSql.execute(f'create table stb(ts timestamp, c1 int) tags(t1 int)') + for i in range(self.tables): + tdSql.execute(f'create table t{i} using stb tags({i}) ttl {self.ttl}') + + tdSql.execute(f'flush database {self.dbname}') + time.sleep(self.ttl + self.updatecfgDict['ttlPushInterval'] + 1) + tdSql.query('show tables') + tdSql.checkRows(90) + + def check_ttl_result(self): + tdSql.execute(f'drop database if exists {self.dbname}') + tdSql.execute(f'create database {self.dbname}') + tdSql.execute(f'create table {self.dbname}.t1(ts timestamp, c1 int)') + tdSql.execute(f'create table {self.dbname}.t2(ts timestamp, c1 int) ttl {self.ttl}') + tdSql.query(f'show {self.dbname}.tables') + tdSql.checkRows(2) + + tdSql.execute(f'flush database {self.dbname}') + time.sleep(self.ttl - 1) + tdSql.execute(f'insert into {self.dbname}.t2 values(now, 1)'); + + tdSql.execute(f'flush database {self.dbname}') + time.sleep(self.ttl - 1) + tdSql.query(f'show {self.dbname}.tables') + tdSql.checkRows(2) + + tdSql.execute(f'flush database {self.dbname}') + time.sleep(self.ttl * 2) + tdSql.query(f'show {self.dbname}.tables') + tdSql.checkRows(1) + + def run(self): + self.check_batch_drop_num() + self.check_ttl_result() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/0-others/user_privilege_multi_users.py b/tests/system-test/0-others/user_privilege_multi_users.py index 8812f42e7b..53ff136e63 100644 --- a/tests/system-test/0-others/user_privilege_multi_users.py +++ b/tests/system-test/0-others/user_privilege_multi_users.py @@ -107,6 +107,7 @@ class TDTestCase: tdLog.debug("case passed") else: tdLog.exit("The privilege number in information_schema.ins_user_privileges is incorrect") + tdSql.query("select * from information_schema.ins_columns where db_name='{self.dbname}';") def stop(self): # remove the privilege diff --git a/tests/system-test/1-insert/delete_data.py b/tests/system-test/1-insert/delete_data.py index aaad723b89..ffeb9e23a9 100644 --- a/tests/system-test/1-insert/delete_data.py +++ b/tests/system-test/1-insert/delete_data.py @@ -14,6 +14,7 @@ import random import string +import time from numpy import logspace from util import constant @@ -298,13 +299,37 @@ class TDTestCase: tdSql.query(f'select {func}(*) from {self.stbname}') tdSql.execute(f'drop table {self.stbname}') tdSql.execute(f'drop database {self.dbname}') + + def FIX_TS_3987(self): + tdSql.execute("create database db duration 1d vgroups 1;") + tdSql.execute("use db;") + tdSql.execute("create table t (ts timestamp, a int);") + tdSql.execute("insert into t values (1694681045000, 1);") + tdSql.execute("select * from t;") + tdSql.execute("flush database db;") + tdSql.execute("select * from t;") + tdSql.execute("delete from t where ts = 1694681045000;") + tdSql.execute("select * from t;") + tdSql.execute("insert into t values (1694581045000, 2);") + tdSql.execute("select * from t;") + tdSql.execute("flush database db;") + tdSql.query("select * from t;") + time.sleep(5) + tdSql.query("select * from t;") + + tdSql.checkRows(1) + tdSql.checkData(0, 0, 1694581045000) + tdSql.checkData(0, 1, 2) + def run(self): + self.FIX_TS_3987() self.delete_data_ntb() self.delete_data_ctb() self.delete_data_stb() tdDnodes.stoptaosd(1) tdDnodes.starttaosd(1) self.delete_data_ntb() + def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__) diff --git a/tests/system-test/1-insert/precisionUS.py b/tests/system-test/1-insert/precisionUS.py index 1b41d66010..d634149297 100644 --- a/tests/system-test/1-insert/precisionUS.py +++ b/tests/system-test/1-insert/precisionUS.py @@ -220,7 +220,7 @@ class TDTestCase: # init def init(self, conn, logSql, replicaVar=1): - seed = time.clock_gettime(time.CLOCK_REALTIME) + seed = time.time() % 10000 random.seed(seed) self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") diff --git a/tests/system-test/1-insert/table_param_ttl.py b/tests/system-test/1-insert/table_param_ttl.py index 6cc978a76c..f36a49a1d7 100644 --- a/tests/system-test/1-insert/table_param_ttl.py +++ b/tests/system-test/1-insert/table_param_ttl.py @@ -35,6 +35,7 @@ class TDTestCase: tdSql.execute(f'create table db.{self.ntbname}_{i} (ts timestamp,c0 int) ttl {self.ttl_param}') tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum) + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.ttl_param+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(0) @@ -42,6 +43,7 @@ class TDTestCase: tdSql.execute(f'create table db.{self.ntbname}_{i} (ts timestamp,c0 int) ttl {self.default_ttl}') for i in range(int(self.tbnum/2)): tdSql.execute(f'alter table db.{self.ntbname}_{i} ttl {self.modify_ttl}') + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.modify_ttl+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum - int(self.tbnum/2)) @@ -54,6 +56,7 @@ class TDTestCase: tdSql.execute(f'create table db.{self.stbname}_{i} using db.{self.stbname} tags({i}) ttl {self.ttl_param}') tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum) + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.ttl_param+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(0) @@ -63,6 +66,7 @@ class TDTestCase: tdSql.checkRows(self.tbnum) for i in range(int(self.tbnum/2)): tdSql.execute(f'alter table db.{self.stbname}_{i} ttl {self.modify_ttl}') + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.modify_ttl+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum - int(self.tbnum/2)) @@ -75,6 +79,7 @@ class TDTestCase: tdSql.execute(f'insert into db.{self.stbname}_{i} using db.{self.stbname} tags({i}) ttl {self.ttl_param} values(now,1)') tdSql.query(f'show db.tables') tdSql.checkRows(self.tbnum) + tdSql.execute(f'flush database db') sleep(self.updatecfgDict['ttlUnit']*self.ttl_param+self.updatecfgDict['ttlPushInterval'] + 1) tdSql.query(f'show db.tables') tdSql.checkRows(0) diff --git a/tests/system-test/2-query/To_unixtimestamp.py b/tests/system-test/2-query/To_unixtimestamp.py index 424ebff6c5..975abfa8b0 100644 --- a/tests/system-test/2-query/To_unixtimestamp.py +++ b/tests/system-test/2-query/To_unixtimestamp.py @@ -99,7 +99,7 @@ class TDTestCase: tdSql.query(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', 0);") tdSql.checkEqual(tdSql.queryResult[0][0], 0) tdSql.query(f"select to_unixtimestamp('1970-01-01 00:00:00', 1);") - tdSql.checkData(0, 0, '1970-01-01 00:00:00') + tdSql.checkData(0, 0, datetime.datetime(1970, 1, 1, 0, 0, 0)) tdSql.error(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', 2);") tdSql.error(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', 1.5);") tdSql.error(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', 'abc');") diff --git a/tests/system-test/2-query/diff.py b/tests/system-test/2-query/diff.py index c6f233eefa..0a2f750f93 100644 --- a/tests/system-test/2-query/diff.py +++ b/tests/system-test/2-query/diff.py @@ -16,6 +16,9 @@ class TDTestCase: self.perfix = 'dev' self.tables = 10 + def check_result(self): + for i in range(self.rowNum): + tdSql.checkData(i, 0, 1); def run(self): tdSql.prepare() @@ -179,11 +182,6 @@ class TDTestCase: tdSql.error(f"select diff(col8) from {dbname}.stb_1") tdSql.error(f"select diff(col9) from {dbname}.stb") tdSql.error(f"select diff(col9) from {dbname}.stb_1") - tdSql.error(f"select diff(col11) from {dbname}.stb_1") - tdSql.error(f"select diff(col12) from {dbname}.stb_1") - tdSql.error(f"select diff(col13) from {dbname}.stb_1") - tdSql.error(f"select diff(col14) from {dbname}.stb_1") - tdSql.error(f"select diff(col14) from {dbname}.stb_1") tdSql.error(f"select diff(col1,col1,col1) from {dbname}.stb_1") tdSql.error(f"select diff(col1,1,col1) from {dbname}.stb_1") tdSql.error(f"select diff(col1,col1,col) from {dbname}.stb_1") @@ -217,6 +215,22 @@ class TDTestCase: tdSql.query(f"select diff(col6) from {dbname}.stb_1") tdSql.checkRows(10) + tdSql.query(f"select diff(col11) from {dbname}.stb_1") + tdSql.checkRows(10) + self.check_result() + + tdSql.query(f"select diff(col12) from {dbname}.stb_1") + tdSql.checkRows(10) + self.check_result() + + tdSql.query(f"select diff(col13) from {dbname}.stb_1") + tdSql.checkRows(10) + self.check_result() + + tdSql.query(f"select diff(col14) from {dbname}.stb_1") + tdSql.checkRows(10) + self.check_result() + tdSql.execute(f'''create table {dbname}.stb1(ts timestamp, col1 tinyint, col2 smallint, col3 int, col4 bigint, col5 float, col6 double, col7 bool, col8 binary(20), col9 nchar(20), col11 tinyint unsigned, col12 smallint unsigned, col13 int unsigned, col14 bigint unsigned) tags(loc nchar(20))''') tdSql.execute(f"create table {dbname}.stb1_1 using {dbname}.stb tags('shanghai')") diff --git a/tests/system-test/2-query/fill_with_group.py b/tests/system-test/2-query/fill_with_group.py index 393102c8ed..b442647ff4 100644 --- a/tests/system-test/2-query/fill_with_group.py +++ b/tests/system-test/2-query/fill_with_group.py @@ -137,6 +137,12 @@ class TDTestCase: sql = "select _wstart, _wend, count(ts), sum(c1) from meters where ts > '2018-11-25 00:00:00.000' and ts < '2018-11-26 00:00:00.00' interval(1d) fill(NULL) order by _wstart desc" tdSql.query(sql) tdSql.checkRows(1) + sql = "select _wstart, count(*) from meters where ts > '2018-08-20 00:00:00.000' and ts < '2018-09-30 00:00:00.000' interval(9d) fill(NULL) order by _wstart desc;" + tdSql.query(sql) + tdSql.checkRows(6) + sql = "select _wstart, count(*) from meters where ts > '2018-08-20 00:00:00.000' and ts < '2018-09-30 00:00:00.000' interval(9d) fill(NULL) order by _wstart;" + tdSql.query(sql) + tdSql.checkRows(6) def run(self): self.prepareTestEnv() diff --git a/tests/system-test/2-query/orderBy.py b/tests/system-test/2-query/orderBy.py index fed1651b3a..fa447cbca4 100644 --- a/tests/system-test/2-query/orderBy.py +++ b/tests/system-test/2-query/orderBy.py @@ -220,7 +220,7 @@ class TDTestCase: # init def init(self, conn, logSql, replicaVar=1): - seed = time.clock_gettime(time.CLOCK_REALTIME) + seed = time.time() % 10000 random.seed(seed) self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") diff --git a/tests/system-test/2-query/smaBasic.py b/tests/system-test/2-query/smaBasic.py index c221a70605..a82d190e2b 100644 --- a/tests/system-test/2-query/smaBasic.py +++ b/tests/system-test/2-query/smaBasic.py @@ -269,7 +269,7 @@ class TDTestCase: # init def init(self, conn, logSql, replicaVar=1): - seed = time.clock_gettime(time.CLOCK_REALTIME) + seed = time.time() % 10000 random.seed(seed) self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") diff --git a/tests/system-test/2-query/stbJoin.py b/tests/system-test/2-query/stbJoin.py index 677704648c..6eb95349fe 100644 --- a/tests/system-test/2-query/stbJoin.py +++ b/tests/system-test/2-query/stbJoin.py @@ -112,6 +112,18 @@ class TDTestCase: tdSql.query(f"select a.* from sta a join stb b on a.tg1 != b.tg1 and a.ts=b.ts;") tdSql.checkRows(36) + tdSql.query(f"select a.* from sta a join stb b on a.ts=b.ts and a.ts is null;") + tdSql.checkRows(0) + + tdSql.query(f"select a.* from sta a join stb b on a.ts=b.ts and a.ts is not null;") + tdSql.checkRows(48) + + tdSql.query(f"select a.* from sta a ,stb b where a.ts=b.ts and a.ts is null;") + tdSql.checkRows(0) + + tdSql.query(f"select a.* from sta a ,stb b where a.ts=b.ts and a.ts is not null;") + tdSql.checkRows(48) + # tdSql.checkData(0,1,10) tdSql.error(f"select a.* from sta a join stb b on a.tg1=b.tg1 where a.ts=b.ts or a.tg2=b.tg2;") diff --git a/tests/system-test/2-query/union.py b/tests/system-test/2-query/union.py index 9086d7754d..547ab07eb0 100644 --- a/tests/system-test/2-query/union.py +++ b/tests/system-test/2-query/union.py @@ -249,6 +249,9 @@ class TDTestCase: tdSql.checkRows(14) tdSql.query(f"select derivative(c1, 1s, 0) from (select * from {dbname}.t1 union select * from {dbname}.t1 order by ts)") tdSql.checkRows(11) + tdSql.query(f"select count(*) from {dbname}.t1 as a join {dbname}.t1 as b on a.ts = b.ts and a.ts is null") + tdSql.checkRows(1) + tdSql.checkData(0, 0, 0) tdSql.error(f"select first(c1) from (select * from {dbname}.t1 union select * from {dbname}.t1)") tdSql.error(f"select last(c1) from (select * from {dbname}.t1 union select * from {dbname}.t1)") diff --git a/tests/system-test/6-cluster/5dnode3mnodeRecreateMnode.py b/tests/system-test/6-cluster/5dnode3mnodeRecreateMnode.py index 650bc347aa..7af5982dec 100644 --- a/tests/system-test/6-cluster/5dnode3mnodeRecreateMnode.py +++ b/tests/system-test/6-cluster/5dnode3mnodeRecreateMnode.py @@ -166,22 +166,23 @@ class TDTestCase: nodePort = 6030 + i*100 newTdSql=tdCom.newTdSql(port=nodePort) + + tdDnodes[1].stoptaosd() + dataPath = tdDnodes[1].dataDir os.system(f"rm -rf {dataPath}/*") os.system(f"rm -rf {dataPath}/.runing") - tdDnodes[1].stoptaosd() tdDnodes[1].starttaosd() sleep(5) for i in range(6): nodePort = 6030 + i*100 newTdSql=tdCom.newTdSql(port=nodePort) + tdDnodes[0].stoptaosd() dataPath = tdDnodes[0].dataDir os.system(f"rm -rf {dataPath}/*") os.system(f"rm -rf {dataPath}/.runing") - - tdDnodes[0].stoptaosd() tdDnodes[0].starttaosd() sleep(5) for i in range(6): diff --git a/tests/system-test/6-cluster/clusterCommonCheck.py b/tests/system-test/6-cluster/clusterCommonCheck.py index 5e5568c5c5..85d670a325 100644 --- a/tests/system-test/6-cluster/clusterCommonCheck.py +++ b/tests/system-test/6-cluster/clusterCommonCheck.py @@ -245,9 +245,12 @@ class ClusterComCheck: tdLog.exit(f"vgroup number of {db_name} is not correct") if self.db_replica == 1 : if tdSql.queryResult[0][4] == 'leader' and tdSql.queryResult[1][4] == 'leader' and tdSql.queryResult[last_number][4] == 'leader': - ready_time= (count + 1) - tdLog.success(f"all vgroups of {db_name} are leaders in {count + 1} s") - return True + tdSql.query(f"select `replica` from information_schema.ins_databases where `name`='{db_name}';") + print("db replica :",tdSql.queryResult[0][0]) + if tdSql.queryResult[0][0] == db_replica: + ready_time= (count + 1) + tdLog.success(f"all vgroups with replica {self.db_replica} of {db_name} are leaders in {count + 1} s") + return True count+=1 elif self.db_replica == 3 : vgroup_status_first=[tdSql.queryResult[0][4],tdSql.queryResult[0][6],tdSql.queryResult[0][8]] @@ -255,13 +258,16 @@ class ClusterComCheck: vgroup_status_last=[tdSql.queryResult[last_number][4],tdSql.queryResult[last_number][6],tdSql.queryResult[last_number][8]] if vgroup_status_first.count('leader') == 1 and vgroup_status_first.count('follower') == 2: if vgroup_status_last.count('leader') == 1 and vgroup_status_last.count('follower') == 2: - ready_time= (count + 1) - tdLog.success(f"elections of {db_name}.vgroups are ready in {ready_time} s") - return True + tdSql.query(f"select `replica` from information_schema.ins_databases where `name`='{db_name}';") + print("db replica :",tdSql.queryResult[0][0]) + if tdSql.queryResult[0][0] == db_replica: + ready_time= (count + 1) + tdLog.success(f"elections of {db_name}.vgroups with replica {self.db_replica} are ready in {ready_time} s") + return True count+=1 else: tdLog.debug(tdSql.queryResult) - tdLog.notice(f"elections of {db_name} all vgroups are failed in {count} s ") + tdLog.notice(f"elections of {db_name} all vgroups with replica {self.db_replica} are failed in {count} s ") caller = inspect.getframeinfo(inspect.stack()[1][0]) args = (caller.filename, caller.lineno) tdLog.exit("%s(%d) failed " % args) diff --git a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py index aa0c7a0177..9ab47764c8 100644 --- a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py +++ b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py @@ -177,7 +177,7 @@ class TDTestCase: tdSql.query("select count(*) from %s"%stableName) tdSql.checkData(0,0,rowsPerStb) - clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=replica1,db_name=paraDict["dbName"],count_number=40) + clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=replica1,db_name=paraDict["dbName"],count_number=100) sleep(5) tdLog.info(f"show transactions;alter database db0_0 replica {replica3};") TdSqlEx.execute(f'show transactions;') diff --git a/tests/system-test/7-tmq/tmqMaxGroupIds.py b/tests/system-test/7-tmq/tmqMaxGroupIds.py index 3bdba8bb6c..5049ee5bd7 100644 --- a/tests/system-test/7-tmq/tmqMaxGroupIds.py +++ b/tests/system-test/7-tmq/tmqMaxGroupIds.py @@ -206,7 +206,7 @@ class TDTestCase: while (1): tdSql.query('show subscriptions;') subscribeNum = tdSql.queryRows - tdLog.info(" get subscriptions count: %d"%(subscribeNum)) + tdLog.info(" get subscriptions count: %d, expected:%d"%(subscribeNum, expectSubscribeNum)) if subscribeNum == expectSubscribeNum: flag = 1 break diff --git a/tests/system-test/8-stream/pause_resume_test.py b/tests/system-test/8-stream/pause_resume_test.py index f5f1cf07fa..421f499a3d 100644 --- a/tests/system-test/8-stream/pause_resume_test.py +++ b/tests/system-test/8-stream/pause_resume_test.py @@ -11,9 +11,12 @@ class TDTestCase: tdLog.debug("start to execute %s" % __file__) tdSql.init(conn.cursor(), logSql) self.tdCom = tdCom + self.date_time = 1694054350870 + self.interval = 15 def pause_resume_test(self, interval, partition="tbname", delete=False, fill_history_value=None, pause=True, resume=True, ignore_untreated=False): tdLog.info(f"*** testing stream pause+resume: interval: {interval}, partition: {partition}, delete: {delete}, fill_history: {fill_history_value}, ignore_untreated: {ignore_untreated} ***") + date_time = self.date_time if_exist_value_list = [None, True] if_exist = random.choice(if_exist_value_list) reverse_check = True if ignore_untreated else False @@ -54,20 +57,20 @@ class TDTestCase: self.tdCom.create_stream(stream_name=f'{self.ctb_name}{self.tdCom.stream_suffix}', des_table=self.tdCom.ctb_stream_des_table, source_sql=f'select _wstart AS wstart, {self.tdCom.stb_source_select_str} from {self.ctb_name} {partition_elm} interval({self.tdCom.dataDict["interval"]}s)', trigger_mode="at_once", subtable_value=ctb_subtable_value, fill_history_value=fill_history_value) self.tdCom.create_stream(stream_name=f'{self.tb_name}{self.tdCom.stream_suffix}', des_table=self.tdCom.tb_stream_des_table, source_sql=f'select _wstart AS wstart, {self.tdCom.tb_source_select_str} from {self.tb_name} {partition_elm} interval({self.tdCom.dataDict["interval"]}s)', trigger_mode="at_once", subtable_value=tb_subtable_value, fill_history_value=fill_history_value) for i in range(range_count): - ts_value = str(self.tdCom.date_time+self.tdCom.dataDict["interval"])+f'+{i*10}s' + ts_value = str(date_time+self.tdCom.dataDict["interval"])+f'+{i*10}s' ts_cast_delete_value = self.tdCom.time_cast(ts_value) self.tdCom.sinsert_rows(tbname=self.ctb_name, ts_value=ts_value) if self.tdCom.update and i%2 == 0: self.tdCom.sinsert_rows(tbname=self.ctb_name, ts_value=ts_value) if self.delete and i%2 != 0: self.tdCom.sdelete_rows(tbname=self.ctb_name, start_ts=ts_cast_delete_value) - self.tdCom.date_time += 1 + date_time += 1 self.tdCom.sinsert_rows(tbname=self.tb_name, ts_value=ts_value) if self.tdCom.update and i%2 == 0: self.tdCom.sinsert_rows(tbname=self.tb_name, ts_value=ts_value) if self.delete and i%2 != 0: self.tdCom.sdelete_rows(tbname=self.tb_name, start_ts=ts_cast_delete_value) - self.tdCom.date_time += 1 + date_time += 1 if partition: partition_elm = f'partition by {partition}' else: @@ -140,8 +143,8 @@ class TDTestCase: for delete in [True, False]: for fill_history_value in [0, 1]: # pause/resume - self.pause_resume_test(interval=random.randint(10, 15), partition="tbname", ignore_untreated=False, fill_history_value=fill_history_value, delete=delete) - self.pause_resume_test(interval=random.randint(10, 15), partition="tbname", ignore_untreated=True, fill_history_value=fill_history_value, delete=delete) + self.pause_resume_test(interval=self.interval, partition="tbname", ignore_untreated=False, fill_history_value=fill_history_value, delete=delete) + self.pause_resume_test(interval=self.interval, partition="tbname", ignore_untreated=True, fill_history_value=fill_history_value, delete=delete) # self.pause_resume_test(interval=random.randint(10, 15), partition="tbname", resume=False, fill_history_value=fill_history_value, delete=delete) def stop(self): @@ -151,4 +154,4 @@ class TDTestCase: event = threading.Event() tdCases.addLinux(__file__, TDTestCase()) -tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/8-stream/scalar_function.py b/tests/system-test/8-stream/scalar_function.py index 56537e2f54..3bc44a7dc7 100644 --- a/tests/system-test/8-stream/scalar_function.py +++ b/tests/system-test/8-stream/scalar_function.py @@ -6,7 +6,8 @@ from util.cases import * from util.common import * class TDTestCase: - updatecfgDict = {'debugFlag': 135, 'asynclog': 0} + updatecfgDict = {'vdebugFlag': 143, 'qdebugflag':135, 'tqdebugflag':135, 'udebugflag':135, 'rpcdebugflag':135, + 'asynclog': 0} def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) diff --git a/tests/system-test/test.py b/tests/system-test/test.py index 1c50e5bbbe..6813530a5c 100644 --- a/tests/system-test/test.py +++ b/tests/system-test/test.py @@ -35,6 +35,7 @@ from util.taosadapter import * import taos import taosrest +import taosws def checkRunTimeError(): import win32gui @@ -105,12 +106,13 @@ if __name__ == "__main__": queryPolicy = 1 createDnodeNums = 1 restful = False + websocket = False replicaVar = 1 asan = False - independentMnode = True + independentMnode = False previousCluster = False - opts, args = getopt.gnu_getopt(sys.argv[1:], 'f:p:m:l:scghrd:k:e:N:M:Q:C:RD:n:i:aP', [ - 'file=', 'path=', 'master', 'logSql', 'stop', 'cluster', 'valgrind', 'help', 'restart', 'updateCfgDict', 'killv', 'execCmd','dnodeNums','mnodeNums','queryPolicy','createDnodeNums','restful','adaptercfgupdate','replicaVar','independentMnode','previous']) + opts, args = getopt.gnu_getopt(sys.argv[1:], 'f:p:m:l:scghrd:k:e:N:M:Q:C:RWD:n:i:aP', [ + 'file=', 'path=', 'master', 'logSql', 'stop', 'cluster', 'valgrind', 'help', 'restart', 'updateCfgDict', 'killv', 'execCmd','dnodeNums','mnodeNums','queryPolicy','createDnodeNums','restful','websocket','adaptercfgupdate','replicaVar','independentMnode','previous']) for key, value in opts: if key in ['-h', '--help']: tdLog.printNoPrefix( @@ -131,6 +133,7 @@ if __name__ == "__main__": tdLog.printNoPrefix('-Q set queryPolicy in one dnode') tdLog.printNoPrefix('-C create Dnode Numbers in one cluster') tdLog.printNoPrefix('-R restful realization form') + tdLog.printNoPrefix('-W websocket connection') tdLog.printNoPrefix('-D taosadapter update cfg dict ') tdLog.printNoPrefix('-n the number of replicas') tdLog.printNoPrefix('-i independentMnode Mnode') @@ -177,7 +180,7 @@ if __name__ == "__main__": sys.exit(0) if key in ['-k', '--killValgrind']: - killValgrind = 0 + killValgrind = 1 if key in ['-e', '--execCmd']: try: @@ -199,10 +202,13 @@ if __name__ == "__main__": createDnodeNums = value if key in ['-i', '--independentMnode']: - independentMnode = False + independentMnode = value if key in ['-R', '--restful']: restful = True + + if key in ['-W', '--websocket']: + websocket = True if key in ['-a', '--asan']: asan = True @@ -224,7 +230,7 @@ if __name__ == "__main__": # do exeCmd command # if not execCmd == "": - if restful: + if restful or websocket: tAdapter.init(deployPath) else: tdDnodes.init(deployPath) @@ -263,7 +269,7 @@ if __name__ == "__main__": if valgrind: time.sleep(2) - if restful: + if restful or websocket: toBeKilled = "taosadapter" # killCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}' | xargs kill -TERM > /dev/null 2>&1" % toBeKilled @@ -358,7 +364,7 @@ if __name__ == "__main__": tdDnodes.deploy(1,updateCfgDict) tdDnodes.start(1) tdCases.logSql(logSql) - if restful: + if restful or websocket: tAdapter.deploy(adapter_cfg_dict) tAdapter.start() @@ -366,6 +372,8 @@ if __name__ == "__main__": queryPolicy=int(queryPolicy) if restful: conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") else: conn = taos.connect(host,config=tdDnodes.getSimCfgPath()) @@ -395,14 +403,16 @@ if __name__ == "__main__": tdDnodes.starttaosd(dnode.index) tdCases.logSql(logSql) - if restful: + if restful or websocket: tAdapter.deploy(adapter_cfg_dict) tAdapter.start() - if not restful: - conn = taos.connect(host,config=tdDnodes.getSimCfgPath()) - else: + if restful: conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") + else: + conn = taos.connect(host,config=tdDnodes.getSimCfgPath()) # tdLog.info(tdDnodes.getSimCfgPath(),host) if createDnodeNums == 1: createDnodeNums=dnodeNums @@ -419,6 +429,8 @@ if __name__ == "__main__": queryPolicy=int(queryPolicy) if restful: conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") else: conn = taos.connect(host,config=tdDnodes.getSimCfgPath()) @@ -438,10 +450,12 @@ if __name__ == "__main__": if ucase is not None and hasattr(ucase, 'noConn') and ucase.noConn == True: conn = None else: - if not restful: - conn = taos.connect(host="%s"%(host), config=tdDnodes.sim.getCfgDir()) + if restful: + conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") else: - conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + conn = taos.connect(host=f"{host}", config=tdDnodes.getSimCfgPath()) if testCluster: tdLog.info("Procedures for testing cluster") @@ -451,10 +465,12 @@ if __name__ == "__main__": tdCases.runOneCluster(fileName) else: tdLog.info("Procedures for testing self-deployment") - if not restful: - conn = taos.connect(host,config=tdDnodes.getSimCfgPath()) + if restful: + conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") else: - conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + conn = taos.connect(host=f"{host}", config=tdDnodes.getSimCfgPath()) if fileName == "all": tdCases.runAllWindows(conn) @@ -470,10 +486,12 @@ if __name__ == "__main__": tdDnodes.stopAll() tdDnodes.start(1) time.sleep(1) - if not restful: - conn = taos.connect( host, config=tdDnodes.getSimCfgPath()) - else: + if restful: conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") + else: + conn = taos.connect(host=f"{host}", config=tdDnodes.getSimCfgPath()) tdLog.info("Procedures for tdengine deployed in %s" % (host)) tdLog.info("query test after taosd restart") tdCases.runOneWindows(conn, sp[0] + "_" + "restart.py", replicaVar) @@ -505,7 +523,7 @@ if __name__ == "__main__": except: pass - if restful: + if restful or websocket: tAdapter.init(deployPath, masterIp) tAdapter.stop(force_kill=True) @@ -515,16 +533,18 @@ if __name__ == "__main__": tdDnodes.start(1) tdCases.logSql(logSql) - if restful: + if restful or websocket: tAdapter.deploy(adapter_cfg_dict) tAdapter.start() if queryPolicy != 1: queryPolicy=int(queryPolicy) - if not restful: - conn = taos.connect(host,config=tdDnodes.getSimCfgPath()) - else: + if restful: conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") + else: + conn = taos.connect(host=f"{host}", config=tdDnodes.getSimCfgPath()) # tdSql.init(conn.cursor()) # tdSql.execute("create qnode on dnode 1") # tdSql.execute('alter local "queryPolicy" "%d"'%queryPolicy) @@ -567,15 +587,17 @@ if __name__ == "__main__": tdDnodes.starttaosd(dnode.index) tdCases.logSql(logSql) - if restful: + if restful or websocket: tAdapter.deploy(adapter_cfg_dict) tAdapter.start() # create taos connect - if not restful: - conn = taos.connect(host,config=tdDnodes.getSimCfgPath()) + if restful: + conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") else: - conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + conn = taos.connect(host=f"{host}", config=tdDnodes.getSimCfgPath()) print(tdDnodes.getSimCfgPath(),host) if createDnodeNums == 1: createDnodeNums=dnodeNums @@ -595,8 +617,10 @@ if __name__ == "__main__": queryPolicy=int(queryPolicy) if restful: conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") else: - conn = taos.connect(host,config=tdDnodes.getSimCfgPath()) + conn = taos.connect(host=f"{host}", config=tdDnodes.getSimCfgPath()) cursor = conn.cursor() cursor.execute("create qnode on dnode 1") @@ -621,10 +645,12 @@ if __name__ == "__main__": tdCases.runOneCluster(fileName) else: tdLog.info("Procedures for testing self-deployment") - if not restful: - conn = taos.connect(host,config=tdDnodes.getSimCfgPath()) + if restful: + conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") else: - conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + conn = taos.connect(host=f"{host}", config=tdDnodes.getSimCfgPath()) if fileName == "all": tdCases.runAllLinux(conn) @@ -641,10 +667,12 @@ if __name__ == "__main__": tdDnodes.stopAll() tdDnodes.start(1) time.sleep(1) - if not restful: - conn = taos.connect( host, config=tdDnodes.getSimCfgPath()) - else: + if restful: conn = taosrest.connect(url=f"http://{host}:6041",timezone="utc") + elif websocket: + conn = taosws.connect(f"taosws://root:taosdata@{host}:6041") + else: + conn = taos.connect(host=f"{host}", config=tdDnodes.getSimCfgPath()) tdLog.info("Procedures for tdengine deployed in %s" % (host)) tdLog.info("query test after taosd restart") tdCases.runOneLinux(conn, sp[0] + "_" + "restart.py", replicaVar) diff --git a/tools/shell/src/shellWebsocket.c b/tools/shell/src/shellWebsocket.c index ff2e5efdd4..e83ceff099 100644 --- a/tools/shell/src/shellWebsocket.c +++ b/tools/shell/src/shellWebsocket.c @@ -260,7 +260,7 @@ void shellRunSingleCommandWebsocketImp(char *command) { WS_RES* res; for (int reconnectNum = 0; reconnectNum < 2; reconnectNum++) { - if (!shell.ws_conn && shell_conn_ws_server(0)) { + if (!shell.ws_conn && shell_conn_ws_server(0) || shell.stop_query) { return; } st = taosGetTimestampUs();