diff --git a/Jenkinsfile2 b/Jenkinsfile2 index 8ddee6dbbd..1ab6a4a8cd 100644 --- a/Jenkinsfile2 +++ b/Jenkinsfile2 @@ -355,7 +355,7 @@ pipeline { } parallel { stage('check docs') { - agent{label " slave1_47 || slave1_48 || slave1_49 || slave1_52 || worker03 || slave215 || slave217 || slave219 || Mac_catalina "} + agent{label " slave1_47 || slave1_48 || slave1_49 || slave1_50 || slave1_52 || slave1_59 || slave1_63 || worker03 || slave215 || slave217 || slave219 || Mac_catalina "} steps { check_docs() } @@ -401,7 +401,7 @@ pipeline { } } stage('linux test') { - agent{label " slave1_47 || slave1_48 || slave1_49 || slave1_52 || worker03 || slave215 || slave217 || slave219 "} + agent{label " slave1_47 || slave1_48 || slave1_49 || slave1_50 || slave1_52 || slave1_59 || slave1_63 || worker03 || slave215 || slave217 || slave219 "} options { skipDefaultCheckout() } when { changeRequest() diff --git a/cmake/cmake.define b/cmake/cmake.define index 7710c071eb..7db6baafab 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.0) -set(CMAKE_VERBOSE_MAKEFILE FALSE) +set(CMAKE_VERBOSE_MAKEFILE TRUE) set(TD_BUILD_TAOSA_INTERNAL FALSE) #set output directory @@ -159,6 +159,7 @@ ELSE () CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2) CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F) CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI) + CHECK_C_COMPILER_FLAG("-mavx512vl" COMPILER_SUPPORT_AVX512VL) IF (COMPILER_SUPPORT_SSE42) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") @@ -166,11 +167,11 @@ ELSE () ENDIF() IF ("${SIMD_SUPPORT}" MATCHES "true") - IF (COMPILER_SUPPORT_FMA) + IF (COMPILER_SUPPORT_FMA) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma") - ENDIF() - IF (COMPILER_SUPPORT_AVX) + ENDIF() + IF (COMPILER_SUPPORT_AVX) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") ENDIF() @@ -183,7 +184,13 @@ ELSE () IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi") - MESSAGE(STATUS "avx512 supported by gcc") + MESSAGE(STATUS "avx512f/avx512bmi supported by compiler") + ENDIF() + + IF (COMPILER_SUPPORT_AVX512VL) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl") + MESSAGE(STATUS "avx512vl supported by compiler") ENDIF() ENDIF() diff --git a/cmake/cmake.options b/cmake/cmake.options index d34c34dd89..e12de3e4e6 100644 --- a/cmake/cmake.options +++ b/cmake/cmake.options @@ -151,6 +151,7 @@ IF(${BUILD_S3}) IF(${BUILD_WITH_S3}) +add_definitions(-DUSE_S3) option(BUILD_WITH_COS "If build with cos" OFF) ELSE () diff --git a/docs/en/05-get-started/03-package.md b/docs/en/05-get-started/03-package.md index 3e3c04682f..d34df2c970 100644 --- a/docs/en/05-get-started/03-package.md +++ b/docs/en/05-get-started/03-package.md @@ -243,7 +243,7 @@ You can use the TDengine CLI to monitor your TDengine deployment and execute ad taos ``` -The TDengine CLI displays a welcome message and version information to indicate that its connection to the TDengine service was successful. If an error message is displayed, see the [FAQ](/train-faq/faq) for troubleshooting information. At the following prompt, you can execute SQL commands. +The TDengine CLI displays a welcome message and version information to indicate that its connection to the TDengine service was successful. If an error message is displayed, see the [FAQ](../../train-faq/faq) for troubleshooting information. At the following prompt, you can execute SQL commands. ```cmd taos> diff --git a/docs/en/05-get-started/_pkg_install.mdx b/docs/en/05-get-started/_pkg_install.mdx index 32d7c1f376..2372d2ff26 100644 --- a/docs/en/05-get-started/_pkg_install.mdx +++ b/docs/en/05-get-started/_pkg_install.mdx @@ -10,7 +10,7 @@ Between official releases, beta versions may be released that contain new featur -For information about installing TDengine, see [Install and Uninstall](/operation/pkg-install). +For information about installing TDengine, see [Install and Uninstall](../../operation/pkg-install). For information about TDengine releases, see [All Downloads](https://tdengine.com/all-downloads) diff --git a/docs/en/05-get-started/index.md b/docs/en/05-get-started/index.md index cc3b4826dd..5c4ab59f75 100644 --- a/docs/en/05-get-started/index.md +++ b/docs/en/05-get-started/index.md @@ -12,7 +12,7 @@ import StackOverflowSVG from './stackoverflow.svg' You can install and run TDengine on Linux/Windows/macOS machines as well as Docker containers. You can also deploy TDengine as a managed service with TDengine Cloud. -The full package of TDengine includes the TDengine Server (`taosd`), TDengine Client (`taosc`), taosAdapter for connecting with third-party systems and providing a RESTful interface, a command-line interface, and some tools. In addition to connectors for multiple languages, TDengine also provides a [RESTful interface](/reference/rest-api) through [taosAdapter](/reference/taosadapter). +The full package of TDengine includes the TDengine Server (`taosd`), TDengine Client (`taosc`), taosAdapter for connecting with third-party systems and providing a RESTful interface, a command-line interface, and some tools. In addition to connectors for multiple languages, TDengine also provides a [RESTful interface](../reference/rest-api) through [taosAdapter](../reference/taosadapter). ```mdx-code-block import DocCardList from '@theme/DocCardList'; diff --git a/docs/en/07-develop/01-connect/_connect_java.mdx b/docs/en/07-develop/01-connect/_connect_java.mdx index 538e27fcc3..fda86f2221 100644 --- a/docs/en/07-develop/01-connect/_connect_java.mdx +++ b/docs/en/07-develop/01-connect/_connect_java.mdx @@ -12,4 +12,4 @@ When using REST connection, the feature of bulk pulling can be enabled if the si {{#include docs/examples/java/src/main/java/com/taos/example/WSConnectExample.java:main}} ``` -More configuration about connection, please refer to [Java Connector](/reference/connector/java) +More configuration about connection, please refer to [Java Connector](../../reference/connector/java) diff --git a/docs/en/07-develop/01-connect/index.md b/docs/en/07-develop/01-connect/index.md index 3ca44783c4..3f09f9fb6a 100644 --- a/docs/en/07-develop/01-connect/index.md +++ b/docs/en/07-develop/01-connect/index.md @@ -22,7 +22,7 @@ import VerifyLinux from "../../14-reference/03-connector/_verify_linux.mdx"; import VerifyWindows from "../../14-reference/03-connector/_verify_windows.mdx"; import VerifyMacOS from "../../14-reference/03-connector/_verify_macos.mdx"; -Any application running on any platform can access TDengine through the REST API provided by TDengine. For information, see [REST API](/reference/rest-api/). Applications can also use the connectors for various programming languages, including C/C++, Java, Python, Go, Node.js, C#, and Rust, to access TDengine. These connectors support connecting to TDengine clusters using both native interfaces (taosc). Some connectors also support connecting over a REST interface. Community developers have also contributed several unofficial connectors, such as the ADO.NET connector, the Lua connector, and the PHP connector. +Any application running on any platform can access TDengine through the REST API provided by TDengine. For information, see [REST API](../../reference/rest-api/). Applications can also use the connectors for various programming languages, including C/C++, Java, Python, Go, Node.js, C#, and Rust, to access TDengine. These connectors support connecting to TDengine clusters using both native interfaces (taosc). Some connectors also support connecting over a REST interface. Community developers have also contributed several unofficial connectors, such as the ADO.NET connector, the Lua connector, and the PHP connector. ## Establish Connection @@ -36,7 +36,7 @@ For REST and native connections, connectors provide similar APIs for performing Key differences: 3. The REST connection is more accessible with cross-platform support, however it results in a 30% performance downgrade. -1. The TDengine client driver (taosc) has the highest performance with all the features of TDengine like [Parameter Binding](/reference/connector/cpp#parameter-binding-api), [Subscription](/reference/connector/cpp#subscription-and-consumption-api), etc. +1. The TDengine client driver (taosc) has the highest performance with all the features of TDengine like [Parameter Binding](../../reference/connector/cpp#parameter-binding-api), [Subscription](../../reference/connector/cpp#subscription-and-consumption-api), etc. ## Install Client Driver taosc diff --git a/docs/en/07-develop/02-model/index.mdx b/docs/en/07-develop/02-model/index.mdx index 4524a66a41..a18e1932ab 100644 --- a/docs/en/07-develop/02-model/index.mdx +++ b/docs/en/07-develop/02-model/index.mdx @@ -3,9 +3,9 @@ title: Data Model description: This document describes the data model of TDengine. --- -The data model employed by TDengine is similar to that of a relational database. You have to create databases and tables. You must design the data model based on your own business and application requirements. You should design the [STable](/concept/#super-table-stable) (an abbreviation for super table) schema to fit your data. This chapter will explain the big picture without getting into syntactical details. +The data model employed by TDengine is similar to that of a relational database. You have to create databases and tables. You must design the data model based on your own business and application requirements. You should design the [STable](../../concept/#super-table-stable) (an abbreviation for super table) schema to fit your data. This chapter will explain the big picture without getting into syntactical details. -Note: before you read this chapter, please make sure you have already read through [Key Concepts](/concept/), since TDengine introduces new concepts like "one table for one [data collection point](/concept/#data-collection-point)" and "[super table](/concept/#super-table-stable)". +Note: before you read this chapter, please make sure you have already read through [Key Concepts](../../concept/), since TDengine introduces new concepts like "one table for one [data collection point](../../concept/#data-collection-point)" and "[super table](../../concept/#super-table-stable)". ## Create Database @@ -22,7 +22,7 @@ In the above SQL statement: - a new data file will be created every 10 days - the size of the write cache pool on each VNode is 16 MB - the number of vgroups is 100 -- WAL is enabled but fsync is disabled For more details please refer to [Database](/taos-sql/database). +- WAL is enabled but fsync is disabled For more details please refer to [Database](../../taos-sql/database). After creating a database, the current database in use can be switched using SQL command `USE`. For example the SQL statement below switches the current database to `power`. @@ -41,13 +41,13 @@ Without the current database specified, table name must be preceded with the cor ## Create STable -In a time-series application, there may be multiple kinds of data collection points. For example, in the electrical power system there are meters, transformers, bus bars, switches, etc. For easy and efficient aggregation of multiple tables, one STable needs to be created for each kind of data collection point. For example, for the meters in [table 1](/concept/#model_table1), the SQL statement below can be used to create the super table. +In a time-series application, there may be multiple kinds of data collection points. For example, in the electrical power system there are meters, transformers, bus bars, switches, etc. For easy and efficient aggregation of multiple tables, one STable needs to be created for each kind of data collection point. For example, for the meters in [table 1](../../concept/#model_table1), the SQL statement below can be used to create the super table. ```sql CREATE STABLE meters (ts timestamp, current float, voltage int, phase float) TAGS (location binary(64), groupId int); ``` -Similar to creating a regular table, when creating a STable, the name and schema need to be provided. In the STable schema, the first column must always be a timestamp (like ts in the example), and the other columns (like current, voltage and phase in the example) are the data collected. The remaining columns can [contain data of type](/taos-sql/data-type/) integer, float, double, string etc. In addition, the schema for tags, like location and groupId in the example, must be provided. The tag type can be integer, float, string, etc. Tags are essentially the static properties of a data collection point. For example, properties like the location, device type, device group ID, manager ID are tags. Tags in the schema can be added, removed or updated. Please refer to [STable](/taos-sql/stable) for more details. +Similar to creating a regular table, when creating a STable, the name and schema need to be provided. In the STable schema, the first column must always be a timestamp (like ts in the example), and the other columns (like current, voltage and phase in the example) are the data collected. The remaining columns can [contain data of type](../../taos-sql/data-type/) integer, float, double, string etc. In addition, the schema for tags, like location and groupId in the example, must be provided. The tag type can be integer, float, string, etc. Tags are essentially the static properties of a data collection point. For example, properties like the location, device type, device group ID, manager ID are tags. Tags in the schema can be added, removed or updated. Please refer to [STable](../../taos-sql/stable) for more details. For each kind of data collection point, a corresponding STable must be created. There may be many STables in an application. For electrical power system, we need to create a STable respectively for meters, transformers, busbars, switches. There may be multiple kinds of data collection points on a single device, for example there may be one data collection point for electrical data like current and voltage and another data collection point for environmental data like temperature, humidity and wind direction. Multiple STables are required for these kinds of devices. @@ -61,7 +61,7 @@ A specific table needs to be created for each data collection point. Similar to CREATE TABLE d1001 USING meters TAGS ("California.SanFrancisco", 2); ``` -In the above SQL statement, "d1001" is the table name, "meters" is the STable name, followed by the value of tag "Location" and the value of tag "groupId", which are "California.SanFrancisco" and "2" respectively in the example. The tag values can be updated after the table is created. Please refer to [Tables](/taos-sql/table) for details. +In the above SQL statement, "d1001" is the table name, "meters" is the STable name, followed by the value of tag "Location" and the value of tag "groupId", which are "California.SanFrancisco" and "2" respectively in the example. The tag values can be updated after the table is created. Please refer to [Tables](../../taos-sql/table) for details. It's suggested to use the globally unique ID of a data collection point as the table name. For example the device serial number could be used as a unique ID. If a unique ID doesn't exist, multiple IDs that are not globally unique can be combined to form a globally unique ID. It's not recommended to use a globally unique ID as tag value. @@ -75,7 +75,7 @@ INSERT INTO d1001 USING meters TAGS ("California.SanFrancisco", 2) VALUES (now, In the above SQL statement, a row with value `(now, 10.2, 219, 0.32)` will be inserted into table "d1001". If table "d1001" doesn't exist, it will be created automatically using STable "meters" as template with tag value `"California.SanFrancisco", 2`. -For more details please refer to [Create Table Automatically](/taos-sql/insert#automatically-create-table-when-inserting). +For more details please refer to [Create Table Automatically](../../taos-sql/insert#automatically-create-table-when-inserting). ## Single Column vs Multiple Column diff --git a/docs/en/07-develop/03-insert-data/01-sql-writing.mdx b/docs/en/07-develop/03-insert-data/01-sql-writing.mdx index 4d1b67e451..1e719283cc 100644 --- a/docs/en/07-develop/03-insert-data/01-sql-writing.mdx +++ b/docs/en/07-develop/03-insert-data/01-sql-writing.mdx @@ -33,7 +33,7 @@ The below SQL statement is used to insert one row into table "d1001". INSERT INTO d1001 VALUES (ts1, 10.3, 219, 0.31); ``` -`ts1` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](/taos-sql/insert). +`ts1` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](../../../taos-sql/insert). ### Insert Multiple Rows @@ -43,7 +43,7 @@ Multiple rows can be inserted in a single SQL statement. The example below inser INSERT INTO d1001 VALUES (ts2, 10.2, 220, 0.23) (ts2, 10.3, 218, 0.25); ``` -`ts1` and `ts2` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](/taos-sql/insert). +`ts1` and `ts2` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](../../../taos-sql/insert). ### Insert into Multiple Tables @@ -53,9 +53,9 @@ Data can be inserted into multiple tables in the same SQL statement. The example INSERT INTO d1001 VALUES (ts1, 10.3, 219, 0.31) (ts2, 12.6, 218, 0.33) d1002 VALUES (ts3, 12.3, 221, 0.31); ``` -`ts1`, `ts2` and `ts3` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](/taos-sql/insert). +`ts1`, `ts2` and `ts3` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](../../../taos-sql/insert). -For more details about `INSERT` please refer to [INSERT](/taos-sql/insert). +For more details about `INSERT` please refer to [INSERT](../../../taos-sql/insert). :::info diff --git a/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx b/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx index 89ca10b669..e8de010d5b 100644 --- a/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx +++ b/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx @@ -35,7 +35,7 @@ bin/kafka-topics.sh --bootstrap-server=localhost:9092 --describe ## Insert into TDengine -We can write data into TDengine via SQL or Schemaless. For more information, please refer to [Insert Using SQL](/develop/insert-data/sql-writing/) or [High Performance Writing](/develop/insert-data/high-volume/) or [Schemaless Writing](/reference/schemaless/). +We can write data into TDengine via SQL or Schemaless. For more information, please refer to [Insert Using SQL](../sql-writing/) or [High Performance Writing](../high-volume/) or [Schemaless Writing](../../../reference/schemaless/). ## Examples diff --git a/docs/en/07-develop/03-insert-data/30-influxdb-line.mdx b/docs/en/07-develop/03-insert-data/30-influxdb-line.mdx index bd430d5973..b90b92510a 100644 --- a/docs/en/07-develop/03-insert-data/30-influxdb-line.mdx +++ b/docs/en/07-develop/03-insert-data/30-influxdb-line.mdx @@ -46,7 +46,7 @@ meters,location=California.LosAngeles,groupid=2 current=13.4,voltage=223,phase=0 ::: -For more details please refer to [InfluxDB Line Protocol](https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/) and [TDengine Schemaless](/reference/schemaless/#Schemaless-Line-Protocol) +For more details please refer to [InfluxDB Line Protocol](https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/) and [TDengine Schemaless](../../../reference/schemaless/#Schemaless-Line-Protocol) ## Examples diff --git a/docs/en/07-develop/04-query-data/index.mdx b/docs/en/07-develop/04-query-data/index.mdx index 5a05d22d85..413baf6b2c 100644 --- a/docs/en/07-develop/04-query-data/index.mdx +++ b/docs/en/07-develop/04-query-data/index.mdx @@ -128,7 +128,7 @@ For more information, see [Aggregate by Window](../../taos-sql/distinguished). ### Query -In the section describing [Insert](/develop/insert-data/sql-writing), a database named `power` is created and some data are inserted into STable `meters`. Below sample code demonstrates how to query the data in this STable. +In the section describing [Insert](../insert-data/sql-writing), a database named `power` is created and some data are inserted into STable `meters`. Below sample code demonstrates how to query the data in this STable. diff --git a/docs/en/07-develop/index.md b/docs/en/07-develop/index.md index 8f80b82b97..4ed5e8c19f 100644 --- a/docs/en/07-develop/index.md +++ b/docs/en/07-develop/index.md @@ -6,7 +6,7 @@ description: This document describes how to use the various components of TDengi Before creating an application to process time-series data with TDengine, consider the following: 1. Choose the method to connect to TDengine. TDengine offers a REST API that can be used with any programming language. It also has connectors for a variety of languages. -2. Design the data model based on your own use cases. Consider the main [concepts](/concept/) of TDengine, including "one table per data collection point" and the supertable. Learn about static labels, collected metrics, and subtables. Depending on the characteristics of your data and your requirements, you decide to create one or more databases and design a supertable schema that fit your data. +2. Design the data model based on your own use cases. Consider the main [concepts](../concept/) of TDengine, including "one table per data collection point" and the supertable. Learn about static labels, collected metrics, and subtables. Depending on the characteristics of your data and your requirements, you decide to create one or more databases and design a supertable schema that fit your data. 3. Decide how you will insert data. TDengine supports writing using standard SQL, but also supports schemaless writing, so that data can be written directly without creating tables manually. 4. Based on business requirements, find out what SQL query statements need to be written. You may be able to repurpose any existing SQL. 5. If you want to run real-time analysis based on time series data, including various dashboards, use the TDengine stream processing component instead of deploying complex systems such as Spark or Flink. @@ -14,7 +14,7 @@ Before creating an application to process time-series data with TDengine, consid 7. In many use cases (such as fleet management), the application needs to obtain the latest status of each data collection point. It is recommended that you use the cache function of TDengine instead of deploying Redis separately. 8. If you find that the SQL functions of TDengine cannot meet your requirements, then you can use user-defined functions to solve the problem. -This section is organized in the order described above. For ease of understanding, TDengine provides sample code for each supported programming language for each function. If you want to learn more about the use of SQL, please read the [SQL manual](/taos-sql/). For a more in-depth understanding of the use of each connector, please read the [Connector Reference Guide](/reference/connector/). If you also want to integrate TDengine with third-party systems, such as Grafana, please refer to the [third-party tools](/third-party/). +This section is organized in the order described above. For ease of understanding, TDengine provides sample code for each supported programming language for each function. If you want to learn more about the use of SQL, please read the [SQL manual](../taos-sql/). For a more in-depth understanding of the use of each connector, please read the [Connector Reference Guide](../reference/connector/). If you also want to integrate TDengine with third-party systems, such as Grafana, please refer to the [third-party tools](../third-party/). If you encounter any problems during the development process, please click ["Submit an issue"](https://github.com/taosdata/TDengine/issues/new/choose) at the bottom of each page and submit it on GitHub right away. diff --git a/docs/en/10-deployment/01-deploy.md b/docs/en/10-deployment/01-deploy.md index 4a6a3aae41..6e7e0d1b7c 100644 --- a/docs/en/10-deployment/01-deploy.md +++ b/docs/en/10-deployment/01-deploy.md @@ -72,7 +72,7 @@ For all the dnodes in a TDengine cluster, the below parameters must be configure ## Start Cluster -The first dnode can be started following the instructions in [Get Started](/get-started/). Then TDengine CLI `taos` can be launched to execute command `show dnodes`, the output is as following for example: +The first dnode can be started following the instructions in [Get Started](../../get-started/). Then TDengine CLI `taos` can be launched to execute command `show dnodes`, the output is as following for example: ``` taos> show dnodes; @@ -90,7 +90,7 @@ From the above output, it is shown that the end point of the started dnode is "h There are a few steps necessary to add other dnodes in the cluster. -Second, we can start `taosd` as instructed in [Get Started](/get-started/). +Second, we can start `taosd` as instructed in [Get Started](../../get-started/). Then, on the first dnode i.e. h1.tdengine.com in our example, use TDengine CLI `taos` to execute the following command: diff --git a/docs/en/12-taos-sql/26-udf.md b/docs/en/12-taos-sql/26-udf.md index b533b98b3d..f86b535927 100644 --- a/docs/en/12-taos-sql/26-udf.md +++ b/docs/en/12-taos-sql/26-udf.md @@ -53,7 +53,7 @@ CREATE AGGREGATE FUNCTION function_name AS library_path OUTPUTTYPE output_type [ CREATE AGGREGATE FUNCTION l2norm AS "/home/taos/udf_example/libl2norm.so" OUTPUTTYPE DOUBLE bufsize 64; ``` -For more information about user-defined functions, see [User-Defined Functions](/develop/udf). +For more information about user-defined functions, see [User-Defined Functions](../../develop/udf). ## Manage UDF diff --git a/docs/en/13-operation/10-monitor.md b/docs/en/13-operation/10-monitor.md index f1be4c5fd3..009db425a4 100644 --- a/docs/en/13-operation/10-monitor.md +++ b/docs/en/13-operation/10-monitor.md @@ -41,7 +41,7 @@ Launch `TDinsight.sh` with the command above and restart Grafana, then open Dash ## log database -The data of tdinsight dashboard is stored in `log` database (default. You can change it in taoskeeper's config file. For more infrmation, please reference to [taoskeeper document](/reference/taosKeeper)). The taoskeeper will create log database on taoskeeper startup. +The data of tdinsight dashboard is stored in `log` database (default. You can change it in taoskeeper's config file. For more infrmation, please reference to [taoskeeper document](../../reference/taosKeeper)). The taoskeeper will create log database on taoskeeper startup. ### cluster\_info table diff --git a/docs/en/14-reference/02-rest-api/02-rest-api.mdx b/docs/en/14-reference/02-rest-api/02-rest-api.mdx index 4da987213c..8f8e966f8d 100644 --- a/docs/en/14-reference/02-rest-api/02-rest-api.mdx +++ b/docs/en/14-reference/02-rest-api/02-rest-api.mdx @@ -514,4 +514,4 @@ Response body: ## Reference -[taosAdapter](/reference/taosadapter/) +[taosAdapter](../taosadapter/) diff --git a/docs/en/14-reference/03-connector/03-cpp.mdx b/docs/en/14-reference/03-connector/03-cpp.mdx index f6ebf0fe47..27adb58c12 100644 --- a/docs/en/14-reference/03-connector/03-cpp.mdx +++ b/docs/en/14-reference/03-connector/03-cpp.mdx @@ -24,7 +24,7 @@ The dynamic libraries for the TDengine client driver are located in. ## Supported platforms -Please refer to [list of supported platforms](/reference/connector#supported-platforms) +Please refer to [list of supported platforms](../#supported-platforms) ## Supported versions @@ -32,7 +32,7 @@ The version number of the TDengine client driver and the version number of the T ## Installation Steps -Please refer to the [Installation Steps](/reference/connector#installation-steps) for TDengine client driver installation +Please refer to the [Installation Steps](../#installation-steps) for TDengine client driver installation ## Establishing a connection @@ -394,7 +394,7 @@ The specific functions related to the interface are as follows (see also the [pr ### Schemaless Writing API -In addition to writing data using the SQL method or the parameter binding API, writing can also be done using schemaless writing, which eliminates the need to create a super table/data sub-table structure in advance and writes the data directly. The TDengine system automatically creates and maintains the required table structure based on the written data content. The use of schemaless writing is described in the chapter [Schemaless Writing](/reference/schemaless/), and the C/C++ API used with it is described here. +In addition to writing data using the SQL method or the parameter binding API, writing can also be done using schemaless writing, which eliminates the need to create a super table/data sub-table structure in advance and writes the data directly. The TDengine system automatically creates and maintains the required table structure based on the written data content. The use of schemaless writing is described in the chapter [Schemaless Writing](../../schemaless/), and the C/C++ API used with it is described here. - `TAOS_RES* taos_schemaless_insert(TAOS* taos, const char* lines[], int numLines, int protocol, int precision)` diff --git a/docs/en/14-reference/03-connector/04-java.mdx b/docs/en/14-reference/03-connector/04-java.mdx index f770ce0d5d..039395cc30 100644 --- a/docs/en/14-reference/03-connector/04-java.mdx +++ b/docs/en/14-reference/03-connector/04-java.mdx @@ -148,7 +148,7 @@ TDengine currently supports timestamp, number, character, Boolean type, and the **Note**: Only TAG supports JSON types Due to historical reasons, the BINARY type data in TDengine is not truly binary data and is no longer recommended for use. Please use VARBINARY type instead. -GEOMETRY type is binary data in little endian byte order, which complies with the WKB specification. For detailed information, please refer to [Data Type] (/tao-sql/data-type/#Data Types) +GEOMETRY type is binary data in little endian byte order, which complies with the WKB specification. For detailed information, please refer to [Data Type](../../../taos-sql/data-type/) For WKB specifications, please refer to [Well Known Binary (WKB)]( https://libgeos.org/specifications/wkb/ ) For Java connector, the jts library can be used to easily create GEOMETRY type objects, serialize them, and write them to TDengine. Here is an example [Geometry example](https://github.com/taosdata/TDengine/blob/3.0/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/GeometryDemo.java) @@ -160,7 +160,7 @@ For Java connector, the jts library can be used to easily create GEOMETRY type o Before using Java Connector to connect to the database, the following conditions are required. - Java 1.8 or above runtime environment and Maven 3.6 or above installed -- TDengine client driver installed (required for native connections, not required for REST connections), please refer to [Installing Client Driver](/reference/connector#Install-Client-Driver) +- TDengine client driver installed (required for native connections, not required for REST connections), please refer to [Installing Client Driver](../#Install-Client-Driver) ### Install the connectors @@ -368,7 +368,7 @@ The configuration parameters in properties are as follows. - TSDBDriver.PROPERTY_KEY_MESSAGE_WAIT_TIMEOUT: message transmission timeout in milliseconds, the default value is 60000 ms. It only takes effect when using JDBC REST connection and batchfetch is true. - TSDBDriver.PROPERTY_KEY_USE_SSL: connecting Securely Using SSL. true: using SSL connection, false: not using SSL connection. It only takes effect when using JDBC REST connection. - TSDBDriver.HTTP_POOL_SIZE: size of REST concurrent requests. The default value is 20. - For JDBC native connections, you can specify other parameters, such as log level, SQL length, etc., by specifying URL and Properties. For more detailed configuration, please refer to [Client Configuration](/reference/config/#Client-Only). + For JDBC native connections, you can specify other parameters, such as log level, SQL length, etc., by specifying URL and Properties. For more detailed configuration, please refer to [Client Configuration](../../config/#Client-Only). ### Priority of configuration parameters diff --git a/docs/en/14-reference/03-connector/05-go.mdx b/docs/en/14-reference/03-connector/05-go.mdx index a0be7a4a02..33f7a93439 100644 --- a/docs/en/14-reference/03-connector/05-go.mdx +++ b/docs/en/14-reference/03-connector/05-go.mdx @@ -74,7 +74,7 @@ If it is a TDengine error, you can get the error code and error information in t ### Pre-installation preparation * Install Go development environment (Go 1.14 and above, GCC 4.8.5 and above) -* If you use the native connector, please install the TDengine client driver. Please refer to [Install Client Driver](/reference/connector/#install-client-driver) for specific steps +* If you use the native connector, please install the TDengine client driver. Please refer to [Install Client Driver](../#install-client-driver) for specific steps Configure the environment variables and check the command. diff --git a/docs/en/14-reference/03-connector/06-rust.mdx b/docs/en/14-reference/03-connector/06-rust.mdx index 5a44b161cb..0981df6724 100644 --- a/docs/en/14-reference/03-connector/06-rust.mdx +++ b/docs/en/14-reference/03-connector/06-rust.mdx @@ -80,7 +80,7 @@ Note: Only TAG supports JSON types ### Pre-installation preparation * Install the Rust development toolchain -* If using the native connection, please install the TDengine client driver. Please refer to [install client driver](/reference/connector#install-client-driver) +* If using the native connection, please install the TDengine client driver. Please refer to [install client driver](../#install-client-driver) ### Install the connectors diff --git a/docs/en/14-reference/03-connector/07-python.mdx b/docs/en/14-reference/03-connector/07-python.mdx index b699019b44..ccc270d3be 100644 --- a/docs/en/14-reference/03-connector/07-python.mdx +++ b/docs/en/14-reference/03-connector/07-python.mdx @@ -7,7 +7,7 @@ description: This document describes taospy, the TDengine Python connector. import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -`taospy` is the official Python connector for TDengine. taospy provides a rich API that makes it easy for Python applications to use TDengine. `taospy` wraps both the [native interface](/reference/connector/cpp) and [REST interface](/reference/rest-api) of TDengine, which correspond to the `taos` and `taosrest` modules of the `taospy` package, respectively. +`taospy` is the official Python connector for TDengine. taospy provides a rich API that makes it easy for Python applications to use TDengine. `taospy` wraps both the [native interface](../cpp) and [REST interface](../../rest-api) of TDengine, which correspond to the `taos` and `taosrest` modules of the `taospy` package, respectively. In addition to wrapping the native and REST interfaces, `taospy` also provides a set of programming interfaces that conforms to the [Python Data Access Specification (PEP 249)](https://peps.python.org/pep-0249/). It is easy to integrate `taospy` with many third-party tools, such as [SQLAlchemy](https://www.sqlalchemy.org/) and [pandas](https://pandas.pydata.org/). `taos-ws-py` is an optional package to enable using WebSocket to connect TDengine. @@ -17,7 +17,7 @@ The direct connection to the server using the native interface provided by the c The source code for the Python connector is hosted on [GitHub](https://github.com/taosdata/taos-connector-python). ## Supported platforms -- The [supported platforms](/reference/connector/#supported-platforms) for the native connection are the same as the ones supported by the TDengine client. +- The [supported platforms](../#supported-platforms) for the native connection are the same as the ones supported by the TDengine client. - REST connections are supported on all platforms that can run Python. ### Supported features @@ -95,7 +95,7 @@ TDengine currently supports timestamp, number, character, Boolean type, and the 1. Install Python. The recent taospy package requires Python 3.6.2+. The earlier versions of taospy require Python 3.7+. The taos-ws-py package requires Python 3.7+. If Python is not available on your system, refer to the [Python BeginnersGuide](https://wiki.python.org/moin/BeginnersGuide/Download) to install it. 2. Install [pip](https://pypi.org/project/pip/). In most cases, the Python installer comes with the pip utility. If not, please refer to [pip documentation](https://pip.pypa.io/en/stable/installation/) to install it. -If you use a native connection, you will also need to [Install Client Driver](/reference/connector#Install-Client-Driver). The client install package includes the TDengine client dynamic link library (`libtaos.so` or `taos.dll`) and the TDengine CLI. +If you use a native connection, you will also need to [Install Client Driver](../#Install-Client-Driver). The client install package includes the TDengine client dynamic link library (`libtaos.so` or `taos.dll`) and the TDengine CLI. ### Install via pip @@ -444,7 +444,7 @@ The best practice for TaosCursor is to create a cursor at the beginning of a que ##### Use of the RestClient class -The `RestClient` class is a direct wrapper for the [REST API](/reference/rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. +The `RestClient` class is a direct wrapper for the [REST API](../../rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. ```python title="Use of RestClient" {{#include docs/examples/python/rest_client_example.py}} @@ -501,7 +501,7 @@ The queried results can only be fetched once. For example, only one of `fetch_al -The `RestClient` class is a direct wrapper for the [REST API](/reference/rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. +The `RestClient` class is a direct wrapper for the [REST API](../../rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. ```python {{#include docs/examples/python/rest_client_example.py}} @@ -561,7 +561,7 @@ The `TaosConnection` class and the `TaosResult` class already implement all the ##### Use of the RestClient class -The `RestClient` class is a direct wrapper for the [REST API](/reference/rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. +The `RestClient` class is a direct wrapper for the [REST API](../../rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. ```python title="Use of RestClient" {{#include docs/examples/python/rest_client_with_req_id_example.py}} diff --git a/docs/en/14-reference/03-connector/08-node.mdx b/docs/en/14-reference/03-connector/08-node.mdx index a02bcddfd6..bed06477f1 100644 --- a/docs/en/14-reference/03-connector/08-node.mdx +++ b/docs/en/14-reference/03-connector/08-node.mdx @@ -28,7 +28,7 @@ The REST connector supports all platforms that can run Node.js. ## Version support -Please refer to [version support list](/reference/connector#version-support) +Please refer to [version support list](../#version-support) ## Supported features @@ -58,7 +58,7 @@ Please refer to [version support list](/reference/connector#version-support) ### Pre-installation preparation - Install the Node.js development environment -- If you are using the REST connector, skip this step. However, if you use the native connector, please install the TDengine client driver. Please refer to [Install Client Driver](/reference/connector#Install-Client-Driver) for more details. We use [node-gyp](https://github.com/nodejs/node-gyp) to interact with TDengine instances and also need to install some dependencies mentioned below depending on the specific OS. +- If you are using the REST connector, skip this step. However, if you use the native connector, please install the TDengine client driver. Please refer to [Install Client Driver](../#Install-Client-Driver) for more details. We use [node-gyp](https://github.com/nodejs/node-gyp) to interact with TDengine instances and also need to install some dependencies mentioned below depending on the specific OS. diff --git a/docs/en/14-reference/03-connector/09-csharp.mdx b/docs/en/14-reference/03-connector/09-csharp.mdx index 203d44fe02..282be3af6b 100644 --- a/docs/en/14-reference/03-connector/09-csharp.mdx +++ b/docs/en/14-reference/03-connector/09-csharp.mdx @@ -36,7 +36,7 @@ Please note TDengine does not support 32bit Windows any more. ## Version support -Please refer to [version support list](/reference/connector#version-support) +Please refer to [version support list](../#version-support) ## Supported features @@ -69,7 +69,7 @@ Please refer to [version support list](/reference/connector#version-support) * Install the [.NET SDK](https://dotnet.microsoft.com/download) * [Nuget Client](https://docs.microsoft.com/en-us/nuget/install-nuget-client-tools) (optional installation) -* Install TDengine client driver, please refer to [Install client driver](/reference/connector/#install-client-driver) for details +* Install TDengine client driver, please refer to [Install client driver](../#install-client-driver) for details ### Install `TDengine.Connector` diff --git a/docs/en/14-reference/03-connector/80-php.mdx b/docs/en/14-reference/03-connector/80-php.mdx index b3c2065b6e..bff9e8e5d5 100644 --- a/docs/en/14-reference/03-connector/80-php.mdx +++ b/docs/en/14-reference/03-connector/80-php.mdx @@ -40,7 +40,7 @@ Because the version of TDengine client driver is tightly associated with that of ### Install TDengine Client Driver -Regarding how to install TDengine client driver please refer to [Install Client Driver](/reference/connector#installation-steps) +Regarding how to install TDengine client driver please refer to [Install Client Driver](../#installation-steps) ### Install php-tdengine diff --git a/docs/en/14-reference/03-connector/_preparation.mdx b/docs/en/14-reference/03-connector/_preparation.mdx index 25b78ec134..99887ac36b 100644 --- a/docs/en/14-reference/03-connector/_preparation.mdx +++ b/docs/en/14-reference/03-connector/_preparation.mdx @@ -2,7 +2,7 @@ :::info -Since the TDengine client driver is written in C, using the native connection requires loading the client driver shared library file, which is usually included in the TDengine installer. You can install either standard TDengine server installation package or [TDengine client installation package](/get-started/). For Windows development, you need to install the corresponding Windows client, please refer to [Install TDengine](../../get-started/package). +Since the TDengine client driver is written in C, using the native connection requires loading the client driver shared library file, which is usually included in the TDengine installer. You can install either standard TDengine server installation package or [TDengine client installation package](../../get-started/). For Windows development, you need to install the corresponding Windows client, please refer to [Install TDengine](../../get-started/package). - libtaos.so: After successful installation of TDengine on a Linux system, the dependent Linux version of the client driver `libtaos.so` file will be automatically linked to `/usr/lib/libtaos.so`, which is included in the Linux scannable path and does not need to be specified separately. - taos.dll: After installing the client on Windows, the dependent Windows version of the client driver taos.dll file will be automatically copied to the system default search path C:/Windows/System32, again without the need to specify it separately. diff --git a/docs/en/14-reference/04-taosadapter.md b/docs/en/14-reference/04-taosadapter.md index c75598b0df..a9330d21c7 100644 --- a/docs/en/14-reference/04-taosadapter.md +++ b/docs/en/14-reference/04-taosadapter.md @@ -186,7 +186,7 @@ See [example/config/taosadapter.toml](https://github.com/taosdata/taosadapter/bl ### TDengine RESTful interface -You can use any client that supports the http protocol to write data to or query data from TDengine by accessing the REST interface address `http://:6041/rest/sql`. See the [official documentation](/reference/rest-api/) for details. +You can use any client that supports the http protocol to write data to or query data from TDengine by accessing the REST interface address `http://:6041/rest/sql`. See the [official documentation](../rest-api/) for details. ### InfluxDB @@ -202,7 +202,7 @@ Support InfluxDB query parameters as follows. - `precision` The time precision used by TDengine - `u` TDengine user name - `p` TDengine password -- `ttl` The time to live of automatically created sub-table. This value cannot be updated. TDengine will use the ttl value of the first data of sub-table to create sub-table. For more information, please refer [Create Table](/taos-sql/table/#create-table) +- `ttl` The time to live of automatically created sub-table. This value cannot be updated. TDengine will use the ttl value of the first data of sub-table to create sub-table. For more information, please refer [Create Table](../../taos-sql/table/#create-table) Note: InfluxDB token authorization is not supported at present. Only Basic authorization and query parameter validation are supported. Example: curl --request POST http://127.0.0.1:6041/influxdb/v1/write?db=test --user "root:taosdata" --data-binary "measurement,host=host1 field1=2i,field2=2.0 1577836800000000000" diff --git a/docs/en/14-reference/06-taosdump.md b/docs/en/14-reference/06-taosdump.md index c07465a97c..5ec26df1fa 100644 --- a/docs/en/14-reference/06-taosdump.md +++ b/docs/en/14-reference/06-taosdump.md @@ -31,7 +31,7 @@ There are two ways to install taosdump: 2. backup multiple specified databases: use `-D db1,db2,... ` parameters; 3. back up some super or normal tables in the specified database: use `dbname stbname1 stbname2 tbname1 tbname2 ... ` parameters. Note that the first parameter of this input sequence is the database name, and only one database is supported. The second and subsequent parameters are the names of super or normal tables in that database, separated by spaces. 4. back up the system log database: TDengine clusters usually contain a system database named `log`. The data in this database is the data that TDengine runs itself, and the taosdump will not back up the log database by default. If users need to back up the log database, users can use the `-a` or `-allow-sys` command-line parameter. -5. Loose mode backup: taosdump version 1.4.1 onwards provides `-n` and `-L` parameters for backing up data without using escape characters and "loose" mode, which can reduce the number of backups if table names, column names, tag names do not use escape characters. This can also reduce the backup data time and backup data footprint. If you are unsure about using `-n` and `-L` conditions, please use the default parameters for "strict" mode backup. See the [official documentation](/taos-sql/escape) for a description of escaped characters. +5. Loose mode backup: taosdump version 1.4.1 onwards provides `-n` and `-L` parameters for backing up data without using escape characters and "loose" mode, which can reduce the number of backups if table names, column names, tag names do not use escape characters. This can also reduce the backup data time and backup data footprint. If you are unsure about using `-n` and `-L` conditions, please use the default parameters for "strict" mode backup. See the [official documentation](../../taos-sql/escape) for a description of escaped characters. :::tip - taosdump versions after 1.4.1 provide the `-I` argument for parsing Avro file schema and data. If users specify `-s` then only taosdump will parse schema. diff --git a/docs/en/14-reference/08-taos-shell.md b/docs/en/14-reference/08-taos-shell.md index 7e0433a8b2..8512f5b59d 100644 --- a/docs/en/14-reference/08-taos-shell.md +++ b/docs/en/14-reference/08-taos-shell.md @@ -8,7 +8,7 @@ The TDengine command-line interface (hereafter referred to as `TDengine CLI`) is ## Installation -If executed on the TDengine server-side, there is no need for additional installation steps to install TDengine CLI as it is already included and installed automatically. To run TDengine CLI in an environment where no TDengine server is running, the TDengine client installation package needs to be installed first. For details, please refer to [Connector](/reference/connector/). +If executed on the TDengine server-side, there is no need for additional installation steps to install TDengine CLI as it is already included and installed automatically. To run TDengine CLI in an environment where no TDengine server is running, the TDengine client installation package needs to be installed first. For details, please refer to [Connector](../connector/). ## Execution @@ -18,7 +18,7 @@ To access the TDengine CLI, you can execute `taos` command-line utility from a t taos ``` -TDengine CLI will display a welcome message and version information if it successfully connected to the TDengine service. If it fails, TDengine CLI will print an error message. See [FAQ](/train-faq/faq) to solve the problem of terminal connection failure to the server. The TDengine CLI prompts as follows: +TDengine CLI will display a welcome message and version information if it successfully connected to the TDengine service. If it fails, TDengine CLI will print an error message. See [FAQ](../../train-faq/faq) to solve the problem of terminal connection failure to the server. The TDengine CLI prompts as follows: ```cmd taos> diff --git a/docs/en/14-reference/12-config/index.md b/docs/en/14-reference/12-config/index.md index ac2364a65d..65c48f9190 100755 --- a/docs/en/14-reference/12-config/index.md +++ b/docs/en/14-reference/12-config/index.md @@ -87,7 +87,7 @@ Ensure that your firewall rules do not block TCP port 6042 on any host in the c | Protocol | Default Port | Description | How to configure | | :------- | :----------- | :-------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------- | | TCP | 6030 | Communication between client and server. In a multi-node cluster, communication between nodes. serverPort | -| TCP | 6041 | REST connection between client and server | Prior to 2.4.0.0: serverPort+11; After 2.4.0.0 refer to [taosAdapter](/reference/taosadapter/) | +| TCP | 6041 | REST connection between client and server | Prior to 2.4.0.0: serverPort+11; After 2.4.0.0 refer to [taosAdapter](../taosadapter/) | | TCP | 6043 | Service Port of taosKeeper | The parameter of taosKeeper | | TCP | 6044 | Data access port for StatsD | Configurable through taosAdapter parameters. | | UDP | 6045 | Data access for statsd | Configurable through taosAdapter parameters. | diff --git a/docs/en/14-reference/13-schemaless/13-schemaless.md b/docs/en/14-reference/13-schemaless/13-schemaless.md index eb336f4633..9b001ee79c 100644 --- a/docs/en/14-reference/13-schemaless/13-schemaless.md +++ b/docs/en/14-reference/13-schemaless/13-schemaless.md @@ -116,7 +116,7 @@ You can configure smlChildTableName in taos.cfg to specify table names, for exam 10. Taos.cfg adds the configuration of smlTsDefaultName (with a string value), which only works on the client side. After configuration, the time column name of the schemaless automatic table creation can be set through this configuration. If not configured, defaults to _ts. 11. Super table name or child table name are case sensitive. :::tip -All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48 KB(64 KB since version 3.0.5.0) and the total length of a tag value cannot exceed 16 KB. See [TDengine SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area. +All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48 KB(64 KB since version 3.0.5.0) and the total length of a tag value cannot exceed 16 KB. See [TDengine SQL Boundary Limits](../../taos-sql/limit) for specific constraints in this area. ::: ## Time resolution recognition diff --git a/docs/en/20-third-party/02-prometheus.md b/docs/en/20-third-party/02-prometheus.md index bfdd3d015e..e6bd855939 100644 --- a/docs/en/20-third-party/02-prometheus.md +++ b/docs/en/20-third-party/02-prometheus.md @@ -16,7 +16,7 @@ Prometheus data can be stored in TDengine via the `remote_write` interface with To write Prometheus data to TDengine requires the following preparations. - The TDengine cluster is deployed and functioning properly -- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](/reference/taosadapter) for details. +- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](../../reference/taosadapter) for details. - Prometheus has been installed. Please refer to the [official documentation](https://prometheus.io/docs/prometheus/latest/installation/) for installing Prometheus ## Configuration steps diff --git a/docs/en/20-third-party/03-telegraf.md b/docs/en/20-third-party/03-telegraf.md index 7e99b84eab..900262f51f 100644 --- a/docs/en/20-third-party/03-telegraf.md +++ b/docs/en/20-third-party/03-telegraf.md @@ -14,7 +14,7 @@ Telegraf's data can be written to TDengine by simply adding the output configura To write Telegraf data to TDengine requires the following preparations. - The TDengine cluster is deployed and functioning properly -- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](/reference/taosadapter) for details. +- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](../../reference/taosadapter) for details. - Telegraf has been installed. Please refer to the [official documentation](https://docs.influxdata.com/telegraf/v1.22/install/) for Telegraf installation. - Telegraf collects the running status measurements of current system. You can enable [input plugins](https://docs.influxdata.com/telegraf/v1.22/plugins/) to insert [other formats](https://docs.influxdata.com/telegraf/v1.24/data_formats/input/) data to Telegraf then forward to TDengine. @@ -73,6 +73,6 @@ Query OK, 3 row(s) in set (0.013269s) - TDengine take influxdb format data and create unique ID for table names by the rule. The user can configure `smlChildTableName` parameter to generate specified table names if he/she needs. And he/she also need to insert data with specified data format. -For example, Add `smlChildTableName=tname` in the taos.cfg file. Insert data `st,tname=cpu1,t1=4 c1=3 1626006833639000000` then the table name will be cpu1. If there are multiple lines has same tname but different tag_set, the first line's tag_set will be used to automatically creating table and ignore other lines. Please refer to [TDengine Schemaless](/reference/schemaless/#Schemaless-Line-Protocol) +For example, Add `smlChildTableName=tname` in the taos.cfg file. Insert data `st,tname=cpu1,t1=4 c1=3 1626006833639000000` then the table name will be cpu1. If there are multiple lines has same tname but different tag_set, the first line's tag_set will be used to automatically creating table and ignore other lines. Please refer to [TDengine Schemaless](../../reference/schemaless/#Schemaless-Line-Protocol) ::: diff --git a/docs/en/20-third-party/05-collectd.md b/docs/en/20-third-party/05-collectd.md index d8c8e7f81d..5672852cd0 100644 --- a/docs/en/20-third-party/05-collectd.md +++ b/docs/en/20-third-party/05-collectd.md @@ -15,7 +15,7 @@ You can write the data collected by collectd to TDengine by simply modifying the Writing collectd data to the TDengine requires several preparations. - The TDengine cluster is deployed and running properly -- taosAdapter is installed and running, please refer to [taosAdapter's manual](/reference/taosadapter) for details +- taosAdapter is installed and running, please refer to [taosAdapter's manual](../../reference/taosadapter) for details - collectd has been installed. Please refer to the [official documentation](https://collectd.org/download.shtml) to install collectd ## Configuration steps diff --git a/docs/en/20-third-party/07-icinga2.md b/docs/en/20-third-party/07-icinga2.md index 540aae8689..43cd9b83ec 100644 --- a/docs/en/20-third-party/07-icinga2.md +++ b/docs/en/20-third-party/07-icinga2.md @@ -14,7 +14,7 @@ You can write the data collected by icinga2 to TDengine by simply modifying the To write icinga2 data to TDengine requires the following preparations. - The TDengine cluster is deployed and working properly -- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](/reference/taosadapter) for details. +- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](../../reference/taosadapter) for details. - icinga2 has been installed. Please refer to the [official documentation](https://icinga.com/docs/icinga-2/latest/doc/02-installation/) for icinga2 installation ## Configuration steps diff --git a/docs/en/20-third-party/08-tcollector.md b/docs/en/20-third-party/08-tcollector.md index f1c0ecd44d..83d01dda24 100644 --- a/docs/en/20-third-party/08-tcollector.md +++ b/docs/en/20-third-party/08-tcollector.md @@ -14,7 +14,7 @@ You can write the data collected by TCollector to TDengine by simply changing th To write data to the TDengine via TCollector requires the following preparations. - The TDengine cluster has been deployed and is working properly -- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](/reference/taosadapter) for details. +- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](../../reference/taosadapter) for details. - TCollector has been installed. Please refer to [official documentation](http://opentsdb.net/docs/build/html/user_guide/utilities/tcollector.html#installation-of-tcollector) for TCollector installation ## Configuration steps diff --git a/docs/en/20-third-party/09-emq-broker.md b/docs/en/20-third-party/09-emq-broker.md index 9dab6e52c9..7ca6cd4aa7 100644 --- a/docs/en/20-third-party/09-emq-broker.md +++ b/docs/en/20-third-party/09-emq-broker.md @@ -82,7 +82,7 @@ Edit the resource configuration to add the key/value pairing for Authorization. Basic cm9vdDp0YW9zZGF0YQ== ``` -Please refer to the [ TDengine REST API documentation ](/reference/rest-api/) for the authorization in details. +Please refer to the [ TDengine REST API documentation ](../../reference/rest-api/) for the authorization in details. Enter the rule engine replacement template in the message body: diff --git a/docs/en/20-third-party/11-kafka.md b/docs/en/20-third-party/11-kafka.md index b865c00bc3..cac4f5f604 100644 --- a/docs/en/20-third-party/11-kafka.md +++ b/docs/en/20-third-party/11-kafka.md @@ -94,7 +94,7 @@ The output as bellow: The role of the TDengine Sink Connector is to synchronize the data of the specified topic to TDengine. Users do not need to create databases and super tables in advance. The name of the target database can be specified manually (see the configuration parameter connection.database), or it can be generated according to specific rules (see the configuration parameter connection.database.prefix). -TDengine Sink Connector internally uses TDengine [modeless write interface](/reference/connector/cpp#modeless write-api) to write data to TDengine, currently supports data in three formats: [InfluxDB line protocol format](/develop /insert-data/influxdb-line), [OpenTSDB Telnet protocol format](/develop/insert-data/opentsdb-telnet), and [OpenTSDB JSON protocol format](/develop/insert-data/opentsdb-json). +TDengine Sink Connector internally uses TDengine [modeless write interface](../../reference/connector/cpp#modeless write-api) to write data to TDengine, currently supports data in three formats: [InfluxDB line protocol format](../../develop/insert-data/influxdb-line), [OpenTSDB Telnet protocol format](../../develop/insert-data/opentsdb-telnet), and [OpenTSDB JSON protocol format](../../develop/insert-data/opentsdb-json). The following example synchronizes the data of the topic meters to the target database power. The data format is the InfluxDB Line protocol format. @@ -213,7 +213,7 @@ If you see the above data, the synchronization is successful. If not, check the The role of the TDengine Source Connector is to push all the data of a specific TDengine database after a particular time to Kafka. The implementation principle of TDengine Source Connector is to first pull historical data in batches and then synchronize incremental data with the strategy of the regular query. At the same time, the changes in the table will be monitored, and the newly added table can be automatically synchronized. If Kafka Connect is restarted, synchronization will resume where it left off. -TDengine Source Connector will convert the data in TDengine data table into [InfluxDB Line protocol format](/develop/insert-data/influxdb-line/) or [OpenTSDB JSON protocol format](/develop/insert-data/opentsdb-json ) and then write to Kafka. +TDengine Source Connector will convert the data in TDengine data table into [InfluxDB Line protocol format](../../develop/insert-data/influxdb-line/) or [OpenTSDB JSON protocol format](../../develop/insert-data/opentsdb-json ) and then write to Kafka. The following sample program synchronizes the data in the database test to the topic tdengine-test-meters. diff --git a/docs/en/20-third-party/_deploytaosadapter.mdx b/docs/en/20-third-party/_deploytaosadapter.mdx index 840ca7640a..f8ee53af62 100644 --- a/docs/en/20-third-party/_deploytaosadapter.mdx +++ b/docs/en/20-third-party/_deploytaosadapter.mdx @@ -14,4 +14,4 @@ Check the running status of taosAdapter. systemctl status taosadapter ``` -taosAdapter Please refer to the `taosadapter --help` command output and [reference documentation](/reference/taosadapter) for detailed configuration parameters and usage of taosAdapter. +taosAdapter Please refer to the `taosadapter --help` command output and [reference documentation](../../reference/taosadapter) for detailed configuration parameters and usage of taosAdapter. diff --git a/docs/en/25-application/01-telegraf.md b/docs/en/25-application/01-telegraf.md index 1e3325b2b2..a6db826fa3 100644 --- a/docs/en/25-application/01-telegraf.md +++ b/docs/en/25-application/01-telegraf.md @@ -41,7 +41,7 @@ Download and install the [latest version of TDengine](https://docs.tdengine.com/ ### Install Grafana Plugin and Configure Data Source -Please refer to [Install Grafana Plugin and Configure Data Source](/third-party/grafana/#install-grafana-plugin-and-configure-data-source) +Please refer to [Install Grafana Plugin and Configure Data Source](../../third-party/grafana/#install-grafana-plugin-and-configure-data-source) ### Modify /etc/telegraf/telegraf.conf diff --git a/docs/en/25-application/02-collectd.md b/docs/en/25-application/02-collectd.md index ee1e944928..2c198dcdf5 100644 --- a/docs/en/25-application/02-collectd.md +++ b/docs/en/25-application/02-collectd.md @@ -44,7 +44,7 @@ Download and install the [latest version of TDengine](https://docs.tdengine.com/ ### Install Grafana Plugin and Configure Data Source -Please refer to [Install Grafana Plugin and Configure Data Source](/third-party/grafana/#install-grafana-plugin-and-configure-data-source) +Please refer to [Install Grafana Plugin and Configure Data Source](../../third-party/grafana/#install-grafana-plugin-and-configure-data-source) ### Configure collectd diff --git a/docs/en/25-application/_03-immigrate.md b/docs/en/25-application/_03-immigrate.md index 457a40614e..cdb3d5591c 100644 --- a/docs/en/25-application/_03-immigrate.md +++ b/docs/en/25-application/_03-immigrate.md @@ -70,7 +70,7 @@ You can use collectd and push the data to taosAdapter utilizing the write_tsdb p - **Tuning the Dashboard system** -After writing the data to TDengine, you can configure Grafana to visualize the data written to TDengine. To obtain and use the Grafana plugin provided by TDengine, please refer to [Links to other tools](/third-party/grafana). +After writing the data to TDengine, you can configure Grafana to visualize the data written to TDengine. To obtain and use the Grafana plugin provided by TDengine, please refer to [Links to other tools](../../third-party/grafana). TDengine provides two sets of Dashboard templates by default, and users only need to import the templates from the Grafana directory into Grafana to activate their use. @@ -396,7 +396,7 @@ Hard disk writing performance has little effect on TDengine. The TDengine writin ### Computational resource estimates -Due to the characteristics of IoT data, when the frequency of data generation is consistent, the writing process of TDengine maintains a relatively fixed amount of resource consumption (computing and storage). According to the [TDengine Operation and Maintenance Guide](/operation/) description, the system consumes less than 1 CPU core at 22,000 writes per second. +Due to the characteristics of IoT data, when the frequency of data generation is consistent, the writing process of TDengine maintains a relatively fixed amount of resource consumption (computing and storage). According to the [TDengine Operation and Maintenance Guide](../../operation/) description, the system consumes less than 1 CPU core at 22,000 writes per second. In estimating the CPU resources consumed by the query, assuming that the application requires the database to provide 10,000 QPS, the CPU time consumed by each query is about 1 ms. The query provided by each core per second is 1,000 QPS, which satisfies 10,000 QPS. The query request requires at least 10 cores. For the system as a whole system to have less than 50% CPU load, the entire cluster needs twice as many cores i.e. 20 cores. diff --git a/include/common/cos.h b/include/common/cos.h index 21b645f604..c6b159c1da 100644 --- a/include/common/cos.h +++ b/include/common/cos.h @@ -43,6 +43,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, int32_t s3GetObjectsByPrefix(const char *prefix, const char *path); void s3EvictCache(const char *path, long object_size); long s3Size(const char *object_name); +int32_t s3GetObjectToFile(const char *object_name, char *fileName); #ifdef __cplusplus } diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 58517a5db0..33cfada338 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -75,12 +75,15 @@ extern int32_t tsElectInterval; extern int32_t tsHeartbeatInterval; extern int32_t tsHeartbeatTimeout; +// vnode +extern int64_t tsVndCommitMaxIntervalMs; + // snode extern int32_t tsRsyncPort; -extern char tsCheckpointBackupDir[]; +extern char tsCheckpointBackupDir[]; // vnode checkpoint -extern char tsSnodeAddress[]; //127.0.0.1:873 +extern char tsSnodeAddress[]; // 127.0.0.1:873 // mnode extern int64_t tsMndSdbWriteDelta; @@ -104,8 +107,8 @@ extern int32_t tsMonitorMaxLogs; extern bool tsMonitorComp; // audit -extern bool tsEnableAudit; -extern bool tsEnableAuditCreateTable; +extern bool tsEnableAudit; +extern bool tsEnableAuditCreateTable; // telem extern bool tsEnableTelem; @@ -113,9 +116,9 @@ extern int32_t tsTelemInterval; extern char tsTelemServer[]; extern uint16_t tsTelemPort; extern bool tsEnableCrashReport; -extern char *tsTelemUri; -extern char *tsClientCrashReportUri; -extern char *tsSvrCrashReportUri; +extern char * tsTelemUri; +extern char * tsClientCrashReportUri; +extern char * tsSvrCrashReportUri; // query buffer management extern int32_t tsQueryBufferSize; // maximum allowed usage buffer size in MB for each data node during query processing diff --git a/include/common/tgrant.h b/include/common/tgrant.h index a5f3ab2e3f..f06fca8014 100644 --- a/include/common/tgrant.h +++ b/include/common/tgrant.h @@ -31,8 +31,6 @@ extern "C" { #endif #define GRANT_HEART_BEAT_MIN 2 -#define GRANT_ACTIVE_CODE "activeCode" -#define GRANT_C_ACTIVE_CODE "cActiveCode" typedef enum { TSDB_GRANT_ALL, @@ -52,11 +50,6 @@ typedef enum { TSDB_GRANT_TABLE, } EGrantType; -typedef struct { - int64_t grantedTime; - int64_t connGrantedTime; -} SGrantedInfo; - int32_t grantCheck(EGrantType grant); int32_t grantAlterActiveCode(int32_t did, const char* old, const char* newer, char* out, int8_t type); diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 95526222dc..afa0fa2a6e 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -186,6 +186,7 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_MND_RESUME_STREAM, "resume-stream", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, "stream-checkpoint-remain", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_NODECHANGE_CHECK, "stream-nodechange-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TRIM_DB_TIMER, "trim-db-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_GRANT_NOTIFY, "grant-notify", NULL, NULL) diff --git a/include/libs/nodes/querynodes.h b/include/libs/nodes/querynodes.h index 0c98820408..1956201422 100644 --- a/include/libs/nodes/querynodes.h +++ b/include/libs/nodes/querynodes.h @@ -481,6 +481,7 @@ typedef struct SVnodeModifyOpStmt { SHashObj* pSubTableHashObj; // SHashObj SHashObj* pTableNameHashObj; // set of table names for refreshing meta, sync mode SHashObj* pDbFNameHashObj; // set of db names for refreshing meta, sync mode + SHashObj* pTableCxtHashObj; // temp SHashObj for single request SArray* pVgDataBlocks; // SArray SVCreateTbReq* pCreateTblReq; TdFilePtr fp; diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index d0a2b311ee..ba90c0dc7a 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -35,6 +35,7 @@ int32_t streamStateBegin(SStreamState* pState); int32_t streamStateCommit(SStreamState* pState); void streamStateDestroy(SStreamState* pState, bool remove); int32_t streamStateDeleteCheckPoint(SStreamState* pState, TSKEY mark); +int32_t streamStateDelTaskDb(SStreamState* pState); int32_t streamStateFuncPut(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen); int32_t streamStateFuncGet(SStreamState* pState, const SWinKey* key, void** ppVal, int32_t* pVLen); @@ -133,4 +134,4 @@ char* streamStateIntervalDump(SStreamState* pState); } #endif -#endif /* ifndef _STREAM_STATE_H_ */ +#endif /* ifndef _STREAM_STATE_H_ */ \ No newline at end of file diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index e97f3e0192..7f65ef8358 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -58,7 +58,9 @@ typedef struct SStreamTask SStreamTask; typedef struct SStreamQueue SStreamQueue; typedef struct SStreamTaskSM SStreamTaskSM; -#define SSTREAM_TASK_VER 2 +#define SSTREAM_TASK_VER 2 +#define SSTREAM_TASK_INCOMPATIBLE_VER 1 +#define SSTREAM_TASK_NEED_CONVERT_VER 2 enum { STREAM_STATUS__NORMAL = 0, @@ -110,6 +112,7 @@ typedef enum { TASK_LEVEL__SOURCE = 1, TASK_LEVEL__AGG, TASK_LEVEL__SINK, + TASK_LEVEL_SMA, } ETASK_LEVEL; enum { @@ -304,11 +307,16 @@ typedef struct SStreamTaskId { typedef struct SCheckpointInfo { int64_t startTs; int64_t checkpointId; - int64_t checkpointVer; // latest checkpointId version - int64_t processedVer; // already processed ver, that has generated results version. + + int64_t checkpointVer; // latest checkpointId version + int64_t processedVer; int64_t nextProcessVer; // current offset in WAL, not serialize it int64_t failedId; // record the latest failed checkpoint id + int64_t checkpointingId; + int32_t downstreamAlignNum; + int32_t checkpointNotReadyTasks; bool dispatchCheckpointTrigger; + int64_t msgVer; } SCheckpointInfo; typedef struct SStreamStatus { @@ -447,12 +455,11 @@ struct SStreamTask { int64_t checkReqId; SArray* checkReqIds; // shuffle int32_t refCnt; - int64_t checkpointingId; - int32_t checkpointAlignCnt; - int32_t checkpointNotReadyTasks; int32_t transferStateAlignCnt; struct SStreamMeta* pMeta; SSHashObj* pNameMap; + void* pBackend; + int64_t backendRefId; char reserve[256]; }; @@ -490,20 +497,25 @@ typedef struct SStreamMeta { int32_t walScanCounter; void* streamBackend; int64_t streamBackendRid; - SHashObj* pTaskBackendUnique; + SHashObj* pTaskDbUnique; TdThreadMutex backendMutex; SMetaHbInfo* pHbInfo; STaskUpdateInfo updateInfo; + SHashObj* pUpdateTaskSet; int32_t numOfStreamTasks; // this value should be increased when a new task is added into the meta int32_t numOfPausedTasks; - int32_t chkptNotReadyTasks; int64_t rid; int64_t chkpId; + int32_t chkpCap; SArray* chkpSaved; SArray* chkpInUse; - int32_t chkpCap; SRWLatch chkpDirLock; + + void* qHandle; + int32_t pauseTaskNum; + + void* bkdChkptMgt; } SStreamMeta; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -659,7 +671,7 @@ int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointRea typedef struct STaskStatusEntry { STaskId id; int32_t status; - int32_t statusLastDuration; // to record the last duration of current status + int32_t statusLastDuration; // to record the last duration of current status int64_t stage; int32_t nodeId; int64_t verStart; // start version in WAL, only valid for source task @@ -668,10 +680,12 @@ typedef struct STaskStatusEntry { int32_t relatedHTask; // has related fill-history task int64_t activeCheckpointId; // current active checkpoint id bool checkpointFailed; // denote if the checkpoint is failed or not + bool inputQChanging; // inputQ is changing or not + int64_t inputQUnchangeCounter; double inputQUsed; // in MiB double inputRate; - double sinkQuota; // existed quota size for sink task - double sinkDataSize; // sink to dst data size + double sinkQuota; // existed quota size for sink task + double sinkDataSize; // sink to dst data size } STaskStatusEntry; typedef struct SStreamHbMsg { @@ -832,8 +846,10 @@ int32_t streamMetaReopen(SStreamMeta* pMeta); void streamMetaInitBackend(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); +int32_t streamMetaReloadAllTasks(SStreamMeta* pMeta); int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta); +int32_t streamTaskSetDb(SStreamMeta* pMeta, void* pTask, char* key); void streamMetaStartHb(SStreamMeta* pMeta); bool streamMetaTaskInTimer(SStreamMeta* pMeta); int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs, int64_t endTs, bool succ); @@ -855,8 +871,10 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, int8_t isSucceed); +SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask); +void* streamDestroyStateMachine(SStreamTaskSM* pSM); #ifdef __cplusplus } #endif -#endif /* ifndef _STREAM_H_ */ +#endif /* ifndef _STREAM_H_ */ \ No newline at end of file diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 0256a496df..6ab06d06a3 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -558,7 +558,6 @@ int32_t* taosGetErrno(); #define TSDB_CODE_GRANT_GEN_IVLD_KEY TAOS_DEF_ERROR_CODE(0, 0x0812) #define TSDB_CODE_GRANT_GEN_APP_LIMIT TAOS_DEF_ERROR_CODE(0, 0x0813) #define TSDB_CODE_GRANT_GEN_ENC_IVLD_KLEN TAOS_DEF_ERROR_CODE(0, 0x0814) -#define TSDB_CODE_GRANT_PAR_IVLD_DIST TAOS_DEF_ERROR_CODE(0, 0x0815) // sync // #define TSDB_CODE_SYN_INVALID_CONFIG TAOS_DEF_ERROR_CODE(0, 0x0900) // 2.x diff --git a/include/util/tcompression.h b/include/util/tcompression.h index ab0c22fc9b..75ddbb12e7 100644 --- a/include/util/tcompression.h +++ b/include/util/tcompression.h @@ -139,6 +139,8 @@ int32_t getWordLength(char type); int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, char *const output, const char type); int32_t tsDecompressFloatImplAvx512(const char *const input, const int32_t nelements, char *const output); int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelements, char *const output); +int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelements, char *const output, bool bigEndian); +int32_t tsDecompressTimestampAvx2(const char* const input, const int32_t nelements, char *const output, bool bigEndian); /************************************************************************* * STREAM COMPRESSION diff --git a/packaging/tools/set_core.sh b/packaging/tools/set_core.sh index db95aeb343..084c4465f9 100755 --- a/packaging/tools/set_core.sh +++ b/packaging/tools/set_core.sh @@ -38,3 +38,4 @@ source /etc/profile ${csudo}mkdir -p ${corePath} ||: ${csudo}sysctl -w kernel.core_pattern=${corePath}/core-%e-%p ||: ${csudo}echo "${corePath}/core-%e-%p" | ${csudo}tee /proc/sys/kernel/core_pattern ||: +${csudo}echo "kernel.core_pattern = ${corePath}/core_%e-%p" >> /etc/sysctl.conf ||: diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index addf0aa629..e78783cf3c 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -126,9 +126,9 @@ void queryCallback(void* param, void* res, int32_t code) { taos_fetch_raw_block_a(res, fetchCallback, param); } -void createNewTable(TAOS* pConn, int32_t index) { +void createNewTable(TAOS* pConn, int32_t index, int32_t numOfRows, int64_t startTs, const char* pVarchar) { char str[1024] = {0}; - sprintf(str, "create table tu%d using st2 tags(%d)", index, index); + sprintf(str, "create table if not exists tu%d using st2 tags(%d)", index, index); TAOS_RES* pRes = taos_query(pConn, str); if (taos_errno(pRes) != 0) { @@ -136,22 +136,43 @@ void createNewTable(TAOS* pConn, int32_t index) { } taos_free_result(pRes); - for (int32_t i = 0; i < 10000; i += 20) { - char sql[1024] = {0}; - sprintf(sql, - "insert into tu%d values(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" - "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" - "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" - "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)", - index, i, i, i + 1, i + 1, i + 2, i + 2, i + 3, i + 3, i + 4, i + 4, i + 5, i + 5, i + 6, i + 6, i + 7, - i + 7, i + 8, i + 8, i + 9, i + 9, i + 10, i + 10, i + 11, i + 11, i + 12, i + 12, i + 13, i + 13, i + 14, - i + 14, i + 15, i + 15, i + 16, i + 16, i + 17, i + 17, i + 18, i + 18, i + 19, i + 19); - TAOS_RES* p = taos_query(pConn, sql); - if (taos_errno(p) != 0) { - printf("failed to insert data, reason:%s\n", taos_errstr(p)); - } + if (startTs == 0) { + for (int32_t i = 0; i < numOfRows; i += 20) { + char sql[1024] = {0}; + sprintf(sql, + "insert into tu%d values(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" + "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" + "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" + "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)", + index, i, i, i + 1, i + 1, i + 2, i + 2, i + 3, i + 3, i + 4, i + 4, i + 5, i + 5, i + 6, i + 6, i + 7, + i + 7, i + 8, i + 8, i + 9, i + 9, i + 10, i + 10, i + 11, i + 11, i + 12, i + 12, i + 13, i + 13, i + 14, + i + 14, i + 15, i + 15, i + 16, i + 16, i + 17, i + 17, i + 18, i + 18, i + 19, i + 19); + TAOS_RES* p = taos_query(pConn, sql); + if (taos_errno(p) != 0) { + printf("failed to insert data, reason:%s\n", taos_errstr(p)); + } - taos_free_result(p); + taos_free_result(p); + } + } else { + for (int32_t i = 0; i < numOfRows; i += 20) { + char sql[1024*50] = {0}; + sprintf(sql, + "insert into tu%d values(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, " + "%d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, " + "'%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')", + index, startTs, i, pVarchar, startTs + 1, i + 1, pVarchar, startTs + 2, i + 2, pVarchar, startTs + 3, i + 3, pVarchar, startTs + 4, i + 4, + pVarchar, startTs + 5, i + 5, pVarchar, startTs + 6, i + 6, pVarchar, startTs + 7, i + 7, pVarchar, startTs + 8, i + 8, pVarchar, startTs + 9, i + 9, + pVarchar, startTs + 10, i + 10, pVarchar, startTs + 11, i + 11, pVarchar, startTs + 12, i + 12, pVarchar, startTs + 13, i + 13, pVarchar, startTs + 14, + i + 14, pVarchar, startTs + 15, i + 15, pVarchar, startTs + 16, i + 16, pVarchar, startTs + 17, i + 17, pVarchar, startTs + 18, i + 18, + pVarchar, startTs + 19, i + 19, pVarchar); + TAOS_RES* p = taos_query(pConn, sql); + if (taos_errno(p) != 0) { + printf("failed to insert data, reason:%s\n", taos_errstr(p)); + } + + taos_free_result(p); + } } } @@ -808,14 +829,7 @@ TEST(clientCase, projection_query_tables) { TAOS_RES* pRes = taos_query(pConn, "use abc1"); taos_free_result(pRes); - pRes = taos_query(pConn, "create stable st1 (ts timestamp, k int) tags(a int)"); - if (taos_errno(pRes) != 0) { - printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); - } - - taos_free_result(pRes); - - pRes = taos_query(pConn, "create stable st2 (ts timestamp, k int) tags(a int)"); + pRes = taos_query(pConn, "create stable st2 (ts timestamp, k int, f varchar(4096)) tags(a int)"); if (taos_errno(pRes) != 0) { printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); } @@ -828,28 +842,32 @@ TEST(clientCase, projection_query_tables) { taos_free_result(pRes); int64_t start = 1685959190000; + const char* pstr = + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefgh" + "ijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnop" + "qrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwx" + "yzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdef" + "ghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz!@#$%^&&*&^^%$#@!qQWERTYUIOPASDFGHJKL:" + "QWERTYUIOP{}"; - int32_t code = -1; - for(int32_t i = 0; i < 1000000; ++i) { - char t[512] = {0}; + for(int32_t i = 0; i < 10000; ++i) { + char str[1024] = {0}; + sprintf(str, "create table if not exists tu%d using st2 tags(%d)", i, i); - sprintf(t, "insert into t1 values(now, %d)", i); - while(1) { - void* p = taos_query(pConn, t); - code = taos_errno(p); - taos_free_result(p); - if (code != 0) { - printf("insert data error, retry\n"); - } else { - break; - } + TAOS_RES* px = taos_query(pConn, str); + if (taos_errno(px) != 0) { + printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(px); + } + + for(int32_t j = 0; j < 5000; ++j) { + start += 20; + for (int32_t i = 0; i < 10000; ++i) { + createNewTable(pConn, i, 20, start, pstr); } } - for (int32_t i = 0; i < 1; ++i) { - printf("create table :%d\n", i); - createNewTable(pConn, i); - } // // pRes = taos_query(pConn, "select * from tu"); // if (taos_errno(pRes) != 0) { diff --git a/source/common/src/cos.c b/source/common/src/cos.c index 2a334a34b8..7c8676e9f5 100644 --- a/source/common/src/cos.c +++ b/source/common/src/cos.c @@ -86,7 +86,7 @@ typedef struct { char err_msg[128]; S3Status status; uint64_t content_length; - char *buf; + char * buf; int64_t buf_pos; } TS3SizeCBD; @@ -270,7 +270,7 @@ typedef struct list_parts_callback_data { typedef struct MultipartPartData { put_object_callback_data put_object_data; int seq; - UploadManager *manager; + UploadManager * manager; } MultipartPartData; static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData) { @@ -317,7 +317,7 @@ S3Status MultipartResponseProperiesCallback(const S3ResponseProperties *properti MultipartPartData *data = (MultipartPartData *)callbackData; int seq = data->seq; - const char *etag = properties->eTag; + const char * etag = properties->eTag; data->manager->etags[seq - 1] = strdup(etag); data->manager->next_etags_pos = seq; return S3StatusOK; @@ -450,10 +450,10 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { int32_t code = 0; const char *key = object; // const char *uploadId = 0; - const char *filename = 0; + const char * filename = 0; uint64_t contentLength = 0; - const char *cacheControl = 0, *contentType = 0, *md5 = 0; - const char *contentDispositionFilename = 0, *contentEncoding = 0; + const char * cacheControl = 0, *contentType = 0, *md5 = 0; + const char * contentDispositionFilename = 0, *contentEncoding = 0; int64_t expires = -1; S3CannedAcl cannedAcl = S3CannedAclPrivate; int metaPropertiesCount = 0; @@ -467,6 +467,7 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { // data.infileFD = NULL; // data.noStatus = noStatus; + // uError("ERROR: %s stat file %s: ", __func__, file); if (taosStatFile(file, &contentLength, NULL, NULL) < 0) { uError("ERROR: %s Failed to stat file %s: ", __func__, file); code = TAOS_SYSTEM_ERROR(errno); @@ -647,7 +648,7 @@ typedef struct list_bucket_callback_data { char nextMarker[1024]; int keyCount; int allDetails; - SArray *objectArray; + SArray * objectArray; } list_bucket_callback_data; static S3Status listBucketCallback(int isTruncated, const char *nextMarker, int contentsCount, @@ -692,11 +693,11 @@ static void s3FreeObjectKey(void *pItem) { static SArray *getListByPrefix(const char *prefix) { S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, - 0, awsRegionG}; + 0, awsRegionG}; S3ListBucketHandler listBucketHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback}, &listBucketCallback}; - const char *marker = 0, *delimiter = 0; + const char * marker = 0, *delimiter = 0; int maxkeys = 0, allDetails = 0; list_bucket_callback_data data; data.objectArray = taosArrayInit(32, sizeof(void *)); @@ -737,7 +738,7 @@ static SArray *getListByPrefix(const char *prefix) { void s3DeleteObjects(const char *object_name[], int nobject) { S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, - 0, awsRegionG}; + 0, awsRegionG}; S3ResponseHandler responseHandler = {0, &responseCompleteCallback}; for (int i = 0; i < nobject; ++i) { @@ -788,7 +789,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, const char *ifMatch = 0, *ifNotMatch = 0; S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, - 0, awsRegionG}; + 0, awsRegionG}; S3GetConditions getConditions = {ifModifiedSince, ifNotModifiedSince, ifMatch, ifNotMatch}; S3GetObjectHandler getObjectHandler = {{&responsePropertiesCallback, &responseCompleteCallback}, &getObjectDataCallback}; @@ -826,7 +827,7 @@ int32_t s3GetObjectToFile(const char *object_name, char *fileName) { const char *ifMatch = 0, *ifNotMatch = 0; S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, - 0, awsRegionG}; + 0, awsRegionG}; S3GetConditions getConditions = {ifModifiedSince, ifNotModifiedSince, ifMatch, ifNotMatch}; S3GetObjectHandler getObjectHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback}, &getObjectCallback}; @@ -857,7 +858,7 @@ int32_t s3GetObjectsByPrefix(const char *prefix, const char *path) { if (objectArray == NULL) return -1; for (size_t i = 0; i < taosArrayGetSize(objectArray); i++) { - char *object = taosArrayGetP(objectArray, i); + char * object = taosArrayGetP(objectArray, i); const char *tmp = strchr(object, '/'); tmp = (tmp == NULL) ? object : tmp + 1; char fileName[PATH_MAX] = {0}; @@ -948,12 +949,12 @@ static void s3InitRequestOptions(cos_request_options_t *options, int is_cname) { int32_t s3PutObjectFromFile(const char *file_str, const char *object_str) { int32_t code = 0; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_request_options_t *options = NULL; cos_string_t bucket, object, file; - cos_table_t *resp_headers; + cos_table_t * resp_headers; // int traffic_limit = 0; cos_pool_create(&p, NULL); @@ -984,14 +985,14 @@ int32_t s3PutObjectFromFile(const char *file_str, const char *object_str) { int32_t s3PutObjectFromFile2(const char *file_str, const char *object_str) { int32_t code = 0; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; - cos_request_options_t *options = NULL; + cos_status_t * s = NULL; + cos_request_options_t * options = NULL; cos_string_t bucket, object, file; - cos_table_t *resp_headers; + cos_table_t * resp_headers; int traffic_limit = 0; - cos_table_t *headers = NULL; + cos_table_t * headers = NULL; cos_resumable_clt_params_t *clt_params = NULL; cos_pool_create(&p, NULL); @@ -1024,11 +1025,11 @@ int32_t s3PutObjectFromFile2(const char *file_str, const char *object_str) { } void s3DeleteObjectsByPrefix(const char *prefix_str) { - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; cos_request_options_t *options = NULL; int is_cname = 0; cos_string_t bucket; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_string_t prefix; cos_pool_create(&p, NULL); @@ -1043,10 +1044,10 @@ void s3DeleteObjectsByPrefix(const char *prefix_str) { } void s3DeleteObjects(const char *object_name[], int nobject) { - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; cos_string_t bucket; - cos_table_t *resp_headers = NULL; + cos_table_t * resp_headers = NULL; cos_request_options_t *options = NULL; cos_list_t object_list; cos_list_t deleted_object_list; @@ -1080,14 +1081,14 @@ void s3DeleteObjects(const char *object_name[], int nobject) { bool s3Exists(const char *object_name) { bool ret = false; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; - cos_request_options_t *options = NULL; + cos_status_t * s = NULL; + cos_request_options_t * options = NULL; cos_string_t bucket; cos_string_t object; - cos_table_t *resp_headers; - cos_table_t *headers = NULL; + cos_table_t * resp_headers; + cos_table_t * headers = NULL; cos_object_exist_status_e object_exist; cos_pool_create(&p, NULL); @@ -1114,15 +1115,15 @@ bool s3Exists(const char *object_name) { bool s3Get(const char *object_name, const char *path) { bool ret = false; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_request_options_t *options = NULL; cos_string_t bucket; cos_string_t object; cos_string_t file; - cos_table_t *resp_headers = NULL; - cos_table_t *headers = NULL; + cos_table_t * resp_headers = NULL; + cos_table_t * headers = NULL; int traffic_limit = 0; //创建内存池 @@ -1158,15 +1159,15 @@ bool s3Get(const char *object_name, const char *path) { int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_size, bool check, uint8_t **ppBlock) { (void)check; int32_t code = 0; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_request_options_t *options = NULL; cos_string_t bucket; cos_string_t object; - cos_table_t *resp_headers; - cos_table_t *headers = NULL; - cos_buf_t *content = NULL; + cos_table_t * resp_headers; + cos_table_t * headers = NULL; + cos_buf_t * content = NULL; // cos_string_t file; // int traffic_limit = 0; char range_buf[64]; @@ -1260,7 +1261,7 @@ void s3EvictCache(const char *path, long object_size) { terrno = TAOS_SYSTEM_ERROR(errno); vError("failed to open %s since %s", dir_name, terrstr()); } - SArray *evict_files = taosArrayInit(16, sizeof(SEvictFile)); + SArray * evict_files = taosArrayInit(16, sizeof(SEvictFile)); tdbDirEntryPtr pDirEntry; while ((pDirEntry = taosReadDir(pDir)) != NULL) { char *name = taosGetDirEntryName(pDirEntry); @@ -1302,13 +1303,13 @@ void s3EvictCache(const char *path, long object_size) { long s3Size(const char *object_name) { long size = 0; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_request_options_t *options = NULL; cos_string_t bucket; cos_string_t object; - cos_table_t *resp_headers = NULL; + cos_table_t * resp_headers = NULL; //创建内存池 cos_pool_create(&p, NULL); @@ -1354,5 +1355,6 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, void s3EvictCache(const char *path, long object_size) {} long s3Size(const char *object_name) { return 0; } int32_t s3GetObjectsByPrefix(const char *prefix, const char *path) { return 0; } +int32_t s3GetObjectToFile(const char *object_name, char *fileName) { return 0; } #endif diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index d9aecd96d7..ced68c134e 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -107,7 +107,7 @@ bool tsEnableTelem = true; int32_t tsTelemInterval = 43200; char tsTelemServer[TSDB_FQDN_LEN] = "telemetry.tdengine.com"; uint16_t tsTelemPort = 80; -char *tsTelemUri = "/report"; +char * tsTelemUri = "/report"; #ifdef TD_ENTERPRISE bool tsEnableCrashReport = false; @@ -354,16 +354,24 @@ static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *input char cfgFile[PATH_MAX + 100] = {0}; taosExpandDir(inputCfgDir, cfgDir, PATH_MAX); - char lastC = cfgDir[strlen(cfgDir) - 1]; + char lastC = cfgDir[strlen(cfgDir) - 1]; char *tdDirsep = TD_DIRSEP; if (lastC == '\\' || lastC == '/') { tdDirsep = ""; } if (taosIsDir(cfgDir)) { #ifdef CUS_PROMPT - snprintf(cfgFile, sizeof(cfgFile), "%s" "%s" "%s.cfg", cfgDir, tdDirsep, CUS_PROMPT); + snprintf(cfgFile, sizeof(cfgFile), + "%s" + "%s" + "%s.cfg", + cfgDir, tdDirsep, CUS_PROMPT); #else - snprintf(cfgFile, sizeof(cfgFile), "%s" "%s" "taos.cfg", cfgDir, tdDirsep); + snprintf(cfgFile, sizeof(cfgFile), + "%s" + "%s" + "taos.cfg", + cfgDir, tdDirsep); #endif } else { tstrncpy(cfgFile, cfgDir, sizeof(cfgDir)); @@ -729,6 +737,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "disableStream", tsDisableStream, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "checkpointInterval", tsStreamCheckpointInterval, 60, 1200, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; @@ -1347,7 +1356,7 @@ void taosCleanupCfg() { typedef struct { const char *optionName; - void *optionVar; + void * optionVar; } OptionNameAndVar; static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize, SConfigItem *pItem, bool isDebugflag) { @@ -1360,7 +1369,7 @@ static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize, switch (pItem->dtype) { case CFG_DTYPE_BOOL: { int32_t flag = pItem->i32; - bool *pVar = pOptions[d].optionVar; + bool * pVar = pOptions[d].optionVar; uInfo("%s set from %d to %d", optName, *pVar, flag); *pVar = flag; terrno = TSDB_CODE_SUCCESS; diff --git a/source/dnode/mnode/impl/inc/mndCluster.h b/source/dnode/mnode/impl/inc/mndCluster.h index 2b59d9dbf5..e33ffdb372 100644 --- a/source/dnode/mnode/impl/inc/mndCluster.h +++ b/source/dnode/mnode/impl/inc/mndCluster.h @@ -27,8 +27,6 @@ void mndCleanupCluster(SMnode *pMnode); int32_t mndGetClusterName(SMnode *pMnode, char *clusterName, int32_t len); int64_t mndGetClusterId(SMnode *pMnode); int64_t mndGetClusterCreateTime(SMnode *pMnode); -int32_t mndGetClusterGrantedInfo(SMnode *pMnode, SGrantedInfo *pInfo); -int32_t mndSetClusterGrantedInfo(SMnode *pMnode, SGrantedInfo *pInfo); int64_t mndGetClusterUpTime(SMnode *pMnode); #ifdef __cplusplus diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 27fef1e81e..08c0aec46a 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -192,8 +192,6 @@ typedef struct { int64_t createdTime; int64_t updateTime; int32_t upTime; - int64_t grantedTime; - int64_t connGrantedTime; } SClusterObj; typedef struct { @@ -700,6 +698,11 @@ typedef struct { } SStreamObj; +typedef struct SStreamSeq { + char name[24]; + uint64_t seq; + SRWLatch lock; +} SStreamSeq; int32_t tEncodeSStreamObj(SEncoder* pEncoder, const SStreamObj* pObj); int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj, int32_t sver); void tFreeStreamObj(SStreamObj* pObj); @@ -731,14 +734,13 @@ typedef struct { int8_t type; int32_t numOfCols; SSchema* pSchema; - SRWLatch lock; + SRWLatch lock; } SViewObj; int32_t tEncodeSViewObj(SEncoder* pEncoder, const SViewObj* pObj); int32_t tDecodeSViewObj(SDecoder* pDecoder, SViewObj* pObj, int32_t sver); void tFreeSViewObj(SViewObj* pObj); - #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 244a6d08dd..e3ac4fd6fc 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -28,18 +28,28 @@ typedef struct SStreamTransInfo { const char *name; } SStreamTransInfo; +// time to generated the checkpoint, if now() - checkpointTs >= tsCheckpointInterval, this checkpoint will be discard +// to avoid too many checkpoints for a taskk in the waiting list +typedef struct SCheckpointCandEntry { + char * pName; + int64_t streamId; + int64_t checkpointTs; + int64_t checkpointId; +} SCheckpointCandEntry; + typedef struct SStreamTransMgmt { SHashObj *pDBTrans; + SHashObj *pWaitingList; // stream id list, of which timed checkpoint failed to be issued due to the trans conflict. } SStreamTransMgmt; typedef struct SStreamExecInfo { - SArray *pNodeList; - int64_t ts; // snapshot ts - SStreamTransMgmt transMgmt; - int64_t activeCheckpoint; // active check point id - SHashObj * pTaskMap; - SArray * pTaskList; - TdThreadMutex lock; + SArray * pNodeList; + int64_t ts; // snapshot ts + SStreamTransMgmt transMgmt; + int64_t activeCheckpoint; // active check point id + SHashObj * pTaskMap; + SArray * pTaskList; + TdThreadMutex lock; } SStreamExecInfo; extern SStreamExecInfo execInfo; @@ -51,8 +61,9 @@ void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); -int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pName, const char* pSrcDb, const char* pDstDb); -bool streamTransConflictOtherTrans(SMnode *pMnode, const char *pSrcDb, const char *pDstDb); +int32_t mndStreamRegisterTrans(STrans *pTrans, const char *pName, const char *pSrcDb, const char *pDstDb); +int32_t mndAddtoCheckpointWaitingList(SStreamObj *pStream, int64_t checkpointId); +bool streamTransConflictOtherTrans(SMnode *pMnode, const char *pSrcDb, const char *pDstDb, bool lock); // for sma // TODO refactor diff --git a/source/dnode/mnode/impl/src/mndCluster.c b/source/dnode/mnode/impl/src/mndCluster.c index 26c678b513..4c799e1e1e 100644 --- a/source/dnode/mnode/impl/src/mndCluster.c +++ b/source/dnode/mnode/impl/src/mndCluster.c @@ -19,7 +19,7 @@ #include "mndTrans.h" #define CLUSTER_VER_NUMBE 1 -#define CLUSTER_RESERVE_SIZE 44 +#define CLUSTER_RESERVE_SIZE 60 int64_t tsExpireTime = 0; static SSdbRaw *mndClusterActionEncode(SClusterObj *pCluster); @@ -112,19 +112,6 @@ int64_t mndGetClusterCreateTime(SMnode *pMnode) { return createTime; } -int32_t mndGetClusterGrantedInfo(SMnode *pMnode, SGrantedInfo *pInfo) { - void *pIter = NULL; - SClusterObj *pCluster = mndAcquireCluster(pMnode, &pIter); - if (pCluster != NULL) { - pInfo->grantedTime = pCluster->grantedTime; - pInfo->connGrantedTime = pCluster->connGrantedTime; - mndReleaseCluster(pMnode, pCluster, pIter); - return 0; - } - - return -1; -} - static int32_t mndGetClusterUpTimeImp(SClusterObj *pCluster) { #if 0 int32_t upTime = taosGetTimestampSec() - pCluster->updateTime / 1000; @@ -159,8 +146,6 @@ static SSdbRaw *mndClusterActionEncode(SClusterObj *pCluster) { SDB_SET_INT64(pRaw, dataPos, pCluster->updateTime, _OVER) SDB_SET_BINARY(pRaw, dataPos, pCluster->name, TSDB_CLUSTER_ID_LEN, _OVER) SDB_SET_INT32(pRaw, dataPos, pCluster->upTime, _OVER) - SDB_SET_INT64(pRaw, dataPos, pCluster->grantedTime, _OVER) - SDB_SET_INT64(pRaw, dataPos, pCluster->connGrantedTime, _OVER) SDB_SET_RESERVE(pRaw, dataPos, CLUSTER_RESERVE_SIZE, _OVER) terrno = 0; @@ -201,8 +186,6 @@ static SSdbRow *mndClusterActionDecode(SSdbRaw *pRaw) { SDB_GET_INT64(pRaw, dataPos, &pCluster->updateTime, _OVER) SDB_GET_BINARY(pRaw, dataPos, pCluster->name, TSDB_CLUSTER_ID_LEN, _OVER) SDB_GET_INT32(pRaw, dataPos, &pCluster->upTime, _OVER) - SDB_GET_INT64(pRaw, dataPos, &pCluster->grantedTime, _OVER); - SDB_GET_INT64(pRaw, dataPos, &pCluster->connGrantedTime, _OVER); SDB_GET_RESERVE(pRaw, dataPos, CLUSTER_RESERVE_SIZE, _OVER) terrno = 0; @@ -235,8 +218,6 @@ static int32_t mndClusterActionUpdate(SSdb *pSdb, SClusterObj *pOld, SClusterObj mTrace("cluster:%" PRId64 ", perform update action, old row:%p new row:%p, uptime from %d to %d", pOld->id, pOld, pNew, pOld->upTime, pNew->upTime); pOld->upTime = pNew->upTime; - pOld->grantedTime = pNew->grantedTime; - pOld->connGrantedTime = pNew->connGrantedTime; pOld->updateTime = taosGetTimestampMs(); return 0; } @@ -378,44 +359,3 @@ static int32_t mndProcessUptimeTimer(SRpcMsg *pReq) { mndTransDrop(pTrans); return 0; } - -int32_t mndSetClusterGrantedInfo(SMnode *pMnode, SGrantedInfo *pInfo) { - SClusterObj clusterObj = {0}; - void *pIter = NULL; - SClusterObj *pCluster = mndAcquireCluster(pMnode, &pIter); - if (pCluster != NULL) { - if (pCluster->grantedTime >= pInfo->grantedTime && pCluster->connGrantedTime >= pInfo->connGrantedTime) { - mndReleaseCluster(pMnode, pCluster, pIter); - return 0; - } - memcpy(&clusterObj, pCluster, sizeof(SClusterObj)); - if (pCluster->grantedTime < pInfo->grantedTime) clusterObj.grantedTime = pInfo->grantedTime; - if (pCluster->connGrantedTime < pInfo->connGrantedTime) clusterObj.connGrantedTime = pInfo->connGrantedTime; - mndReleaseCluster(pMnode, pCluster, pIter); - } - - if (clusterObj.id <= 0) { - mError("can't get cluster info while update granted info"); - return -1; - } - - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_NOTHING, NULL, "granted-info"); - if (pTrans == NULL) return -1; - - SSdbRaw *pCommitRaw = mndClusterActionEncode(&clusterObj); - if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { - mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); - mndTransDrop(pTrans); - return -1; - } - (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); - - if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); - mndTransDrop(pTrans); - return -1; - } - - mndTransDrop(pTrans); - return 0; -} \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index b0bffcc83e..e224aceec2 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -790,9 +790,7 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg if (cfgAll) { // alter all dnodes: if (!failRecord) failRecord = taosArrayInit(1, sizeof(int32_t)); if (failRecord) taosArrayPush(failRecord, &pDnode->id); - if (0 == cfgAllErr || cfgAllErr == TSDB_CODE_GRANT_PAR_IVLD_ACTIVE) { - cfgAllErr = terrno; // output 1st or more specific error - } + if (0 == cfgAllErr) cfgAllErr = terrno; // output 1st terrno. } } else { terrno = 0; // no action for dup active code @@ -808,9 +806,7 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg if (cfgAll) { if (!failRecord) failRecord = taosArrayInit(1, sizeof(int32_t)); if (failRecord) taosArrayPush(failRecord, &pDnode->id); - if (0 == cfgAllErr || cfgAllErr == TSDB_CODE_GRANT_PAR_IVLD_ACTIVE) { - cfgAllErr = terrno; // output 1st or more specific error - } + if (0 == cfgAllErr) cfgAllErr = terrno; } } else { terrno = 0; @@ -1287,12 +1283,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { strcpy(dcfgReq.config, "supportvnodes"); snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag); - } else if (strncasecmp(cfgReq.config, GRANT_ACTIVE_CODE, 10) == 0 || - strncasecmp(cfgReq.config, GRANT_C_ACTIVE_CODE, 11) == 0) { - if (cfgReq.dnodeId != -1) { - terrno = TSDB_CODE_INVALID_CFG; - goto _err_out; - } + } else if (strncasecmp(cfgReq.config, "activeCode", 10) == 0 || strncasecmp(cfgReq.config, "cActiveCode", 11) == 0) { int8_t opt = strncasecmp(cfgReq.config, "a", 1) == 0 ? DND_ACTIVE_CODE : DND_CONN_ACTIVE_CODE; int8_t index = opt == DND_ACTIVE_CODE ? 10 : 11; if (' ' != cfgReq.config[index] && 0 != cfgReq.config[index]) { @@ -1310,11 +1301,12 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { goto _err_out; } - strcpy(dcfgReq.config, opt == DND_ACTIVE_CODE ? GRANT_ACTIVE_CODE : GRANT_C_ACTIVE_CODE); + strcpy(dcfgReq.config, opt == DND_ACTIVE_CODE ? "activeCode" : "cActiveCode"); snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%s", cfgReq.value); - if ((terrno = mndConfigDnode(pMnode, pReq, &cfgReq, opt)) != 0) { + if (mndConfigDnode(pMnode, pReq, &cfgReq, opt) != 0) { mError("dnode:%d, failed to config activeCode since %s", cfgReq.dnodeId, terrstr()); + terrno = TSDB_CODE_INVALID_CFG; goto _err_out; } tFreeSMCfgDnodeReq(&cfgReq); diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 8d00dfefb6..fdfec610d9 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -146,6 +146,15 @@ static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) { } } +static void mndStreamCheckpointRemain(SMnode* pMnode) { + int32_t contLen = 0; + void *pReq = mndBuildCheckpointTickMsg(&contLen, 0); + if (pReq != NULL) { + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, .pCont = pReq, .contLen = contLen}; + tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); + } +} + static void mndStreamCheckNode(SMnode* pMnode) { int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); @@ -286,6 +295,10 @@ static void *mndThreadFp(void *param) { mndStreamCheckpointTick(pMnode, sec); } + if (sec % 5 == 0) { + mndStreamCheckpointRemain(pMnode); + } + if (sec % tsStreamNodeCheckInterval == 0) { mndStreamCheckNode(pMnode); } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 235703428f..21969cc404 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -18,23 +18,21 @@ #include "mndDb.h" #include "mndDnode.h" #include "mndMnode.h" -#include "mndSnode.h" #include "mndPrivilege.h" #include "mndScheduler.h" #include "mndShow.h" +#include "mndSnode.h" #include "mndStb.h" -#include "mndTopic.h" #include "mndTrans.h" -#include "mndUser.h" #include "mndVgroup.h" #include "osMemory.h" #include "parser.h" #include "tmisce.h" #include "tname.h" -#define MND_STREAM_VER_NUMBER 4 -#define MND_STREAM_RESERVE_SIZE 64 -#define MND_STREAM_MAX_NUM 60 +#define MND_STREAM_VER_NUMBER 4 +#define MND_STREAM_RESERVE_SIZE 64 +#define MND_STREAM_MAX_NUM 60 #define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint" #define MND_STREAM_PAUSE_NAME "stream-pause" @@ -65,6 +63,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq); static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); +static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq); static int32_t mndProcessStreamHb(SRpcMsg *pReq); static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); static void mndCancelGetNextStream(SMnode *pMnode, void *pIter); @@ -83,7 +82,7 @@ static SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); -static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char* pMsg); +static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char *pMsg); static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans); static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset, int32_t retryCode); @@ -91,12 +90,19 @@ static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *p static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); static void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode); static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); -static int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDbName, size_t len); +static int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDbName, size_t len); static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList); +static void freeCheckpointCandEntry(void *); static SSdbRaw *mndStreamActionEncode(SStreamObj *pStream); static SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); +SSdbRaw * mndStreamSeqActionEncode(SStreamObj *pStream); +SSdbRow * mndStreamSeqActionDecode(SSdbRaw *pRaw); +static int32_t mndStreamSeqActionInsert(SSdb *pSdb, SStreamSeq *pStream); +static int32_t mndStreamSeqActionDelete(SSdb *pSdb, SStreamSeq *pStream); +static int32_t mndStreamSeqActionUpdate(SSdb *pSdb, SStreamSeq *pOldStream, SStreamSeq *pNewStream); + int32_t mndInitStream(SMnode *pMnode) { SSdbTable table = { .sdbType = SDB_STREAM, @@ -107,6 +113,15 @@ int32_t mndInitStream(SMnode *pMnode) { .updateFp = (SdbUpdateFp)mndStreamActionUpdate, .deleteFp = (SdbDeleteFp)mndStreamActionDelete, }; + SSdbTable tableSeq = { + .sdbType = SDB_STREAM_SEQ, + .keyType = SDB_KEY_BINARY, + .encodeFp = (SdbEncodeFp)mndStreamSeqActionEncode, + .decodeFp = (SdbDecodeFp)mndStreamSeqActionDecode, + .insertFp = (SdbInsertFp)mndStreamSeqActionInsert, + .updateFp = (SdbUpdateFp)mndStreamSeqActionUpdate, + .deleteFp = (SdbDeleteFp)mndStreamSeqActionDelete, + }; mndSetMsgHandle(pMnode, TDMT_MND_CREATE_STREAM, mndProcessCreateStreamReq); mndSetMsgHandle(pMnode, TDMT_MND_DROP_STREAM, mndProcessDropStreamReq); @@ -123,6 +138,7 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, mndProcessStreamCheckpointInCandid); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_NODECHANGE_CHECK, mndProcessNodeCheckReq); @@ -141,14 +157,23 @@ int32_t mndInitStream(SMnode *pMnode) { execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId)); execInfo.pTaskMap = taosHashInit(64, fn, true, HASH_NO_LOCK); execInfo.transMgmt.pDBTrans = taosHashInit(32, fn, true, HASH_NO_LOCK); + execInfo.transMgmt.pWaitingList = taosHashInit(32, fn, true, HASH_NO_LOCK); + taosHashSetFreeFp(execInfo.transMgmt.pWaitingList, freeCheckpointCandEntry); - return sdbSetTable(pMnode->pSdb, table); + if (sdbSetTable(pMnode->pSdb, table) != 0) { + return -1; + } + if (sdbSetTable(pMnode->pSdb, tableSeq) != 0) { + return -1; + } + return 0; } void mndCleanupStream(SMnode *pMnode) { taosArrayDestroy(execInfo.pTaskList); taosHashCleanup(execInfo.pTaskMap); taosHashCleanup(execInfo.transMgmt.pDBTrans); + taosHashCleanup(execInfo.transMgmt.pWaitingList); taosThreadMutexDestroy(&execInfo.lock); mDebug("mnd stream exec info cleanup"); } @@ -195,7 +220,8 @@ STREAM_ENCODE_OVER: return NULL; } - mTrace("stream:%s, encode to raw:%p, row:%p", pStream->name, pRaw, pStream); + mTrace("stream:%s, encode to raw:%p, row:%p, checkpoint:%" PRId64 "", pStream->name, pRaw, pStream, + pStream->checkpointId); return pRaw; } @@ -248,7 +274,8 @@ STREAM_DECODE_OVER: return NULL; } - mTrace("stream:%s, decode from raw:%p, row:%p", pStream->name, pRaw, pStream); + mTrace("stream:%s, decode from raw:%p, row:%p, checkpoint:%" PRId64 "", pStream->name, pRaw, pStream, + pStream->checkpointId); return pRow; } @@ -274,6 +301,8 @@ static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStream pOldStream->status = pNewStream->status; pOldStream->updateTime = pNewStream->updateTime; + pOldStream->checkpointId = pNewStream->checkpointId; + pOldStream->checkpointFreq = pNewStream->checkpointFreq; taosWUnLockLatch(&pOldStream->lock); return 0; @@ -308,6 +337,12 @@ static void mndShowStreamStatus(char *dst, SStreamObj *pStream) { } } +SSdbRaw *mndStreamSeqActionEncode(SStreamObj *pStream) { return NULL; } +SSdbRow *mndStreamSeqActionDecode(SSdbRaw *pRaw) { return NULL; } +int32_t mndStreamSeqActionInsert(SSdb *pSdb, SStreamSeq *pStream) { return 0; } +int32_t mndStreamSeqActionDelete(SSdb *pSdb, SStreamSeq *pStream) { return 0; } +int32_t mndStreamSeqActionUpdate(SSdb *pSdb, SStreamSeq *pOldStream, SStreamSeq *pNewStream) { return 0; } + static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) { int8_t trigger = pStream->conf.trigger; if (trigger == STREAM_TRIGGER_AT_ONCE) { @@ -686,7 +721,7 @@ _OVER: return -1; } -static int32_t mndPersistTaskDropReq(SMnode* pMnode, STrans *pTrans, SStreamTask *pTask) { +static int32_t mndPersistTaskDropReq(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) { SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -698,11 +733,11 @@ static int32_t mndPersistTaskDropReq(SMnode* pMnode, STrans *pTrans, SStreamTask pReq->streamId = pTask->id.streamId; STransAction action = {0}; - SEpSet epset = {0}; - if(pTask->info.nodeId == SNODE_HANDLE){ - SSnodeObj* pObj = mndAcquireSnode(pMnode, pTask->info.nodeId); + SEpSet epset = {0}; + if (pTask->info.nodeId == SNODE_HANDLE) { + SSnodeObj *pObj = mndAcquireSnode(pMnode, pTask->info.nodeId); addEpIntoEpSet(&epset, pObj->pDnode->fqdn, pObj->pDnode->port); - }else{ + } else { SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); epset = mndGetVgroupEpset(pMnode, pVgObj); mndReleaseVgroup(pMnode, pVgObj); @@ -736,7 +771,7 @@ int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { // check for number of existed tasks int32_t numOfStream = 0; SStreamObj *pStream = NULL; - void *pIter = NULL; + void * pIter = NULL; while ((pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { if (pStream->sourceDbUid == pStreamObj->sourceDbUid) { @@ -748,13 +783,15 @@ static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { / if (numOfStream > MND_STREAM_MAX_NUM) { mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM); sdbCancelFetch(pMnode->pSdb, pIter); - return TSDB_CODE_MND_TOO_MANY_STREAMS; + terrno = TSDB_CODE_MND_TOO_MANY_STREAMS; + return terrno; } if (pStream->targetStbUid == pStreamObj->targetStbUid) { mError("Cannot write the same stable as other stream:%s", pStream->name); sdbCancelFetch(pMnode->pSdb, pIter); - return TSDB_CODE_MND_INVALID_TARGET_TABLE; + terrno = TSDB_CODE_MND_TOO_MANY_STREAMS; + return terrno; } } @@ -762,13 +799,14 @@ static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { / } static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { - SMnode * pMnode = pReq->info.node; - int32_t code = -1; - SStreamObj * pStream = NULL; - SDbObj * pDb = NULL; - SCMCreateStreamReq createStreamReq = {0}; - SStreamObj streamObj = {0}; + SMnode *pMnode = pReq->info.node; + SStreamObj *pStream = NULL; + SStreamObj streamObj = {0}; + char * sql = NULL; + int32_t sqlLen = 0; + terrno = TSDB_CODE_SUCCESS; + SCMCreateStreamReq createStreamReq = {0}; if (tDeserializeSCMCreateStreamReq(pReq->pCont, pReq->contLen, &createStreamReq) != 0) { terrno = TSDB_CODE_INVALID_MSG; goto _OVER; @@ -789,7 +827,6 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { if (pStream != NULL) { if (createStreamReq.igExists) { mInfo("stream:%s, already exist, ignore exist is set", createStreamReq.name); - code = 0; goto _OVER; } else { terrno = TSDB_CODE_MND_STREAM_ALREADY_EXIST; @@ -799,9 +836,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - char* sql = NULL; - int32_t sqlLen = 0; - if(createStreamReq.sql != NULL){ + if (createStreamReq.sql != NULL) { sqlLen = strlen(createStreamReq.sql); sql = taosMemoryMalloc(sqlLen + 1); memset(sql, 0, sqlLen + 1); @@ -814,8 +849,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - code = checkForNumOfStreams(pMnode, &streamObj); - if (code != TSDB_CODE_SUCCESS) { + if (checkForNumOfStreams(pMnode, &streamObj) < 0) { goto _OVER; } @@ -878,8 +912,6 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { saveStreamTasksInfo(&streamObj, &execInfo); taosThreadMutexUnlock(&execInfo.lock); - code = TSDB_CODE_ACTION_IN_PROGRESS; - SName dbname = {0}; tNameFromString(&dbname, createStreamReq.sourceDB, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); @@ -888,26 +920,26 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { // reuse this function for stream if (sql != NULL && sqlLen > 0) { - auditRecord(pReq, pMnode->clusterId, "createStream", dbname.dbname, name.dbname, sql, - sqlLen); - } - else{ + auditRecord(pReq, pMnode->clusterId, "createStream", dbname.dbname, name.dbname, sql, sqlLen); + } else { char detail[1000] = {0}; sprintf(detail, "dbname:%s, stream name:%s", dbname.dbname, name.dbname); auditRecord(pReq, pMnode->clusterId, "createStream", dbname.dbname, name.dbname, detail, strlen(detail)); } + _OVER: - if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { + if (terrno != TSDB_CODE_SUCCESS && terrno != TSDB_CODE_ACTION_IN_PROGRESS) { mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); } mndReleaseStream(pMnode, pStream); tFreeSCMCreateStreamReq(&createStreamReq); tFreeStreamObj(&streamObj); - if(sql != NULL){ + if (sql != NULL) { taosMemoryFreeClear(sql); } - return code; + + return terrno; } int64_t mndStreamGenChkpId(SMnode *pMnode) { @@ -920,8 +952,11 @@ int64_t mndStreamGenChkpId(SMnode *pMnode) { if (pIter == NULL) break; maxChkpId = TMAX(maxChkpId, pStream->checkpointId); + mDebug("stream %p checkpoint %" PRId64 "", pStream, pStream->checkpointId); sdbRelease(pSdb, pStream); } + + mDebug("generated checkpoint %" PRId64 "", maxChkpId + 1); return maxChkpId + 1; } @@ -941,6 +976,22 @@ static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { return 0; } +static int32_t mndProcessStreamRemainChkptTmr(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SSdb * pSdb = pMnode->pSdb; + if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { + return 0; + } + + SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); + pMsg->checkpointId = 0; + + int32_t size = sizeof(SMStreamDoCheckpointMsg); + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, .pCont = pMsg, .contLen = size}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + return 0; +} + static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, int64_t streamId, int32_t taskId) { SStreamCheckpointSourceReq req = {0}; @@ -983,107 +1034,104 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in return 0; } -// static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStream, int64_t checkpointId) { -// int64_t timestampMs = taosGetTimestampMs(); -// if (timestampMs - pStream->checkpointFreq < tsStreamCheckpointInterval * 1000) { -// return -1; -// } -// STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, "stream-checkpoint"); -// if (pTrans == NULL) return -1; -// mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); -// if (mndTrancCheckConflict(pMnode, pTrans) != 0) { -// mError("failed to checkpoint of stream name%s, checkpointId: %" PRId64 ", reason:%s", pStream->name, -// checkpointId, -// tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); -// mndTransDrop(pTrans); -// return -1; -// } -// mDebug("start to trigger checkpoint for stream:%s, checkpoint: %" PRId64 "", pStream->name, checkpointId); -// atomic_store_64(&pStream->currentTick, 1); -// taosWLockLatch(&pStream->lock); -// // 1. redo action: broadcast checkpoint source msg for all source vg -// int32_t totLevel = taosArrayGetSize(pStream->tasks); -// for (int32_t i = 0; i < totLevel; i++) { -// SArray *pLevel = taosArrayGetP(pStream->tasks, i); -// SStreamTask *pTask = taosArrayGetP(pLevel, 0); -// if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { -// int32_t sz = taosArrayGetSize(pLevel); -// for (int32_t j = 0; j < sz; j++) { -// SStreamTask *pTask = taosArrayGetP(pLevel, j); -// /*A(pTask->info.nodeId > 0);*/ -// SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); -// if (pVgObj == NULL) { -// taosWUnLockLatch(&pStream->lock); -// mndTransDrop(pTrans); -// return -1; -// } +static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStream, int64_t checkpointId) { + int32_t code = -1; + int64_t timestampMs = taosGetTimestampMs(); + if (timestampMs - pStream->checkpointFreq < tsStreamCheckpointInterval * 1000) { + return -1; + } -// void *buf; -// int32_t tlen; -// if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, -// pTask->id.taskId) < 0) { -// mndReleaseVgroup(pMnode, pVgObj); -// taosWUnLockLatch(&pStream->lock); -// mndTransDrop(pTrans); -// return -1; -// } + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, true); + if (conflict) { + mndAddtoCheckpointWaitingList(pStream, checkpointId); + mWarn("checkpoint conflict with other trans in %s, ignore the checkpoint for stream:%s %" PRIx64, pStream->sourceDb, + pStream->name, pStream->uid); + return -1; + } -// STransAction action = {0}; -// action.epSet = mndGetVgroupEpset(pMnode, pVgObj); -// action.pCont = buf; -// action.contLen = tlen; -// action.msgType = TDMT_VND_STREAM_CHECK_POINT_SOURCE; + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, MND_STREAM_CHECKPOINT_NAME); + if (pTrans == NULL) { + return -1; + } -// mndReleaseVgroup(pMnode, pVgObj); + mndStreamRegisterTrans(pTrans, MND_STREAM_CHECKPOINT_NAME, pStream->sourceDb, pStream->targetDb); -// if (mndTransAppendRedoAction(pTrans, &action) != 0) { -// taosMemoryFree(buf); -// taosWUnLockLatch(&pStream->lock); -// mndReleaseStream(pMnode, pStream); -// mndTransDrop(pTrans); -// return -1; -// } -// } -// } -// } -// // 2. reset tick -// pStream->checkpointFreq = checkpointId; -// pStream->checkpointId = checkpointId; -// pStream->checkpointFreq = taosGetTimestampMs(); -// atomic_store_64(&pStream->currentTick, 0); -// // 3. commit log: stream checkpoint info -// pStream->version = pStream->version + 1; -// taosWUnLockLatch(&pStream->lock); + mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); + if (mndTrancCheckConflict(pMnode, pTrans) != 0) { + mError("failed to checkpoint of stream name%s, checkpointId: %" PRId64 ", reason:%s", pStream->name, checkpointId, + tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); + goto _ERR; + } -// // // code condtion + mDebug("start to trigger checkpoint for stream:%s, checkpoint: %" PRId64 "", pStream->name, checkpointId); -// SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); -// if (pCommitRaw == NULL) { -// mError("failed to prepare trans rebalance since %s", terrstr()); -// goto _ERR; -// } -// if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { -// sdbFreeRaw(pCommitRaw); -// mError("failed to prepare trans rebalance since %s", terrstr()); -// goto _ERR; -// } -// if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) { -// sdbFreeRaw(pCommitRaw); -// mError("failed to prepare trans rebalance since %s", terrstr()); -// goto _ERR; -// } + taosWLockLatch(&pStream->lock); + pStream->currentTick = 1; -// if (mndTransPrepare(pMnode, pTrans) != 0) { -// mError("failed to prepare trans rebalance since %s", terrstr()); -// goto _ERR; -// } -// mndTransDrop(pTrans); -// return 0; -// _ERR: -// mndTransDrop(pTrans); -// return -1; -// } + // 1. redo action: broadcast checkpoint source msg for all source vg + int32_t totLevel = taosArrayGetSize(pStream->tasks); + for (int32_t i = 0; i < totLevel; i++) { + SArray * pLevel = taosArrayGetP(pStream->tasks, i); + SStreamTask *p = taosArrayGetP(pLevel, 0); + + if (p->info.taskLevel == TASK_LEVEL__SOURCE) { + int32_t sz = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < sz; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + + SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); + if (pVgObj == NULL) { + taosWUnLockLatch(&pStream->lock); + goto _ERR; + } + + void * buf; + int32_t tlen; + if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, + pTask->id.taskId) < 0) { + mndReleaseVgroup(pMnode, pVgObj); + taosWUnLockLatch(&pStream->lock); + goto _ERR; + } + + STransAction act = {0}; + SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); + mndReleaseVgroup(pMnode, pVgObj); + + initTransAction(&act, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset, TSDB_CODE_SYN_PROPOSE_NOT_READY); + if (mndTransAppendRedoAction(pTrans, &act) != 0) { + taosMemoryFree(buf); + taosWUnLockLatch(&pStream->lock); + goto _ERR; + } + } + } + } + + // 2. reset tick + pStream->checkpointId = checkpointId; + pStream->checkpointFreq = taosGetTimestampMs(); + pStream->currentTick = 0; + + // 3. commit log: stream checkpoint info + pStream->version = pStream->version + 1; + taosWUnLockLatch(&pStream->lock); + + if ((code = mndPersistTransLog(pStream, pTrans)) != TSDB_CODE_SUCCESS) { + return code; + } + + if ((code = mndTransPrepare(pMnode, pTrans)) != TSDB_CODE_SUCCESS) { + mError("failed to prepare trans rebalance since %s", terrstr()); + goto _ERR; + } + + code = 0; +_ERR: + mndTransDrop(pTrans); + return code; +} static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream, SMnode *pMnode, int64_t chkptId) { taosWLockLatch(&pStream->lock); @@ -1159,23 +1207,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream return 0; } -static const char *mndGetStreamDB(SMnode *pMnode) { - SSdb * pSdb = pMnode->pSdb; - SStreamObj *pStream = NULL; - void * pIter = NULL; - - pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) { - return NULL; - } - - const char *p = taosStrdup(pStream->sourceDb); - mndReleaseStream(pMnode, pStream); - sdbCancelFetch(pSdb, pIter); - return p; -} - -static int32_t initStreamNodeList(SMnode* pMnode) { +static int32_t initStreamNodeList(SMnode *pMnode) { if (execInfo.pNodeList == NULL || (taosArrayGetSize(execInfo.pNodeList) == 0)) { execInfo.pNodeList = taosArrayDestroy(execInfo.pNodeList); execInfo.pNodeList = extractNodeListFromStream(pMnode); @@ -1184,11 +1216,11 @@ static int32_t initStreamNodeList(SMnode* pMnode) { return taosArrayGetSize(execInfo.pNodeList); } -static bool taskNodeIsUpdated(SMnode* pMnode) { +static bool taskNodeIsUpdated(SMnode *pMnode) { // check if the node update happens or not taosThreadMutexLock(&execInfo.lock); - int32_t numOfNodes = initStreamNodeList(pMnode); + int32_t numOfNodes = initStreamNodeList(pMnode); if (numOfNodes == 0) { mDebug("stream task node change checking done, no vgroups exist, do nothing"); execInfo.ts = taosGetTimestampSec(); @@ -1228,6 +1260,38 @@ static bool taskNodeIsUpdated(SMnode* pMnode) { return nodeUpdated; } +static int32_t mndCheckNodeStatus(SMnode *pMnode) { + bool ready = true; + int64_t ts = taosGetTimestampSec(); + if (taskNodeIsUpdated(pMnode)) { + return -1; + } + + taosThreadMutexLock(&execInfo.lock); + if (taosArrayGetSize(execInfo.pNodeList) == 0) { + mDebug("stream task node change checking done, no vgroups exist, do nothing"); + execInfo.ts = ts; + } + + for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) { + STaskId * p = taosArrayGet(execInfo.pTaskList, i); + STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p)); + if (pEntry == NULL) { + continue; + } + + if (pEntry->status != TASK_STATUS__READY) { + mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued", + pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status)); + ready = false; + break; + } + } + + taosThreadMutexUnlock(&execInfo.lock); + return ready ? 0 : -1; +} + static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { SMnode * pMnode = pReq->info.node; SSdb * pSdb = pMnode->pSdb; @@ -1235,90 +1299,65 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { SStreamObj *pStream = NULL; int32_t code = 0; - // check if the node update happens or not - bool updated = taskNodeIsUpdated(pMnode); - if (updated) { - mWarn("checkpoint ignore, stream task nodes update detected"); - return -1; - } - - { // check if all tasks are in TASK_STATUS__READY status - bool ready = true; - taosThreadMutexLock(&execInfo.lock); - - // no streams exists, abort - int32_t numOfTasks = taosArrayGetSize(execInfo.pTaskList); - if (numOfTasks <= 0) { - taosThreadMutexUnlock(&execInfo.lock); - return 0; - } - - for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) { - STaskId * p = taosArrayGet(execInfo.pTaskList, i); - STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p)); - if (pEntry == NULL) { - continue; - } - - if (pEntry->status != TASK_STATUS__READY) { - mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued", - pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status)); - ready = false; - break; - } - } - taosThreadMutexUnlock(&execInfo.lock); - if (!ready) { - return 0; - } + if ((code = mndCheckNodeStatus(pMnode)) != 0) { + return code; } SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; - int64_t checkpointId = pMsg->checkpointId; - - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, MND_STREAM_CHECKPOINT_NAME); - if (pTrans == NULL) { - mError("failed to trigger checkpoint, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY)); - return -1; - } - - mDebug("start to trigger checkpoint, checkpointId: %" PRId64, checkpointId); - - const char *pDb = mndGetStreamDB(pMnode); - mndTransSetDbName(pTrans, pDb, pDb); - mndStreamRegisterTrans(pTrans, MND_STREAM_CHECKPOINT_NAME, pDb, pDb); - taosMemoryFree((void *)pDb); - - if (mndTransCheckConflict(pMnode, pTrans) != 0) { - mError("failed to trigger checkpoint, checkpointId: %" PRId64 ", reason:%s", checkpointId, - tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); - mndTransDrop(pTrans); - return -1; - } - - while (1) { - pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) break; - - code = mndAddStreamCheckpointToTrans(pTrans, pStream, pMnode, checkpointId); + while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { + code = mndProcessStreamCheckpointTrans(pMnode, pStream, pMsg->checkpointId); sdbRelease(pSdb, pStream); if (code == -1) { break; } } - if (code == 0) { - if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("failed to prepare trans rebalance since %s", terrstr()); + return code; +} + +static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + void * pIter = NULL; + int32_t code = 0; + + taosThreadMutexLock(&execInfo.lock); + int32_t num = taosHashGetSize(execInfo.transMgmt.pWaitingList); + taosThreadMutexUnlock(&execInfo.lock); + if (num == 0) { + return code; + } + + if ((code = mndCheckNodeStatus(pMnode)) != 0) { + return code; + } + + SArray *pList = taosArrayInit(4, sizeof(int64_t)); + while ((pIter = taosHashIterate(execInfo.transMgmt.pWaitingList, pIter)) != NULL) { + SCheckpointCandEntry *pEntry = pIter; + + SStreamObj *ps = mndAcquireStream(pMnode, pEntry->pName); + if (ps == NULL) { + continue; + } + mDebug("start to launch checkpoint for stream:%s %" PRIx64 " in candidate list", pEntry->pName, pEntry->streamId); + + code = mndProcessStreamCheckpointTrans(pMnode, ps, pEntry->checkpointId); + mndReleaseStream(pMnode, ps); + + if (code == TSDB_CODE_SUCCESS) { + taosArrayPush(pList, &pEntry->streamId); } } - mndTransDrop(pTrans); + for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) { + int64_t *pId = taosArrayGet(pList, i); - // only one trans here - taosThreadMutexLock(&execInfo.lock); - execInfo.activeCheckpoint = checkpointId; - taosThreadMutexUnlock(&execInfo.lock); + taosHashRemove(execInfo.transMgmt.pWaitingList, pId, sizeof(*pId)); + } + + int32_t remain = taosHashGetSize(execInfo.transMgmt.pWaitingList); + mDebug("%d in candidate list generated checkpoint, remaining:%d", (int32_t)taosArrayGetSize(pList), remain); + taosArrayDestroy(pList); return code; } @@ -1355,7 +1394,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { } // check if it is conflict with other trans in both sourceDb and targetDb. - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb); + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, true); if (conflict) { sdbRelease(pMnode->pSdb, pStream); tFreeMDropStreamReq(&dropReq); @@ -1556,7 +1595,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB char dstStr[20] = {0}; STR_TO_VARSTR(dstStr, sinkQuota) pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char*) dstStr, false); + colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false); char scanHistoryIdle[20 + VARSTR_HEADER_SIZE] = {0}; strcpy(scanHistoryIdle, "100a"); @@ -1564,7 +1603,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB memset(dstStr, 0, tListLen(dstStr)); STR_TO_VARSTR(dstStr, scanHistoryIdle) pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char*) dstStr, false); + colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false); numOfRows++; sdbRelease(pSdb, pStream); @@ -1579,7 +1618,7 @@ static void mndCancelGetNextStream(SMnode *pMnode, void *pIter) { sdbCancelFetch(pSdb, pIter); } -static void setTaskAttrInResBlock(SStreamObj* pStream, SStreamTask* pTask, SSDataBlock* pBlock, int32_t numOfRows) { +static void setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDataBlock *pBlock, int32_t numOfRows) { SColumnInfoData *pColInfo; int32_t cols = 0; @@ -1633,7 +1672,7 @@ static void setTaskAttrInResBlock(SStreamObj* pStream, SStreamTask* pTask, SSDat colDataSetVal(pColInfo, numOfRows, (const char *)level, false); // status - char status[20 + VARSTR_HEADER_SIZE] = {0}; + char status[20 + VARSTR_HEADER_SIZE] = {0}; STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); @@ -1684,7 +1723,7 @@ static void setTaskAttrInResBlock(SStreamObj* pStream, SStreamTask* pTask, SSDat colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); } -static int32_t getNumOfTasks(SArray* pTaskList) { +static int32_t getNumOfTasks(SArray *pTaskList) { int32_t numOfLevels = taosArrayGetSize(pTaskList); int32_t count = 0; @@ -1742,7 +1781,7 @@ static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter) { sdbCancelFetch(pSdb, pIter); } -static int32_t mndPauseStreamTask(SMnode* pMnode, STrans *pTrans, SStreamTask *pTask) { +static int32_t mndPauseStreamTask(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) { SVPauseStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVPauseStreamTaskReq)); if (pReq == NULL) { mError("failed to malloc in pause stream, size:%" PRIzu ", code:%s", sizeof(SVPauseStreamTaskReq), @@ -1768,7 +1807,7 @@ static int32_t mndPauseStreamTask(SMnode* pMnode, STrans *pTrans, SStreamTask *p return 0; } -int32_t mndPauseAllStreamTasks(SMnode* pMnode, STrans *pTrans, SStreamObj *pStream) { +int32_t mndPauseAllStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { SArray *tasks = pStream->tasks; int32_t size = taosArrayGetSize(tasks); @@ -1838,7 +1877,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { } // check if it is conflict with other trans in both sourceDb and targetDb. - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb); + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, true); if (conflict) { sdbRelease(pMnode->pSdb, pStream); return -1; @@ -1896,7 +1935,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { return TSDB_CODE_ACTION_IN_PROGRESS; } -static int32_t mndResumeStreamTask(STrans *pTrans, SMnode* pMnode, SStreamTask *pTask, int8_t igUntreated) { +static int32_t mndResumeStreamTask(STrans *pTrans, SMnode *pMnode, SStreamTask *pTask, int8_t igUntreated) { SVResumeStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVResumeStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1920,7 +1959,7 @@ static int32_t mndResumeStreamTask(STrans *pTrans, SMnode* pMnode, SStreamTask * return 0; } -int32_t mndResumeAllStreamTasks(STrans *pTrans, SMnode* pMnode, SStreamObj *pStream, int8_t igUntreated) { +int32_t mndResumeAllStreamTasks(STrans *pTrans, SMnode *pMnode, SStreamObj *pStream, int8_t igUntreated) { int32_t size = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < size; i++) { SArray *pTasks = taosArrayGetP(pStream->tasks, i); @@ -1973,7 +2012,7 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { } // check if it is conflict with other trans in both sourceDb and targetDb. - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb); + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, true); if (conflict) { sdbRelease(pMnode->pSdb, pStream); return -1; @@ -2157,8 +2196,8 @@ static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent) // 1. increase the replica does not affect the stream process. // 2. decreasing the replica may affect the stream task execution in the way that there is one or more running stream // tasks on the will be removed replica. -// 3. vgroup redistribution is an combination operation of first increase replica and then decrease replica. So we will -// handle it as mentioned in 1 & 2 items. +// 3. vgroup redistribution is an combination operation of first increase replica and then decrease replica. So we +// will handle it as mentioned in 1 & 2 items. static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList) { SVgroupChangeInfo info = { .pUpdateNodeList = taosArrayInit(4, sizeof(SNodeUpdateInfo)), @@ -2186,10 +2225,8 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP epsetAssign(&updateInfo.prevEp, &pPrevEntry->epset); epsetAssign(&updateInfo.newEp, &pCurrent->epset); taosArrayPush(info.pUpdateNodeList, &updateInfo); - - } - if(pCurrent->nodeId != SNODE_HANDLE){ + if (pCurrent->nodeId != SNODE_HANDLE) { SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId); taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0); mndReleaseVgroup(pMnode, pVgroup); @@ -2402,7 +2439,6 @@ static int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) { if (p == NULL) { return TSDB_CODE_SUCCESS; } - taosHashRemove(pExecNode->pTaskMap, pRemovedId, sizeof(*pRemovedId)); for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { @@ -2440,7 +2476,7 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { STaskId * pId = taosArrayGet(execInfo.pTaskList, i); STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); - if(pEntry->nodeId == SNODE_HANDLE) continue; + if (pEntry->nodeId == SNODE_HANDLE) continue; bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId); if (!existed) { @@ -2457,9 +2493,9 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { (int32_t)taosArrayGetSize(execInfo.pTaskList)); int32_t size = taosArrayGetSize(pNodeSnapshot); - SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); - for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { - SNodeEntry* p = taosArrayGet(execInfo.pNodeList, i); + SArray *pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); + for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { + SNodeEntry *p = taosArrayGet(execInfo.pNodeList, i); for (int32_t j = 0; j < size; ++j) { SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); @@ -2478,13 +2514,14 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { return 0; } -static void killAllCheckpointTrans(SMnode* pMnode, SVgroupChangeInfo* pChangeInfo) { - void* pIter = NULL; - while((pIter = taosHashIterate(pChangeInfo->pDBMap, pIter)) != NULL) { - char* pDb = (char*) pIter; +// kill all trans in the dst DB +static void killAllCheckpointTrans(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo) { + void *pIter = NULL; + while ((pIter = taosHashIterate(pChangeInfo->pDBMap, pIter)) != NULL) { + char *pDb = (char *)pIter; size_t len = 0; - void* pKey = taosHashGetKey(pDb, &len); + void * pKey = taosHashGetKey(pDb, &len); killActiveCheckpointTrans(pMnode, pKey, len); } } @@ -2633,7 +2670,7 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList)); } -STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char* pMsg) { +STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char *pMsg) { STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, name); if (pTrans == NULL) { mError("failed to build trans:%s, reason: %s", name, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); @@ -2641,7 +2678,7 @@ STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const return NULL; } - mDebug("s-task:0x%"PRIx64" start to build trans %s", pStream->uid, pMsg); + mDebug("s-task:0x%" PRIx64 " start to build trans %s", pStream->uid, pMsg); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); if (mndTransCheckConflict(pMnode, pTrans) != 0) { @@ -2677,6 +2714,7 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { terrno = TSDB_CODE_OUT_OF_MEMORY; mError("failed to malloc in reset stream, size:%" PRIzu ", code:%s", sizeof(SVResetStreamTaskReq), tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + taosWUnLockLatch(&pStream->lock); return terrno; } @@ -2720,9 +2758,9 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { return TSDB_CODE_ACTION_IN_PROGRESS; } -int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDBName, size_t len) { +int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDBName, size_t len) { // data in the hash table will be removed automatically, no need to remove it here. - SStreamTransInfo* pTransInfo = taosHashGet(execInfo.transMgmt.pDBTrans, pDBName, len); + SStreamTransInfo *pTransInfo = taosHashGet(execInfo.transMgmt.pDBTrans, pDBName, len); if (pTransInfo == NULL) { return TSDB_CODE_SUCCESS; } @@ -2733,7 +2771,7 @@ int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDBName, size_t le return TSDB_CODE_SUCCESS; } - STrans* pTrans = mndAcquireTrans(pMnode, pTransInfo->transId); + STrans *pTrans = mndAcquireTrans(pMnode, pTransInfo->transId); if (pTrans != NULL) { mInfo("kill checkpoint transId:%d in Db:%s", pTransInfo->transId, pDBName); mndKillTrans(pMnode, pTrans); @@ -2744,7 +2782,7 @@ int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDBName, size_t le } static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) { - STrans* pTrans = mndAcquireTrans(pMnode, transId); + STrans *pTrans = mndAcquireTrans(pMnode, transId); if (pTrans != NULL) { mInfo("kill checkpoint transId:%d to reset task status", transId); mndKillTrans(pMnode, pTrans); @@ -2761,10 +2799,10 @@ static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) { break; } - bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb); + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, false); if (conflict) { - mError("stream:%s other trans exists in DB:%s & %s failed to start reset-status trans", - pStream->name, pStream->sourceDb, pStream->targetDb); + mError("stream:%s other trans exists in DB:%s & %s failed to start reset-status trans", pStream->name, + pStream->sourceDb, pStream->targetDb); continue; } @@ -2775,11 +2813,10 @@ static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) { return code; } } - return 0; } -static SStreamTask* mndGetStreamTask(STaskId* pId, SStreamObj* pStream) { +static SStreamTask *mndGetStreamTask(STaskId *pId, SStreamObj *pStream) { for (int32_t i = 0; i < taosArrayGetSize(pStream->tasks); i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); @@ -2797,7 +2834,7 @@ static SStreamTask* mndGetStreamTask(STaskId* pId, SStreamObj* pStream) { static bool needDropRelatedFillhistoryTask(STaskStatusEntry *pTaskEntry, SStreamExecInfo *pExecNode) { if (pTaskEntry->status == TASK_STATUS__STREAM_SCAN_HISTORY && pTaskEntry->statusLastDuration >= 10) { - if (fabs(pTaskEntry->inputQUsed) <= DBL_EPSILON) { + if (!pTaskEntry->inputQChanging && pTaskEntry->inputQUnchangeCounter > 10) { int32_t numOfReady = 0; int32_t numOfTotal = 0; for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { @@ -2834,7 +2871,7 @@ static bool needDropRelatedFillhistoryTask(STaskStatusEntry *pTaskEntry, SStream static int32_t mndDropRelatedFillhistoryTask(SMnode *pMnode, STaskStatusEntry *pTaskEntry, SStreamObj *pStream) { SStreamTask *pTask = mndGetStreamTask(&pTaskEntry->id, pStream); if (pTask == NULL) { - mError("failed to get the stream task:0x%x, may have been dropped", (int32_t) pTaskEntry->id.taskId); + mError("failed to get the stream task:0x%x, may have been dropped", (int32_t)pTaskEntry->id.taskId); return -1; } @@ -2869,12 +2906,12 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { mInfo("set node expired for %d nodes", num); for (int k = 0; k < num; ++k) { - int32_t* pVgId = taosArrayGet(pNodeList, k); + int32_t *pVgId = taosArrayGet(pNodeList, k); mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num); int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); for (int i = 0; i < numOfNodes; ++i) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i); + SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, i); if (pNodeEntry->nodeId == *pVgId) { mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId); @@ -2887,13 +2924,13 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { return TSDB_CODE_SUCCESS; } -static void updateStageInfo(STaskStatusEntry* pTaskEntry, int64_t stage) { +static void updateStageInfo(STaskStatusEntry *pTaskEntry, int64_t stage) { int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); - for(int32_t j = 0; j < numOfNodes; ++j) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j); + for (int32_t j = 0; j < numOfNodes; ++j) { + SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, j); if (pNodeEntry->nodeId == pTaskEntry->nodeId) { - mInfo("vgId:%d stage updated from %"PRId64 " to %"PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, - pTaskEntry->stage, stage, pTaskEntry->id.taskId); + mInfo("vgId:%d stage updated from %" PRId64 " to %" PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, + pTaskEntry->nodeId, pTaskEntry->stage, stage, pTaskEntry->id.taskId); pNodeEntry->stageUpdated = true; pTaskEntry->stage = stage; @@ -2940,6 +2977,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { bool snodeChanged = false; for (int32_t i = 0; i < req.numOfTasks; ++i) { STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); + STaskStatusEntry *pTaskEntry = taosHashGet(execInfo.pTaskMap, &p->id, sizeof(p->id)); if (pTaskEntry == NULL) { mError("s-task:0x%" PRIx64 " not found in mnode task list", p->id.taskId); @@ -2948,8 +2986,20 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (pTaskEntry->stage != p->stage && pTaskEntry->stage != -1) { updateStageInfo(pTaskEntry, p->stage); - if(pTaskEntry->nodeId == SNODE_HANDLE) snodeChanged = true; + if (pTaskEntry->nodeId == SNODE_HANDLE) snodeChanged = true; } else { + // task is idle for more than 50 sec. + if (fabs(pTaskEntry->inputQUsed - p->inputQUsed) <= DBL_EPSILON) { + if (!pTaskEntry->inputQChanging) { + pTaskEntry->inputQUnchangeCounter++; + } else { + pTaskEntry->inputQChanging = false; + } + } else { + pTaskEntry->inputQChanging = true; + pTaskEntry->inputQUnchangeCounter = 0; + } + streamTaskStatusCopy(pTaskEntry, p); if (p->activeCheckpointId != 0) { if (activeCheckpointId != 0) { @@ -2976,7 +3026,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (p->status == TASK_STATUS__STREAM_SCAN_HISTORY) { bool drop = needDropRelatedFillhistoryTask(pTaskEntry, &execInfo); - if(drop) { + if (drop) { SStreamObj *pStreamObj = mndGetStreamObj(pMnode, pTaskEntry->id.streamId); if (pStreamObj == NULL) { mError("failed to acquire the streamObj:0x%" PRIx64 " it may have been dropped", pStreamObj->uid); @@ -3013,9 +3063,13 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { return TSDB_CODE_SUCCESS; } +void freeCheckpointCandEntry(void *param) { + SCheckpointCandEntry *pEntry = param; + taosMemoryFreeClear(pEntry->pName); +} SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId) { - void *pIter = NULL; - SSdb *pSdb = pMnode->pSdb; + void * pIter = NULL; + SSdb * pSdb = pMnode->pSdb; SStreamObj *pStream = NULL; while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { @@ -3026,4 +3080,4 @@ SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId) { } return NULL; -} \ No newline at end of file +} diff --git a/source/dnode/mnode/impl/src/mndStreamTrans.c b/source/dnode/mnode/impl/src/mndStreamTrans.c index 2345de290a..fa36d69d6e 100644 --- a/source/dnode/mnode/impl/src/mndStreamTrans.c +++ b/source/dnode/mnode/impl/src/mndStreamTrans.c @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#include "mndTrans.h" #include "mndStream.h" +#include "mndTrans.h" typedef struct SKeyInfo { void* pKey; @@ -35,17 +35,15 @@ int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pName, const char* pS } int32_t clearFinishedTrans(SMnode* pMnode) { - SArray* pList = taosArrayInit(4, sizeof(SKeyInfo)); size_t keyLen = 0; + SArray* pList = taosArrayInit(4, sizeof(SKeyInfo)); + void* pIter = NULL; - taosThreadMutexLock(&execInfo.lock); - - void* pIter = NULL; while ((pIter = taosHashIterate(execInfo.transMgmt.pDBTrans, pIter)) != NULL) { - SStreamTransInfo *pEntry = (SStreamTransInfo *)pIter; - STrans* pTrans = mndAcquireTrans(pMnode, pEntry->transId); + SStreamTransInfo* pEntry = (SStreamTransInfo*)pIter; // let's clear the finished trans + STrans* pTrans = mndAcquireTrans(pMnode, pEntry->transId); if (pTrans == NULL) { void* pKey = taosHashGetKey(pEntry, &keyLen); // key is the name of src/dst db name @@ -60,46 +58,76 @@ int32_t clearFinishedTrans(SMnode* pMnode) { } size_t num = taosArrayGetSize(pList); - for(int32_t i = 0; i < num; ++i) { + for (int32_t i = 0; i < num; ++i) { SKeyInfo* pKey = taosArrayGet(pList, i); taosHashRemove(execInfo.transMgmt.pDBTrans, pKey->pKey, pKey->keyLen); } - mDebug("clear %d finished stream-trans, remained:%d", (int32_t) num, taosHashGetSize(execInfo.transMgmt.pDBTrans)); - taosThreadMutexUnlock(&execInfo.lock); + mDebug("clear %d finished stream-trans, remained:%d", (int32_t)num, taosHashGetSize(execInfo.transMgmt.pDBTrans)); terrno = TSDB_CODE_SUCCESS; taosArrayDestroy(pList); return 0; } -bool streamTransConflictOtherTrans(SMnode* pMnode, const char* pSrcDb, const char* pDstDb) { - clearFinishedTrans(pMnode); +bool streamTransConflictOtherTrans(SMnode* pMnode, const char* pSrcDb, const char* pDstDb, bool lock) { + if (lock) { + taosThreadMutexLock(&execInfo.lock); + } - taosThreadMutexLock(&execInfo.lock); int32_t num = taosHashGetSize(execInfo.transMgmt.pDBTrans); if (num <= 0) { - taosThreadMutexUnlock(&execInfo.lock); + if (lock) { + taosThreadMutexUnlock(&execInfo.lock); + } return false; } + clearFinishedTrans(pMnode); + SStreamTransInfo *pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, pSrcDb, strlen(pSrcDb)); if (pEntry != NULL) { - taosThreadMutexUnlock(&execInfo.lock); + if (lock) { + taosThreadMutexUnlock(&execInfo.lock); + } mWarn("conflict with other transId:%d in Db:%s, trans:%s", pEntry->transId, pSrcDb, pEntry->name); return true; } pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, pDstDb, strlen(pDstDb)); if (pEntry != NULL) { - taosThreadMutexUnlock(&execInfo.lock); + if (lock) { + taosThreadMutexUnlock(&execInfo.lock); + } mWarn("conflict with other transId:%d in Db:%s, trans:%s", pEntry->transId, pSrcDb, pEntry->name); return true; } - taosThreadMutexUnlock(&execInfo.lock); + if (lock) { + taosThreadMutexUnlock(&execInfo.lock); + } + return false; } +int32_t mndAddtoCheckpointWaitingList(SStreamObj* pStream, int64_t checkpointId) { + SCheckpointCandEntry* pEntry = taosHashGet(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid)); + if (pEntry == NULL) { + SCheckpointCandEntry entry = {.streamId = pStream->uid, + .checkpointTs = taosGetTimestampMs(), + .checkpointId = checkpointId, + .pName = taosStrdup(pStream->name)}; + taosHashPut(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid), &entry, sizeof(entry)); + int32_t size = taosHashGetSize(execInfo.transMgmt.pWaitingList); + mDebug("stream:%" PRIx64 " add into waiting list due to conflict, ts:%" PRId64 " , checkpointId: %" PRId64 + ", total in waitingList:%d", + pStream->uid, entry.checkpointTs, checkpointId, size); + } else { + mDebug("stream:%" PRIx64 " ts:%" PRId64 ", checkpointId:%" PRId64 " already in waiting list, no need to add into", + pStream->uid, pEntry->checkpointTs, checkpointId); + } + + return TSDB_CODE_SUCCESS; +} diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index c9756ef814..7749decf91 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -834,7 +834,7 @@ int32_t mndTransCheckConflict(SMnode *pMnode, STrans *pTrans) { if (mndCheckTransConflict(pMnode, pTrans)) { terrno = TSDB_CODE_MND_TRANS_CONFLICT; mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); - return -1; + return terrno; } return 0; diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 66abfd6bc1..0e3b544508 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -708,9 +708,8 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { int32_t numOfAlterViews = taosHashGetSize(pUser->alterViews); int32_t numOfTopics = taosHashGetSize(pUser->topics); int32_t numOfUseDbs = taosHashGetSize(pUser->useDbs); - int32_t size = sizeof(SUserObj) + USER_RESERVE_SIZE + - (numOfReadDbs + numOfWriteDbs + numOfUseDbs) * TSDB_DB_FNAME_LEN + numOfTopics * TSDB_TOPIC_FNAME_LEN + - ipWhiteReserve; + int32_t size = sizeof(SUserObj) + USER_RESERVE_SIZE + (numOfReadDbs + numOfWriteDbs) * TSDB_DB_FNAME_LEN + + numOfTopics * TSDB_TOPIC_FNAME_LEN + ipWhiteReserve; char *stb = taosHashIterate(pUser->readTbs, NULL); while (stb != NULL) { @@ -720,7 +719,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->readTbs, stb); @@ -734,7 +733,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->writeTbs, stb); @@ -748,7 +747,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->alterTbs, stb); @@ -762,7 +761,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->readViews, stb); @@ -776,7 +775,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->writeViews, stb); @@ -790,11 +789,21 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->alterViews, stb); - } + } + + int32_t *useDb = taosHashIterate(pUser->useDbs, NULL); + while (useDb != NULL) { + size_t keyLen = 0; + void *key = taosHashGetKey(useDb, &keyLen); + size += sizeof(int32_t); + size += keyLen; + size += sizeof(int32_t); + useDb = taosHashIterate(pUser->useDbs, useDb); + } SSdbRaw *pRaw = sdbAllocRaw(SDB_USER, USER_VER_NUMBER, size); if (pRaw == NULL) goto _OVER; @@ -925,7 +934,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { stb = taosHashIterate(pUser->alterViews, stb); } - int32_t *useDb = taosHashIterate(pUser->useDbs, NULL); + useDb = taosHashIterate(pUser->useDbs, NULL); while (useDb != NULL) { size_t keyLen = 0; void *key = taosHashGetKey(useDb, &keyLen); diff --git a/source/dnode/mnode/sdb/inc/sdb.h b/source/dnode/mnode/sdb/inc/sdb.h index ddde645fae..0a20dcfd09 100644 --- a/source/dnode/mnode/sdb/inc/sdb.h +++ b/source/dnode/mnode/sdb/inc/sdb.h @@ -149,7 +149,8 @@ typedef enum { SDB_FUNC = 20, SDB_IDX = 21, SDB_VIEW = 22, - SDB_MAX = 23 + SDB_STREAM_SEQ = 23, + SDB_MAX = 24 } ESdbType; typedef struct SSdbRaw { @@ -169,11 +170,11 @@ typedef struct SSdbRow { } SSdbRow; typedef struct SSdb { - SMnode *pMnode; - SWal *pWal; + SMnode * pMnode; + SWal * pWal; int64_t sync; - char *currDir; - char *tmpDir; + char * currDir; + char * tmpDir; int64_t commitIndex; int64_t commitTerm; int64_t commitConfig; @@ -183,7 +184,7 @@ typedef struct SSdb { int64_t tableVer[SDB_MAX]; int64_t maxId[SDB_MAX]; EKeyType keyTypes[SDB_MAX]; - SHashObj *hashObjs[SDB_MAX]; + SHashObj * hashObjs[SDB_MAX]; TdThreadRwlock locks[SDB_MAX]; SdbInsertFp insertFps[SDB_MAX]; SdbUpdateFp updateFps[SDB_MAX]; @@ -198,25 +199,25 @@ typedef struct SSdb { typedef struct SSdbIter { TdFilePtr file; int64_t total; - char *name; + char * name; } SSdbIter; typedef struct { - ESdbType sdbType; - EKeyType keyType; - SdbDeployFp deployFp; - SdbEncodeFp encodeFp; - SdbDecodeFp decodeFp; - SdbInsertFp insertFp; - SdbUpdateFp updateFp; - SdbDeleteFp deleteFp; + ESdbType sdbType; + EKeyType keyType; + SdbDeployFp deployFp; + SdbEncodeFp encodeFp; + SdbDecodeFp decodeFp; + SdbInsertFp insertFp; + SdbUpdateFp updateFp; + SdbDeleteFp deleteFp; SdbValidateFp validateFp; } SSdbTable; typedef struct SSdbOpt { const char *path; - SMnode *pMnode; - SWal *pWal; + SMnode * pMnode; + SWal * pWal; int64_t sync; } SSdbOpt; @@ -393,7 +394,7 @@ int32_t sdbGetRawSoftVer(SSdbRaw *pRaw, int8_t *sver); int32_t sdbGetRawTotalSize(SSdbRaw *pRaw); SSdbRow *sdbAllocRow(int32_t objSize); -void *sdbGetRowObj(SSdbRow *pRow); +void * sdbGetRowObj(SSdbRow *pRow); void sdbFreeRow(SSdb *pSdb, SSdbRow *pRow, bool callFunc); int32_t sdbStartRead(SSdb *pSdb, SSdbIter **ppIter, int64_t *index, int64_t *term, int64_t *config); diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 72ef6d14bd..380be1dd38 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -13,31 +13,31 @@ * along with this program. If not, see . */ -#include "rsync.h" #include "executor.h" +#include "rsync.h" #include "sndInt.h" #include "tqCommon.h" #include "tuuid.h" #define sndError(...) \ - do { \ + do { \ if (sndDebugFlag & DEBUG_ERROR) { \ taosPrintLog("SND ERROR ", DEBUG_ERROR, sndDebugFlag, __VA_ARGS__); \ - } \ + } \ } while (0) -#define sndInfo(...) \ +#define sndInfo(...) \ do { \ - if (sndDebugFlag & DEBUG_INFO) { \ + if (sndDebugFlag & DEBUG_INFO) { \ taosPrintLog("SND INFO ", DEBUG_INFO, sndDebugFlag, __VA_ARGS__); \ } \ } while (0) #define sndDebug(...) \ - do { \ + do { \ if (sndDebugFlag & DEBUG_DEBUG) { \ taosPrintLog("SND ", DEBUG_DEBUG, sndDebugFlag, __VA_ARGS__); \ - } \ + } \ } while (0) int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer) { @@ -46,10 +46,11 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer if (code != TSDB_CODE_SUCCESS) { return code; } + pTask->pBackend = NULL; streamTaskOpenAllUpstreamInput(pTask); - SStreamTask* pSateTask = pTask; + SStreamTask *pSateTask = pTask; SStreamTask task = {0}; if (pTask->info.fillHistory) { task.id.streamId = pTask->streamTaskId.streamId; @@ -84,7 +85,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); - SCheckpointInfo* pChkInfo = &pTask->chkInfo; + SCheckpointInfo *pChkInfo = &pTask->chkInfo; // checkpoint ver is the kept version, handled data should be the next version. if (pTask->chkInfo.checkpointId != 0) { pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1; @@ -92,7 +93,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); } - char* p = NULL; + char *p = NULL; streamTaskGetStatus(pTask, &p); if (pTask->info.fillHistory) { @@ -194,7 +195,7 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { switch (pMsg->msgType) { case TDMT_STREAM_TASK_DEPLOY: { - void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + void * pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); return tqStreamTaskProcessDeployReq(pSnode->pMeta, -1, pReq, len, true, true); } diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index 9aeb14cd60..114051f02b 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -14,6 +14,8 @@ set( "src/vnd/vnodeSnapshot.c" "src/vnd/vnodeRetention.c" "src/vnd/vnodeInitApi.c" + "src/vnd/vnodeAsync.c" + "src/vnd/vnodeHash.c" # meta "src/meta/metaOpen.c" diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index ca9d22a987..88362239f5 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -309,7 +309,12 @@ int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STs void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); // tsdbMerge.c ============================================================================================== -int32_t tsdbSchedMerge(STsdb *tsdb, int32_t fid); +typedef struct { + STsdb *tsdb; + int32_t fid; +} SMergeArg; + +int32_t tsdbMerge(void *arg); // tsdbDiskData ============================================================================================== int32_t tDiskDataBuilderCreate(SDiskDataBuilder **ppBuilder); diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index 55b62dfe48..4036200d73 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -48,9 +48,32 @@ int32_t vnodeCheckCfg(const SVnodeCfg*); int32_t vnodeEncodeConfig(const void* pObj, SJson* pJson); int32_t vnodeDecodeConfig(const SJson* pJson, void* pObj); +// vnodeAsync.c +typedef struct SVAsync SVAsync; + +typedef enum { + EVA_PRIORITY_HIGH = 0, + EVA_PRIORITY_NORMAL, + EVA_PRIORITY_LOW, +} EVAPriority; + +#define VNODE_ASYNC_VALID_CHANNEL_ID(channelId) ((channelId) > 0) +#define VNODE_ASYNC_VALID_TASK_ID(taskId) ((taskId) > 0) + +int32_t vnodeAsyncInit(SVAsync** async, char* label); +int32_t vnodeAsyncDestroy(SVAsync** async); +int32_t vnodeAChannelInit(SVAsync* async, int64_t* channelId); +int32_t vnodeAChannelDestroy(SVAsync* async, int64_t channelId, bool waitRunning); +int32_t vnodeAsync(SVAsync* async, EVAPriority priority, int32_t (*execute)(void*), void (*complete)(void*), void* arg, + int64_t* taskId); +int32_t vnodeAsyncC(SVAsync* async, int64_t channelId, EVAPriority priority, int32_t (*execute)(void*), + void (*complete)(void*), void* arg, int64_t* taskId); +int32_t vnodeAWait(SVAsync* async, int64_t taskId); +int32_t vnodeACancel(SVAsync* async, int64_t taskId); +int32_t vnodeAsyncSetWorkers(SVAsync* async, int32_t numWorkers); + // vnodeModule.c -int vnodeScheduleTask(int (*execute)(void*), void* arg); -int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg); +extern SVAsync* vnodeAsyncHandle[2]; // vnodeBufPool.c typedef struct SVBufPoolNode SVBufPoolNode; @@ -110,7 +133,7 @@ int32_t vnodeAsyncCommit(SVnode* pVnode); bool vnodeShouldRollback(SVnode* pVnode); // vnodeSync.c -int32_t vnodeSyncOpen(SVnode *pVnode, char *path, int32_t vnodeVersion); +int32_t vnodeSyncOpen(SVnode* pVnode, char* path, int32_t vnodeVersion); int32_t vnodeSyncStart(SVnode* pVnode); void vnodeSyncPreClose(SVnode* pVnode); void vnodeSyncPostClose(SVnode* pVnode); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 473329bfee..8a4cbb5fd0 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -93,7 +93,11 @@ typedef struct SQueryNode SQueryNode; #define VNODE_RSMA2_DIR "rsma2" #define VNODE_TQ_STREAM "stream" +#if SUSPEND_RESUME_TEST // only for test purpose +#define VNODE_BUFPOOL_SEGMENTS 1 +#else #define VNODE_BUFPOOL_SEGMENTS 3 +#endif #define VND_INFO_FNAME "vnode.json" #define VND_INFO_FNAME_TMP "vnode_tmp.json" @@ -209,7 +213,7 @@ int32_t tsdbBegin(STsdb* pTsdb); // int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); int32_t tsdbCacheCommit(STsdb* pTsdb); int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo); -int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync); +int32_t tsdbRetention(STsdb* tsdb, int64_t now, int32_t sync); // int32_t tsdbFinishCommit(STsdb* pTsdb); // int32_t tsdbRollbackCommit(STsdb* pTsdb); int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg); @@ -448,13 +452,16 @@ struct SVnode { SVBufPool* recycleTail; SVBufPool* onRecycle; + // commit variables + int64_t commitChannel; + int64_t commitTask; + SMeta* pMeta; SSma* pSma; STsdb* pTsdb; SWal* pWal; STQ* pTq; SSink* pSink; - tsem_t canCommit; int64_t sync; TdThreadMutex lock; bool blocked; @@ -494,18 +501,18 @@ struct SSma { void* pRSmaEnv; }; -#define SMA_CFG(s) (&(s)->pVnode->config) -#define SMA_TSDB_CFG(s) (&(s)->pVnode->config.tsdbCfg) -#define SMA_RETENTION(s) ((SRetention*)&(s)->pVnode->config.tsdbCfg.retentions) -#define SMA_LOCKED(s) ((s)->locked) -#define SMA_META(s) ((s)->pVnode->pMeta) -#define SMA_VID(s) TD_VID((s)->pVnode) -#define SMA_TFS(s) ((s)->pVnode->pTfs) -#define SMA_TSMA_ENV(s) ((s)->pTSmaEnv) -#define SMA_RSMA_ENV(s) ((s)->pRSmaEnv) -#define SMA_RSMA_TSDB0(s) ((s)->pVnode->pTsdb) -#define SMA_RSMA_TSDB1(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L0]) -#define SMA_RSMA_TSDB2(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L1]) +#define SMA_CFG(s) (&(s)->pVnode->config) +#define SMA_TSDB_CFG(s) (&(s)->pVnode->config.tsdbCfg) +#define SMA_RETENTION(s) ((SRetention*)&(s)->pVnode->config.tsdbCfg.retentions) +#define SMA_LOCKED(s) ((s)->locked) +#define SMA_META(s) ((s)->pVnode->pMeta) +#define SMA_VID(s) TD_VID((s)->pVnode) +#define SMA_TFS(s) ((s)->pVnode->pTfs) +#define SMA_TSMA_ENV(s) ((s)->pTSmaEnv) +#define SMA_RSMA_ENV(s) ((s)->pRSmaEnv) +#define SMA_RSMA_TSDB0(s) ((s)->pVnode->pTsdb) +#define SMA_RSMA_TSDB1(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L0]) +#define SMA_RSMA_TSDB2(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L1]) #define SMA_RSMA_GET_TSDB(pVnode, level) ((level == 0) ? pVnode->pTsdb : pVnode->pSma->pRSmaTsdb[level - 1]) // sma diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 5dc29509a0..abe4c3f2fc 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -61,7 +61,7 @@ struct SRSmaQTaskInfoItem { int32_t len; int8_t type; int64_t suid; - void *qTaskInfo; + void * qTaskInfo; }; static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level) { @@ -185,7 +185,7 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) { void *pIter = NULL; while ((pIter = taosHashIterate(pStore->uidHash, pIter))) { tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); - SArray *pTbUids = *(SArray **)pIter; + SArray * pTbUids = *(SArray **)pIter; if (tdUpdateTbUidListImpl(pSma, pTbSuid, pTbUids, isAdd) != TSDB_CODE_SUCCESS) { taosHashCancelIterate(pStore->uidHash, pIter); @@ -213,7 +213,7 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui } SRSmaStat *pStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); - SHashObj *infoHash = NULL; + SHashObj * infoHash = NULL; if (!pStat || !(infoHash = RSMA_INFO_HASH(pStat))) { terrno = TSDB_CODE_RSMA_INVALID_STAT; return TSDB_CODE_FAILED; @@ -264,11 +264,11 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat int8_t idx) { if ((param->qmsgLen > 0) && param->qmsg[idx]) { SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); - SRetention *pRetention = SMA_RETENTION(pSma); - STsdbCfg *pTsdbCfg = SMA_TSDB_CFG(pSma); - SVnode *pVnode = pSma->pVnode; + SRetention * pRetention = SMA_RETENTION(pSma); + STsdbCfg * pTsdbCfg = SMA_TSDB_CFG(pSma); + SVnode * pVnode = pSma->pVnode; char taskInfDir[TSDB_FILENAME_LEN] = {0}; - void *pStreamState = NULL; + void * pStreamState = NULL; // set the backend of stream state tdRSmaQTaskInfoGetFullPath(pVnode, pRSmaInfo->suid, idx + 1, pVnode->pTfs, taskInfDir); @@ -297,6 +297,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat sprintf(pStreamTask->exec.qmsg, "%s", RSMA_EXEC_TASK_FLAG); pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id); + pStreamTask->status.pSM = streamCreateStateMachine(pStreamTask); + pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -372,7 +374,7 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con } #endif - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + SSmaEnv * pEnv = SMA_RSMA_ENV(pSma); SRSmaStat *pStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); SRSmaInfo *pRSmaInfo = NULL; @@ -651,9 +653,7 @@ static int32_t tdRSmaProcessDelReq(SSma *pSma, int64_t suid, int8_t level, SBatc ((SMsgHead *)pBuf)->vgId = TD_VID(pSma->pVnode); - SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL, - .pCont = pBuf, - .contLen = len + sizeof(SMsgHead)}; + SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = pBuf, .contLen = len + sizeof(SMsgHead)}; code = tmsgPutToQueue(&pSma->pVnode->msgCb, WRITE_QUEUE, &delMsg); TSDB_CHECK_CODE(code, lino, _exit); } @@ -673,8 +673,8 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma int32_t code = 0; int32_t lino = 0; SSDataBlock *output = NULL; - SArray *pResList = pItem->pResList; - STSchema *pTSchema = pInfo->pTSchema; + SArray * pResList = pItem->pResList; + STSchema * pTSchema = pInfo->pTSchema; int64_t suid = pInfo->suid; while (1) { @@ -733,7 +733,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma } } - STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); + STsdb * sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); SSubmitReq2 *pReq = NULL; if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) { @@ -795,7 +795,7 @@ _exit: static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *pMsg, int32_t len, int32_t inputType, SRSmaInfo *pInfo, tb_uid_t suid) { int32_t size = RSMA_EXEC_MSG_HLEN + len; // header + payload - void *qItem = taosAllocateQitem(size, DEF_QITEM, 0); + void * qItem = taosAllocateQitem(size, DEF_QITEM, 0); if (!qItem) { return TSDB_CODE_FAILED; @@ -870,10 +870,10 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) { * @param level * @return int32_t */ -static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, SRSmaInfo *pInfo, - ERsmaExecType type, int8_t level) { +static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, + SRSmaInfo *pInfo, ERsmaExecType type, int8_t level) { int32_t idx = level - 1; - void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); + void * qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); if (!qTaskInfo) { @@ -887,8 +887,9 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, return TSDB_CODE_FAILED; } - smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p, suid:%" PRIu64 ", nMsg:%d, submitReqVer:%" PRIi64 ", inputType:%d", SMA_VID(pSma), level, - RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize, version, inputType); + smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p, suid:%" PRIu64 ", nMsg:%d, submitReqVer:%" PRIi64 + ", inputType:%d", + SMA_VID(pSma), level, RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize, version, inputType); if ((terrno = qSetSMAInput(qTaskInfo, pMsg, msgSize, inputType)) < 0) { smaError("vgId:%d, rsma %" PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno)); @@ -912,7 +913,7 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) { int32_t code = 0; int32_t lino = 0; - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + SSmaEnv * pEnv = SMA_RSMA_ENV(pSma); SRSmaStat *pStat = NULL; SRSmaInfo *pRSmaInfo = NULL; @@ -1067,8 +1068,8 @@ _err: static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { int32_t code = 0; int32_t lino = 0; - SVnode *pVnode = pSma->pVnode; - SArray *suidList = NULL; + SVnode * pVnode = pSma->pVnode; + SArray * suidList = NULL; STbUidStore uidStore = {0}; SMetaReader mr = {0}; tb_uid_t suid = 0; @@ -1196,7 +1197,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t code = 0; int32_t lino = 0; int32_t nTaskInfo = 0; - SSma *pSma = pRSmaStat->pSma; + SSma * pSma = pRSmaStat->pSma; SVnode *pVnode = pSma->pVnode; if (taosHashGetSize(pInfoHash) <= 0) { @@ -1229,7 +1230,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { do { int32_t nStreamFlushed = 0; int32_t nSleep = 0; - void *infoHash = NULL; + void * infoHash = NULL; while (true) { while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; @@ -1271,7 +1272,7 @@ _checkpoint: SStreamMeta *pMeta = NULL; int64_t checkpointId = taosGetTimestampNs(); bool checkpointBuilt = false; - void *infoHash = NULL; + void * infoHash = NULL; while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { @@ -1282,11 +1283,12 @@ _checkpoint: SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; - atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); - pTask->checkpointingId = checkpointId; - pTask->chkInfo.checkpointId = pTask->checkpointingId; + // atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); + pTask->chkInfo.checkpointingId = checkpointId; + pTask->chkInfo.checkpointId = checkpointId; // 1pTask->checkpointingId; pTask->chkInfo.checkpointVer = pItem->submitReqVer; pTask->info.triggerParam = pItem->fetchResultVer; + pTask->info.taskLevel = TASK_LEVEL_SMA; if (!checkpointBuilt) { // the stream states share one checkpoint @@ -1342,10 +1344,10 @@ _exit: * @param tmrId */ static void tdRSmaFetchTrigger(void *param, void *tmrId) { - SRSmaRef *pRSmaRef = NULL; - SSma *pSma = NULL; - SRSmaStat *pStat = NULL; - SRSmaInfo *pRSmaInfo = NULL; + SRSmaRef * pRSmaRef = NULL; + SSma * pSma = NULL; + SRSmaStat * pStat = NULL; + SRSmaInfo * pRSmaInfo = NULL; SRSmaInfoItem *pItem = NULL; if (!(pRSmaRef = taosHashGet(smaMgmt.refHash, ¶m, POINTER_BYTES))) { @@ -1513,7 +1515,7 @@ _err: } static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SArray *pSubmitArr, ERsmaExecType type) { - void *msg = NULL; + void * msg = NULL; int8_t resume = 0; int32_t nSubmit = 0; int32_t nDelete = 0; @@ -1628,11 +1630,11 @@ _err: int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { int32_t code = 0; int32_t lino = 0; - SVnode *pVnode = pSma->pVnode; - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + SVnode * pVnode = pSma->pVnode; + SSmaEnv * pEnv = SMA_RSMA_ENV(pSma); SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); - SHashObj *infoHash = NULL; - SArray *pSubmitArr = NULL; + SHashObj * infoHash = NULL; + SArray * pSubmitArr = NULL; bool isFetchAll = false; if (!pRSmaStat || !(infoHash = RSMA_INFO_HASH(pRSmaStat))) { @@ -1731,4 +1733,4 @@ _exit: smaError("vgId:%d, %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); } return code; -} +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index e66c0357a1..2b35628663 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -750,21 +750,27 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { SStreamTask* pStateTask = pTask; - SStreamTask task = {0}; + + STaskId taskId = {.streamId = 0, .taskId = 0}; if (pTask->info.fillHistory) { - task.id.streamId = pTask->streamTaskId.streamId; - task.id.taskId = pTask->streamTaskId.taskId; - task.pMeta = pTask->pMeta; - pStateTask = &task; + taskId.streamId = pTask->id.streamId; + taskId.taskId = pTask->id.taskId; + + pTask->id.streamId = pTask->streamTaskId.streamId; + pTask->id.taskId = pTask->streamTaskId.taskId; } - pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pStateTask, false, -1, -1); + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1); if (pTask->pState == NULL) { tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId); return -1; } else { tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } + if (pTask->info.fillHistory) { + pTask->id.streamId = taskId.streamId; + pTask->id.taskId = taskId.taskId; + } SReadHandle handle = { .checkpointId = pTask->chkInfo.checkpointId, @@ -785,15 +791,17 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { SStreamTask* pSateTask = pTask; - SStreamTask task = {0}; + // SStreamTask task = {0}; + + STaskId taskId = {.streamId = 0, .taskId = 0}; if (pTask->info.fillHistory) { - task.id.streamId = pTask->streamTaskId.streamId; - task.id.taskId = pTask->streamTaskId.taskId; - task.pMeta = pTask->pMeta; - pSateTask = &task; + taskId.streamId = pTask->id.streamId; + taskId.taskId = pTask->id.taskId; + pTask->id.streamId = pTask->streamTaskId.streamId; + pTask->id.taskId = pTask->streamTaskId.taskId; } - pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1); + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1); if (pTask->pState == NULL) { tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId); return -1; @@ -801,6 +809,11 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } + if (pTask->info.fillHistory) { + pTask->id.streamId = taskId.streamId; + pTask->id.taskId = taskId.taskId; + } + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList); SReadHandle handle = { .checkpointId = pTask->chkInfo.checkpointId, @@ -1280,14 +1293,13 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) return TSDB_CODE_SUCCESS; } - // downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req. if (pTask->status.downstreamReady != 1) { pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id - pTask->checkpointingId = req.checkpointId; + pTask->chkInfo.checkpointingId = req.checkpointId; - qError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64 - ", set it failure", - pTask->id.idStr, req.checkpointId); + tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64 + ", set it failure", + pTask->id.idStr, req.checkpointId); streamMetaReleaseTask(pMeta, pTask); SRpcMsg rsp = {0}; @@ -1316,10 +1328,10 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) // check if the checkpoint msg already sent or not. if (status == TASK_STATUS__CK) { - ASSERT(pTask->checkpointingId == req.checkpointId); + ASSERT(pTask->chkInfo.checkpointingId == req.checkpointId); tqWarn("s-task:%s recv checkpoint-source msg again checkpointId:%" PRId64 " already received, ignore this msg and continue process checkpoint", - pTask->id.idStr, pTask->checkpointingId); + pTask->id.idStr, pTask->chkInfo.checkpointingId); taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); @@ -1335,10 +1347,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) // set the initial value for generating check point // set the mgmt epset info according to the checkout source msg from mnode, todo update mgmt epset if needed - if (pMeta->chkptNotReadyTasks == 0) { - pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks; - } - total = pMeta->numOfStreamTasks; streamMetaWUnLock(pMeta); @@ -1390,7 +1398,7 @@ int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) { - SVDropHTaskReq* pReq = (SVDropHTaskReq*) pMsg->pCont; + SVDropHTaskReq* pReq = (SVDropHTaskReq*)pMsg->pCont; SStreamMeta* pMeta = pTq->pStreamMeta; SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); @@ -1415,6 +1423,9 @@ int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) { SStreamTaskId id = {.streamId = pTask->hTaskInfo.id.streamId, .taskId = pTask->hTaskInfo.id.taskId}; streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &id); + // clear the scheduler status + streamTaskSetSchedStatusInactive(pTask); + tqDebug("s-task:%s set scheduler status:%d after drop fill-history task", pTask->id.idStr, pTask->status.schedStatus); streamMetaReleaseTask(pMeta, pTask); return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index 7a8147f83b..2ab710176d 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -104,8 +104,8 @@ int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) { pHdr->type = SNAP_DATA_STREAM_STATE_BACKEND; pHdr->size = len; memcpy(pHdr->data, rowData, len); - tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode)); taosMemoryFree(rowData); + tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode)); return code; _err: @@ -139,7 +139,7 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS pWriter->sver = sver; pWriter->ever = ever; - sprintf(tdir, "%s%s%s%s%s", pTq->path, TD_DIRSEP, VNODE_TQ_STREAM, TD_DIRSEP, "received"); + sprintf(tdir, "%s%s%s", pTq->path, TD_DIRSEP, VNODE_TQ_STREAM); taosMkDir(tdir); SStreamSnapWriter* pSnapWriter = NULL; @@ -167,25 +167,19 @@ int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) return code; } -int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) { - tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); - - streamMetaWLock(pWriter->pTq->pStreamMeta); - int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta); - if (code == 0) { - streamMetaInitBackend(pWriter->pTq->pStreamMeta); - code = streamStateLoadTasks(pWriter); - } - - streamMetaWUnLock(pWriter->pTq->pStreamMeta); - tqDebug("vgId:%d, vnode %s succ to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); - taosMemoryFree(pWriter); - return code; -} - -int32_t streamStateLoadTasks(SStreamStateWriter* pWriter) { return streamMetaLoadAllTasks(pWriter->pTq->pStreamMeta); } int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData) { tqDebug("vgId:%d, vnode %s snapshot write data", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); return streamSnapWrite(pWriter->pWriterImpl, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); } +int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) { + tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + int32_t code = streamStateLoadTasks(pWriter); + tqDebug("vgId:%d, vnode %s succ to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + taosMemoryFree(pWriter); + return code; +} + +int32_t streamStateLoadTasks(SStreamStateWriter* pWriter) { + return streamMetaReloadAllTasks(pWriter->pTq->pStreamMeta); +} diff --git a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c index f22ecc3daf..f966e90b9a 100644 --- a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c @@ -238,7 +238,6 @@ int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t goto _err; } tDecoderClear(&decoder); - // tdbTbInsert(TTB *pTb, const void *pKey, int keyLen, const void *pVal, int valLen, TXN *pTxn) int64_t key[2] = {taskId.streamId, taskId.taskId}; taosWLockLatch(&pTq->pStreamMeta->lock); diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 918d0bd7d0..5076599753 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -1131,9 +1131,13 @@ int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKE char **values_list = taosMemoryCalloc(num_keys * 2, sizeof(char *)); size_t *values_list_sizes = taosMemoryCalloc(num_keys * 2, sizeof(size_t)); char **errs = taosMemoryCalloc(num_keys * 2, sizeof(char *)); + + (void)tsdbCacheCommit(pTsdb); + taosThreadMutexLock(&pTsdb->lruMutex); + taosThreadMutexLock(&pTsdb->rCache.rMutex); - rocksMayWrite(pTsdb, true, false, false); + // rocksMayWrite(pTsdb, true, false, false); rocksdb_multi_get(pTsdb->rCache.db, pTsdb->rCache.readoptions, num_keys * 2, (const char *const *)keys_list, keys_list_sizes, values_list, values_list_sizes, errs); taosThreadMutexUnlock(&pTsdb->rCache.rMutex); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 89bed6b42f..add8da52e0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -20,8 +20,6 @@ #define BLOCK_COMMIT_FACTOR 3 -extern int vnodeScheduleTask(int (*execute)(void *), void *arg); -extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg); extern void remove_file(const char *fname, bool last_level); #define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT @@ -651,7 +649,6 @@ _exit: static int32_t close_file_system(STFileSystem *fs) { TARRAY2_CLEAR(fs->fSetArr, tsdbTFileSetClear); TARRAY2_CLEAR(fs->fSetArrTmp, tsdbTFileSetClear); - // TODO return 0; } @@ -748,36 +745,31 @@ _exit: return code; } -static void tsdbDoWaitBgTask(STFileSystem *fs, STFSBgTask *task) { - task->numWait++; - taosThreadCondWait(task->done, &fs->tsdb->mutex); - task->numWait--; +int32_t tsdbFSCancelAllBgTask(STFileSystem *fs) { + TARRAY2(int64_t) channelArr = {0}; - if (task->numWait == 0) { - taosThreadCondDestroy(task->done); - if (task->destroy) { - task->destroy(task->arg); + // collect all open channels + taosThreadMutexLock(&fs->tsdb->mutex); + STFileSet *fset; + TARRAY2_FOREACH(fs->fSetArr, fset) { + if (VNODE_ASYNC_VALID_CHANNEL_ID(fset->bgTaskChannel)) { + TARRAY2_APPEND(&channelArr, fset->bgTaskChannel); + fset->bgTaskChannel = 0; } - taosMemoryFree(task); } -} + taosThreadMutexUnlock(&fs->tsdb->mutex); -static void tsdbDoDoneBgTask(STFileSystem *fs, STFSBgTask *task) { - if (task->numWait > 0) { - taosThreadCondBroadcast(task->done); - } else { - taosThreadCondDestroy(task->done); - if (task->destroy) { - task->destroy(task->arg); - } - taosMemoryFree(task); - } + // destroy all channels + int64_t channel; + TARRAY2_FOREACH(&channelArr, channel) { vnodeAChannelDestroy(vnodeAsyncHandle[1], channel, true); } + TARRAY2_DESTROY(&channelArr, NULL); + return 0; } int32_t tsdbCloseFS(STFileSystem **fs) { if (fs[0] == NULL) return 0; - tsdbFSDisableBgTask(fs[0]); + tsdbFSCancelAllBgTask(*fs); close_file_system(fs[0]); destroy_fs(fs); return 0; @@ -910,7 +902,20 @@ int32_t tsdbFSEditCommit(STFileSystem *fs) { } if (!skipMerge) { - code = tsdbSchedMerge(fs->tsdb, fset->fid); + code = tsdbTFileSetOpenChannel(fset); + TSDB_CHECK_CODE(code, lino, _exit); + + SMergeArg *arg = taosMemoryMalloc(sizeof(*arg)); + if (arg == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + arg->tsdb = fs->tsdb; + arg->fid = fset->fid; + + code = vnodeAsyncC(vnodeAsyncHandle[1], fset->bgTaskChannel, EVA_PRIORITY_HIGH, tsdbMerge, taosMemoryFree, + arg, NULL); TSDB_CHECK_CODE(code, lino, _exit); } } @@ -939,7 +944,11 @@ int32_t tsdbFSEditCommit(STFileSystem *fs) { } } - if (tsdbTFileSetIsEmpty(fset) && fset->bgTaskRunning == NULL) { + if (tsdbTFileSetIsEmpty(fset)) { + if (VNODE_ASYNC_VALID_CHANNEL_ID(fset->bgTaskChannel)) { + vnodeAChannelDestroy(vnodeAsyncHandle[1], fset->bgTaskChannel, false); + fset->bgTaskChannel = 0; + } TARRAY2_REMOVE(fs->fSetArr, i, tsdbTFileSetClear); } else { i++; @@ -1179,136 +1188,4 @@ _out: pHash = NULL; } return code; -} - -const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"}; - -static int32_t tsdbFSRunBgTask(void *arg) { - STFSBgTask *task = (STFSBgTask *)arg; - STFileSystem *fs = task->fs; - - task->launchTime = taosGetTimestampMs(); - task->run(task->arg); - task->finishTime = taosGetTimestampMs(); - - tsdbDebug("vgId:%d bg task:%s task id:%" PRId64 " finished, schedule time:%" PRId64 " launch time:%" PRId64 - " finish time:%" PRId64, - TD_VID(fs->tsdb->pVnode), gFSBgTaskName[task->type], task->taskid, task->scheduleTime, task->launchTime, - task->finishTime); - - taosThreadMutexLock(&fs->tsdb->mutex); - - STFileSet *fset = NULL; - tsdbFSGetFSet(fs, task->fid, &fset); - ASSERT(fset != NULL && fset->bgTaskRunning == task); - - // free last - tsdbDoDoneBgTask(fs, task); - fset->bgTaskRunning = NULL; - - // schedule next - if (fset->bgTaskNum > 0) { - if (fs->stop) { - while (fset->bgTaskNum > 0) { - STFSBgTask *nextTask = fset->bgTaskQueue->next; - nextTask->prev->next = nextTask->next; - nextTask->next->prev = nextTask->prev; - fset->bgTaskNum--; - tsdbDoDoneBgTask(fs, nextTask); - } - } else { - // pop task from head - fset->bgTaskRunning = fset->bgTaskQueue->next; - fset->bgTaskRunning->prev->next = fset->bgTaskRunning->next; - fset->bgTaskRunning->next->prev = fset->bgTaskRunning->prev; - fset->bgTaskNum--; - vnodeScheduleTaskEx(1, tsdbFSRunBgTask, fset->bgTaskRunning); - } - } - - taosThreadMutexUnlock(&fs->tsdb->mutex); - return 0; -} - -// IMPORTANT: the caller must hold the fs->tsdb->mutex -int32_t tsdbFSScheduleBgTask(STFileSystem *fs, int32_t fid, EFSBgTaskT type, int32_t (*run)(void *), - void (*destroy)(void *), void *arg, int64_t *taskid) { - if (fs->stop) { - if (destroy) { - destroy(arg); - } - return 0; - } - - STFileSet *fset; - tsdbFSGetFSet(fs, fid, &fset); - - ASSERT(fset != NULL); - - for (STFSBgTask *task = fset->bgTaskQueue->next; task != fset->bgTaskQueue; task = task->next) { - if (task->type == type) { - if (destroy) { - destroy(arg); - } - return 0; - } - } - - // do schedule task - STFSBgTask *task = taosMemoryCalloc(1, sizeof(STFSBgTask)); - if (task == NULL) return TSDB_CODE_OUT_OF_MEMORY; - taosThreadCondInit(task->done, NULL); - - task->fs = fs; - task->fid = fid; - task->type = type; - task->run = run; - task->destroy = destroy; - task->arg = arg; - task->scheduleTime = taosGetTimestampMs(); - task->taskid = ++fs->taskid; - - if (fset->bgTaskRunning == NULL && fset->bgTaskNum == 0) { - // launch task directly - fset->bgTaskRunning = task; - vnodeScheduleTaskEx(1, tsdbFSRunBgTask, task); - } else { - // add to the queue tail - fset->bgTaskNum++; - task->next = fset->bgTaskQueue; - task->prev = fset->bgTaskQueue->prev; - task->prev->next = task; - task->next->prev = task; - } - - if (taskid) *taskid = task->taskid; - return 0; -} - -int32_t tsdbFSDisableBgTask(STFileSystem *fs) { - taosThreadMutexLock(&fs->tsdb->mutex); - for (;;) { - fs->stop = true; - bool done = true; - - STFileSet *fset; - TARRAY2_FOREACH(fs->fSetArr, fset) { - if (fset->bgTaskRunning) { - tsdbDoWaitBgTask(fs, fset->bgTaskRunning); - done = false; - break; - } - } - - if (done) break; - } - taosThreadMutexUnlock(&fs->tsdb->mutex); - return 0; -} - -int32_t tsdbFSEnableBgTask(STFileSystem *fs) { - taosThreadMutexLock(&fs->tsdb->mutex); - fs->stop = false; - taosThreadMutexUnlock(&fs->tsdb->mutex); - return 0; -} +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index a3a8e2f575..74453126cf 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -55,11 +55,6 @@ int64_t tsdbFSAllocEid(STFileSystem *fs); int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype); int32_t tsdbFSEditCommit(STFileSystem *fs); int32_t tsdbFSEditAbort(STFileSystem *fs); -// background task -int32_t tsdbFSScheduleBgTask(STFileSystem *fs, int32_t fid, EFSBgTaskT type, int32_t (*run)(void *), - void (*destroy)(void *), void *arg, int64_t *taskid); -int32_t tsdbFSDisableBgTask(STFileSystem *fs); -int32_t tsdbFSEnableBgTask(STFileSystem *fs); // other int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset); int32_t tsdbFSCheckCommit(STsdb *tsdb, int32_t fid); diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index 61bedcb996..025671ff3d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -14,6 +14,7 @@ */ #include "tsdbFSet2.h" +#include "vnd.h" int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl) { if (!(lvl[0] = taosMemoryMalloc(sizeof(SSttLvl)))) return TSDB_CODE_OUT_OF_MEMORY; @@ -451,10 +452,7 @@ int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset) { TARRAY2_INIT(fset[0]->lvlArr); // background task queue - fset[0]->bgTaskNum = 0; - fset[0]->bgTaskQueue->next = fset[0]->bgTaskQueue; - fset[0]->bgTaskQueue->prev = fset[0]->bgTaskQueue; - fset[0]->bgTaskRunning = NULL; + fset[0]->bgTaskChannel = 0; // block commit variables taosThreadCondInit(&fset[0]->canCommit, NULL); @@ -650,3 +648,8 @@ bool tsdbTFileSetIsEmpty(const STFileSet *fset) { } return TARRAY2_SIZE(fset->lvlArr) == 0; } + +int32_t tsdbTFileSetOpenChannel(STFileSet *fset) { + if (VNODE_ASYNC_VALID_CHANNEL_ID(fset->bgTaskChannel)) return 0; + return vnodeAChannelInit(vnodeAsyncHandle[1], &fset->bgTaskChannel); +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index 34f174ade7..32028db352 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -22,14 +22,12 @@ extern "C" { #endif -typedef struct STFileSet STFileSet; -typedef struct STFileOp STFileOp; -typedef struct SSttLvl SSttLvl; +typedef struct STFileOp STFileOp; +typedef struct SSttLvl SSttLvl; typedef TARRAY2(STFileObj *) TFileObjArray; typedef TARRAY2(SSttLvl *) TSttLvlArray; typedef TARRAY2(STFileOp) TFileOpArray; typedef struct STFileSystem STFileSystem; -typedef struct STFSBgTask STFSBgTask; typedef enum { TSDB_FOP_NONE = 0, @@ -72,33 +70,8 @@ bool tsdbTFileSetIsEmpty(const STFileSet *fset); // stt int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl); int32_t tsdbSttLvlClear(SSttLvl **lvl); - -typedef enum { - TSDB_BG_TASK_MERGER = 1, - TSDB_BG_TASK_RETENTION, - TSDB_BG_TASK_COMPACT, -} EFSBgTaskT; - -struct STFSBgTask { - STFileSystem *fs; - int32_t fid; - - EFSBgTaskT type; - int32_t (*run)(void *arg); - void (*destroy)(void *arg); - void *arg; - - TdThreadCond done[1]; - int32_t numWait; - - int64_t taskid; - int64_t scheduleTime; - int64_t launchTime; - int64_t finishTime; - - struct STFSBgTask *prev; - struct STFSBgTask *next; -}; +// open channel +int32_t tsdbTFileSetOpenChannel(STFileSet *fset); struct STFileOp { tsdb_fop_t optype; @@ -118,10 +91,8 @@ struct STFileSet { STFileObj *farr[TSDB_FTYPE_MAX]; // file array TSttLvlArray lvlArr[1]; // level array - // background task queue - int32_t bgTaskNum; - STFSBgTask bgTaskQueue[1]; - STFSBgTask *bgTaskRunning; + // background task channel + int64_t bgTaskChannel; // block commit variables TdThreadCond canCommit; diff --git a/source/dnode/vnode/src/tsdb/tsdbMerge.c b/source/dnode/vnode/src/tsdb/tsdbMerge.c index 6d968d0828..b47b951b2b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMerge.c +++ b/source/dnode/vnode/src/tsdb/tsdbMerge.c @@ -17,11 +17,6 @@ #define TSDB_MAX_LEVEL 2 // means max level is 3 -typedef struct { - STsdb *tsdb; - int32_t fid; -} SMergeArg; - typedef struct { STsdb *tsdb; int32_t fid; @@ -528,7 +523,7 @@ static int32_t tsdbMergeGetFSet(SMerger *merger) { return 0; } -static int32_t tsdbMerge(void *arg) { +int32_t tsdbMerge(void *arg) { int32_t code = 0; int32_t lino = 0; SMergeArg *mergeArg = (SMergeArg *)arg; @@ -597,18 +592,3 @@ _exit: tsdbTFileSetClear(&merger->fset); return code; } - -int32_t tsdbSchedMerge(STsdb *tsdb, int32_t fid) { - SMergeArg *arg = taosMemoryMalloc(sizeof(*arg)); - if (arg == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - arg->tsdb = tsdb; - arg->fid = fid; - - int32_t code = tsdbFSScheduleBgTask(tsdb->pFS, fid, TSDB_BG_TASK_MERGER, tsdbMerge, taosMemoryFree, arg, NULL); - if (code) taosMemoryFree(arg); - - return code; -} diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 751df706ab..cb899f9ee8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -48,7 +48,7 @@ static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScan static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, STsdbReader* pReader); -static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); +static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SReadCostSummary* pCost); static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idstr, int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); @@ -58,6 +58,7 @@ static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbRea static bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo); static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter); static int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order); +static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo); static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } @@ -168,7 +169,7 @@ static int32_t filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader, bo return TSDB_CODE_SUCCESS; } - SCostSummary* pCost = &pReader->cost; + SReadCostSummary* pCost = &pReader->cost; pIter->pLastBlockReader->uid = 0; tMergeTreeClose(&pIter->pLastBlockReader->mergeTree); @@ -291,11 +292,7 @@ static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) } static int32_t tsdbInitReaderLock(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexInit(&pReader->readerMutex, NULL); - + int32_t code = taosThreadMutexInit(&pReader->readerMutex, NULL); qTrace("tsdb/read: %p, post-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); return code; @@ -324,22 +321,14 @@ static int32_t tsdbAcquireReader(STsdbReader* pReader) { } static int32_t tsdbTryAcquireReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexTryLock(&pReader->readerMutex); - + int32_t code = taosThreadMutexTryLock(&pReader->readerMutex); qTrace("tsdb/read: %p, post-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); return code; } static int32_t tsdbReleaseReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexUnlock(&pReader->readerMutex); - + int32_t code = taosThreadMutexUnlock(&pReader->readerMutex); qTrace("tsdb/read: %p, post-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); return code; @@ -432,6 +421,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void } tsdbInitReaderLock(pReader); + tsem_init(&pReader->resumeAfterSuspend, 0, 0); *ppReader = pReader; return code; @@ -1015,8 +1005,8 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) { // check if current block are all handled if (pDumpInfo->rowIndex >= 0 && pDumpInfo->rowIndex < pRecord->numRow) { int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - if (outOfTimeWindow(ts, - &pReader->info.window)) { // the remain data has out of query time window, ignore current block + if (outOfTimeWindow(ts, &pReader->info.window)) { + // the remain data has out of query time window, ignore current block setBlockAllDumped(pDumpInfo, ts, pReader->info.order); } } else { @@ -1123,16 +1113,12 @@ static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlo } int32_t step = asc ? 1 : -1; - // *nextIndex = pBlockInfo->tbBlockIdx + step; - // *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pTableBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx + step); SFileDataBlockInfo* p = taosArrayGet(pBlockIter->blockList, pTableDataBlockIdx->globalIndex); memcpy(pRecord, &p->record, sizeof(SBrinRecord)); *nextIndex = pBlockInfo->tbBlockIdx + step; - - // tMapDataGetItemByIdx(&pTableBlockScanInfo->mapData, pIndex->ordinalIndex, pBlock, tGetDataBlk); return true; } @@ -1376,23 +1362,19 @@ static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* return TSDB_CODE_SUCCESS; } - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - int64_t st = taosGetTimestampUs(); + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->resBlockInfo.capacity, pReader); - blockDataUpdateTsWindow(pBlock, pReader->suppInfo.slotId[0]); - pBlock->info.id.uid = pBlockScanInfo->uid; + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pBlockScanInfo); - setComposedBlockFlag(pReader, true); - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; tsdbDebug("%p build data block from cache completed, elapsed time:%.2f ms, numOfRows:%" PRId64 ", brange:%" PRId64 " - %" PRId64 ", uid:%" PRIu64 ", %s", - pReader, elapsedTime, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey, + pReader, el, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey, pBlockScanInfo->uid, pReader->idStr); - pReader->cost.buildmemBlock += elapsedTime; + pReader->cost.buildmemBlock += el; return code; } @@ -2293,13 +2275,12 @@ static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlock return code; } -static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { +void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; pResBlock->info.dataLoad = 1; blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); - setComposedBlockFlag(pReader, true); pReader->cost.composedBlocks += 1; @@ -2356,7 +2337,6 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { pBlockScanInfo = *pReader->status.pTableIter; if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { - // setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->info.order); return code; } } @@ -2436,7 +2416,7 @@ int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order) { return ASCENDING_TRAVERSE(order) ? 0 : taosArrayGetSize(pDelSkyline) - 1; } -int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost) { +int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SReadCostSummary* pCost) { int32_t code = 0; int32_t newDelDataInFile = taosArrayGetSize(pBlockScanInfo->pFileDelData); if (newDelDataInFile == 0 && @@ -2935,6 +2915,8 @@ static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { SReaderStatus* pStatus = &pReader->status; STableUidList* pUidList = &pStatus->uidList; + tsdbDebug("seq load data blocks from cache, %s", pReader->idStr); + while (1) { if (pReader->code != TSDB_CODE_SUCCESS) { tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); @@ -3043,6 +3025,8 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; SDataBlockIter* pBlockIter = &pReader->status.blockIter; + tsdbDebug("seq load data blocks from stt files %s", pReader->idStr); + while (1) { terrno = 0; @@ -3774,7 +3758,6 @@ int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t e int32_t code = TSDB_CODE_SUCCESS; do { - // SRow* pTSRow = NULL; TSDBROW row = {.type = -1}; bool freeTSRow = false; tsdbGetNextRowInMem(pBlockScanInfo, pReader, &row, endKey, &freeTSRow); @@ -3783,6 +3766,7 @@ int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t e } if (row.type == TSDBROW_ROW_FMT) { + int64_t ts = row.pTSRow->ts;; code = doAppendRowFromTSRow(pBlock, pReader, row.pTSRow, pBlockScanInfo); if (freeTSRow) { @@ -3792,13 +3776,17 @@ int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t e if (code) { return code; } + + pBlockScanInfo->lastProcKey = ts; } else { code = doAppendRowFromFileBlock(pBlock, pReader, row.pBlockData, row.iRow); if (code) { break; } + pBlockScanInfo->lastProcKey = row.pBlockData->aTSKEY[row.iRow]; } + // no data in buffer, return immediately if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) { break; @@ -4107,7 +4095,7 @@ void tsdbReaderClose2(STsdbReader* pReader) { tsdbDataFileReaderClose(&pReader->pFileReader); } - SCostSummary* pCost = &pReader->cost; + SReadCostSummary* pCost = &pReader->cost; SFilesetIter* pFilesetIter = &pReader->status.fileIter; if (pFilesetIter->pLastBlockReader != NULL) { SLastBlockReader* pLReader = pFilesetIter->pLastBlockReader; @@ -4122,6 +4110,7 @@ void tsdbReaderClose2(STsdbReader* pReader) { tsdbUntakeReadSnap2(pReader, pReader->pReadSnap, true); pReader->pReadSnap = NULL; + tsem_destroy(&pReader->resumeAfterSuspend); tsdbReleaseReader(pReader); tsdbUninitReaderLock(pReader); @@ -4154,6 +4143,8 @@ int32_t tsdbReaderSuspend2(STsdbReader* pReader) { SReaderStatus* pStatus = &pReader->status; STableBlockScanInfo* pBlockScanInfo = NULL; + pReader->status.suspendInvoked = true; // record the suspend status + if (pStatus->loadFromFile) { SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); if (pBlockInfo != NULL) { @@ -4167,84 +4158,34 @@ int32_t tsdbReaderSuspend2(STsdbReader* pReader) { tsdbDataFileReaderClose(&pReader->pFileReader); - SCostSummary* pCost = &pReader->cost; + SReadCostSummary* pCost = &pReader->cost; pReader->status.pLDataIterArray = destroySttBlockReader(pReader->status.pLDataIterArray, &pCost->sttCost); pReader->status.pLDataIterArray = taosArrayInit(4, POINTER_BYTES); - // resetDataBlockScanInfo excluding lastKey - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - pInfo->pFileDelData = taosArrayDestroy(pInfo->pFileDelData); - } - } else { - // resetDataBlockScanInfo excluding lastKey - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - } - - pBlockScanInfo = pStatus->pTableIter == NULL ? NULL : *pStatus->pTableIter; - if (pBlockScanInfo) { - // save lastKey to restore memory iterator - STimeWindow w = pReader->resBlockInfo.pResBlock->info.window; - pBlockScanInfo->lastProcKey = ASCENDING_TRAVERSE(pReader->info.order) ? w.ekey : w.skey; - - // reset current current table's data block scan info, - pBlockScanInfo->iterInit = false; - - pBlockScanInfo->iter.hasVal = false; - pBlockScanInfo->iiter.hasVal = false; - if (pBlockScanInfo->iter.iter != NULL) { - pBlockScanInfo->iter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iter.iter); - } - - if (pBlockScanInfo->iiter.iter != NULL) { - pBlockScanInfo->iiter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iiter.iter); - } - - pBlockScanInfo->pBlockList = taosArrayDestroy(pBlockScanInfo->pBlockList); - pBlockScanInfo->pBlockIdxList = taosArrayDestroy(pBlockScanInfo->pBlockIdxList); - // TODO: keep skyline for reuse - pBlockScanInfo->delSkyline = taosArrayDestroy(pBlockScanInfo->delSkyline); - } } + // resetDataBlockScanInfo excluding lastKey + STableBlockScanInfo** p = NULL; + + int32_t step = ASCENDING_TRAVERSE(pReader->info.order)? 1:-1; + + int32_t iter = 0; + while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { + STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; + clearBlockScanInfo(pInfo); + pInfo->sttKeyInfo.nextProcKey = pInfo->lastProcKey + step; + } + + pStatus->uidList.currentIndex = 0; + initReaderStatus(pStatus); + tsdbUntakeReadSnap2(pReader, pReader->pReadSnap, false); pReader->pReadSnap = NULL; pReader->flag = READER_STATUS_SUSPEND; +#if SUSPEND_RESUME_TEST + tsem_post(&pReader->resumeAfterSuspend); +#endif + tsdbDebug("reader: %p suspended uid %" PRIu64 " in this query %s", pReader, pBlockScanInfo ? pBlockScanInfo->uid : 0, pReader->idStr); return code; @@ -4399,6 +4340,16 @@ int32_t tsdbNextDataBlock2(STsdbReader* pReader, bool* hasNext) { SReaderStatus* pStatus = &pReader->status; + // NOTE: the following codes is used to perform test for suspend/resume for tsdbReader when it blocks the commit + // the data should be ingested in round-robin and all the child tables should be createted before ingesting data + // the version range of query will be used to identify the correctness of suspend/resume functions. + // this function will blocked before loading the SECOND block from vnode-buffer, and restart itself from sst-files +#if SUSPEND_RESUME_TEST + if (!pReader->status.suspendInvoked && !pReader->status.loadFromFile) { + tsem_wait(&pReader->resumeAfterSuspend); + } +#endif + code = tsdbAcquireReader(pReader); qTrace("tsdb/read: %p, take read mutex, code: %d", pReader, code); diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c index 305399e0af..24c526a906 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c @@ -210,6 +210,7 @@ void clearBlockScanInfo(STableBlockScanInfo* p) { p->iterInit = false; p->iter.hasVal = false; p->iiter.hasVal = false; + p->sttKeyInfo.status = STT_FILE_READER_UNINIT; if (p->iter.iter != NULL) { p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter); diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h index 60e6e6960a..709e311ff0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -96,7 +96,7 @@ typedef struct SResultBlockInfo { int64_t capacity; } SResultBlockInfo; -typedef struct SCostSummary { +typedef struct SReadCostSummary { int64_t numOfBlocks; double blockLoadTime; double buildmemBlock; @@ -110,7 +110,7 @@ typedef struct SCostSummary { double createScanInfoList; double createSkylineIterTime; double initLastBlockReader; -} SCostSummary; +} SReadCostSummary; typedef struct STableUidList { uint64_t* tableUidList; // access table uid list in uid ascending order list @@ -122,12 +122,6 @@ typedef struct { int32_t numOfSttFiles; } SBlockNumber; -typedef struct SBlockIndex { - int32_t ordinalIndex; - int64_t inFileOffset; - STimeWindow window; // todo replace it with overlap flag. -} SBlockIndex; - typedef struct SBlockOrderWrapper { int64_t uid; int64_t offset; @@ -192,6 +186,7 @@ typedef struct SFileBlockDumpInfo { } SFileBlockDumpInfo; typedef struct SReaderStatus { + bool suspendInvoked; bool loadFromFile; // check file stage bool composedDataBlock; // the returned data block is a composed block or not SSHashObj* pTableMap; // SHash @@ -220,7 +215,8 @@ struct STsdbReader { int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows SBlockLoadSuppInfo suppInfo; STsdbReadSnap* pReadSnap; - SCostSummary cost; + tsem_t resumeAfterSuspend; + SReadCostSummary cost; SHashObj** pIgnoreTables; SSHashObj* pSchemaMap; // keep the retrieved schema info, to avoid the overhead by repeatly load schema SDataFileReader* pFileReader; // the file reader diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index f6888ba9cb..d8f1ad7c6c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -249,7 +249,7 @@ _exit: if (code) { TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); } else { - tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); + tsdbDebug("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); } return code; } @@ -279,7 +279,7 @@ _exit: if (code) { TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); } else { - tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); + tsdbDebug("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); } tsdbFSDestroyCopySnapshot(&rtner->fsetArr); return code; @@ -391,32 +391,6 @@ _exit: static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); } -static int32_t tsdbDoRetentionSync(void *arg) { - int32_t code = 0; - int32_t lino = 0; - SRTNer rtner[1] = {0}; - - code = tsdbDoRetentionBegin(arg, rtner); - TSDB_CHECK_CODE(code, lino, _exit); - - STFileSet *fset; - TARRAY2_FOREACH(rtner->fsetArr, fset) { - code = tsdbDoRetentionOnFileSet(rtner, fset); - TSDB_CHECK_CODE(code, lino, _exit); - } - - code = tsdbDoRetentionEnd(rtner); - TSDB_CHECK_CODE(code, lino, _exit); - -_exit: - if (code) { - TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); - } - tsem_post(&((SRtnArg *)arg)->tsdb->pVnode->canCommit); - tsdbFreeRtnArg(arg); - return code; -} - static int32_t tsdbDoRetentionAsync(void *arg) { int32_t code = 0; int32_t lino = 0; @@ -454,49 +428,41 @@ _exit: int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) { int32_t code = 0; - if (sync) { // sync retention + taosThreadMutexLock(&tsdb->mutex); + + STFileSet *fset; + TARRAY2_FOREACH(tsdb->pFS->fSetArr, fset) { + code = tsdbTFileSetOpenChannel(fset); + if (code) { + taosThreadMutexUnlock(&tsdb->mutex); + return code; + } + SRtnArg *arg = taosMemoryMalloc(sizeof(*arg)); if (arg == NULL) { + taosThreadMutexUnlock(&tsdb->mutex); return TSDB_CODE_OUT_OF_MEMORY; } arg->tsdb = tsdb; arg->now = now; - arg->fid = INT32_MAX; + arg->fid = fset->fid; - tsem_wait(&tsdb->pVnode->canCommit); - code = vnodeScheduleTask(tsdbDoRetentionSync, arg); + if (sync) { + code = vnodeAsyncC(vnodeAsyncHandle[0], tsdb->pVnode->commitChannel, EVA_PRIORITY_LOW, tsdbDoRetentionAsync, + tsdbFreeRtnArg, arg, NULL); + } else { + code = vnodeAsyncC(vnodeAsyncHandle[1], fset->bgTaskChannel, EVA_PRIORITY_LOW, tsdbDoRetentionAsync, + tsdbFreeRtnArg, arg, NULL); + } if (code) { - tsem_post(&tsdb->pVnode->canCommit); - taosMemoryFree(arg); + tsdbFreeRtnArg(arg); + taosThreadMutexUnlock(&tsdb->mutex); return code; } - } else { // async retention - taosThreadMutexLock(&tsdb->mutex); - - STFileSet *fset; - TARRAY2_FOREACH(tsdb->pFS->fSetArr, fset) { - SRtnArg *arg = taosMemoryMalloc(sizeof(*arg)); - if (arg == NULL) { - taosThreadMutexUnlock(&tsdb->mutex); - return TSDB_CODE_OUT_OF_MEMORY; - } - - arg->tsdb = tsdb; - arg->now = now; - arg->fid = fset->fid; - - code = tsdbFSScheduleBgTask(tsdb->pFS, fset->fid, TSDB_BG_TASK_RETENTION, tsdbDoRetentionAsync, tsdbFreeRtnArg, - arg, NULL); - if (code) { - tsdbFreeRtnArg(arg); - taosThreadMutexUnlock(&tsdb->mutex); - return code; - } - } - - taosThreadMutexUnlock(&tsdb->mutex); } + taosThreadMutexUnlock(&tsdb->mutex); + return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index a9da0fbcec..104c9b2f35 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1032,9 +1032,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, void* pRang int32_t code = 0; int32_t lino = 0; - // disable background tasks - tsdbFSDisableBgTask(pTsdb->pFS); - // start to write writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; @@ -1107,7 +1104,6 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** writer, int8_t rollback) { taosThreadMutexUnlock(&writer[0]->tsdb->mutex); } - tsdbFSEnableBgTask(tsdb->pFS); tsdbIterMergerClose(&writer[0]->ctx->tombIterMerger); tsdbIterMergerClose(&writer[0]->ctx->dataIterMerger); @@ -1595,3 +1591,6 @@ _out: return code; } + +extern int32_t tsdbFSCancelAllBgTask(STFileSystem* fs); +int32_t tsdbCancelAllBgTask(STsdb* tsdb) { return tsdbFSCancelAllBgTask(tsdb->pFS); } \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeAsync.c b/source/dnode/vnode/src/vnd/vnodeAsync.c new file mode 100644 index 0000000000..c95d2324aa --- /dev/null +++ b/source/dnode/vnode/src/vnd/vnodeAsync.c @@ -0,0 +1,719 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "vnd.h" +#include "vnodeHash.h" + +typedef struct SVATask SVATask; +typedef struct SVAChannel SVAChannel; + +#define VNODE_ASYNC_DEFAULT_WORKERS 4 +#define VNODE_ASYNC_MAX_WORKERS 256 + +// priority + +#define EVA_PRIORITY_MAX (EVA_PRIORITY_LOW + 1) + +// worker +typedef enum { + EVA_WORKER_STATE_UINIT = 0, + EVA_WORKER_STATE_ACTIVE, + EVA_WORKER_STATE_IDLE, + EVA_WORKER_STATE_STOP, +} EVWorkerState; + +typedef struct { + SVAsync *async; + int32_t workerId; + EVWorkerState state; + TdThread thread; + SVATask *runningTask; +} SVWorker; + +// task +typedef enum { + EVA_TASK_STATE_WAITTING = 0, + EVA_TASK_STATE_RUNNING, +} EVATaskState; + +struct SVATask { + int64_t taskId; + EVAPriority priority; + int32_t priorScore; + SVAChannel *channel; + int32_t (*execute)(void *); + void (*complete)(void *); + void *arg; + EVATaskState state; + + // wait + int32_t numWait; + TdThreadCond waitCond; + + // queue + struct SVATask *prev; + struct SVATask *next; +}; + +#define VATASK_PIORITY(task_) ((task_)->priority - ((task_)->priorScore / 4)) + +// async channel +typedef enum { + EVA_CHANNEL_STATE_OPEN = 0, + EVA_CHANNEL_STATE_CLOSE, +} EVAChannelState; + +struct SVAChannel { + int64_t channelId; + EVAChannelState state; + SVATask queue[EVA_PRIORITY_MAX]; + SVATask *scheduled; + + SVAChannel *prev; + SVAChannel *next; +}; + +// async handle +struct SVAsync { + const char *label; + + TdThreadMutex mutex; + TdThreadCond hasTask; + bool stop; + + // worker + int32_t numWorkers; + int32_t numLaunchWorkers; + int32_t numIdleWorkers; + SVWorker workers[VNODE_ASYNC_MAX_WORKERS]; + + // channel + int64_t nextChannelId; + int32_t numChannels; + SVAChannel chList; + SVHashTable *channelTable; + + // task + int64_t nextTaskId; + int32_t numTasks; + SVATask queue[EVA_PRIORITY_MAX]; + SVHashTable *taskTable; +}; + +static int32_t vnodeAsyncTaskDone(SVAsync *async, SVATask *task) { + int32_t ret; + + if (task->channel != NULL && task->channel->scheduled == task) { + task->channel->scheduled = NULL; + if (task->channel->state == EVA_CHANNEL_STATE_CLOSE) { + taosMemoryFree(task->channel); + } else { + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + SVATask *nextTask = task->channel->queue[i].next; + if (nextTask != &task->channel->queue[i]) { + if (task->channel->scheduled == NULL) { + task->channel->scheduled = nextTask; + nextTask->next->prev = nextTask->prev; + nextTask->prev->next = nextTask->next; + } else { + nextTask->priorScore++; + int32_t newPriority = VATASK_PIORITY(nextTask); + if (newPriority != i) { + // remove from current priority queue + nextTask->prev->next = nextTask->next; + nextTask->next->prev = nextTask->prev; + // add to new priority queue + nextTask->next = &task->channel->queue[newPriority]; + nextTask->prev = task->channel->queue[newPriority].prev; + nextTask->next->prev = nextTask; + nextTask->prev->next = nextTask; + } + } + } + } + + if (task->channel->scheduled != NULL) { + int32_t priority = VATASK_PIORITY(task->channel->scheduled); + task->channel->scheduled->next = &async->queue[priority]; + task->channel->scheduled->prev = async->queue[priority].prev; + task->channel->scheduled->next->prev = task->channel->scheduled; + task->channel->scheduled->prev->next = task->channel->scheduled; + } + } + } + + ret = vHashDrop(async->taskTable, task); + if (ret != 0) { + ASSERT(0); + } + async->numTasks--; + + // call complete callback + if (task->complete) { + task->complete(task->arg); + } + + if (task->numWait == 0) { + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + } else if (task->numWait == 1) { + taosThreadCondSignal(&task->waitCond); + } else { + taosThreadCondBroadcast(&task->waitCond); + } + return 0; +} + +static int32_t vnodeAsyncCancelAllTasks(SVAsync *async) { + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + while (async->queue[i].next != &async->queue[i]) { + SVATask *task = async->queue[i].next; + task->prev->next = task->next; + task->next->prev = task->prev; + vnodeAsyncTaskDone(async, task); + } + } + return 0; +} + +static void *vnodeAsyncLoop(void *arg) { + SVWorker *worker = (SVWorker *)arg; + SVAsync *async = worker->async; + + setThreadName(async->label); + + for (;;) { + taosThreadMutexLock(&async->mutex); + + // finish last running task + if (worker->runningTask != NULL) { + vnodeAsyncTaskDone(async, worker->runningTask); + worker->runningTask = NULL; + } + + for (;;) { + if (async->stop || worker->workerId >= async->numWorkers) { + if (async->stop) { // cancel all tasks + vnodeAsyncCancelAllTasks(async); + } + worker->state = EVA_WORKER_STATE_STOP; + async->numLaunchWorkers--; + taosThreadMutexUnlock(&async->mutex); + return NULL; + } + + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + SVATask *task = async->queue[i].next; + if (task != &async->queue[i]) { + if (worker->runningTask == NULL) { + worker->runningTask = task; + task->prev->next = task->next; + task->next->prev = task->prev; + } else { // promote priority + task->priorScore++; + int32_t priority = VATASK_PIORITY(task); + if (priority != i) { + // remove from current priority queue + task->prev->next = task->next; + task->next->prev = task->prev; + // add to new priority queue + task->next = &async->queue[priority]; + task->prev = async->queue[priority].prev; + task->next->prev = task; + task->prev->next = task; + } + } + } + } + + if (worker->runningTask == NULL) { + worker->state = EVA_WORKER_STATE_IDLE; + async->numIdleWorkers++; + taosThreadCondWait(&async->hasTask, &async->mutex); + async->numIdleWorkers--; + worker->state = EVA_WORKER_STATE_ACTIVE; + } else { + worker->runningTask->state = EVA_TASK_STATE_RUNNING; + break; + } + } + + taosThreadMutexUnlock(&async->mutex); + + // do run the task + worker->runningTask->execute(worker->runningTask->arg); + } + + return NULL; +} + +static uint32_t vnodeAsyncTaskHash(const void *obj) { + SVATask *task = (SVATask *)obj; + return MurmurHash3_32((const char *)(&task->taskId), sizeof(task->taskId)); +} + +static int32_t vnodeAsyncTaskCompare(const void *obj1, const void *obj2) { + SVATask *task1 = (SVATask *)obj1; + SVATask *task2 = (SVATask *)obj2; + if (task1->taskId < task2->taskId) { + return -1; + } else if (task1->taskId > task2->taskId) { + return 1; + } + return 0; +} + +static uint32_t vnodeAsyncChannelHash(const void *obj) { + SVAChannel *channel = (SVAChannel *)obj; + return MurmurHash3_32((const char *)(&channel->channelId), sizeof(channel->channelId)); +} + +static int32_t vnodeAsyncChannelCompare(const void *obj1, const void *obj2) { + SVAChannel *channel1 = (SVAChannel *)obj1; + SVAChannel *channel2 = (SVAChannel *)obj2; + if (channel1->channelId < channel2->channelId) { + return -1; + } else if (channel1->channelId > channel2->channelId) { + return 1; + } + return 0; +} + +int32_t vnodeAsyncInit(SVAsync **async, char *label) { + int32_t ret; + + if (async == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + if (label == NULL) { + label = "anonymous"; + } + + (*async) = (SVAsync *)taosMemoryCalloc(1, sizeof(SVAsync) + strlen(label) + 1); + if ((*async) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + strcpy((char *)((*async) + 1), label); + (*async)->label = (const char *)((*async) + 1); + + taosThreadMutexInit(&(*async)->mutex, NULL); + taosThreadCondInit(&(*async)->hasTask, NULL); + (*async)->stop = false; + + // worker + (*async)->numWorkers = VNODE_ASYNC_DEFAULT_WORKERS; + (*async)->numLaunchWorkers = 0; + (*async)->numIdleWorkers = 0; + for (int32_t i = 0; i < VNODE_ASYNC_MAX_WORKERS; i++) { + (*async)->workers[i].async = (*async); + (*async)->workers[i].workerId = i; + (*async)->workers[i].state = EVA_WORKER_STATE_UINIT; + (*async)->workers[i].runningTask = NULL; + } + + // channel + (*async)->nextChannelId = 0; + (*async)->numChannels = 0; + (*async)->chList.prev = &(*async)->chList; + (*async)->chList.next = &(*async)->chList; + ret = vHashInit(&(*async)->channelTable, vnodeAsyncChannelHash, vnodeAsyncChannelCompare); + if (ret != 0) { + taosThreadMutexDestroy(&(*async)->mutex); + taosThreadCondDestroy(&(*async)->hasTask); + taosMemoryFree(*async); + return ret; + } + + // task + (*async)->nextTaskId = 0; + (*async)->numTasks = 0; + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + (*async)->queue[i].next = &(*async)->queue[i]; + (*async)->queue[i].prev = &(*async)->queue[i]; + } + ret = vHashInit(&(*async)->taskTable, vnodeAsyncTaskHash, vnodeAsyncTaskCompare); + if (ret != 0) { + vHashDestroy(&(*async)->channelTable); + taosThreadMutexDestroy(&(*async)->mutex); + taosThreadCondDestroy(&(*async)->hasTask); + taosMemoryFree(*async); + return ret; + } + + return 0; +} + +int32_t vnodeAsyncDestroy(SVAsync **async) { + if ((*async) == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + // set stop and broadcast + taosThreadMutexLock(&(*async)->mutex); + (*async)->stop = true; + taosThreadCondBroadcast(&(*async)->hasTask); + taosThreadMutexUnlock(&(*async)->mutex); + + // join all workers + for (int32_t i = 0; i < VNODE_ASYNC_MAX_WORKERS; i++) { + taosThreadMutexLock(&(*async)->mutex); + EVWorkerState state = (*async)->workers[i].state; + taosThreadMutexUnlock(&(*async)->mutex); + + if (state == EVA_WORKER_STATE_UINIT) { + continue; + } + + taosThreadJoin((*async)->workers[i].thread, NULL); + ASSERT((*async)->workers[i].state == EVA_WORKER_STATE_STOP); + (*async)->workers[i].state = EVA_WORKER_STATE_UINIT; + } + + // close all channels + for (SVAChannel *channel = (*async)->chList.next; channel != &(*async)->chList; channel = (*async)->chList.next) { + channel->next->prev = channel->prev; + channel->prev->next = channel->next; + + int32_t ret = vHashDrop((*async)->channelTable, channel); + if (ret) { + ASSERT(0); + } + (*async)->numChannels--; + taosMemoryFree(channel); + } + + ASSERT((*async)->numLaunchWorkers == 0); + ASSERT((*async)->numIdleWorkers == 0); + ASSERT((*async)->numChannels == 0); + ASSERT((*async)->numTasks == 0); + + taosThreadMutexDestroy(&(*async)->mutex); + taosThreadCondDestroy(&(*async)->hasTask); + + vHashDestroy(&(*async)->channelTable); + vHashDestroy(&(*async)->taskTable); + taosMemoryFree(*async); + *async = NULL; + + return 0; +} + +static int32_t vnodeAsyncLaunchWorker(SVAsync *async) { + for (int32_t i = 0; i < async->numWorkers; i++) { + ASSERT(async->workers[i].state != EVA_WORKER_STATE_IDLE); + if (async->workers[i].state == EVA_WORKER_STATE_ACTIVE) { + continue; + } else if (async->workers[i].state == EVA_WORKER_STATE_STOP) { + taosThreadJoin(async->workers[i].thread, NULL); + async->workers[i].state = EVA_WORKER_STATE_UINIT; + } + + taosThreadCreate(&async->workers[i].thread, NULL, vnodeAsyncLoop, &async->workers[i]); + async->workers[i].state = EVA_WORKER_STATE_ACTIVE; + async->numLaunchWorkers++; + break; + } + return 0; +} + +int32_t vnodeAsync(SVAsync *async, EVAPriority priority, int32_t (*execute)(void *), void (*complete)(void *), + void *arg, int64_t *taskId) { + return vnodeAsyncC(async, 0, priority, execute, complete, arg, taskId); +} + +int32_t vnodeAsyncC(SVAsync *async, int64_t channelId, EVAPriority priority, int32_t (*execute)(void *), + void (*complete)(void *), void *arg, int64_t *taskId) { + if (async == NULL || execute == NULL || channelId < 0) { + return TSDB_CODE_INVALID_PARA; + } + + int64_t id; + + // create task object + SVATask *task = (SVATask *)taosMemoryCalloc(1, sizeof(SVATask)); + if (task == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + task->priority = priority; + task->priorScore = 0; + task->execute = execute; + task->complete = complete; + task->arg = arg; + task->state = EVA_TASK_STATE_WAITTING; + task->numWait = 0; + taosThreadCondInit(&task->waitCond, NULL); + + // schedule task + taosThreadMutexLock(&async->mutex); + + if (channelId == 0) { + task->channel = NULL; + } else { + SVAChannel channel = {.channelId = channelId}; + vHashGet(async->channelTable, &channel, (void **)&task->channel); + if (task->channel == NULL) { + taosThreadMutexUnlock(&async->mutex); + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + return TSDB_CODE_INVALID_PARA; + } + } + + task->taskId = id = ++async->nextTaskId; + + // add task to hash table + int32_t ret = vHashPut(async->taskTable, task); + if (ret != 0) { + taosThreadMutexUnlock(&async->mutex); + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + return ret; + } + + async->numTasks++; + + // add task to queue + if (task->channel == NULL || task->channel->scheduled == NULL) { + // add task to async->queue + if (task->channel) { + task->channel->scheduled = task; + } + + task->next = &async->queue[priority]; + task->prev = async->queue[priority].prev; + task->next->prev = task; + task->prev->next = task; + + // signal worker or launch new worker + if (async->numIdleWorkers > 0) { + taosThreadCondSignal(&(async->hasTask)); + } else if (async->numLaunchWorkers < async->numWorkers) { + vnodeAsyncLaunchWorker(async); + } + } else if (task->channel->scheduled->state == EVA_TASK_STATE_RUNNING || + priority >= VATASK_PIORITY(task->channel->scheduled)) { + // add task to task->channel->queue + task->next = &task->channel->queue[priority]; + task->prev = task->channel->queue[priority].prev; + task->next->prev = task; + task->prev->next = task; + } else { + // remove task->channel->scheduled from queue + task->channel->scheduled->prev->next = task->channel->scheduled->next; + task->channel->scheduled->next->prev = task->channel->scheduled->prev; + + // promote priority and add task->channel->scheduled to task->channel->queue + task->channel->scheduled->priorScore++; + int32_t newPriority = VATASK_PIORITY(task->channel->scheduled); + task->channel->scheduled->next = &task->channel->queue[newPriority]; + task->channel->scheduled->prev = task->channel->queue[newPriority].prev; + task->channel->scheduled->next->prev = task->channel->scheduled; + task->channel->scheduled->prev->next = task->channel->scheduled; + + // add task to queue + task->channel->scheduled = task; + task->next = &async->queue[priority]; + task->prev = async->queue[priority].prev; + task->next->prev = task; + task->prev->next = task; + } + + taosThreadMutexUnlock(&async->mutex); + + if (taskId != NULL) { + *taskId = id; + } + + return 0; +} + +int32_t vnodeAWait(SVAsync *async, int64_t taskId) { + if (async == NULL || taskId <= 0) { + return TSDB_CODE_INVALID_PARA; + } + + SVATask *task = NULL; + SVATask task2 = {.taskId = taskId}; + + taosThreadMutexLock(&async->mutex); + + vHashGet(async->taskTable, &task2, (void **)&task); + if (task) { + task->numWait++; + taosThreadCondWait(&task->waitCond, &async->mutex); + task->numWait--; + + if (task->numWait == 0) { + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + } + } + + taosThreadMutexUnlock(&async->mutex); + + return 0; +} + +int32_t vnodeACancel(SVAsync *async, int64_t taskId) { + if (async == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + int32_t ret = 0; + SVATask *task = NULL; + SVATask task2 = {.taskId = taskId}; + + taosThreadMutexLock(&async->mutex); + + vHashGet(async->taskTable, &task2, (void **)&task); + if (task) { + if (task->state == EVA_TASK_STATE_WAITTING) { + // remove from queue + task->next->prev = task->prev; + task->prev->next = task->next; + vnodeAsyncTaskDone(async, task); + } else { + ret = 0; // task is running, should return code TSDB_CODE_BUSY ?? + } + } + + taosThreadMutexUnlock(&async->mutex); + + return ret; +} + +int32_t vnodeAsyncSetWorkers(SVAsync *async, int32_t numWorkers) { + if (async == NULL || numWorkers <= 0 || numWorkers > VNODE_ASYNC_MAX_WORKERS) { + return TSDB_CODE_INVALID_PARA; + } + + taosThreadMutexLock(&async->mutex); + async->numWorkers = numWorkers; + if (async->numIdleWorkers > 0) { + taosThreadCondBroadcast(&async->hasTask); + } + taosThreadMutexUnlock(&async->mutex); + + return 0; +} + +int32_t vnodeAChannelInit(SVAsync *async, int64_t *channelId) { + if (async == NULL || channelId == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + // create channel object + SVAChannel *channel = (SVAChannel *)taosMemoryMalloc(sizeof(SVAChannel)); + if (channel == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + channel->state = EVA_CHANNEL_STATE_OPEN; + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + channel->queue[i].next = &channel->queue[i]; + channel->queue[i].prev = &channel->queue[i]; + } + channel->scheduled = NULL; + + // register channel + taosThreadMutexLock(&async->mutex); + + channel->channelId = *channelId = ++async->nextChannelId; + + // add to hash table + int32_t ret = vHashPut(async->channelTable, channel); + if (ret != 0) { + taosThreadMutexUnlock(&async->mutex); + taosMemoryFree(channel); + return ret; + } + + // add to list + channel->next = &async->chList; + channel->prev = async->chList.prev; + channel->next->prev = channel; + channel->prev->next = channel; + + async->numChannels++; + + taosThreadMutexUnlock(&async->mutex); + + return 0; +} + +int32_t vnodeAChannelDestroy(SVAsync *async, int64_t channelId, bool waitRunning) { + if (async == NULL || channelId <= 0) { + return TSDB_CODE_INVALID_PARA; + } + + SVAChannel *channel = NULL; + SVAChannel channel2 = {.channelId = channelId}; + + taosThreadMutexLock(&async->mutex); + + vHashGet(async->channelTable, &channel2, (void **)&channel); + if (channel) { + // unregister channel + channel->next->prev = channel->prev; + channel->prev->next = channel->next; + vHashDrop(async->channelTable, channel); + async->numChannels--; + + // cancel all waiting tasks + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + while (channel->queue[i].next != &channel->queue[i]) { + SVATask *task = channel->queue[i].next; + task->prev->next = task->next; + task->next->prev = task->prev; + vnodeAsyncTaskDone(async, task); + } + } + + // cancel or wait the scheduled task + if (channel->scheduled == NULL || channel->scheduled->state == EVA_TASK_STATE_WAITTING) { + if (channel->scheduled) { + channel->scheduled->prev->next = channel->scheduled->next; + channel->scheduled->next->prev = channel->scheduled->prev; + vnodeAsyncTaskDone(async, channel->scheduled); + } + taosMemoryFree(channel); + } else { + if (waitRunning) { + // wait task + SVATask *task = channel->scheduled; + task->numWait++; + taosThreadCondWait(&task->waitCond, &async->mutex); + task->numWait--; + if (task->numWait == 0) { + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + } + + taosMemoryFree(channel); + } else { + channel->state = EVA_CHANNEL_STATE_CLOSE; + } + } + } else { + taosThreadMutexUnlock(&async->mutex); + return TSDB_CODE_INVALID_PARA; + } + + taosThreadMutexUnlock(&async->mutex); + + return 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index a178e1f772..c8cd167393 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -203,10 +203,8 @@ int vnodeSaveInfo(const char *dir, const SVnodeInfo *pInfo) { // free info binary taosMemoryFree(data); - vInfo("vgId:%d, vnode info is saved, fname:%s replica:%d selfIndex:%d changeVersion:%d", - pInfo->config.vgId, fname, - pInfo->config.syncCfg.replicaNum, pInfo->config.syncCfg.myIndex, - pInfo->config.syncCfg.changeVersion); + vInfo("vgId:%d, vnode info is saved, fname:%s replica:%d selfIndex:%d changeVersion:%d", pInfo->config.vgId, fname, + pInfo->config.syncCfg.replicaNum, pInfo->config.syncCfg.myIndex, pInfo->config.syncCfg.changeVersion); return 0; @@ -289,9 +287,10 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { char dir[TSDB_FILENAME_LEN] = {0}; int64_t lastCommitted = pInfo->info.state.committed; - tsem_wait(&pVnode->canCommit); + // wait last commit task + vnodeAWait(vnodeAsyncHandle[0], pVnode->commitTask); - if(syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; + if (syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; pVnode->state.commitTerm = pVnode->state.applyTerm; @@ -379,12 +378,11 @@ static int32_t vnodeCommitTask(void *arg) { vnodeReturnBufPool(pVnode); _exit: - // end commit - tsem_post(&pVnode->canCommit); - taosMemoryFree(pInfo); return code; } +static void vnodeCompleteCommit(void *arg) { taosMemoryFree(arg); } + int vnodeAsyncCommit(SVnode *pVnode) { int32_t code = 0; @@ -401,14 +399,14 @@ int vnodeAsyncCommit(SVnode *pVnode) { } // schedule the task - code = vnodeScheduleTask(vnodeCommitTask, pInfo); + code = vnodeAsyncC(vnodeAsyncHandle[0], pVnode->commitChannel, EVA_PRIORITY_HIGH, vnodeCommitTask, + vnodeCompleteCommit, pInfo, &pVnode->commitTask); _exit: if (code) { if (NULL != pInfo) { taosMemoryFree(pInfo); } - tsem_post(&pVnode->canCommit); vError("vgId:%d, %s failed since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), pVnode->state.commitID); } else { @@ -420,8 +418,7 @@ _exit: int vnodeSyncCommit(SVnode *pVnode) { vnodeAsyncCommit(pVnode); - tsem_wait(&pVnode->canCommit); - tsem_post(&pVnode->canCommit); + vnodeAWait(vnodeAsyncHandle[0], pVnode->commitTask); return 0; } @@ -501,7 +498,7 @@ _exit: } bool vnodeShouldRollback(SVnode *pVnode) { - char tFName[TSDB_FILENAME_LEN] = {0}; + char tFName[TSDB_FILENAME_LEN] = {0}; int32_t offset = 0; vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, tFName, TSDB_FILENAME_LEN); @@ -512,7 +509,7 @@ bool vnodeShouldRollback(SVnode *pVnode) { } void vnodeRollback(SVnode *pVnode) { - char tFName[TSDB_FILENAME_LEN] = {0}; + char tFName[TSDB_FILENAME_LEN] = {0}; int32_t offset = 0; vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, tFName, TSDB_FILENAME_LEN); diff --git a/source/dnode/vnode/src/vnd/vnodeHash.c b/source/dnode/vnode/src/vnd/vnodeHash.c new file mode 100644 index 0000000000..33602f6581 --- /dev/null +++ b/source/dnode/vnode/src/vnd/vnodeHash.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "vnodeHash.h" + +#define VNODE_HASH_DEFAULT_NUM_BUCKETS 1024 + +typedef struct SVHashEntry SVHashEntry; + +struct SVHashEntry { + SVHashEntry* next; + void* obj; +}; + +struct SVHashTable { + uint32_t (*hash)(const void*); + int32_t (*compare)(const void*, const void*); + int32_t numEntries; + uint32_t numBuckets; + SVHashEntry** buckets; +}; + +static int32_t vHashRehash(SVHashTable* ht, uint32_t newNumBuckets) { + SVHashEntry** newBuckets = (SVHashEntry**)taosMemoryCalloc(newNumBuckets, sizeof(SVHashEntry*)); + if (newBuckets == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + for (int32_t i = 0; i < ht->numBuckets; i++) { + SVHashEntry* entry = ht->buckets[i]; + while (entry != NULL) { + SVHashEntry* next = entry->next; + uint32_t bucketIndex = ht->hash(entry->obj) % newNumBuckets; + entry->next = newBuckets[bucketIndex]; + newBuckets[bucketIndex] = entry; + entry = next; + } + } + + taosMemoryFree(ht->buckets); + ht->buckets = newBuckets; + ht->numBuckets = newNumBuckets; + + return 0; +} + +int32_t vHashInit(SVHashTable** ht, uint32_t (*hash)(const void*), int32_t (*compare)(const void*, const void*)) { + if (ht == NULL || hash == NULL || compare == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + (*ht) = (SVHashTable*)taosMemoryMalloc(sizeof(SVHashTable)); + if (*ht == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + (*ht)->hash = hash; + (*ht)->compare = compare; + (*ht)->numEntries = 0; + (*ht)->numBuckets = VNODE_HASH_DEFAULT_NUM_BUCKETS; + (*ht)->buckets = (SVHashEntry**)taosMemoryCalloc((*ht)->numBuckets, sizeof(SVHashEntry*)); + if ((*ht)->buckets == NULL) { + taosMemoryFree(*ht); + return TSDB_CODE_OUT_OF_MEMORY; + } + + return 0; +} + +int32_t vHashDestroy(SVHashTable** ht) { + if (ht == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + if (*ht) { + ASSERT((*ht)->numEntries == 0); + taosMemoryFree((*ht)->buckets); + taosMemoryFree(*ht); + (*ht) = NULL; + } + return 0; +} + +int32_t vHashPut(SVHashTable* ht, void* obj) { + if (ht == NULL || obj == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + uint32_t bucketIndex = ht->hash(obj) % ht->numBuckets; + for (SVHashEntry* entry = ht->buckets[bucketIndex]; entry != NULL; entry = entry->next) { + if (ht->compare(entry->obj, obj) == 0) { + return TSDB_CODE_DUP_KEY; + } + } + + if (ht->numEntries >= ht->numBuckets) { + vHashRehash(ht, ht->numBuckets * 2); + bucketIndex = ht->hash(obj) % ht->numBuckets; + } + + SVHashEntry* entry = (SVHashEntry*)taosMemoryMalloc(sizeof(SVHashEntry)); + if (entry == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + entry->obj = obj; + entry->next = ht->buckets[bucketIndex]; + ht->buckets[bucketIndex] = entry; + ht->numEntries++; + + return 0; +} + +int32_t vHashGet(SVHashTable* ht, const void* obj, void** retObj) { + if (ht == NULL || obj == NULL || retObj == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + uint32_t bucketIndex = ht->hash(obj) % ht->numBuckets; + for (SVHashEntry* entry = ht->buckets[bucketIndex]; entry != NULL; entry = entry->next) { + if (ht->compare(entry->obj, obj) == 0) { + *retObj = entry->obj; + return 0; + } + } + + *retObj = NULL; + return TSDB_CODE_NOT_FOUND; +} + +int32_t vHashDrop(SVHashTable* ht, const void* obj) { + if (ht == NULL || obj == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + uint32_t bucketIndex = ht->hash(obj) % ht->numBuckets; + for (SVHashEntry** entry = &ht->buckets[bucketIndex]; *entry != NULL; entry = &(*entry)->next) { + if (ht->compare((*entry)->obj, obj) == 0) { + SVHashEntry* tmp = *entry; + *entry = (*entry)->next; + taosMemoryFree(tmp); + ht->numEntries--; + if (ht->numBuckets > VNODE_HASH_DEFAULT_NUM_BUCKETS && ht->numEntries < ht->numBuckets / 4) { + vHashRehash(ht, ht->numBuckets / 2); + } + return 0; + } + } + + return TSDB_CODE_NOT_FOUND; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeHash.h b/source/dnode/vnode/src/vnd/vnodeHash.h new file mode 100644 index 0000000000..86f6f9ac87 --- /dev/null +++ b/source/dnode/vnode/src/vnd/vnodeHash.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _VNODE_HAS_H_ +#define _VNODE_HAS_H_ + +#include "vnd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SVHashTable SVHashTable; + +int32_t vHashInit(SVHashTable** ht, uint32_t (*hash)(const void*), int32_t (*compare)(const void*, const void*)); +int32_t vHashDestroy(SVHashTable** ht); +int32_t vHashPut(SVHashTable* ht, void* obj); +int32_t vHashGet(SVHashTable* ht, const void* obj, void** retObj); +int32_t vHashDrop(SVHashTable* ht, const void* obj); + +#ifdef __cplusplus +} +#endif + +#endif /*_VNODE_HAS_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeModule.c b/source/dnode/vnode/src/vnd/vnodeModule.c index df08fb8a2b..4e3cee42c6 100644 --- a/source/dnode/vnode/src/vnd/vnodeModule.c +++ b/source/dnode/vnode/src/vnd/vnodeModule.c @@ -16,65 +16,25 @@ #include "cos.h" #include "vnd.h" -typedef struct SVnodeTask SVnodeTask; -struct SVnodeTask { - SVnodeTask* next; - SVnodeTask* prev; - int (*execute)(void*); - void* arg; -}; +static volatile int32_t VINIT = 0; -typedef struct { - int nthreads; - TdThread* threads; - TdThreadMutex mutex; - TdThreadCond hasTask; - SVnodeTask queue; -} SVnodeThreadPool; - -struct SVnodeGlobal { - int8_t init; - int8_t stop; - SVnodeThreadPool tp[2]; -}; - -struct SVnodeGlobal vnodeGlobal; - -static void* loop(void* arg); +SVAsync* vnodeAsyncHandle[2]; int vnodeInit(int nthreads) { - int8_t init; - int ret; + int32_t init; - init = atomic_val_compare_exchange_8(&(vnodeGlobal.init), 0, 1); + init = atomic_val_compare_exchange_32(&VINIT, 0, 1); if (init) { return 0; } - vnodeGlobal.stop = 0; - for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) { - taosThreadMutexInit(&vnodeGlobal.tp[i].mutex, NULL); - taosThreadCondInit(&vnodeGlobal.tp[i].hasTask, NULL); + // vnode-commit + vnodeAsyncInit(&vnodeAsyncHandle[0], "vnode-commit"); + vnodeAsyncSetWorkers(vnodeAsyncHandle[0], nthreads); - taosThreadMutexLock(&vnodeGlobal.tp[i].mutex); - - vnodeGlobal.tp[i].queue.next = &vnodeGlobal.tp[i].queue; - vnodeGlobal.tp[i].queue.prev = &vnodeGlobal.tp[i].queue; - - taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex)); - - vnodeGlobal.tp[i].nthreads = nthreads; - vnodeGlobal.tp[i].threads = taosMemoryCalloc(nthreads, sizeof(TdThread)); - if (vnodeGlobal.tp[i].threads == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - vError("failed to init vnode module since:%s", tstrerror(terrno)); - return -1; - } - - for (int j = 0; j < nthreads; j++) { - taosThreadCreate(&(vnodeGlobal.tp[i].threads[j]), NULL, loop, &vnodeGlobal.tp[i]); - } - } + // vnode-merge + vnodeAsyncInit(&vnodeAsyncHandle[1], "vnode-merge"); + vnodeAsyncSetWorkers(vnodeAsyncHandle[1], nthreads); if (walInit() < 0) { return -1; @@ -90,99 +50,15 @@ int vnodeInit(int nthreads) { } void vnodeCleanup() { - int8_t init; - - init = atomic_val_compare_exchange_8(&(vnodeGlobal.init), 1, 0); + int32_t init = atomic_val_compare_exchange_32(&VINIT, 1, 0); if (init == 0) return; // set stop - vnodeGlobal.stop = 1; - for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) { - taosThreadMutexLock(&(vnodeGlobal.tp[i].mutex)); - taosThreadCondBroadcast(&(vnodeGlobal.tp[i].hasTask)); - taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex)); - - // wait for threads - for (int j = 0; j < vnodeGlobal.tp[i].nthreads; j++) { - taosThreadJoin(vnodeGlobal.tp[i].threads[j], NULL); - } - - // clear source - taosMemoryFreeClear(vnodeGlobal.tp[i].threads); - taosThreadCondDestroy(&(vnodeGlobal.tp[i].hasTask)); - taosThreadMutexDestroy(&(vnodeGlobal.tp[i].mutex)); - } + vnodeAsyncDestroy(&vnodeAsyncHandle[0]); + vnodeAsyncDestroy(&vnodeAsyncHandle[1]); walCleanUp(); tqCleanUp(); smaCleanUp(); s3CleanUp(); } - -int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg) { - SVnodeTask* pTask; - - ASSERT(!vnodeGlobal.stop); - - pTask = taosMemoryMalloc(sizeof(*pTask)); - if (pTask == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - pTask->execute = execute; - pTask->arg = arg; - - taosThreadMutexLock(&(vnodeGlobal.tp[tpid].mutex)); - pTask->next = &vnodeGlobal.tp[tpid].queue; - pTask->prev = vnodeGlobal.tp[tpid].queue.prev; - vnodeGlobal.tp[tpid].queue.prev->next = pTask; - vnodeGlobal.tp[tpid].queue.prev = pTask; - taosThreadCondSignal(&(vnodeGlobal.tp[tpid].hasTask)); - taosThreadMutexUnlock(&(vnodeGlobal.tp[tpid].mutex)); - - return 0; -} - -int vnodeScheduleTask(int (*execute)(void*), void* arg) { return vnodeScheduleTaskEx(0, execute, arg); } - -/* ------------------------ STATIC METHODS ------------------------ */ -static void* loop(void* arg) { - SVnodeThreadPool* tp = (SVnodeThreadPool*)arg; - SVnodeTask* pTask; - int ret; - - if (tp == &vnodeGlobal.tp[0]) { - setThreadName("vnode-commit"); - } else if (tp == &vnodeGlobal.tp[1]) { - setThreadName("vnode-merge"); - } - - for (;;) { - taosThreadMutexLock(&(tp->mutex)); - for (;;) { - pTask = tp->queue.next; - if (pTask == &tp->queue) { - // no task - if (vnodeGlobal.stop) { - taosThreadMutexUnlock(&(tp->mutex)); - return NULL; - } else { - taosThreadCondWait(&(tp->hasTask), &(tp->mutex)); - } - } else { - // has task - pTask->prev->next = pTask->next; - pTask->next->prev = pTask->prev; - break; - } - } - - taosThreadMutexUnlock(&(tp->mutex)); - - pTask->execute(pTask->arg); - taosMemoryFree(pTask); - } - - return NULL; -} diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index ff79e83d72..946ce9d278 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -129,8 +129,8 @@ int32_t vnodeAlterReplica(const char *path, SAlterVnodeReplicaReq *pReq, int32_t } pCfg->changeVersion = pReq->changeVersion; - vInfo("vgId:%d, save config while alter, replicas:%d totalReplicas:%d selfIndex:%d changeVersion:%d", - pReq->vgId, pCfg->replicaNum, pCfg->totalReplicaNum, pCfg->myIndex, pCfg->changeVersion); + vInfo("vgId:%d, save config while alter, replicas:%d totalReplicas:%d selfIndex:%d changeVersion:%d", pReq->vgId, + pCfg->replicaNum, pCfg->totalReplicaNum, pCfg->myIndex, pCfg->changeVersion); info.config.syncCfg = *pCfg; ret = vnodeSaveInfo(dir, &info); @@ -396,10 +396,14 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC pVnode->blocked = false; tsem_init(&pVnode->syncSem, 0, 0); - tsem_init(&(pVnode->canCommit), 0, 1); taosThreadMutexInit(&pVnode->mutex, NULL); taosThreadCondInit(&pVnode->poolNotEmpty, NULL); + if (vnodeAChannelInit(vnodeAsyncHandle[0], &pVnode->commitChannel) != 0) { + vError("vgId:%d, failed to init commit channel", TD_VID(pVnode)); + goto _err; + } + int8_t rollback = vnodeShouldRollback(pVnode); // open buffer pool @@ -487,7 +491,6 @@ _err: if (pVnode->pMeta) metaClose(&pVnode->pMeta); if (pVnode->freeList) vnodeCloseBufPool(pVnode); - tsem_destroy(&(pVnode->canCommit)); taosMemoryFree(pVnode); return NULL; } @@ -501,7 +504,8 @@ void vnodePostClose(SVnode *pVnode) { vnodeSyncPostClose(pVnode); } void vnodeClose(SVnode *pVnode) { if (pVnode) { - tsem_wait(&pVnode->canCommit); + vnodeAWait(vnodeAsyncHandle[0], pVnode->commitTask); + vnodeAChannelDestroy(vnodeAsyncHandle[0], pVnode->commitChannel, true); vnodeSyncClose(pVnode); vnodeQueryClose(pVnode); tqClose(pVnode->pTq); @@ -510,10 +514,8 @@ void vnodeClose(SVnode *pVnode) { smaClose(pVnode->pSma); if (pVnode->pMeta) metaClose(&pVnode->pMeta); vnodeCloseBufPool(pVnode); - tsem_post(&pVnode->canCommit); // destroy handle - tsem_destroy(&(pVnode->canCommit)); tsem_destroy(&pVnode->syncSem); taosThreadCondDestroy(&pVnode->poolNotEmpty); taosThreadMutexDestroy(&pVnode->mutex); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 91244e321f..f2ef11e9ed 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#include "vnd.h" #include "tsdb.h" +#include "vnd.h" // SVSnapReader ======================================================== struct SVSnapReader { @@ -32,11 +32,11 @@ struct SVSnapReader { TSnapRangeArray *pRanges; STsdbSnapReader *pTsdbReader; // tq - int8_t tqHandleDone; - STqSnapReader *pTqSnapReader; - int8_t tqOffsetDone; - STqOffsetReader *pTqOffsetReader; - int8_t tqCheckInfoDone; + int8_t tqHandleDone; + STqSnapReader *pTqSnapReader; + int8_t tqOffsetDone; + STqOffsetReader *pTqOffsetReader; + int8_t tqCheckInfoDone; STqCheckInfoReader *pTqCheckInfoReader; // stream int8_t streamTaskDone; @@ -458,8 +458,8 @@ struct SVSnapWriter { TSnapRangeArray *pRanges; STsdbSnapWriter *pTsdbSnapWriter; // tq - STqSnapWriter *pTqSnapWriter; - STqOffsetWriter *pTqOffsetWriter; + STqSnapWriter *pTqSnapWriter; + STqOffsetWriter *pTqOffsetWriter; STqCheckInfoWriter *pTqCheckInfoWriter; // stream SStreamTaskWriter *pStreamTaskWriter; @@ -519,6 +519,8 @@ _out: return code; } +extern int32_t tsdbCancelAllBgTask(STsdb *tsdb); + int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter **ppWriter) { int32_t code = 0; SVSnapWriter *pWriter = NULL; @@ -526,8 +528,8 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter int64_t ever = pParam->end; // commit memory data - vnodeAsyncCommit(pVnode); - tsem_wait(&pVnode->canCommit); + vnodeSyncCommit(pVnode); + tsdbCancelAllBgTask(pVnode->pTsdb); // alloc pWriter = (SVSnapWriter *)taosMemoryCalloc(1, sizeof(*pWriter)); @@ -657,7 +659,6 @@ _exit: vInfo("vgId:%d, vnode snapshot writer closed, rollback:%d", TD_VID(pVnode), rollback); taosMemoryFree(pWriter); } - tsem_post(&pVnode->canCommit); return code; } diff --git a/source/libs/executor/src/streameventwindowoperator.c b/source/libs/executor/src/streameventwindowoperator.c index 9b987ff1a4..9f1610e08d 100644 --- a/source/libs/executor/src/streameventwindowoperator.c +++ b/source/libs/executor/src/streameventwindowoperator.c @@ -610,6 +610,13 @@ void streamEventReloadState(SOperatorInfo* pOperator) { compactEventWindow(pOperator, &curInfo, pInfo->pSeUpdated, pInfo->pSeDeleted, false); qDebug("===stream=== reload state. save result %" PRId64 ", %" PRIu64, curInfo.winInfo.sessionWin.win.skey, curInfo.winInfo.sessionWin.groupId); + if (IS_VALID_SESSION_WIN(curInfo.winInfo)) { + saveSessionOutputBuf(pAggSup, &curInfo.winInfo); + } + + if (!curInfo.pWinFlag->endFlag) { + continue; + } if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { saveResult(curInfo.winInfo, pInfo->pSeUpdated); @@ -621,10 +628,6 @@ void streamEventReloadState(SOperatorInfo* pOperator) { getSessionHashKey(&curInfo.winInfo.sessionWin, &key); tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &curInfo.winInfo, sizeof(SResultWindowInfo)); } - - if (IS_VALID_SESSION_WIN(curInfo.winInfo)) { - saveSessionOutputBuf(pAggSup, &curInfo.winInfo); - } } taosMemoryFree(pBuf); diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 86d83e05ad..893848f010 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -276,15 +276,15 @@ static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SSHashObj* resWins) { void* pIte = NULL; int32_t iter = 0; while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { - SWinKey* pKey = tSimpleHashGetKey(pIte, NULL); - uint64_t groupId = pKey->groupId; - TSKEY ts = pKey->ts; + SWinKey* pKey = tSimpleHashGetKey(pIte, NULL); + uint64_t groupId = pKey->groupId; + TSKEY ts = pKey->ts; SRowBuffPos* pPos = *(SRowBuffPos**)pIte; if (!pPos->beUpdated) { continue; } pPos->beUpdated = false; - int32_t code = saveWinResultInfo(ts, groupId, pPos, resWins); + int32_t code = saveWinResultInfo(ts, groupId, pPos, resWins); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -1091,10 +1091,10 @@ void doStreamIntervalDecodeOpState(void* buf, int32_t len, SOperatorInfo* pOpera int32_t mapSize = 0; buf = taosDecodeFixedI32(buf, &mapSize); for (int32_t i = 0; i < mapSize; i++) { - SWinKey key = {0}; + SWinKey key = {0}; buf = decodeSWinKey(buf, &key); SRowBuffPos* pPos = NULL; - int32_t resSize = pInfo->aggSup.resultRowSize; + int32_t resSize = pInfo->aggSup.resultRowSize; pInfo->stateStore.streamStateAddIfNotExist(pInfo->pState, &key, (void**)&pPos, &resSize); tSimpleHashPut(pInfo->aggSup.pResultRowHashTable, &key, sizeof(SWinKey), &pPos, POINTER_BYTES); } @@ -1165,7 +1165,7 @@ static SSDataBlock* buildIntervalResult(SOperatorInfo* pOperator) { return NULL; } -int32_t copyUpdateResult(SSHashObj** ppWinUpdated, SArray* pUpdated, __compar_fn_t compar) { +int32_t copyUpdateResult(SSHashObj** ppWinUpdated, SArray* pUpdated, __compar_fn_t compar) { void* pIte = NULL; int32_t iter = 0; while ((pIte = tSimpleHashIterate(*ppWinUpdated, pIte, &iter)) != NULL) { @@ -1402,10 +1402,12 @@ void streamIntervalReloadState(SOperatorInfo* pOperator) { void* pBuf = NULL; int32_t code = pInfo->stateStore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size); - TSKEY ts = *(TSKEY*)pBuf; - taosMemoryFree(pBuf); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); - pInfo->stateStore.streamStateReloadInfo(pInfo->pState, ts); + if (code == 0) { + TSKEY ts = *(TSKEY*)pBuf; + taosMemoryFree(pBuf); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); + pInfo->stateStore.streamStateReloadInfo(pInfo->pState, ts); + } } SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.reloadStreamStateFn) { @@ -1723,8 +1725,8 @@ void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT pCurWin->sessionWin.win.skey = startTs; pCurWin->sessionWin.win.ekey = endTs; int32_t size = pAggSup->resultRowSize; - int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, - pAggSup->gap, (void**)&pCurWin->pStatePos, &size); + int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, + pAggSup->gap, (void**)&pCurWin->pStatePos, &size); if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) { code = TSDB_CODE_FAILED; clearOutputBuf(pAggSup->pState, pCurWin->pStatePos, &pAggSup->pSessionAPI->stateStore); @@ -1822,9 +1824,9 @@ void removeSessionResults(SStreamAggSupporter* pAggSup, SSHashObj* pHashMap, SAr } } -int32_t updateSessionWindowInfo(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t groupId, - int32_t rows, int32_t start, int64_t gap, SSHashObj* pResultRows, SSHashObj* pStUpdated, - SSHashObj* pStDeleted) { +int32_t updateSessionWindowInfo(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo, TSKEY* pStartTs, + TSKEY* pEndTs, uint64_t groupId, int32_t rows, int32_t start, int64_t gap, + SSHashObj* pResultRows, SSHashObj* pStUpdated, SSHashObj* pStDeleted) { for (int32_t i = start; i < rows; ++i) { if (!isInWindow(pWinInfo, pStartTs[i], gap) && (!pEndTs || !isInWindow(pWinInfo, pEndTs[i], gap))) { return i - start; @@ -1856,8 +1858,8 @@ static int32_t initSessionOutputBuf(SResultWindowInfo* pWinInfo, SResultRow** pR } int32_t doOneWindowAggImpl(SColumnInfoData* pTimeWindowData, SResultWindowInfo* pCurWin, SResultRow** pResult, - int32_t startIndex, int32_t winRows, int32_t rows, int32_t numOutput, - SOperatorInfo* pOperator, int64_t winDelta) { + int32_t startIndex, int32_t winRows, int32_t rows, int32_t numOutput, + SOperatorInfo* pOperator, int64_t winDelta) { SExprSupp* pSup = &pOperator->exprSupp; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; int32_t code = initSessionOutputBuf(pCurWin, pResult, pSup->pCtx, numOutput, pSup->rowEntryInfoOffset); @@ -1981,9 +1983,10 @@ static void compactSessionSemiWindow(SOperatorInfo* pOperator, SResultWindowInfo } int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) { - qDebug("===stream===try save session result skey:%" PRId64 ", ekey:%" PRId64 ".pos%d", - pWinInfo->sessionWin.win.skey, pWinInfo->sessionWin.win.ekey, pWinInfo->pStatePos->needFree); - return pAggSup->stateStore.streamStateSessionPut(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pStatePos, pAggSup->resultRowSize); + qDebug("===stream===try save session result skey:%" PRId64 ", ekey:%" PRId64 ".pos%d", pWinInfo->sessionWin.win.skey, + pWinInfo->sessionWin.win.ekey, pWinInfo->pStatePos->needFree); + return pAggSup->stateStore.streamStateSessionPut(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pStatePos, + pAggSup->resultRowSize); } static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, SSHashObj* pStUpdated, @@ -2045,7 +2048,8 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pStUpdated) { code = saveResult(winInfo, pStUpdated); if (code != TSDB_CODE_SUCCESS) { - qError("%s do stream session aggregate impl, set result error, code %s", GET_TASKID(pTaskInfo), tstrerror(code)); + qError("%s do stream session aggregate impl, set result error, code %s", GET_TASKID(pTaskInfo), + tstrerror(code)); T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); } } @@ -2084,8 +2088,8 @@ void doDeleteTimeWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SArr inline int32_t sessionKeyCompareAsc(const void* pKey1, const void* pKey2) { SResultWindowInfo* pWinInfo1 = (SResultWindowInfo*)pKey1; SResultWindowInfo* pWinInfo2 = (SResultWindowInfo*)pKey2; - SSessionKey* pWin1 = &pWinInfo1->sessionWin; - SSessionKey* pWin2 = &pWinInfo2->sessionWin; + SSessionKey* pWin1 = &pWinInfo1->sessionWin; + SSessionKey* pWin2 = &pWinInfo2->sessionWin; if (pWin1->groupId > pWin2->groupId) { return 1; @@ -2290,9 +2294,9 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) { SResultWindowInfo* pWinInfo = taosArrayGet(pGroupResInfo->pRows, i); - SRowBuffPos* pPos = pWinInfo->pStatePos; - SResultRow* pRow = NULL; - SSessionKey* pKey = (SSessionKey*) pPos->pKey; + SRowBuffPos* pPos = pWinInfo->pStatePos; + SResultRow* pRow = NULL; + SSessionKey* pKey = (SSessionKey*)pPos->pKey; if (pBlock->info.id.groupId == 0) { pBlock->info.id.groupId = pKey->groupId; @@ -2312,7 +2316,7 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa } } - int32_t code = pAPI->stateStore.streamStateGetByPos(pState, pPos, (void**)&pRow); + int32_t code = pAPI->stateStore.streamStateGetByPos(pState, pPos, (void**)&pRow); if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { ASSERT(pBlock->info.rows > 0); break; @@ -2325,7 +2329,7 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa pGroupResInfo->index += 1; continue; } - + doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset); // no results, continue to check the next one if (pRow->numOfRows == 0) { @@ -2409,7 +2413,7 @@ void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) { return; } SResultWindowInfo* pWinInfo = taosArrayGet(pAllWins, size - 1); - SSessionKey* pSeKey = pWinInfo->pStatePos->pKey; + SSessionKey* pSeKey = pWinInfo->pStatePos->pKey; taosArrayPush(pMaxWins, pSeKey); if (pSeKey->groupId == 0) { return; @@ -2716,7 +2720,8 @@ void resetWinRange(STimeWindow* winRange) { void getSessionWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SResultWindowInfo* pWinInfo) { int32_t rowSize = pAggSup->resultRowSize; - int32_t code = pAggSup->stateStore.streamStateSessionGet(pAggSup->pState, pKey, (void**)&pWinInfo->pStatePos, &rowSize); + int32_t code = + pAggSup->stateStore.streamStateSessionGet(pAggSup->pState, pKey, (void**)&pWinInfo->pStatePos, &rowSize); if (code == TSDB_CODE_SUCCESS) { pWinInfo->sessionWin = *pKey; pWinInfo->isOutput = true; @@ -2730,16 +2735,16 @@ void getSessionWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, void streamSessionSemiReloadState(SOperatorInfo* pOperator) { SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; resetWinRange(&pAggSup->winRange); SResultWindowInfo winInfo = {0}; - int32_t size = 0; - void* pBuf = NULL; - int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); - int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); - SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; + int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); + SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; ASSERT(size == num * sizeof(SSessionKey) + sizeof(TSKEY)); for (int32_t i = 0; i < num; i++) { SResultWindowInfo winInfo = {0}; @@ -2763,12 +2768,12 @@ void streamSessionReloadState(SOperatorInfo* pOperator) { SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; resetWinRange(&pAggSup->winRange); - int32_t size = 0; - void* pBuf = NULL; - int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, - strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); - int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); - SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, + strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); + int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); + SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; ASSERT(size == num * sizeof(SSessionKey) + sizeof(TSKEY)); TSKEY ts = *(TSKEY*)((char*)pBuf + size - sizeof(TSKEY)); @@ -2887,7 +2892,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh pInfo->recvGetAll = false; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; - // for stream + // for stream void* buff = NULL; int32_t len = 0; int32_t res = @@ -2924,7 +2929,8 @@ static void clearStreamSessionOperator(SStreamSessionAggOperatorInfo* pInfo) { pInfo->streamAggSup.stateStore.streamStateSessionClear(pInfo->streamAggSup.pState); } -void deleteSessionWinState(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SSHashObj* pMapUpdate, SSHashObj* pMapDelete) { +void deleteSessionWinState(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SSHashObj* pMapUpdate, + SSHashObj* pMapDelete) { SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); doDeleteTimeWindows(pAggSup, pBlock, pWins); removeSessionResults(pAggSup, pMapUpdate, pWins); @@ -3023,7 +3029,7 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { copyUpdateResult(&pInfo->pStUpdated, pInfo->pUpdated, sessionKeyCompareAsc); removeSessionDeleteResults(pInfo->pStDeleted, pInfo->pUpdated); - if(pInfo->isHistoryOp) { + if (pInfo->isHistoryOp) { getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); } @@ -3057,8 +3063,9 @@ SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream pOperator->operatorType = pPhyNode->type; if (pPhyNode->type != QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, - destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + pOperator->fpSet = + createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, destroyStreamSessionAggOperatorInfo, + optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionSemiReloadState); } setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), pPhyNode->type, false, OP_NOT_OPENED, pInfo, @@ -3174,7 +3181,7 @@ void getStateWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SS pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pNextWin->winInfo.sessionWin); int32_t nextSize = pAggSup->resultRowSize; int32_t code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->winInfo.sessionWin, - (void**)&pNextWin->winInfo.pStatePos, &nextSize); + (void**)&pNextWin->winInfo.pStatePos, &nextSize); if (code != TSDB_CODE_SUCCESS) { SET_SESSION_WIN_INVALID(pNextWin->winInfo); } else { @@ -3187,8 +3194,8 @@ void getStateWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SS pNextWin->winInfo.isOutput = true; } pAggSup->stateStore.streamStateFreeCur(pCur); - qDebug("===stream===get state next win buff. skey:%" PRId64 ", endkey:%" PRId64, pNextWin->winInfo.sessionWin.win.skey, - pNextWin->winInfo.sessionWin.win.ekey); + qDebug("===stream===get state next win buff. skey:%" PRId64 ", endkey:%" PRId64, + pNextWin->winInfo.sessionWin.win.skey, pNextWin->winInfo.sessionWin.win.ekey); } void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, char* pKeyData, @@ -3257,13 +3264,13 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, pNextWin->winInfo.isOutput = true; } pAggSup->stateStore.streamStateFreeCur(pCur); - qDebug("===stream===set state next win buff. skey:%" PRId64 ", endkey:%" PRId64, pNextWin->winInfo.sessionWin.win.skey, - pNextWin->winInfo.sessionWin.win.ekey); + qDebug("===stream===set state next win buff. skey:%" PRId64 ", endkey:%" PRId64, + pNextWin->winInfo.sessionWin.win.skey, pNextWin->winInfo.sessionWin.win.ekey); } -int32_t updateStateWindowInfo(SStreamAggSupporter* pAggSup, SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin, TSKEY* pTs, uint64_t groupId, - SColumnInfoData* pKeyCol, int32_t rows, int32_t start, bool* allEqual, - SSHashObj* pResultRows, SSHashObj* pSeUpdated, SSHashObj* pSeDeleted) { +int32_t updateStateWindowInfo(SStreamAggSupporter* pAggSup, SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin, + TSKEY* pTs, uint64_t groupId, SColumnInfoData* pKeyCol, int32_t rows, int32_t start, + bool* allEqual, SSHashObj* pResultRows, SSHashObj* pSeUpdated, SSHashObj* pSeDeleted) { *allEqual = true; for (int32_t i = start; i < rows; ++i) { char* pKeyData = colDataGetData(pKeyCol, i); @@ -3338,7 +3345,7 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl SStateWindowInfo nextWin = {0}; setStateOutputBuf(pAggSup, tsCols[i], groupId, pKeyData, &curWin, &nextWin); releaseOutputBuf(pAggSup->pState, nextWin.winInfo.pStatePos, &pAPI->stateStore); - + setSessionWinOutputInfo(pSeUpdated, &curWin.winInfo); winRows = updateStateWindowInfo(pAggSup, &curWin, &nextWin, tsCols, groupId, pKeyColInfo, rows, i, &allEqual, pAggSup->pResultRows, pSeUpdated, pStDeleted); @@ -3475,6 +3482,7 @@ void doStreamStateSaveCheckpoint(SOperatorInfo* pOperator) { len = doStreamStateEncodeOpState(&pBuf, len, pOperator, true); pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_CHECKPOINT_NAME, strlen(STREAM_STATE_OP_CHECKPOINT_NAME), buf, len); + taosMemoryFree(buf); } static SSDataBlock* buildStateResult(SOperatorInfo* pOperator) { @@ -3614,10 +3622,11 @@ void streamStateReleaseState(SOperatorInfo* pOperator) { static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SResultWindowInfo* pNextWin, SSHashObj* pStUpdated, SSHashObj* pStDeleted) { - SExprSupp* pSup = &pOperator->exprSupp; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStreamStateAggOperatorInfo* pInfo = pOperator->info; - compactTimeWindow(pSup, &pInfo->streamAggSup, &pInfo->twAggSup, pTaskInfo, pCurWin, pNextWin, pStUpdated, pStDeleted, false); + compactTimeWindow(pSup, &pInfo->streamAggSup, &pInfo->twAggSup, pTaskInfo, pCurWin, pNextWin, pStUpdated, pStDeleted, + false); } void streamStateReloadState(SOperatorInfo* pOperator) { @@ -3629,7 +3638,7 @@ void streamStateReloadState(SOperatorInfo* pOperator) { int32_t size = 0; void* pBuf = NULL; int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME, - strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size); + strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size); int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); qDebug("===stream=== reload state. get result count:%d", num); SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; @@ -4010,8 +4019,9 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED, pInfo, pTaskInfo); - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL, - destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + pOperator->fpSet = + createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo, + optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); pInfo->stateStore = pTaskInfo->storageAPI.stateStore; diff --git a/source/libs/nodes/src/nodesUtilFuncs.c b/source/libs/nodes/src/nodesUtilFuncs.c index c9014e91db..e940fbb627 100644 --- a/source/libs/nodes/src/nodesUtilFuncs.c +++ b/source/libs/nodes/src/nodesUtilFuncs.c @@ -916,6 +916,7 @@ void nodesDestroyNode(SNode* pNode) { taosHashCleanup(pStmt->pSubTableHashObj); taosHashCleanup(pStmt->pTableNameHashObj); taosHashCleanup(pStmt->pDbFNameHashObj); + taosHashCleanup(pStmt->pTableCxtHashObj); if (pStmt->freeHashFunc) { pStmt->freeHashFunc(pStmt->pTableBlockHashObj); } diff --git a/source/libs/parser/inc/parInsertUtil.h b/source/libs/parser/inc/parInsertUtil.h index ce8c2d8a3d..b20587dd43 100644 --- a/source/libs/parser/inc/parInsertUtil.h +++ b/source/libs/parser/inc/parInsertUtil.h @@ -50,7 +50,7 @@ void insCheckTableDataOrder(STableDataCxt *pTableCxt, TSKEY tsKey); int32_t insGetTableDataCxt(SHashObj *pHash, void *id, int32_t idLen, STableMeta *pTableMeta, SVCreateTbReq **pCreateTbReq, STableDataCxt **pTableCxt, bool colMode, bool ignoreColVals); int32_t initTableColSubmitData(STableDataCxt *pTableCxt); -int32_t insMergeTableDataCxt(SHashObj *pTableHash, SArray **pVgDataBlocks); +int32_t insMergeTableDataCxt(SHashObj *pTableHash, SArray **pVgDataBlocks, bool isRebuild); int32_t insBuildVgDataBlocks(SHashObj *pVgroupsHashObj, SArray *pVgDataBlocks, SArray **pDataBlocks); void insDestroyTableDataCxtHashMap(SHashObj *pTableCxtHash); void insDestroyVgroupDataCxt(SVgroupDataCxt *pVgCxt); diff --git a/source/libs/parser/src/parInsertSml.c b/source/libs/parser/src/parInsertSml.c index f2194402da..2dbba38212 100644 --- a/source/libs/parser/src/parInsertSml.c +++ b/source/libs/parser/src/parInsertSml.c @@ -425,7 +425,7 @@ SQuery* smlInitHandle() { int32_t smlBuildOutput(SQuery* handle, SHashObj* pVgHash) { SVnodeModifyOpStmt* pStmt = (SVnodeModifyOpStmt*)(handle)->pRoot; // merge according to vgId - int32_t code = insMergeTableDataCxt(pStmt->pTableBlockHashObj, &pStmt->pVgDataBlocks); + int32_t code = insMergeTableDataCxt(pStmt->pTableBlockHashObj, &pStmt->pVgDataBlocks, true); if (code != TSDB_CODE_SUCCESS) { uError("insMergeTableDataCxt failed"); return code; diff --git a/source/libs/parser/src/parInsertSql.c b/source/libs/parser/src/parInsertSql.c index 2b8516d37b..31b016458a 100644 --- a/source/libs/parser/src/parInsertSql.c +++ b/source/libs/parser/src/parInsertSql.c @@ -55,6 +55,7 @@ typedef struct SInsertParseContext { bool usingDuplicateTable; bool forceUpdate; bool needTableTagVal; + bool needRequest; // whether or not request server } SInsertParseContext; typedef int32_t (*_row_append_fn_t)(SMsgBuf* pMsgBuf, const void* value, int32_t len, void* param); @@ -652,6 +653,10 @@ static int32_t parseTagValue(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStm } static int32_t buildCreateTbReq(SVnodeModifyOpStmt* pStmt, STag* pTag, SArray* pTagName) { + if (pStmt->pCreateTblReq) { + tdDestroySVCreateTbReq(pStmt->pCreateTblReq); + taosMemoryFreeClear(pStmt->pCreateTblReq); + } pStmt->pCreateTblReq = taosMemoryCalloc(1, sizeof(SVCreateTbReq)); if (NULL == pStmt->pCreateTblReq) { return TSDB_CODE_OUT_OF_MEMORY; @@ -1797,9 +1802,10 @@ static void clearStbRowsDataContext(SStbRowsDataContext* pStbRowsCxt) { taosMemoryFreeClear(pStbRowsCxt->pCreateCtbReq); } -static int32_t parseOneStbRow(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt, const char** ppSql, - SStbRowsDataContext* pStbRowsCxt, bool* pGotRow, SToken* pToken) { - bool bFirstTable = false; +static int32_t parseOneStbRow(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt, const char** ppSql, + SStbRowsDataContext* pStbRowsCxt, bool* pGotRow, SToken* pToken, + STableDataCxt** ppTableDataCxt) { + bool bFirstTable = false; int32_t code = getStbRowValues(pCxt, pStmt, ppSql, pStbRowsCxt, pGotRow, pToken, &bFirstTable); if (code != TSDB_CODE_SUCCESS || !*pGotRow) { return code; @@ -1809,15 +1815,14 @@ static int32_t parseOneStbRow(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pSt code = processCtbAutoCreationAndCtbMeta(pCxt, pStmt, pStbRowsCxt); } - STableDataCxt* pTableDataCxt = NULL; code = insGetTableDataCxt(pStmt->pTableBlockHashObj, &pStbRowsCxt->pCtbMeta->uid, sizeof(pStbRowsCxt->pCtbMeta->uid), - pStbRowsCxt->pCtbMeta, &pStbRowsCxt->pCreateCtbReq, &pTableDataCxt, false, true); - initTableColSubmitData(pTableDataCxt); + pStbRowsCxt->pCtbMeta, &pStbRowsCxt->pCreateCtbReq, ppTableDataCxt, false, true); + initTableColSubmitData(*ppTableDataCxt); if (code == TSDB_CODE_SUCCESS) { - SRow** pRow = taosArrayReserve(pTableDataCxt->pData->aRowP, 1); - code = tRowBuild(pStbRowsCxt->aColVals, pTableDataCxt->pSchema, pRow); + SRow** pRow = taosArrayReserve((*ppTableDataCxt)->pData->aRowP, 1); + code = tRowBuild(pStbRowsCxt->aColVals, (*ppTableDataCxt)->pSchema, pRow); if (TSDB_CODE_SUCCESS == code) { - insCheckTableDataOrder(pTableDataCxt, TD_ROW_KEY(*pRow)); + insCheckTableDataOrder(*ppTableDataCxt, TD_ROW_KEY(*pRow)); } } @@ -1915,7 +1920,8 @@ static int32_t parseValues(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt, if (!pStmt->stbSyntax) { code = parseOneRow(pCxt, &pStmt->pSql, rowsDataCxt.pTableDataCxt, &gotRow, pToken); } else { - code = parseOneStbRow(pCxt, pStmt, &pStmt->pSql, rowsDataCxt.pStbRowsCxt, &gotRow, pToken); + STableDataCxt* pTableDataCxt = NULL; + code = parseOneStbRow(pCxt, pStmt, &pStmt->pSql, rowsDataCxt.pStbRowsCxt, &gotRow, pToken, &pTableDataCxt); } } @@ -1979,7 +1985,14 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt if (!pStmt->stbSyntax) { code = parseOneRow(pCxt, (const char**)&pRow, rowsDataCxt.pTableDataCxt, &gotRow, &token); } else { - code = parseOneStbRow(pCxt, pStmt, (const char**)&pRow, rowsDataCxt.pStbRowsCxt, &gotRow, &token); + STableDataCxt* pTableDataCxt = NULL; + code = parseOneStbRow(pCxt, pStmt, (const char**)&pRow, rowsDataCxt.pStbRowsCxt, &gotRow, &token, &pTableDataCxt); + if (code == TSDB_CODE_SUCCESS) { + SStbRowsDataContext* pStbRowsCxt = rowsDataCxt.pStbRowsCxt; + void* pData = pTableDataCxt; + taosHashPut(pStmt->pTableCxtHashObj, &pStbRowsCxt->pCtbMeta->uid, sizeof(pStbRowsCxt->pCtbMeta->uid), &pData, + POINTER_BYTES); + } } if (code && firstLine) { firstLine = false; @@ -1992,7 +2005,7 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt (*pNumOfRows)++; } - if (TSDB_CODE_SUCCESS == code && (*pNumOfRows) > tsMaxInsertBatchRows) { + if (TSDB_CODE_SUCCESS == code && (*pNumOfRows) >= tsMaxInsertBatchRows) { pStmt->fileProcessing = true; break; } @@ -2003,7 +2016,7 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt parserDebug("0x%" PRIx64 " %d rows have been parsed", pCxt->pComCxt->requestId, *pNumOfRows); - if (TSDB_CODE_SUCCESS == code && 0 == (*pNumOfRows) && + if (TSDB_CODE_SUCCESS == code && 0 == (*pNumOfRows) && 0 == pStmt->totalRowsNum && (!TSDB_QUERY_HAS_TYPE(pStmt->insertType, TSDB_QUERY_TYPE_STMT_INSERT)) && !pStmt->fileProcessing) { code = buildSyntaxErrMsg(&pCxt->msg, "no any data points", NULL); } @@ -2011,6 +2024,12 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt } static int32_t parseDataFromFileImpl(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt, SRowsDataContext rowsDataCxt) { + // init only for file + if (NULL == pStmt->pTableCxtHashObj) { + pStmt->pTableCxtHashObj = + taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); + } + int32_t numOfRows = 0; int32_t code = parseCsvFile(pCxt, pStmt, rowsDataCxt, &numOfRows); if (TSDB_CODE_SUCCESS == code) { @@ -2022,7 +2041,18 @@ static int32_t parseDataFromFileImpl(SInsertParseContext* pCxt, SVnodeModifyOpSt } else { parserDebug("0x%" PRIx64 " insert from csv. File is too large, do it in batches.", pCxt->pComCxt->requestId); } + if (pStmt->insertType != TSDB_QUERY_TYPE_FILE_INSERT) { + return buildSyntaxErrMsg(&pCxt->msg, "keyword VALUES or FILE is exclusive", NULL); + } } + + // just record pTableCxt whose data come from file + if (!pStmt->stbSyntax && numOfRows > 0) { + void* pData = rowsDataCxt.pTableDataCxt; + taosHashPut(pStmt->pTableCxtHashObj, &pStmt->pTableMeta->uid, sizeof(pStmt->pTableMeta->uid), &pData, + POINTER_BYTES); + } + return code; } @@ -2061,6 +2091,9 @@ static int32_t parseDataClause(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pS NEXT_TOKEN(pStmt->pSql, token); switch (token.type) { case TK_VALUES: + if (TSDB_QUERY_HAS_TYPE(pStmt->insertType, TSDB_QUERY_TYPE_FILE_INSERT)) { + return buildSyntaxErrMsg(&pCxt->msg, "keyword VALUES or FILE is exclusive", token.z); + } return parseValuesClause(pCxt, pStmt, rowsDataCxt, &token); case TK_FILE: return parseFileClause(pCxt, pStmt, rowsDataCxt, &token); @@ -2275,8 +2308,25 @@ static int32_t parseInsertBodyBottom(SInsertParseContext* pCxt, SVnodeModifyOpSt return setStmtInfo(pCxt, pStmt); } + // release old array alloced by merge + pStmt->freeArrayFunc(pStmt->pVgDataBlocks); + pStmt->pVgDataBlocks = NULL; + + bool fileOnly = (pStmt->insertType == TSDB_QUERY_TYPE_FILE_INSERT); + if (fileOnly) { + // none data, skip merge & buildvgdata + if (0 == taosHashGetSize(pStmt->pTableCxtHashObj)) { + pCxt->needRequest = false; + return TSDB_CODE_SUCCESS; + } + } + // merge according to vgId - int32_t code = insMergeTableDataCxt(pStmt->pTableBlockHashObj, &pStmt->pVgDataBlocks); + int32_t code = insMergeTableDataCxt(fileOnly ? pStmt->pTableCxtHashObj : pStmt->pTableBlockHashObj, + &pStmt->pVgDataBlocks, pStmt->fileProcessing); + // clear tmp hashobj only + taosHashClear(pStmt->pTableCxtHashObj); + if (TSDB_CODE_SUCCESS == code) { code = insBuildVgDataBlocks(pStmt->pVgroupsHashObj, pStmt->pVgDataBlocks, &pStmt->pDataBlocks); } @@ -2718,6 +2768,7 @@ int32_t parseInsertSql(SParseContext* pCxt, SQuery** pQuery, SCatalogReq* pCatal .msg = {.buf = pCxt->pMsg, .len = pCxt->msgLen}, .missCache = false, .usingDuplicateTable = false, + .needRequest = true, .forceUpdate = (NULL != pCatalogReq ? pCatalogReq->forceUpdate : false)}; int32_t code = initInsertQuery(&context, pCatalogReq, pMetaData, pQuery); @@ -2732,5 +2783,10 @@ int32_t parseInsertSql(SParseContext* pCxt, SQuery** pQuery, SCatalogReq* pCatal code = setRefreshMeta(*pQuery); } insDestroyBoundColInfo(&context.tags); + + // if no data to insert, set emptyMode to avoid request server + if (!context.needRequest) { + (*pQuery)->execMode = QUERY_EXEC_MODE_EMPTY_RESULT; + } return code; } diff --git a/source/libs/parser/src/parInsertStmt.c b/source/libs/parser/src/parInsertStmt.c index 5137deca2e..a88aec20b3 100644 --- a/source/libs/parser/src/parInsertStmt.c +++ b/source/libs/parser/src/parInsertStmt.c @@ -58,7 +58,7 @@ int32_t qBuildStmtOutput(SQuery* pQuery, SHashObj* pVgHash, SHashObj* pBlockHash // merge according to vgId if (taosHashGetSize(pBlockHash) > 0) { - code = insMergeTableDataCxt(pBlockHash, &pVgDataBlocks); + code = insMergeTableDataCxt(pBlockHash, &pVgDataBlocks, true); } if (TSDB_CODE_SUCCESS == code) { code = insBuildVgDataBlocks(pVgHash, pVgDataBlocks, &pStmt->pDataBlocks); diff --git a/source/libs/parser/src/parInsertUtil.c b/source/libs/parser/src/parInsertUtil.c index 21b093c76c..a924ed68b0 100644 --- a/source/libs/parser/src/parInsertUtil.c +++ b/source/libs/parser/src/parInsertUtil.c @@ -289,6 +289,14 @@ static int32_t rebuildTableData(SSubmitTbData* pSrc, SSubmitTbData** pDst) { pTmp->uid = pSrc->uid; pTmp->sver = pSrc->sver; pTmp->pCreateTbReq = NULL; + if (pTmp->flags & SUBMIT_REQ_AUTO_CREATE_TABLE) { + if (pSrc->pCreateTbReq) { + cloneSVreateTbReq(pSrc->pCreateTbReq, &pTmp->pCreateTbReq); + } else { + pTmp->flags &= ~SUBMIT_REQ_AUTO_CREATE_TABLE; + } + } + if (pTmp->flags & SUBMIT_REQ_COLUMN_DATA_FORMAT) { pTmp->aCol = taosArrayInit(128, sizeof(SColData)); if (NULL == pTmp->aCol) { @@ -416,15 +424,21 @@ void insDestroyTableDataCxtHashMap(SHashObj* pTableCxtHash) { taosHashCleanup(pTableCxtHash); } -static int32_t fillVgroupDataCxt(STableDataCxt* pTableCxt, SVgroupDataCxt* pVgCxt) { +static int32_t fillVgroupDataCxt(STableDataCxt* pTableCxt, SVgroupDataCxt* pVgCxt, bool isRebuild) { if (NULL == pVgCxt->pData->aSubmitTbData) { pVgCxt->pData->aSubmitTbData = taosArrayInit(128, sizeof(SSubmitTbData)); if (NULL == pVgCxt->pData->aSubmitTbData) { return TSDB_CODE_OUT_OF_MEMORY; } } + + // push data to submit, rebuild empty data for next submit taosArrayPush(pVgCxt->pData->aSubmitTbData, pTableCxt->pData); - rebuildTableData(pTableCxt->pData, &pTableCxt->pData); + if (isRebuild) { + rebuildTableData(pTableCxt->pData, &pTableCxt->pData); + } else { + taosMemoryFreeClear(pTableCxt->pData); + } qDebug("add tableDataCxt uid:%" PRId64 " to vgId:%d", pTableCxt->pMeta->uid, pVgCxt->vgId); @@ -467,7 +481,7 @@ int insColDataComp(const void* lp, const void* rp) { return 0; } -int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks) { +int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks, bool isRebuild) { SHashObj* pVgroupHash = taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false); SArray* pVgroupList = taosArrayInit(8, POINTER_BYTES); if (NULL == pVgroupHash || NULL == pVgroupList) { @@ -502,6 +516,13 @@ int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks) { tColDataSortMerge(pTableCxt->pData->aCol); } else { + // skip the table has no data to insert + // eg: import a csv without valid data + // if (0 == taosArrayGetSize(pTableCxt->pData->aRowP)) { + // qWarn("no row in tableDataCxt uid:%" PRId64 " ", pTableCxt->pMeta->uid); + // p = taosHashIterate(pTableHash, p); + // continue; + // } if (!pTableCxt->ordered) { code = tRowSort(pTableCxt->pData->aRowP); } @@ -520,7 +541,7 @@ int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks) { pVgCxt = *(SVgroupDataCxt**)pp; } if (TSDB_CODE_SUCCESS == code) { - code = fillVgroupDataCxt(pTableCxt, pVgCxt); + code = fillVgroupDataCxt(pTableCxt, pVgCxt, isRebuild); } } if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index 3eadea3cdd..bed0f79f02 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -17,6 +17,7 @@ #define _STREAM_BACKEDN_ROCKSDB_H_ #include "rocksdb/c.h" +//#include "streamInt.h" #include "streamState.h" #include "tcoding.h" #include "tcommon.h" @@ -42,15 +43,110 @@ typedef struct { TdThreadMutex cfMutex; SHashObj* cfInst; int64_t defaultCfInit; + } SBackendWrapper; +typedef struct { + void* tableOpt; +} RocksdbCfParam; + +typedef struct { + rocksdb_t* db; + rocksdb_writeoptions_t* writeOpt; + rocksdb_readoptions_t* readOpt; + rocksdb_options_t* dbOpt; + rocksdb_env_t* env; + rocksdb_cache_t* cache; + + rocksdb_column_family_handle_t** pCf; + rocksdb_comparator_t** pCompares; + rocksdb_options_t** pCfOpts; + RocksdbCfParam* pCfParams; + + rocksdb_compactionfilterfactory_t* filterFactory; + TdThreadMutex mutex; + char* idstr; + char* path; + int64_t refId; + + void* pTask; + int64_t streamId; + int64_t taskId; + int64_t chkpId; + SArray* chkpSaved; + SArray* chkpInUse; + int32_t chkpCap; + TdThreadRwlock chkpDirLock; + int64_t dataWritten; + +} STaskDbWrapper; + +typedef struct SDbChkp { + int8_t init; + char* pCurrent; + char* pManifest; + SArray* pSST; + int64_t preCkptId; + int64_t curChkpId; + char* path; + + char* buf; + int32_t len; + + // ping-pong buf + SHashObj* pSstTbl[2]; + int8_t idx; + + SArray* pAdd; + SArray* pDel; + int8_t update; + + TdThreadRwlock rwLock; +} SDbChkp; +typedef struct { + int8_t init; + char* pCurrent; + char* pManifest; + SArray* pSST; + int64_t preCkptId; + int64_t curChkpId; + char* path; + + char* buf; + int32_t len; + + // ping-pong buf + SHashObj* pSstTbl[2]; + int8_t idx; + + SArray* pAdd; + SArray* pDel; + int8_t update; + + SHashObj* pDbChkpTbl; + + TdThreadRwlock rwLock; +} SBkdMgt; + +bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId); void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId); void streamBackendCleanup(void* arg); void streamBackendHandleCleanup(void* arg); int32_t streamBackendLoadCheckpointInfo(void* pMeta); -int32_t streamBackendDoCheckpoint(void* pMeta, uint64_t checkpointId); +int32_t streamBackendDoCheckpoint(void* pMeta, int64_t checkpointId); SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); +int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst); + +STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId); +void taskDbDestroy(void* pBackend, bool flush); +void taskDbDestroy2(void* pBackend); +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); + +void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId); + +void* taskDbAddRef(void* pTaskDb); +void taskDbRemoveRef(void* pTaskDb); int streamStateOpenBackend(void* backend, SStreamState* pState); void streamStateCloseBackend(SStreamState* pState, bool remove); @@ -122,7 +218,7 @@ int32_t streamDefaultGet_rocksdb(SStreamState* pState, const void* key, void** p int32_t streamDefaultDel_rocksdb(SStreamState* pState, const void* key); int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, const void* end, SArray* result); void* streamDefaultIterCreate_rocksdb(SStreamState* pState); -bool streamDefaultIterValid_rocksdb(void* iter); +bool streamDefaultIterValid_rocksdb(void* iter); void streamDefaultIterSeek_rocksdb(void* iter, const char* key); void streamDefaultIterNext_rocksdb(void* iter); char* streamDefaultIterKey_rocksdb(void* iter, int32_t* len); @@ -146,5 +242,20 @@ int32_t streamBackendTriggerChkp(void* pMeta, char* dst); int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId); int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId); +int32_t taskDbBuildSnap(void* arg, SArray* pSnap); + // int32_t streamDefaultIter_rocksdb(SStreamState* pState, const void* start, const void* end, SArray* result); + +// STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId); +// void taskDbDestroy(void* pDb, bool flush); + +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); + +SBkdMgt* bkdMgtCreate(char* path); +int32_t bkdMgtAddChkp(SBkdMgt* bm, char* task, char* path); +int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, char* name); +int32_t bkdMgtDumpTo(SBkdMgt* bm, char* taskId, char* dname); +void bkdMgtDestroy(SBkdMgt* bm); + +int32_t taskDbGenChkpUploadData(void* arg, void* bkdMgt, int64_t chkpId, int8_t type, char** path, SArray* list); #endif \ No newline at end of file diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index afb6b9227d..7b8dae8be7 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -68,13 +68,19 @@ typedef struct SStreamContinueExecInfo { SRpcMsg msg; } SStreamContinueExecInfo; +typedef struct { + int64_t streamId; + int64_t taskId; + int64_t chkpId; + char* dbPrefixPath; +} SStreamTaskSnap; struct STokenBucket { - int32_t numCapacity; // total capacity, available token per second - int32_t numOfToken; // total available tokens - int32_t numRate; // number of token per second - double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second - double quotaRemain; // not consumed bytes per second - double quotaRate; // number of token per second + int32_t numCapacity; // total capacity, available token per second + int32_t numOfToken; // total available tokens + int32_t numRate; // number of token per second + double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second + double quotaRemain; // not consumed bytes per second + double quotaRate; // number of token per second int64_t tokenFillTimestamp; // fill timestamp int64_t quotaFillTimestamp; // fill timestamp }; @@ -89,6 +95,7 @@ struct SStreamQueue { extern SStreamGlobalEnv streamEnv; extern int32_t streamBackendId; extern int32_t streamBackendCfWrapperId; +extern int32_t taskDbWrapperId; void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration); int32_t streamDispatchStreamBlock(SStreamTask* pTask); @@ -106,6 +113,8 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); +int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId); +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId); int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); @@ -117,7 +126,8 @@ int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t STaskId streamTaskExtractKey(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); -int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen); +int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, + int32_t* pLen); int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); void streamClearChkptReadyMsg(SStreamTask* pTask); @@ -132,6 +142,17 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); int32_t streamTransferStateToStreamTask(SStreamTask* pTask); +// <<<<<<< HEAD +// void streamClearChkptReadyMsg(SStreamTask* pTask); + +// int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const +// char*); STaskId streamTaskExtractKey(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo* +// pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); + +// void streamMetaResetStartInfo(STaskStartInfo* pMeta); + +// ======= +// >>>>>>> 3.0 SStreamQueue* streamQueueOpen(int64_t cap); void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); void streamQueueProcessSuccess(SStreamQueue* queue); @@ -151,10 +172,14 @@ int uploadCheckpoint(char* id, char* path); int downloadCheckpoint(char* id, char* path); int deleteCheckpoint(char* id); int deleteCheckpointFile(char* id, char* name); +int downloadCheckpointByName(char* id, char* fname, char* dstName); int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask); int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask); +typedef int32_t (*__stream_async_exec_fn_t)(void* param); + +int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code); #ifdef __cplusplus } #endif diff --git a/source/libs/stream/inc/streamsm.h b/source/libs/stream/inc/streamsm.h index be3665fde7..7be655fbed 100644 --- a/source/libs/stream/inc/streamsm.h +++ b/source/libs/stream/inc/streamsm.h @@ -32,8 +32,8 @@ typedef int32_t (*__state_trans_fn)(SStreamTask*); typedef int32_t (*__state_trans_succ_fn)(SStreamTask*); typedef struct SAttachedEventInfo { - ETaskStatus status; // required status that this event can be handled - EStreamTaskEvent event; // the delayed handled event + ETaskStatus status; // required status that this event can be handled + EStreamTaskEvent event; // the delayed handled event } SAttachedEventInfo; typedef struct STaskStateTrans { @@ -64,8 +64,8 @@ typedef struct SStreamEventInfo { const char* name; } SStreamEventInfo; -SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask); -void* streamDestroyStateMachine(SStreamTaskSM* pSM); +// SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask); +// void* streamDestroyStateMachine(SStreamTaskSM* pSM); #ifdef __cplusplus } #endif diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 9699386fd4..630650025d 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -20,34 +20,10 @@ #include "tcommon.h" #include "tref.h" -typedef struct { - int8_t init; - char* pCurrent; - char* pManifest; - SArray* pSST; - int64_t preCkptId; - int64_t curChkpId; - char* path; - - char* buf; - int32_t len; - - // ping-pong buf - SHashObj* pSstTbl[2]; - int8_t idx; - - SArray* pAdd; - SArray* pDel; - int8_t update; -} SBackendManager; - typedef struct SCompactFilteFactory { void* status; } SCompactFilteFactory; -typedef struct { - void* tableOpt; -} RocksdbCfParam; typedef struct { rocksdb_t* db; rocksdb_column_family_handle_t** pHandle; @@ -61,46 +37,65 @@ typedef struct { rocksdb_comparator_t** pCompares; } RocksdbCfInst; -uint32_t nextPow2(uint32_t x); - int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf); -void destroyRocksdbCfInst(RocksdbCfInst* inst); +void destroyRocksdbCfInst(RocksdbCfInst* inst); +int32_t getCfIdx(const char* cfName); +STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath); + +void destroyCompactFilteFactory(void* arg); +void destroyCompactFilte(void* arg); +const char* compactFilteFactoryName(void* arg); +const char* compactFilteFactoryNameSess(void* arg); +const char* compactFilteFactoryNameState(void* arg); +const char* compactFilteFactoryNameFunc(void* arg); +const char* compactFilteFactoryNameFill(void* arg); + +const char* compactFilteName(void* arg); +const char* compactFilteNameSess(void* arg); +const char* compactFilteNameState(void* arg); +const char* compactFilteNameFill(void* arg); +const char* compactFilteNameFunc(void* arg); -void destroyCompactFilteFactory(void* arg); -void destroyCompactFilte(void* arg); -const char* compactFilteFactoryName(void* arg); -const char* compactFilteName(void* arg); unsigned char compactFilte(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, char** newval, size_t* newvlen, unsigned char* value_changed); rocksdb_compactionfilter_t* compactFilteFactoryCreateFilter(void* arg, rocksdb_compactionfiltercontext_t* ctx); +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterSess(void* arg, rocksdb_compactionfiltercontext_t* ctx); +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterState(void* arg, rocksdb_compactionfiltercontext_t* ctx); +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterFunc(void* arg, rocksdb_compactionfiltercontext_t* ctx); +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterFill(void* arg, rocksdb_compactionfiltercontext_t* ctx); -const char* cfName[] = {"default", "state", "fill", "sess", "func", "parname", "partag"}; +typedef int (*__db_key_encode_fn_t)(void* key, char* buf); +typedef int (*__db_key_decode_fn_t)(void* key, char* buf); +typedef int (*__db_key_tostr_fn_t)(void* key, char* buf); +typedef const char* (*__db_key_cmpname_fn_t)(void* statue); +typedef int (*__db_key_cmp_fn_t)(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); +typedef void (*__db_key_cmp_destroy_fn_t)(void* state); +typedef int32_t (*__db_value_encode_fn_t)(void* value, int32_t vlen, int64_t ttl, char** dest); +typedef int32_t (*__db_value_decode_fn_t)(void* value, int32_t vlen, int64_t* ttl, char** dest); -typedef int (*EncodeFunc)(void* key, char* buf); -typedef int (*DecodeFunc)(void* key, char* buf); -typedef int (*ToStringFunc)(void* key, char* buf); -typedef const char* (*CompareName)(void* statue); -typedef int (*BackendCmpFunc)(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); -typedef void (*DestroyFunc)(void* state); -typedef int32_t (*EncodeValueFunc)(void* value, int32_t vlen, int64_t ttl, char** dest); -typedef int32_t (*DecodeValueFunc)(void* value, int32_t vlen, int64_t* ttl, char** dest); +typedef rocksdb_compactionfilter_t* (*__db_factory_create_fn_t)(void* arg, rocksdb_compactionfiltercontext_t* ctx); +typedef const char* (*__db_factory_name_fn_t)(void* arg); +typedef void (*__db_factory_destroy_fn_t)(void* arg); typedef struct { - const char* key; - int32_t len; - int idx; - BackendCmpFunc cmpFunc; - EncodeFunc enFunc; - DecodeFunc deFunc; - ToStringFunc toStrFunc; - CompareName cmpName; - DestroyFunc detroyFunc; - EncodeValueFunc enValueFunc; - DecodeValueFunc deValueFunc; + const char* key; + int32_t len; + int idx; + __db_key_cmp_fn_t cmpKey; + __db_key_encode_fn_t enFunc; + __db_key_decode_fn_t deFunc; + __db_key_tostr_fn_t toStrFunc; + __db_key_cmpname_fn_t cmpName; + __db_key_cmp_destroy_fn_t destroyCmp; + __db_value_encode_fn_t enValueFunc; + __db_value_decode_fn_t deValueFunc; + + __db_factory_create_fn_t createFilter; + __db_factory_destroy_fn_t destroyFilter; + __db_factory_name_fn_t funcName; } SCfInit; -#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX)); const char* compareDefaultName(void* name); const char* compareStateName(void* name); const char* compareWinKeyName(void* name); @@ -139,296 +134,71 @@ int parKeyEncode(void* k, char* buf); int parKeyDecode(void* k, char* buf); int parKeyToString(void* k, char* buf); -int stremaValueEncode(void* k, char* buf); -int streamValueDecode(void* k, char* buf); -int32_t streamValueToString(void* k, char* buf); -int32_t streaValueIsStale(void* k, int64_t ts); -void destroyFunc(void* arg); +int32_t valueEncode(void* value, int32_t vlen, int64_t ttl, char** dest); +int32_t valueDecode(void* value, int32_t vlen, int64_t* ttl, char** dest); +int32_t valueToString(void* k, char* buf); +int32_t valueIsStale(void* k, int64_t ts); -int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest); -int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest); +void destroyCompare(void* arg); -SBackendManager* bkdMgtCreate(char* path) { - SBackendManager* p = taosMemoryCalloc(1, sizeof(SBackendManager)); - p->curChkpId = 0; - p->preCkptId = 0; - p->pSST = taosArrayInit(64, sizeof(void*)); - p->path = taosStrdup(path); - p->len = strlen(path) + 128; - p->buf = taosMemoryCalloc(1, p->len); +static bool streamStateIterSeekAndValid(rocksdb_iterator_t* iter, char* buf, size_t len); +static rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfName, + rocksdb_snapshot_t** snapshot, rocksdb_readoptions_t** readOpt); - p->idx = 0; - p->pSstTbl[0] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); - p->pSstTbl[1] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); - - p->pAdd = taosArrayInit(64, sizeof(void*)); - p->pDel = taosArrayInit(64, sizeof(void*)); - p->update = 0; - return p; -} -void bkdMgtDestroy(SBackendManager* bm) { - if (bm == NULL) return; - - taosMemoryFree(bm->buf); - taosMemoryFree(bm->path); - - taosArrayDestroyP(bm->pSST, taosMemoryFree); - taosArrayDestroyP(bm->pAdd, taosMemoryFree); - taosArrayDestroyP(bm->pDel, taosMemoryFree); - - taosHashCleanup(bm->pSstTbl[0]); - taosHashCleanup(bm->pSstTbl[1]); - - taosMemoryFree(bm->pCurrent); - taosMemoryFree(bm->pManifest); - - taosMemoryFree(bm); -} - -int32_t compareHashTableImpl(SHashObj* p1, SHashObj* p2, SArray* diff) { - int32_t code = 0; - size_t len = 0; - void* pIter = taosHashIterate(p2, NULL); - while (pIter) { - char* name = taosHashGetKey(pIter, &len); - if (!taosHashGet(p1, name, len)) { - char* p = taosStrdup(name); - taosArrayPush(diff, &p); - } - pIter = taosHashIterate(p2, pIter); - } - return code; -} -int32_t compareHashTable(SHashObj* p1, SHashObj* p2, SArray* add, SArray* del) { - int32_t code = 0; - - code = compareHashTableImpl(p1, p2, add); - code = compareHashTableImpl(p2, p1, del); - - return code; -} -int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { - const char* pCurrent = "CURRENT"; - int32_t currLen = strlen(pCurrent); - - const char* pManifest = "MANIFEST-"; - int32_t maniLen = strlen(pManifest); - - const char* pSST = ".sst"; - int32_t sstLen = strlen(pSST); - - memset(bm->buf, 0, bm->len); - sprintf(bm->buf, "%s%scheckpoint%" PRId64 "", bm->path, TD_DIRSEP, chkpId); - - taosArrayClearP(bm->pAdd, taosMemoryFree); - taosArrayClearP(bm->pDel, taosMemoryFree); - - TdDirPtr pDir = taosOpenDir(bm->buf); - TdDirEntryPtr de = NULL; - int8_t dummy = 0; - while ((de = taosReadDir(pDir)) != NULL) { - char* name = taosGetDirEntryName(de); - if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; - if (strlen(name) == currLen && strcmp(name, pCurrent) == 0) { - taosMemoryFreeClear(bm->pCurrent); - bm->pCurrent = taosStrdup(name); - taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); - continue; - } - - if (strlen(name) >= maniLen && strncmp(name, pManifest, maniLen) == 0) { - taosMemoryFreeClear(bm->pManifest); - bm->pManifest = taosStrdup(name); - taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); - continue; - } - if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { - // char* p = taosStrdup(name); - taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); - continue; - } - } - if (bm->init == 0) { - bm->preCkptId = -1; - bm->curChkpId = chkpId; - bm->init = 1; - - void* pIter = taosHashIterate(bm->pSstTbl[1 - bm->idx], NULL); - while (pIter) { - size_t len; - char* name = taosHashGetKey(pIter, &len); - if (name != NULL && len != 0) { - taosArrayPush(bm->pAdd, &name); - } - pIter = taosHashIterate(bm->pSstTbl[1 - bm->idx], pIter); - } - if (taosArrayGetSize(bm->pAdd) > 0) bm->update = 1; - } else { - int32_t code = compareHashTable(bm->pSstTbl[bm->idx], bm->pSstTbl[1 - bm->idx], bm->pAdd, bm->pDel); - if (code != 0) { - // dead code - taosArrayClearP(bm->pAdd, taosMemoryFree); - taosArrayClearP(bm->pDel, taosMemoryFree); - taosHashClear(bm->pSstTbl[1 - bm->idx]); - bm->update = 0; - taosCloseDir(&pDir); - return code; - } - - bm->preCkptId = bm->curChkpId; - bm->curChkpId = chkpId; - if (taosArrayGetSize(bm->pAdd) == 0 && taosArrayGetSize(bm->pDel) == 0) { - bm->update = 0; - } - } - taosHashClear(bm->pSstTbl[bm->idx]); - bm->idx = 1 - bm->idx; - - taosCloseDir(&pDir); - - return 0; -} - -int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { - int32_t code = 0; - int32_t len = bm->len + 128; - - char* srcBuf = taosMemoryCalloc(1, len); - char* dstBuf = taosMemoryCalloc(1, len); - - char* srcDir = taosMemoryCalloc(1, len); - char* dstDir = taosMemoryCalloc(1, len); - - sprintf(srcDir, "%s%s%s%" PRId64 "", bm->path, TD_DIRSEP, "checkpoint", bm->curChkpId); - sprintf(dstDir, "%s%s%s", bm->path, TD_DIRSEP, dname); - - if (!taosDirExist(srcDir)) { - stError("failed to dump srcDir %s, reason: not exist such dir", srcDir); - code = -1; - goto _ERROR; - } - - code = taosMkDir(dstDir); - if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to mkdir srcDir %s, reason: %s", dstDir, terrstr()); - goto _ERROR; - } - - // clear current file - memset(dstBuf, 0, len); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pCurrent); - taosRemoveFile(dstBuf); - - memset(dstBuf, 0, len); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pManifest); - taosRemoveFile(dstBuf); - - // add file to $name dir - for (int i = 0; i < taosArrayGetSize(bm->pAdd); i++) { - memset(dstBuf, 0, len); - memset(srcBuf, 0, len); - - char* filename = taosArrayGetP(bm->pAdd, i); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, filename); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); - - taosCopyFile(srcBuf, dstBuf); - } - // del file in $name - for (int i = 0; i < taosArrayGetSize(bm->pDel); i++) { - memset(dstBuf, 0, len); - memset(srcBuf, 0, len); - - char* filename = taosArrayGetP(bm->pDel, i); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); - taosRemoveFile(dstBuf); - } - - // copy current file to dst dir - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, bm->pCurrent); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pCurrent); - taosCopyFile(srcBuf, dstBuf); - - // copy manifest file to dst dir - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, bm->pManifest); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pManifest); - taosCopyFile(srcBuf, dstBuf); - - // clear delta data buf - taosArrayClearP(bm->pAdd, taosMemoryFree); - taosArrayClearP(bm->pDel, taosMemoryFree); - -_ERROR: - taosMemoryFree(srcBuf); - taosMemoryFree(dstBuf); - taosMemoryFree(srcDir); - taosMemoryFree(dstDir); - return code; -} +#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX)); +int32_t copyFiles(const char* src, const char* dst); +uint32_t nextPow2(uint32_t x); SCfInit ginitDict[] = { {"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName, - destroyFunc, encodeValueFunc, decodeValueFunc}, - {"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, + destroyCompare, valueEncode, valueDecode, compactFilteFactoryCreateFilter, destroyCompactFilteFactory, + compactFilteFactoryName}, + + {"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilterState, destroyCompactFilteFactory, + compactFilteFactoryNameState}, + + {"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilterFill, destroyCompactFilteFactory, + compactFilteFactoryNameFill}, + {"sess", 4, 3, stateSessionKeyDBComp, stateSessionKeyEncode, stateSessionKeyDecode, stateSessionKeyToString, - compareSessionKeyName, destroyFunc, encodeValueFunc, decodeValueFunc}, - {"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, + compareSessionKeyName, destroyCompare, valueEncode, valueDecode, compactFilteFactoryCreateFilterSess, + destroyCompactFilteFactory, compactFilteFactoryNameSess}, + + {"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilterFunc, destroyCompactFilteFactory, + compactFilteFactoryNameFunc}, + + {"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilter, destroyCompactFilteFactory, compactFilteFactoryName}, + + {"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilter, destroyCompactFilteFactory, compactFilteFactoryName}, }; -bool isValidCheckpoint(const char* dir) { return true; } - -int32_t copyFiles(const char* src, const char* dst) { - int32_t code = 0; - // opt later, just hard link - int32_t sLen = strlen(src); - int32_t dLen = strlen(dst); - char* srcName = taosMemoryCalloc(1, sLen + 64); - char* dstName = taosMemoryCalloc(1, dLen + 64); - - TdDirPtr pDir = taosOpenDir(src); - if (pDir == NULL) { - taosMemoryFree(srcName); - taosMemoryFree(dstName); - return -1; - } - - TdDirEntryPtr de = NULL; - while ((de = taosReadDir(pDir)) != NULL) { - char* name = taosGetDirEntryName(de); - if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; - - sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); - sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); - if (!taosDirEntryIsDir(de)) { - code = taosCopyFile(srcName, dstName); - if (code == -1) { - goto _err; - } +int32_t getCfIdx(const char* cfName) { + int idx = -1; + size_t len = strlen(cfName); + for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { + if (len == ginitDict[i].len && strncmp(cfName, ginitDict[i].key, strlen(cfName)) == 0) { + idx = i; + break; } - - memset(srcName, 0, sLen + 64); - memset(dstName, 0, dLen + 64); } - -_err: - taosMemoryFreeClear(srcName); - taosMemoryFreeClear(dstName); - taosCloseDir(&pDir); - return code >= 0 ? 0 : -1; + return idx; } + +bool isValidCheckpoint(const char* dir) { + return true; + STaskDbWrapper* pDb = taskDbOpenImpl(NULL, NULL, (char*)dir); + if (pDb == NULL) { + return false; + } + taskDbDestroy(pDb, false); + return true; +} + int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { // impl later int32_t code = 0; @@ -451,6 +221,7 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { } taosMkDir(state); code = copyFiles(chkp, state); + stInfo("copy snap file from %s to %s", chkp, state); if (code != 0) { stError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno))); } else { @@ -468,7 +239,246 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { return 0; } +int32_t remoteChkp_readMetaData(char* path, SArray* list) { + char* metaPath = taosMemoryCalloc(1, strlen(path)); + sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META"); + TdFilePtr pFile = taosOpenFile(path, TD_FILE_READ); + + char buf[128] = {0}; + if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { + taosMemoryFree(metaPath); + taosCloseFile(&pFile); + return -1; + } + int32_t len = strlen(buf); + for (int i = 0; i < len; i++) { + if (buf[i] == '\n') { + char* item = taosMemoryCalloc(1, i + 1); + memcpy(item, buf, i); + taosArrayPush(list, &item); + + item = taosMemoryCalloc(1, len - i); + memcpy(item, buf + i + 1, len - i - 1); + taosArrayPush(list, &item); + } + } + + taosCloseFile(&pFile); + taosMemoryFree(metaPath); + return 0; +} +int32_t remoteChkp_validMetaFile(char* name, char* prename, int64_t chkpId) { + int8_t valid = 0; + for (int i = 0; i < strlen(name); i++) { + if (name[i] == '_') { + memcpy(prename, name, i); + if (taosStr2int64(name + i + 1) != chkpId) { + break; + } else { + valid = 1; + } + } + } + return valid; +} +int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { + int32_t complete = 1; + int32_t len = strlen(path) + 32; + char* src = taosMemoryCalloc(1, len); + char* dst = taosMemoryCalloc(1, len); + + int8_t count = 0; + for (int i = 0; i < taosArrayGetSize(list); i++) { + char* p = taosArrayGetP(list, i); + sprintf(src, "%s%s%s", path, TD_DIRSEP, p); + + // check file exist + if (taosStatFile(src, NULL, NULL, NULL) != 0) { + complete = 0; + break; + } + + // check file name + char temp[64] = {0}; + if (remoteChkp_validMetaFile(p, temp, chkpId)) { + count++; + } + + // rename file + sprintf(dst, "%s%s%s", path, TD_DIRSEP, temp); + taosRenameFile(src, dst); + + memset(src, 0, len); + memset(dst, 0, len); + } + if (count != taosArrayGetSize(list)) { + complete = 0; + } + + taosMemoryFree(src); + taosMemoryFree(dst); + + return complete == 1 ? 0 : -1; +} + +int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + // impl later + int32_t code = 0; + if (taosIsDir(chkpPath)) { + taosRemoveDir(chkpPath); + } + if (taosIsDir(defaultPath)) { + taosRemoveDir(defaultPath); + } + + code = downloadCheckpoint(key, chkpPath); + if (code != 0) { + return code; + } + code = copyFiles(chkpPath, defaultPath); + + return code; +} +int32_t rebuildFromRemoteChkp_s3(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + int32_t code = downloadCheckpoint(key, chkpPath); + if (code != 0) { + return code; + } + + int32_t len = strlen(defaultPath) + 32; + char* tmp = taosMemoryCalloc(1, len); + sprintf(tmp, "%s%s", defaultPath, "_tmp"); + if (taosIsDir(tmp)) taosRemoveDir(tmp); + if (taosIsDir(defaultPath)) taosRenameFile(defaultPath, tmp); + + SArray* list = taosArrayInit(2, sizeof(void*)); + code = remoteChkp_readMetaData(chkpPath, list); + if (code == 0) { + code = remoteChkp_validAndCvtMeta(chkpPath, list, chkpId); + } + taosArrayDestroyP(list, taosMemoryFree); + + if (code == 0) { + taosMkDir(defaultPath); + code = copyFiles(chkpPath, defaultPath); + } + + if (code != 0) { + if (taosIsDir(defaultPath)) taosRemoveDir(defaultPath); + if (taosIsDir(tmp)) taosRenameFile(tmp, defaultPath); + } else { + taosRemoveDir(tmp); + } + + taosMemoryFree(tmp); + return code; +} +int32_t rebuildFromRemoteChkp(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + UPLOAD_TYPE type = getUploadType(); + if (type == UPLOAD_S3) { + return rebuildFromRemoteChkp_s3(key, chkpPath, chkpId, defaultPath); + } else if (type == UPLOAD_RSYNC) { + return rebuildFromRemoteChkp_rsync(key, chkpPath, chkpId, defaultPath); + } + return -1; +} + +int32_t rebuildFromLocalChkp(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + int32_t code = -1; + int32_t len = strlen(defaultPath) + 32; + char* tmp = taosMemoryCalloc(1, len); + sprintf(tmp, "%s%s", defaultPath, "_tmp"); + + if (taosIsDir(tmp)) taosRemoveDir(tmp); + if (taosIsDir(defaultPath)) taosRenameFile(defaultPath, tmp); + + if (taosIsDir(chkpPath) && isValidCheckpoint(chkpPath)) { + if (taosIsDir(tmp)) { + taosRemoveDir(tmp); + } + taosMkDir(defaultPath); + code = copyFiles(chkpPath, defaultPath); + if (code != 0) { + stError("failed to restart stream backend from %s, reason: %s", chkpPath, tstrerror(TAOS_SYSTEM_ERROR(errno))); + } else { + stInfo("start to restart stream backend at checkpoint path: %s", chkpPath); + } + } + if (code != 0) { + if (taosIsDir(defaultPath)) taosRemoveDir(defaultPath); + if (taosIsDir(tmp)) taosRenameFile(tmp, defaultPath); + } else { + taosRemoveDir(tmp); + } + + taosMemoryFree(tmp); + return code; +} + +int32_t rebuildFromlocalDefault(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + int32_t code = 0; + return code; +} + +int32_t rebuildDirFromChkp2(const char* path, char* key, int64_t chkpId, char** dbPrefixPath, char** dbPath) { + // impl later + int32_t code = 0; + + char* prefixPath = taosMemoryCalloc(1, strlen(path) + 128); + sprintf(prefixPath, "%s%s%s", path, TD_DIRSEP, key); + + if (!taosIsDir(prefixPath)) { + code = taosMkDir(prefixPath); + ASSERT(code == 0); + } + + char* defaultPath = taosMemoryCalloc(1, strlen(path) + 256); + sprintf(defaultPath, "%s%s%s", prefixPath, TD_DIRSEP, "state"); + if (!taosIsDir(defaultPath)) { + taosMulMkDir(defaultPath); + } + + char* chkpPath = taosMemoryCalloc(1, strlen(path) + 256); + if (chkpId != 0) { + sprintf(chkpPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); + code = rebuildFromLocalChkp(key, chkpPath, chkpId, defaultPath); + if (code != 0) { + code = rebuildFromRemoteChkp(key, chkpPath, chkpId, defaultPath); + } + + if (code != 0) { + stInfo("failed to start stream backend at %s, reason: %s, restart from default defaultPath dir:%s", chkpPath, + tstrerror(TAOS_SYSTEM_ERROR(errno)), defaultPath); + code = taosMkDir(defaultPath); + } + } else { + sprintf(chkpPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", + (int64_t)-1); + + code = rebuildFromLocalChkp(key, chkpPath, -1, defaultPath); + if (code != 0) { + code = taosMkDir(defaultPath); + } + } + taosMemoryFree(chkpPath); + + *dbPath = defaultPath; + *dbPrefixPath = prefixPath; + + return code; +} + +bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId) { + bool exist = true; + char* state = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(state, "%s%s%s", path, TD_DIRSEP, "state"); + if (!taosDirExist(state)) { + exist = false; + } + taosMemoryFree(state); + return exist; +} void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { char* backendPath = NULL; int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); @@ -648,7 +658,7 @@ void streamBackendHandleCleanup(void* arg) { taosThreadRwlockDestroy(&wrapper->rwLock); wrapper->rocksdb = NULL; - taosReleaseRef(streamBackendId, wrapper->backendId); + // taosReleaseRef(streamBackendId, wrapper->backendId); stDebug("end to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); taosMemoryFree(wrapper); @@ -728,8 +738,65 @@ int32_t delObsoleteCheckpoint(void* arg, const char* path) { taosArrayDestroy(chkpDel); return 0; } +/* + * checkpointSave |--cp1--|--cp2--|--cp3--|--cp4--|--cp5--| + * chkpInUse: |--cp2--|--cp4--| + * chkpInUse is doing translation, cannot del until + * replication is finished + */ +int32_t chkpMayDelObsolete(void* arg, int64_t chkpId, char* path) { + STaskDbWrapper* pBackend = arg; -static int32_t compareCheckpoint(const void* a, const void* b) { + taosThreadRwlockWrlock(&pBackend->chkpDirLock); + + taosArrayPush(pBackend->chkpSaved, &chkpId); + + SArray* chkpDel = taosArrayInit(8, sizeof(int64_t)); + SArray* chkpDup = taosArrayInit(8, sizeof(int64_t)); + + int64_t firsId = 0; + if (taosArrayGetSize(pBackend->chkpInUse) >= 1) { + firsId = *(int64_t*)taosArrayGet(pBackend->chkpInUse, 0); + + for (int i = 0; i < taosArrayGetSize(pBackend->chkpSaved); i++) { + int64_t id = *(int64_t*)taosArrayGet(pBackend->chkpSaved, i); + if (id >= firsId) { + taosArrayPush(chkpDup, &id); + } else { + taosArrayPush(chkpDel, &id); + } + } + } else { + int32_t sz = taosArrayGetSize(pBackend->chkpSaved); + int32_t dsz = sz - pBackend->chkpCap; // del size + + for (int i = 0; i < dsz; i++) { + int64_t id = *(int64_t*)taosArrayGet(pBackend->chkpSaved, i); + taosArrayPush(chkpDel, &id); + } + for (int i = dsz < 0 ? 0 : dsz; i < sz; i++) { + int64_t id = *(int64_t*)taosArrayGet(pBackend->chkpSaved, i); + taosArrayPush(chkpDup, &id); + } + } + taosArrayDestroy(pBackend->chkpSaved); + pBackend->chkpSaved = chkpDup; + + taosThreadRwlockUnlock(&pBackend->chkpDirLock); + + for (int i = 0; i < taosArrayGetSize(chkpDel); i++) { + int64_t id = *(int64_t*)taosArrayGet(chkpDel, i); + char tbuf[256] = {0}; + sprintf(tbuf, "%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, id); + if (taosIsDir(tbuf)) { + taosRemoveDir(tbuf); + } + } + taosArrayDestroy(chkpDel); + return 0; +} + +static int32_t chkpIdComp(const void* a, const void* b) { int64_t x = *(int64_t*)a; int64_t y = *(int64_t*)b; return x < y ? -1 : 1; @@ -774,7 +841,7 @@ int32_t streamBackendLoadCheckpointInfo(void* arg) { continue; } } - taosArraySort(suffix, compareCheckpoint); + taosArraySort(suffix, chkpIdComp); // free previous chkpSaved taosArrayClear(pMeta->chkpSaved); for (int i = 0; i < taosArrayGetSize(suffix); i++) { @@ -789,30 +856,50 @@ int32_t streamBackendLoadCheckpointInfo(void* arg) { } int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t*** ppHandle, SArray* refs) { - SArray* pHandle = taosArrayInit(16, POINTER_BYTES); - void* pIter = taosHashIterate(pMeta->pTaskBackendUnique, NULL); - while (pIter) { - int64_t id = *(int64_t*)pIter; + return 0; + // SArray* pHandle = taosArrayInit(16, POINTER_BYTES); + // void* pIter = taosHashIterate(pMeta->pTaskDbUnique, NULL); + // while (pIter) { + // int64_t id = *(int64_t*)pIter; - SBackendCfWrapper* wrapper = taosAcquireRef(streamBackendCfWrapperId, id); - if (wrapper == NULL) { - pIter = taosHashIterate(pMeta->pTaskBackendUnique, pIter); - continue; + // SBackendCfWrapper* wrapper = taosAcquireRef(streamBackendCfWrapperId, id); + // if (wrapper == NULL) { + // pIter = taosHashIterate(pMeta->pTaskDbUnique, pIter); + // continue; + // } + + // taosThreadRwlockRdlock(&wrapper->rwLock); + // for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { + // if (wrapper->pHandle[i]) { + // rocksdb_column_family_handle_t* p = wrapper->pHandle[i]; + // taosArrayPush(pHandle, &p); + // } + // } + // taosThreadRwlockUnlock(&wrapper->rwLock); + + // taosArrayPush(refs, &id); + // } + + // int32_t nCf = taosArrayGetSize(pHandle); + + // rocksdb_column_family_handle_t** ppCf = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); + // for (int i = 0; i < nCf; i++) { + // ppCf[i] = taosArrayGetP(pHandle, i); + // } + // taosArrayDestroy(pHandle); + + // *ppHandle = ppCf; + // return nCf; +} + +int32_t chkpGetAllDbCfHandle2(STaskDbWrapper* pBackend, rocksdb_column_family_handle_t*** ppHandle) { + SArray* pHandle = taosArrayInit(8, POINTER_BYTES); + for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { + if (pBackend->pCf[i]) { + rocksdb_column_family_handle_t* p = pBackend->pCf[i]; + taosArrayPush(pHandle, &p); } - - taosThreadRwlockRdlock(&wrapper->rwLock); - for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { - if (wrapper->pHandle[i]) { - rocksdb_column_family_handle_t* p = wrapper->pHandle[i]; - taosArrayPush(pHandle, &p); - } - } - taosThreadRwlockUnlock(&wrapper->rwLock); - - taosArrayPush(refs, &id); - pIter = taosHashIterate(pMeta->pTaskBackendUnique, pIter); } - int32_t nCf = taosArrayGetSize(pHandle); if (nCf == 0) { taosArrayDestroy(pHandle); @@ -828,6 +915,7 @@ int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t* *ppHandle = ppCf; return nCf; } + int32_t chkpDoDbCheckpoint(rocksdb_t* db, char* path) { int32_t code = -1; char* err = NULL; @@ -866,7 +954,8 @@ int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32 rocksdb_flushoptions_destroy(flushOpt); return code; } -int32_t chkpPreCheckDir(char* path, int64_t chkpId, char** chkpDir, char** chkpIdDir) { + +int32_t chkpPreBuildDir(char* path, int64_t chkpId, char** chkpDir, char** chkpIdDir) { int32_t code = 0; char* pChkpDir = taosMemoryCalloc(1, 256); char* pChkpIdDir = taosMemoryCalloc(1, 256); @@ -881,145 +970,111 @@ int32_t chkpPreCheckDir(char* path, int64_t chkpId, char** chkpDir, char** chkpI return code; } - sprintf(pChkpIdDir, "%s%scheckpoint%" PRId64, pChkpDir, TD_DIRSEP, chkpId); + sprintf(pChkpIdDir, "%s%s%s%" PRId64, pChkpDir, TD_DIRSEP, "checkpoint", chkpId); if (taosIsDir(pChkpIdDir)) { stInfo("stream rm exist checkpoint%s", pChkpIdDir); - taosRemoveFile(pChkpIdDir); + taosRemoveDir(pChkpIdDir); } *chkpDir = pChkpDir; *chkpIdDir = pChkpIdDir; return 0; } - -int32_t streamBackendTriggerChkp(void* arg, char* dst) { +int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { SStreamMeta* pMeta = arg; - int64_t backendRid = pMeta->streamBackendRid; - int32_t code = -1; + void* pIter = taosHashIterate(pMeta->pTaskDbUnique, NULL); + int32_t code = 0; - SArray* refs = taosArrayInit(16, sizeof(int64_t)); - rocksdb_column_family_handle_t** ppCf = NULL; + while (pIter) { + STaskDbWrapper* pTaskDb = *(STaskDbWrapper**)pIter; + taskDbAddRef(pTaskDb); - int64_t st = taosGetTimestampMs(); - SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid); + code = taskDbDoCheckpoint(pTaskDb, pTaskDb->chkpId); + taskDbRemoveRef(pTaskDb); - if (pHandle == NULL || pHandle->db == NULL) { - goto _ERROR; + SStreamTask* pTask = pTaskDb->pTask; + SStreamTaskSnap snap = {.streamId = pTask->id.streamId, + .taskId = pTask->id.taskId, + .chkpId = pTaskDb->chkpId, + .dbPrefixPath = taosStrdup(pTaskDb->path)}; + taosArrayPush(pSnap, &snap); + pIter = taosHashIterate(pMeta->pTaskDbUnique, pIter); } - int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); - stDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, dst, nCf); - - code = chkpPreFlushDb(pHandle->db, ppCf, nCf); - if (code == 0) { - code = chkpDoDbCheckpoint(pHandle->db, dst); - if (code != 0) { - stError("stream backend:%p failed to do checkpoint at:%s", pHandle, dst); - } else { - stDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, dst, - taosGetTimestampMs() - st); - } - } else { - stError("stream backend:%p failed to flush db at:%s", pHandle, dst); - } - - // release all ref to cfWrapper; - for (int i = 0; i < taosArrayGetSize(refs); i++) { - int64_t id = *(int64_t*)taosArrayGet(refs, i); - taosReleaseRef(streamBackendCfWrapperId, id); - } - -_ERROR: - taosMemoryFree(ppCf); - taosReleaseRef(streamBackendId, backendRid); - taosArrayDestroy(refs); return code; } int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId) { - if (arg == NULL) return 0; + // if (arg == NULL) return 0; - SStreamMeta* pMeta = arg; - taosWLockLatch(&pMeta->chkpDirLock); - taosArrayPush(pMeta->chkpInUse, &chkpId); - taosWUnLockLatch(&pMeta->chkpDirLock); + // SStreamMeta* pMeta = arg; + // taosWLockLatch(&pMeta->chkpDirLock); + // taosArrayPush(pMeta->chkpInUse, &chkpId); + // taosWUnLockLatch(&pMeta->chkpDirLock); return 0; } int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId) { - if (arg == NULL) return 0; - - SStreamMeta* pMeta = arg; - taosWLockLatch(&pMeta->chkpDirLock); - if (taosArrayGetSize(pMeta->chkpInUse) > 0) { - int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpInUse, 0); - if (id == chkpId) { - taosArrayPopFrontBatch(pMeta->chkpInUse, 1); - } - } - taosWUnLockLatch(&pMeta->chkpDirLock); return 0; + // if (arg == NULL) return 0; + + // SStreamMeta* pMeta = arg; + // taosWLockLatch(&pMeta->chkpDirLock); + // if (taosArrayGetSize(pMeta->chkpInUse) > 0) { + // int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpInUse, 0); + // if (id == chkpId) { + // taosArrayPopFrontBatch(pMeta->chkpInUse, 1); + // } + // } + // taosWUnLockLatch(&pMeta->chkpDirLock); } -int32_t streamBackendDoCheckpoint(void* arg, uint64_t checkpointId) { - SStreamMeta* pMeta = arg; - int64_t backendRid = pMeta->streamBackendRid; - int64_t st = taosGetTimestampMs(); - int32_t code = -1; +/* + 0 +*/ +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId) { + STaskDbWrapper* pTaskDb = arg; + int64_t st = taosGetTimestampMs(); + int32_t code = -1; + int64_t refId = pTaskDb->refId; - SArray* refs = taosArrayInit(16, sizeof(int64_t)); - - rocksdb_column_family_handle_t** ppCf = NULL; + if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { + return -1; + } char* pChkpDir = NULL; char* pChkpIdDir = NULL; - if (chkpPreCheckDir(pMeta->path, checkpointId, &pChkpDir, &pChkpIdDir) != 0) { - taosArrayDestroy(refs); - return code; + if (chkpPreBuildDir(pTaskDb->path, chkpId, &pChkpDir, &pChkpIdDir) != 0) { + code = -1; + goto _EXIT; } - - SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid); - if (pHandle == NULL || pHandle->db == NULL) { - stError("failed to acquire state-backend handle"); - goto _ERROR; - } - // Get all cf and acquire cfWrappter - int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); - stDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, pChkpIdDir, nCf); + rocksdb_column_family_handle_t** ppCf = NULL; - code = chkpPreFlushDb(pHandle->db, ppCf, nCf); - if (code == 0) { - code = chkpDoDbCheckpoint(pHandle->db, pChkpIdDir); - if (code != 0) { - stError("stream backend:%p failed to do checkpoint at:%s", pHandle, pChkpIdDir); + int32_t nCf = chkpGetAllDbCfHandle2(pTaskDb, &ppCf); + qDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pTaskDb, pChkpIdDir, nCf); + + if ((code = chkpPreFlushDb(pTaskDb->db, ppCf, nCf)) == 0) { + if ((code = chkpDoDbCheckpoint(pTaskDb->db, pChkpIdDir)) != 0) { + stError("stream backend:%p failed to do checkpoint at:%s", pTaskDb, pChkpIdDir); } else { - stDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, pChkpIdDir, - taosGetTimestampMs() - st); + qDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pTaskDb, pChkpIdDir, + taosGetTimestampMs() - st); } } else { - stError("stream backend:%p failed to flush db at:%s", pHandle, pChkpIdDir); - } - // release all ref to cfWrapper; - for (int i = 0; i < taosArrayGetSize(refs); i++) { - int64_t id = *(int64_t*)taosArrayGet(refs, i); - taosReleaseRef(streamBackendCfWrapperId, id); - } - if (code == 0) { - taosWLockLatch(&pMeta->chkpDirLock); - taosArrayPush(pMeta->chkpSaved, &checkpointId); - taosWUnLockLatch(&pMeta->chkpDirLock); - - // delete obsolte checkpoint - delObsoleteCheckpoint(arg, pChkpDir); - pMeta->chkpId = checkpointId; + stError("stream backend:%p failed to flush db at:%s", pTaskDb, pChkpIdDir); } -_ERROR: - taosReleaseRef(streamBackendId, backendRid); - taosArrayDestroy(refs); - taosMemoryFree(ppCf); + code = chkpMayDelObsolete(pTaskDb, chkpId, pChkpDir); + pTaskDb->dataWritten = 0; + + pTaskDb->chkpId = chkpId; + +_EXIT: taosMemoryFree(pChkpDir); taosMemoryFree(pChkpIdDir); + taosReleaseRef(taskDbWrapperId, refId); + taosMemoryFree(ppCf); return code; } +int32_t streamBackendDoCheckpoint(void* arg, int64_t chkpId) { return taskDbDoCheckpoint(arg, chkpId); } SListNode* streamBackendAddCompare(void* backend, void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)backend; @@ -1040,15 +1095,33 @@ void streamBackendDelCompare(void* backend, void* arg) { taosMemoryFree(node); } } -void streamStateDestroy_rocksdb(SStreamState* pState, bool remove) { streamStateCloseBackend(pState, remove); } -static bool streamStateIterSeekAndValid(rocksdb_iterator_t* iter, char* buf, size_t len); +void streamStateDestroy_rocksdb(SStreamState* pState, bool remove) { streamStateCloseBackend(pState, remove); } +void destroyRocksdbCfInst(RocksdbCfInst* inst) { + int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]); + if (inst->pHandle) { + for (int i = 0; i < cfLen; i++) { + if (inst->pHandle[i]) rocksdb_column_family_handle_destroy((inst->pHandle)[i]); + } + taosMemoryFree(inst->pHandle); + } + + if (inst->cfOpt) { + for (int i = 0; i < cfLen; i++) { + rocksdb_options_destroy(inst->cfOpt[i]); + rocksdb_block_based_options_destroy(((RocksdbCfParam*)inst->param)[i].tableOpt); + } + taosMemoryFreeClear(inst->cfOpt); + taosMemoryFreeClear(inst->param); + } + if (inst->wOpt) rocksdb_writeoptions_destroy(inst->wOpt); + if (inst->rOpt) rocksdb_readoptions_destroy(inst->rOpt); + + taosMemoryFree(inst); +} // |key|-----value------| // |key|ttl|len|userData| -static rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfName, - rocksdb_snapshot_t** snapshot, rocksdb_readoptions_t** readOpt); - int defaultKeyComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen) { int len = aLen < bLen ? aLen : bLen; int ret = memcmp(aBuf, bBuf, len); @@ -1331,23 +1404,7 @@ int parKeyToString(void* k, char* buf) { n = sprintf(buf + n, "[groupId:%" PRIi64 "]", *key); return n; } -int stremaValueEncode(void* k, char* buf) { - int len = 0; - SStreamValue* key = k; - len += taosEncodeFixedI64((void**)&buf, key->unixTimestamp); - len += taosEncodeFixedI32((void**)&buf, key->len); - len += taosEncodeBinary((void**)&buf, key->data, key->len); - return len; -} -int streamValueDecode(void* k, char* buf) { - SStreamValue* key = k; - char* p = buf; - p = taosDecodeFixedI64(p, &key->unixTimestamp); - p = taosDecodeFixedI32(p, &key->len); - p = taosDecodeBinary(p, (void**)&key->data, key->len); - return p - buf; -} -int32_t streamValueToString(void* k, char* buf) { +int32_t valueToString(void* k, char* buf) { SStreamValue* key = k; int n = 0; n += sprintf(buf + n, "[unixTimestamp:%" PRIi64 ",", key->unixTimestamp); @@ -1357,7 +1414,7 @@ int32_t streamValueToString(void* k, char* buf) { } /*1: stale, 0: no stale*/ -int32_t streaValueIsStale(void* k, int64_t ts) { +int32_t valueIsStale(void* k, int64_t ts) { SStreamValue* key = k; if (key->unixTimestamp < ts) { return 1; @@ -1365,12 +1422,12 @@ int32_t streaValueIsStale(void* k, int64_t ts) { return 0; } -void destroyFunc(void* arg) { +void destroyCompare(void* arg) { (void)arg; return; } -int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest) { +int32_t valueEncode(void* value, int32_t vlen, int64_t ttl, char** dest) { SStreamValue key = {.unixTimestamp = ttl, .len = vlen, .data = (char*)(value)}; int32_t len = 0; if (*dest == NULL) { @@ -1392,7 +1449,7 @@ int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest) { * ret >= 0 : found valid value * ret < 0 : error or timeout */ -int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest) { +int32_t valueDecode(void* value, int32_t vlen, int64_t* ttl, char** dest) { SStreamValue key = {0}; char* p = value; if (streamStateValueIsStale(p)) { @@ -1449,7 +1506,23 @@ void destroyCompactFilteFactory(void* arg) { } const char* compactFilteFactoryName(void* arg) { SCompactFilteFactory* state = arg; - return "stream_compact_filter"; + return "stream_compact_factory_filter_default"; +} +const char* compactFilteFactoryNameSess(void* arg) { + SCompactFilteFactory* state = arg; + return "stream_compact_factory_filter_sess"; +} +const char* compactFilteFactoryNameState(void* arg) { + SCompactFilteFactory* state = arg; + return "stream_compact_factory_filter_state"; +} +const char* compactFilteFactoryNameFill(void* arg) { + SCompactFilteFactory* state = arg; + return "stream_compact_factory_filter_fill"; +} +const char* compactFilteFactoryNameFunc(void* arg) { + SCompactFilteFactory* state = arg; + return "stream_compact_factory_filter_func"; } void destroyCompactFilte(void* arg) { (void)arg; } @@ -1457,38 +1530,499 @@ unsigned char compactFilte(void* arg, int level, const char* key, size_t klen, c char** newval, size_t* newvlen, unsigned char* value_changed) { return streamStateValueIsStale((char*)val) ? 1 : 0; } -const char* compactFilteName(void* arg) { return "stream_filte"; } +const char* compactFilteName(void* arg) { return "stream_filte_default"; } +const char* compactFilteNameSess(void* arg) { return "stream_filte_sess"; } +const char* compactFilteNameState(void* arg) { return "stream_filte_state"; } +const char* compactFilteNameFill(void* arg) { return "stream_filte_fill"; } +const char* compactFilteNameFunc(void* arg) { return "stream_filte_func"; } + +unsigned char compactFilteSess(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, + char** newval, size_t* newvlen, unsigned char* value_changed) { + // not impl yet + return 0; +} + +unsigned char compactFilteState(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, + char** newval, size_t* newvlen, unsigned char* value_changed) { + // not impl yet + return 0; +} + +unsigned char compactFilteFill(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, + char** newval, size_t* newvlen, unsigned char* value_changed) { + // not impl yet + return 0; +} + +unsigned char compactFilteFunc(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, + char** newval, size_t* newvlen, unsigned char* value_changed) { + // not impl yet + return 0; + // return streamStateValueIsStale((char*)val) ? 1 : 0; +} rocksdb_compactionfilter_t* compactFilteFactoryCreateFilter(void* arg, rocksdb_compactionfiltercontext_t* ctx) { SCompactFilteFactory* state = arg; rocksdb_compactionfilter_t* filter = - rocksdb_compactionfilter_create(NULL, destroyCompactFilte, compactFilte, compactFilteName); + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilte, compactFilteName); + return filter; +} +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterSess(void* arg, rocksdb_compactionfiltercontext_t* ctx) { + SCompactFilteFactory* state = arg; + rocksdb_compactionfilter_t* filter = + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilteSess, compactFilteNameSess); + return filter; +} +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterState(void* arg, rocksdb_compactionfiltercontext_t* ctx) { + SCompactFilteFactory* state = arg; + rocksdb_compactionfilter_t* filter = + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilteState, compactFilteNameState); + return filter; +} +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterFill(void* arg, rocksdb_compactionfiltercontext_t* ctx) { + SCompactFilteFactory* state = arg; + rocksdb_compactionfilter_t* filter = + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilteFill, compactFilteNameFill); + return filter; +} +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterFunc(void* arg, rocksdb_compactionfiltercontext_t* ctx) { + SCompactFilteFactory* state = arg; + rocksdb_compactionfilter_t* filter = + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilteFunc, compactFilteNameFunc); return filter; } -void destroyRocksdbCfInst(RocksdbCfInst* inst) { - int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]); - if (inst->pHandle) { - for (int i = 0; i < cfLen; i++) { - if (inst->pHandle[i]) rocksdb_column_family_handle_destroy((inst->pHandle)[i]); - } - taosMemoryFree(inst->pHandle); +int32_t taskDbOpenCfs(STaskDbWrapper* pTask, char* path, char** pCfNames, int32_t nCf) { + int32_t code = -1; + char* err = NULL; + + rocksdb_options_t** cfOpts = taosMemoryCalloc(nCf, sizeof(rocksdb_options_t*)); + rocksdb_column_family_handle_t** cfHandle = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); + + for (int i = 0; i < nCf; i++) { + int32_t idx = getCfIdx(pCfNames[i]); + cfOpts[i] = pTask->pCfOpts[idx]; } - if (inst->cfOpt) { - for (int i = 0; i < cfLen; i++) { - rocksdb_options_destroy(inst->cfOpt[i]); - rocksdb_block_based_options_destroy(((RocksdbCfParam*)inst->param)[i].tableOpt); - } - taosMemoryFreeClear(inst->cfOpt); - taosMemoryFreeClear(inst->param); - } - if (inst->wOpt) rocksdb_writeoptions_destroy(inst->wOpt); - if (inst->rOpt) rocksdb_readoptions_destroy(inst->rOpt); + rocksdb_t* db = rocksdb_open_column_families(pTask->dbOpt, path, nCf, (const char* const*)pCfNames, + (const rocksdb_options_t* const*)cfOpts, cfHandle, &err); - taosMemoryFree(inst); + if (err != NULL) { + stError("failed to open cf path: %s", err); + taosMemoryFree(err); + goto _EXIT; + } + + for (int i = 0; i < nCf; i++) { + int32_t idx = getCfIdx(pCfNames[i]); + pTask->pCf[idx] = cfHandle[i]; + } + + pTask->db = db; + code = 0; + +_EXIT: + taosMemoryFree(cfOpts); + taosMemoryFree(cfHandle); + return code; +} +void* taskDbAddRef(void* pTaskDb) { + STaskDbWrapper* pBackend = pTaskDb; + return taosAcquireRef(taskDbWrapperId, pBackend->refId); +} +void taskDbRemoveRef(void* pTaskDb) { + if (pTaskDb == NULL) return; + STaskDbWrapper* pBackend = pTaskDb; + taosReleaseRef(taskDbWrapperId, pBackend->refId); } +void taskDbInitOpt(STaskDbWrapper* pTaskDb) { + rocksdb_env_t* env = rocksdb_create_default_env(); + + rocksdb_cache_t* cache = rocksdb_cache_create_lru(256); + rocksdb_options_t* opts = rocksdb_options_create(); + rocksdb_options_set_env(opts, env); + rocksdb_options_set_create_if_missing(opts, 1); + rocksdb_options_set_create_missing_column_families(opts, 1); + // rocksdb_options_set_max_total_wal_size(opts, dbMemLimit); + rocksdb_options_set_recycle_log_file_num(opts, 6); + rocksdb_options_set_max_write_buffer_number(opts, 3); + rocksdb_options_set_info_log_level(opts, 1); + rocksdb_options_set_db_write_buffer_size(opts, 64 << 20); + rocksdb_options_set_write_buffer_size(opts, 32 << 20); + rocksdb_options_set_atomic_flush(opts, 1); + + pTaskDb->dbOpt = opts; + pTaskDb->env = env; + pTaskDb->cache = cache; + pTaskDb->filterFactory = rocksdb_compactionfilterfactory_create( + NULL, destroyCompactFilteFactory, compactFilteFactoryCreateFilter, compactFilteFactoryName); + rocksdb_options_set_compaction_filter_factory(pTaskDb->dbOpt, pTaskDb->filterFactory); + pTaskDb->readOpt = rocksdb_readoptions_create(); + pTaskDb->writeOpt = rocksdb_writeoptions_create(); + + size_t nCf = sizeof(ginitDict) / sizeof(ginitDict[0]); + pTaskDb->pCf = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); + pTaskDb->pCfParams = taosMemoryCalloc(nCf, sizeof(RocksdbCfParam)); + pTaskDb->pCfOpts = taosMemoryCalloc(nCf, sizeof(rocksdb_options_t*)); + pTaskDb->pCompares = taosMemoryCalloc(nCf, sizeof(rocksdb_comparator_t*)); + + for (int i = 0; i < nCf; i++) { + rocksdb_options_t* opt = rocksdb_options_create_copy(pTaskDb->dbOpt); + rocksdb_block_based_table_options_t* tableOpt = rocksdb_block_based_options_create(); + rocksdb_block_based_options_set_block_cache(tableOpt, pTaskDb->cache); + rocksdb_block_based_options_set_partition_filters(tableOpt, 1); + + rocksdb_filterpolicy_t* filter = rocksdb_filterpolicy_create_bloom(15); + rocksdb_block_based_options_set_filter_policy(tableOpt, filter); + + rocksdb_options_set_block_based_table_factory((rocksdb_options_t*)opt, tableOpt); + + SCfInit* cfPara = &ginitDict[i]; + + rocksdb_comparator_t* compare = + rocksdb_comparator_create(NULL, cfPara->destroyCmp, cfPara->cmpKey, cfPara->cmpName); + rocksdb_options_set_comparator((rocksdb_options_t*)opt, compare); + + rocksdb_compactionfilterfactory_t* filterFactory = + rocksdb_compactionfilterfactory_create(NULL, cfPara->destroyFilter, cfPara->createFilter, cfPara->funcName); + rocksdb_options_set_compaction_filter_factory(opt, filterFactory); + + pTaskDb->pCompares[i] = compare; + pTaskDb->pCfOpts[i] = opt; + pTaskDb->pCfParams[i].tableOpt = tableOpt; + } + return; +} +void taskDbInitChkpOpt(STaskDbWrapper* pTaskDb) { + pTaskDb->chkpId = -1; + pTaskDb->chkpCap = 4; + pTaskDb->chkpSaved = taosArrayInit(4, sizeof(int64_t)); + pTaskDb->chkpInUse = taosArrayInit(4, sizeof(int64_t)); + + taosThreadRwlockInit(&pTaskDb->chkpDirLock, NULL); +} + +void taskDbDestroyChkpOpt(STaskDbWrapper* pTaskDb) { + taosArrayDestroy(pTaskDb->chkpSaved); + taosArrayDestroy(pTaskDb->chkpInUse); + taosThreadRwlockDestroy(&pTaskDb->chkpDirLock); +} + +int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** stateFullPath) { + int32_t code = 0; + + char* statePath = taosMemoryCalloc(1, strlen(path) + 128); + sprintf(statePath, "%s%s%s", path, TD_DIRSEP, key); + if (!taosDirExist(statePath)) { + code = taosMulMkDir(statePath); + if (code != 0) { + stError("failed to create dir: %s, reason:%s", statePath, tstrerror(code)); + taosMemoryFree(statePath); + return code; + } + } + + char* dbPath = taosMemoryCalloc(1, strlen(statePath) + 128); + sprintf(dbPath, "%s%s%s", statePath, TD_DIRSEP, "state"); + if (!taosDirExist(dbPath)) { + code = taosMulMkDir(dbPath); + if (code != 0) { + stError("failed to create dir: %s, reason:%s", dbPath, tstrerror(code)); + taosMemoryFree(statePath); + taosMemoryFree(dbPath); + return code; + } + } + + *dbFullPath = dbPath; + *stateFullPath = statePath; + return 0; +} +void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId) { + STaskDbWrapper* p = pTaskDb; + taosThreadMutexLock(&p->mutex); + p->chkpId = chkpId; + taosThreadMutexUnlock(&p->mutex); +} + +STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) { + char* err = NULL; + char** cfNames = NULL; + size_t nCf = 0; + + STaskDbWrapper* pTaskDb = taosMemoryCalloc(1, sizeof(STaskDbWrapper)); + pTaskDb->idstr = key ? taosStrdup(key) : NULL; + pTaskDb->path = statePath ? taosStrdup(statePath) : NULL; + + taosThreadMutexInit(&pTaskDb->mutex, NULL); + taskDbInitChkpOpt(pTaskDb); + taskDbInitOpt(pTaskDb); + + cfNames = rocksdb_list_column_families(pTaskDb->dbOpt, dbPath, &nCf, &err); + if (nCf == 0) { + stInfo("newly create db, need to restart"); + // pre create db + pTaskDb->db = rocksdb_open(pTaskDb->pCfOpts[0], dbPath, &err); + rocksdb_close(pTaskDb->db); + + if (cfNames != NULL) { + rocksdb_list_column_families_destroy(cfNames, nCf); + } + taosMemoryFree(err); + err = NULL; + + cfNames = rocksdb_list_column_families(pTaskDb->dbOpt, dbPath, &nCf, &err); + ASSERT(err == NULL); + } + + if (taskDbOpenCfs(pTaskDb, dbPath, cfNames, nCf) != 0) { + goto _EXIT; + } + + if (cfNames != NULL) { + rocksdb_list_column_families_destroy(cfNames, nCf); + cfNames = NULL; + } + + qDebug("succ to init stream backend at %s, backend:%p", dbPath, pTaskDb); + return pTaskDb; +_EXIT: + + taskDbDestroy(pTaskDb, false); + if (err) taosMemoryFree(err); + if (cfNames) rocksdb_list_column_families_destroy(cfNames, nCf); + return NULL; +} +STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId) { + char* statePath = NULL; + char* dbPath = NULL; + + if (rebuildDirFromChkp2(path, key, chkpId, &statePath, &dbPath) != 0) { + return NULL; + } + + STaskDbWrapper* pTaskDb = taskDbOpenImpl(key, statePath, dbPath); + taosMemoryFree(dbPath); + taosMemoryFree(statePath); + return pTaskDb; +} + +void taskDbDestroy(void* pDb, bool flush) { + STaskDbWrapper* wrapper = pDb; + qDebug("succ to destroy stream backend:%p", wrapper); + + int8_t nCf = sizeof(ginitDict) / sizeof(ginitDict[0]); + + if (wrapper == NULL) return; + + if (flush) { + if (wrapper->db && wrapper->pCf) { + rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); + rocksdb_flushoptions_set_wait(flushOpt, 1); + + char* err = NULL; + rocksdb_column_family_handle_t** cfs = taosMemoryCalloc(1, sizeof(rocksdb_column_family_handle_t*) * nCf); + int numOfFlushCf = 0; + for (int i = 0; i < nCf; i++) { + if (wrapper->pCf[i] != NULL) { + cfs[numOfFlushCf++] = wrapper->pCf[i]; + } + } + if (numOfFlushCf != 0) { + rocksdb_flush_cfs(wrapper->db, flushOpt, cfs, numOfFlushCf, &err); + if (err != NULL) { + stError("failed to flush all cfs, reason:%s", err); + taosMemoryFreeClear(err); + } + } + taosMemoryFree(cfs); + rocksdb_flushoptions_destroy(flushOpt); + } + } + for (int i = 0; i < nCf; i++) { + if (wrapper->pCf[i] != NULL) { + rocksdb_column_family_handle_destroy(wrapper->pCf[i]); + } + } + + if (wrapper->db) rocksdb_close(wrapper->db); + + rocksdb_options_destroy(wrapper->dbOpt); + rocksdb_readoptions_destroy(wrapper->readOpt); + rocksdb_writeoptions_destroy(wrapper->writeOpt); + rocksdb_env_destroy(wrapper->env); + rocksdb_cache_destroy(wrapper->cache); + + taosMemoryFree(wrapper->pCf); + for (int i = 0; i < nCf; i++) { + rocksdb_options_t* opt = wrapper->pCfOpts[i]; + rocksdb_comparator_t* compare = wrapper->pCompares[i]; + rocksdb_block_based_table_options_t* tblOpt = wrapper->pCfParams[i].tableOpt; + + rocksdb_options_destroy(opt); + rocksdb_comparator_destroy(compare); + rocksdb_block_based_options_destroy(tblOpt); + } + taosMemoryFree(wrapper->pCompares); + taosMemoryFree(wrapper->pCfOpts); + taosMemoryFree(wrapper->pCfParams); + + taosThreadMutexDestroy(&wrapper->mutex); + + taskDbDestroyChkpOpt(wrapper); + + taosMemoryFree(wrapper->idstr); + taosMemoryFree(wrapper->path); + taosMemoryFree(wrapper); + + return; +} + +void taskDbDestroy2(void* pDb) { taskDbDestroy(pDb, true); } + +int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char** path) { + int64_t st = taosGetTimestampMs(); + int32_t code = -1; + int64_t refId = pDb->refId; + + if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { + return -1; + } + + char* buf = taosMemoryCalloc(1, strlen(pDb->path) + 128); + sprintf(buf, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); + if (taosIsDir(buf)) { + code = 0; + *path = buf; + } else { + taosMemoryFree(buf); + } + + taosReleaseRef(taskDbWrapperId, refId); + return code; +} + +int32_t taskDbGenChkpUploadData__s3(STaskDbWrapper* pDb, void* bkdChkpMgt, int64_t chkpId, char** path, SArray* list) { + int32_t code = 0; + SBkdMgt* p = (SBkdMgt*)bkdChkpMgt; + + char* temp = taosMemoryCalloc(1, strlen(pDb->path) + 32); + sprintf(temp, "%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "tmp", chkpId); + + if (taosDirExist(temp)) { + taosRemoveDir(temp); + taosMkDir(temp); + } else { + taosMkDir(temp); + } + code = bkdMgtGetDelta(p, pDb->idstr, chkpId, list, temp); + + *path = temp; + + return code; +} +int32_t taskDbGenChkpUploadData(void* arg, void* mgt, int64_t chkpId, int8_t type, char** path, SArray* list) { + STaskDbWrapper* pDb = arg; + UPLOAD_TYPE utype = type; + + if (utype == UPLOAD_RSYNC) { + return taskDbGenChkpUploadData__rsync(pDb, chkpId, path); + } else if (utype == UPLOAD_S3) { + return taskDbGenChkpUploadData__s3(pDb, mgt, chkpId, path, list); + } + return -1; +} + +int32_t taskDbOpenCfByKey(STaskDbWrapper* pDb, const char* key) { + int32_t code = 0; + char* err = NULL; + int8_t idx = getCfIdx(key); + + if (idx == -1) return -1; + + if (pDb->pCf[idx] != NULL) return code; + + rocksdb_column_family_handle_t* cf = + rocksdb_create_column_family(pDb->db, pDb->pCfOpts[idx], ginitDict[idx].key, &err); + if (err != NULL) { + stError("failed to open cf, key:%s, reason: %s", key, err); + taosMemoryFree(err); + code = -1; + return code; + } + + pDb->pCf[idx] = cf; + return code; +} +int32_t copyDataAt(RocksdbCfInst* pSrc, STaskDbWrapper* pDst, int8_t i) { + int32_t WRITE_BATCH = 1024; + char* err = NULL; + int code = 0; + + rocksdb_readoptions_t* pRdOpt = rocksdb_readoptions_create(); + + rocksdb_writebatch_t* wb = rocksdb_writebatch_create(); + rocksdb_iterator_t* pIter = rocksdb_create_iterator_cf(pSrc->db, pRdOpt, pSrc->pHandle[i]); + rocksdb_iter_seek_to_first(pIter); + while (rocksdb_iter_valid(pIter)) { + if (rocksdb_writebatch_count(wb) >= WRITE_BATCH) { + rocksdb_write(pDst->db, pDst->writeOpt, wb, &err); + if (err != NULL) { + code = -1; + goto _EXIT; + } + rocksdb_writebatch_clear(wb); + } + + size_t klen = 0, vlen = 0; + char* key = (char*)rocksdb_iter_key(pIter, &klen); + char* val = (char*)rocksdb_iter_value(pIter, &vlen); + + rocksdb_writebatch_put_cf(wb, pDst->pCf[i], key, klen, val, vlen); + rocksdb_iter_next(pIter); + } + + if (rocksdb_writebatch_count(wb) > 0) { + rocksdb_write(pDst->db, pDst->writeOpt, wb, &err); + if (err != NULL) { + code = -1; + goto _EXIT; + } + } + +_EXIT: + rocksdb_iter_destroy(pIter); + rocksdb_readoptions_destroy(pRdOpt); + taosMemoryFree(err); + + return code; +} + +int32_t streamStateCvtDataFormat(char* path, char* key, void* pCfInst) { + int nCf = sizeof(ginitDict) / sizeof(ginitDict[0]); + + int32_t code = 0; + + STaskDbWrapper* pTaskDb = taskDbOpen(path, key, 0); + RocksdbCfInst* pSrcBackend = pCfInst; + + for (int i = 0; i < nCf; i++) { + rocksdb_column_family_handle_t* pSrcCf = pSrcBackend->pHandle[i]; + if (pSrcCf == NULL) continue; + + code = taskDbOpenCfByKey(pTaskDb, ginitDict[i].key); + if (code != 0) goto _EXIT; + + code = copyDataAt(pSrcBackend, pTaskDb, i); + if (code != 0) goto _EXIT; + } + +_EXIT: + taskDbDestroy(pTaskDb, true); + + return code; +} int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf) { SBackendWrapper* handle = backend; char* err = NULL; @@ -1525,7 +2059,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t SCfInit* cfPara = &ginitDict[idx]; rocksdb_comparator_t* compare = - rocksdb_comparator_create(NULL, cfPara->detroyFunc, cfPara->cmpFunc, cfPara->cmpName); + rocksdb_comparator_create(NULL, cfPara->destroyCmp, cfPara->cmpKey, cfPara->cmpName); rocksdb_options_set_comparator((rocksdb_options_t*)cfOpts[i], compare); pCompare[i] = compare; } @@ -1609,7 +2143,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t SCfInit* cfPara = &ginitDict[i]; rocksdb_comparator_t* compare = - rocksdb_comparator_create(NULL, cfPara->detroyFunc, cfPara->cmpFunc, cfPara->cmpName); + rocksdb_comparator_create(NULL, cfPara->destroyCmp, cfPara->cmpKey, cfPara->cmpName); rocksdb_options_set_comparator((rocksdb_options_t*)opt, compare); inst->pCompares[i] = compare; @@ -1689,7 +2223,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { for (int i = 0; i < cfLen; i++) { SCfInit* cf = &ginitDict[i]; - rocksdb_comparator_t* compare = rocksdb_comparator_create(NULL, cf->detroyFunc, cf->cmpFunc, cf->cmpName); + rocksdb_comparator_t* compare = rocksdb_comparator_create(NULL, cf->destroyCmp, cf->cmpKey, cf->cmpName); rocksdb_options_set_comparator((rocksdb_options_t*)cfOpt[i], compare); pCompare[i] = compare; } @@ -1755,26 +2289,25 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) { } } if (pState != NULL && idx != -1) { - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; rocksdb_column_family_handle_t* cf = NULL; - taosThreadRwlockWrlock(&wrapper->rwLock); - cf = wrapper->pHandle[idx]; - if (cf == NULL) { - char buf[128] = {0}; - GEN_COLUMN_FAMILY_NAME(buf, wrapper->idstr, ginitDict[idx].key); - char* err = NULL; - cf = rocksdb_create_column_family(wrapper->rocksdb, wrapper->cfOpts[idx], buf, &err); + taosThreadMutexLock(&wrapper->mutex); + + cf = wrapper->pCf[idx]; + if (cf == NULL) { + char* err = NULL; + cf = rocksdb_create_column_family(wrapper->db, wrapper->pCfOpts[idx], ginitDict[idx].key, &err); if (err != NULL) { idx = -1; stError("failed to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err); taosMemoryFree(err); } else { - stDebug("succ to to open cf, %p %s_%s", pState, wrapper->idstr, funcName); - wrapper->pHandle[idx] = cf; + qDebug("succ to open cf, %p %s_%s", pState, wrapper->idstr, funcName); + wrapper->pCf[idx] = cf; } } - taosThreadRwlockUnlock(&wrapper->rwLock); + taosThreadMutexUnlock(&wrapper->mutex); } return idx; @@ -1795,118 +2328,120 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe *readOpt = rocksdb_readoptions_create(); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; if (snapshot != NULL) { - *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->rocksdb); + *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->db); rocksdb_readoptions_set_snapshot(*readOpt, *snapshot); rocksdb_readoptions_set_fill_cache(*readOpt, 0); } - return rocksdb_create_iterator_cf(wrapper->rocksdb, *readOpt, - ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]); + return rocksdb_create_iterator_cf(wrapper->db, *readOpt, ((rocksdb_column_family_handle_t**)wrapper->pCf)[idx]); } -#define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ - do { \ - code = 0; \ - char buf[128] = {0}; \ - char* err = NULL; \ - int i = streamStateGetCfIdx(pState, funcname); \ - if (i < 0) { \ - stWarn("streamState failed to get cf name: %s", funcname); \ - code = -1; \ - break; \ - } \ - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ - char toString[128] = {0}; \ - if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ - int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = wrapper->rocksdb; \ - rocksdb_writeoptions_t* opts = wrapper->writeOpts; \ - char* ttlV = NULL; \ - int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \ - rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \ - if (err != NULL) { \ - stError("streamState str: %s failed to write to %s, err: %s", toString, funcname, err); \ - taosMemoryFree(err); \ - code = -1; \ - } else { \ - stTrace("streamState str:%s succ to write to %s, rowValLen:%d, ttlValLen:%d", toString, funcname, vLen, \ - ttlVLen); \ - } \ - taosMemoryFree(ttlV); \ +#define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ + do { \ + code = 0; \ + char buf[128] = {0}; \ + char* err = NULL; \ + int i = streamStateGetCfIdx(pState, funcname); \ + if (i < 0) { \ + qWarn("streamState failed to get cf name: %s", funcname); \ + code = -1; \ + break; \ + } \ + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; \ + wrapper->dataWritten += 1; \ + char toString[128] = {0}; \ + if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ + int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pCf)[ginitDict[i].idx]; \ + rocksdb_writeoptions_t* opts = wrapper->writeOpt; \ + rocksdb_t* db = wrapper->db; \ + char* ttlV = NULL; \ + int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \ + rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \ + if (err != NULL) { \ + stError("streamState str: %s failed to write to %s, err: %s", toString, funcname, err); \ + taosMemoryFree(err); \ + code = -1; \ + } else { \ + qTrace("streamState str:%s succ to write to %s, rowValLen:%d, ttlValLen:%d, %p", toString, funcname, vLen, \ + ttlVLen, wrapper); \ + } \ + taosMemoryFree(ttlV); \ } while (0); -#define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \ - do { \ - code = 0; \ - char buf[128] = {0}; \ - char* err = NULL; \ - int i = streamStateGetCfIdx(pState, funcname); \ - if (i < 0) { \ - stWarn("streamState failed to get cf name: %s", funcname); \ - code = -1; \ - break; \ - } \ - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ - char toString[128] = {0}; \ - if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ - int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = wrapper->rocksdb; \ - rocksdb_readoptions_t* opts = wrapper->readOpts; \ - size_t len = 0; \ - char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \ - if (val == NULL || len == 0) { \ - if (err == NULL) { \ - stTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \ - } else { \ - stError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ - taosMemoryFreeClear(err); \ - } \ - code = -1; \ - } else { \ - char* p = NULL; \ - int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \ - if (tlen <= 0) { \ - stError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, wrapper->idstr, \ - funcname); \ - code = -1; \ - } else { \ - stTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, wrapper->idstr, funcname, tlen); \ - } \ - taosMemoryFree(val); \ - if (vLen != NULL) *vLen = tlen; \ - } \ +#define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \ + do { \ + code = 0; \ + char buf[128] = {0}; \ + char* err = NULL; \ + int i = streamStateGetCfIdx(pState, funcname); \ + if (i < 0) { \ + qWarn("streamState failed to get cf name: %s", funcname); \ + code = -1; \ + break; \ + } \ + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; \ + char toString[128] = {0}; \ + if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ + int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pCf)[ginitDict[i].idx]; \ + rocksdb_t* db = wrapper->db; \ + rocksdb_readoptions_t* opts = wrapper->readOpt; \ + size_t len = 0; \ + char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \ + if (val == NULL || len == 0) { \ + if (err == NULL) { \ + qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \ + } else { \ + stError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ + taosMemoryFreeClear(err); \ + } \ + code = -1; \ + } else { \ + char* p = NULL; \ + int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \ + if (tlen <= 0) { \ + stError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, wrapper->idstr, \ + funcname); \ + code = -1; \ + } else { \ + qTrace("streamState str: %s succ to read from %s_%s, valLen:%d, %p", toString, wrapper->idstr, funcname, tlen, \ + wrapper); \ + } \ + taosMemoryFree(val); \ + if (vLen != NULL) *vLen = tlen; \ + } \ } while (0); -#define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \ - do { \ - code = 0; \ - char buf[128] = {0}; \ - char* err = NULL; \ - int i = streamStateGetCfIdx(pState, funcname); \ - if (i < 0) { \ - stWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \ - code = -1; \ - break; \ - } \ - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ - char toString[128] = {0}; \ - if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ - int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = wrapper->rocksdb; \ - rocksdb_writeoptions_t* opts = wrapper->writeOpts; \ - rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \ - if (err != NULL) { \ - stError("streamState str: %s failed to del from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ - taosMemoryFree(err); \ - code = -1; \ - } else { \ - stTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \ - } \ +#define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \ + do { \ + code = 0; \ + char buf[128] = {0}; \ + char* err = NULL; \ + int i = streamStateGetCfIdx(pState, funcname); \ + if (i < 0) { \ + qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \ + code = -1; \ + break; \ + } \ + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; \ + wrapper->dataWritten += 1; \ + char toString[128] = {0}; \ + if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ + int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pCf)[ginitDict[i].idx]; \ + rocksdb_t* db = wrapper->db; \ + rocksdb_writeoptions_t* opts = wrapper->writeOpt; \ + rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \ + if (err != NULL) { \ + stError("streamState str: %s failed to del from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ + taosMemoryFree(err); \ + code = -1; \ + } else { \ + qTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \ + } \ } while (0); // state cf @@ -1932,7 +2467,8 @@ int32_t streamStateDel_rocksdb(SStreamState* pState, const SWinKey* key) { int32_t streamStateClear_rocksdb(SStreamState* pState) { stDebug("streamStateClear_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + wrapper->dataWritten += 1; char sKeyStr[128] = {0}; char eKeyStr[128] = {0}; @@ -1942,10 +2478,9 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) { int sLen = stateKeyEncode(&sKey, sKeyStr); int eLen = stateKeyEncode(&eKey, eKeyStr); - if (wrapper->pHandle[1] != NULL) { + if (wrapper->pCf[1] != NULL) { char* err = NULL; - rocksdb_delete_range_cf(wrapper->rocksdb, wrapper->writeOpts, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen, - &err); + rocksdb_delete_range_cf(wrapper->db, wrapper->writeOpt, wrapper->pCf[1], sKeyStr, sLen, eKeyStr, eLen, &err); if (err != NULL) { char toStringStart[128] = {0}; char toStringEnd[128] = {0}; @@ -1955,7 +2490,7 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) { stWarn("failed to delete range cf(state) start: %s, end:%s, reason:%s", toStringStart, toStringEnd, err); taosMemoryFree(err); } else { - rocksdb_compact_range_cf(wrapper->rocksdb, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen); + rocksdb_compact_range_cf(wrapper->db, wrapper->pCf[1], sKeyStr, sLen, eKeyStr, eLen); } } @@ -2029,7 +2564,7 @@ int32_t streamStateGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, cons if (pVLen != NULL) { size_t vlen = 0; const char* valStr = rocksdb_iter_value(pCur->iter, &vlen); - *pVLen = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); + *pVLen = valueDecode((void*)valStr, vlen, NULL, (char**)pVal); } *pKey = pKtmp->key; @@ -2052,9 +2587,9 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin if (pCur == NULL) { return NULL; } - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; pCur->number = pState->number; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -2104,7 +2639,7 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState) { if (pCur == NULL) return NULL; pCur->number = pState->number; - pCur->db = ((SBackendCfWrapper*)pState->pTdbState->pBackendCfWrapper)->rocksdb; + pCur->db = ((STaskDbWrapper*)pState->pTdbState->pOwner->pBackend)->db; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -2126,12 +2661,13 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState) { } SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + qDebug("streamStateGetCur_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) return NULL; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2178,6 +2714,9 @@ int32_t streamStateFuncDel_rocksdb(SStreamState* pState, const STupleKey* key) { int32_t streamStateSessionPut_rocksdb(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) { int code = 0; SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; + if (value == NULL || vLen == 0) { + stError("streamStateSessionPut_rocksdb val: %p, len: %d", value, vLen); + } STREAM_STATE_PUT_ROCKSDB(pState, "sess", &sKey, value, vLen); return code; } @@ -2226,11 +2765,11 @@ SStreamStateCur* streamStateSessionSeekToLast_rocksdb(SStreamState* pState) { if (code != 0) { return NULL; } + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + SStreamStateCur* pCur = createStreamStateCursor(); pCur->number = pState->number; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -2262,13 +2801,14 @@ int32_t streamStateSessionCurPrev_rocksdb(SStreamStateCur* pCur) { SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pState, const SSessionKey* key) { stDebug("streamStateSessionSeekKeyCurrentPrev_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } + pCur->number = pState->number; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -2301,13 +2841,13 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta return pCur; } SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pState, SSessionKey* key) { - stDebug("streamStateSessionSeekKeyCurrentNext_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + qDebug("streamStateSessionSeekKeyCurrentNext_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2339,13 +2879,13 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta } SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, const SSessionKey* key) { - stDebug("streamStateSessionSeekKeyNext_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + qDebug("streamStateSessionSeekKeyNext_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2398,7 +2938,7 @@ int32_t streamStateSessionGetKVByCur_rocksdb(SStreamStateCur* pCur, SSessionKey* SStateSessionKey* pKTmp = &ktmp; const char* vval = rocksdb_iter_value(pCur->iter, (size_t*)&vLen); char* val = NULL; - int32_t len = decodeValueFunc((void*)vval, vLen, NULL, &val); + int32_t len = valueDecode((void*)vval, vLen, NULL, &val); if (len < 0) { taosMemoryFree(val); return -1; @@ -2443,13 +2983,13 @@ int32_t streamStateFillDel_rocksdb(SStreamState* pState, const SWinKey* key) { } SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { - stDebug("streamStateFillGetCur_rocksdb"); - SStreamStateCur* pCur = createStreamStateCursor(); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + qDebug("streamStateFillGetCur_rocksdb"); + SStreamStateCur* pCur = createStreamStateCursor(); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; if (pCur == NULL) return NULL; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2492,7 +3032,7 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, winKeyDecode(&winKey, keyStr); const char* valStr = rocksdb_iter_value(pCur->iter, &vlen); - int32_t len = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); + int32_t len = valueDecode((void*)valStr, vlen, NULL, (char**)pVal); if (len < 0) { return -1; } @@ -2503,14 +3043,14 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, } SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) { - stDebug("streamStateFillSeekKeyNext_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + qDebug("streamStateFillSeekKeyNext_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (!pCur) { return NULL; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2541,14 +3081,14 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const return NULL; } SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const SWinKey* key) { - stDebug("streamStateFillSeekKeyPrev_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + qDebug("streamStateFillSeekKeyPrev_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2580,12 +3120,12 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const } int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) { stDebug("streamStateSessionGetKeyByRange_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return -1; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2812,7 +3352,7 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co int code = 0; char* err = NULL; - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; rocksdb_snapshot_t* snapshot = NULL; rocksdb_readoptions_t* readopts = NULL; rocksdb_iterator_t* pIter = streamStateIterCreate(pState, "default", &snapshot, &readopts); @@ -2826,7 +3366,7 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co int32_t vlen = 0; const char* vval = rocksdb_iter_value(pIter, (size_t*)&vlen); char* val = NULL; - int32_t len = decodeValueFunc((void*)vval, vlen, NULL, NULL); + int32_t len = valueDecode((void*)vval, vlen, NULL, NULL); if (len < 0) { rocksdb_iter_next(pIter); continue; @@ -2845,16 +3385,16 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co } rocksdb_iter_next(pIter); } - rocksdb_release_snapshot(wrapper->rocksdb, snapshot); + rocksdb_release_snapshot(wrapper->db, snapshot); rocksdb_readoptions_destroy(readopts); rocksdb_iter_destroy(pIter); return code; } void* streamDefaultIterCreate_rocksdb(SStreamState* pState) { - SStreamStateCur* pCur = createStreamStateCursor(); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SStreamStateCur* pCur = createStreamStateCursor(); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "default", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2885,7 +3425,7 @@ char* streamDefaultIterVal_rocksdb(void* iter, int32_t* len) { int32_t vlen = 0; const char* val = rocksdb_iter_value(pCur->iter, (size_t*)&vlen); - *len = decodeValueFunc((void*)val, vlen, NULL, &ret); + *len = valueDecode((void*)val, vlen, NULL, &ret); if (*len < 0) { taosMemoryFree(ret); return NULL; @@ -2907,7 +3447,8 @@ void streamStateClearBatch(void* pBatch) { rocksdb_writebatch_clear((rocksdb_ void streamStateDestroyBatch(void* pBatch) { rocksdb_writebatch_destroy((rocksdb_writebatch_t*)pBatch); } int32_t streamStatePutBatch(SStreamState* pState, const char* cfKeyName, rocksdb_writebatch_t* pBatch, void* key, void* val, int32_t vlen, int64_t ttl) { - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + wrapper->dataWritten += 1; int i = streamStateGetCfIdx(pState, cfKeyName); if (i < 0) { @@ -2921,7 +3462,7 @@ int32_t streamStatePutBatch(SStreamState* pState, const char* cfKeyName, rocksdb char* ttlV = NULL; int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV); - rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[i].idx]; + rocksdb_column_family_handle_t* pCf = wrapper->pCf[ginitDict[i].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); taosMemoryFree(ttlV); @@ -2940,8 +3481,10 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb char* ttlV = tmpBuf; int32_t ttlVLen = ginitDict[cfIdx].enValueFunc(val, vlen, ttl, &ttlV); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[cfIdx].idx]; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + wrapper->dataWritten += 1; + + rocksdb_column_family_handle_t* pCf = wrapper->pCf[ginitDict[cfIdx].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); if (tmpBuf == NULL) { @@ -2956,15 +3499,16 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb return 0; } int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { - char* err = NULL; - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - rocksdb_write(wrapper->rocksdb, wrapper->writeOpts, (rocksdb_writebatch_t*)pBatch, &err); + char* err = NULL; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + wrapper->dataWritten += 1; + rocksdb_write(wrapper->db, wrapper->writeOpt, (rocksdb_writebatch_t*)pBatch, &err); if (err != NULL) { stError("streamState failed to write batch, err:%s", err); taosMemoryFree(err); return -1; } else { - stDebug("write batch to backend:%p", wrapper->pBackend); + stDebug("write batch to backend:%p", wrapper->db); } return 0; } @@ -2978,3 +3522,446 @@ uint32_t nextPow2(uint32_t x) { x = x | (x >> 16); return x + 1; } +int32_t copyFiles(const char* src, const char* dst) { + int32_t code = 0; + // opt later, just hard link + int32_t sLen = strlen(src); + int32_t dLen = strlen(dst); + char* srcName = taosMemoryCalloc(1, sLen + 64); + char* dstName = taosMemoryCalloc(1, dLen + 64); + + TdDirPtr pDir = taosOpenDir(src); + if (pDir == NULL) { + taosMemoryFree(srcName); + taosMemoryFree(dstName); + return -1; + } + + TdDirEntryPtr de = NULL; + while ((de = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(de); + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + + sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); + sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); + if (!taosDirEntryIsDir(de)) { + code = taosCopyFile(srcName, dstName); + if (code == -1) { + goto _err; + } + } + + memset(srcName, 0, sLen + 64); + memset(dstName, 0, dLen + 64); + } + +_err: + taosMemoryFreeClear(srcName); + taosMemoryFreeClear(dstName); + taosCloseDir(&pDir); + return code >= 0 ? 0 : -1; +} + +int32_t isBkdDataMeta(char* name, int32_t len) { + const char* pCurrent = "CURRENT"; + int32_t currLen = strlen(pCurrent); + + const char* pManifest = "MANIFEST-"; + int32_t maniLen = strlen(pManifest); + + if (len >= maniLen && strncmp(name, pManifest, maniLen) == 0) { + return 1; + } else if (len == currLen && strcmp(name, pCurrent) == 0) { + return 1; + } + return 0; +} +int32_t compareHashTableImpl(SHashObj* p1, SHashObj* p2, SArray* diff) { + int32_t code = 0; + size_t len = 0; + void* pIter = taosHashIterate(p2, NULL); + while (pIter) { + char* name = taosHashGetKey(pIter, &len); + if (!isBkdDataMeta(name, len) && !taosHashGet(p1, name, len)) { + char* fname = taosMemoryCalloc(1, len + 1); + strncpy(fname, name, len); + taosArrayPush(diff, &fname); + } + pIter = taosHashIterate(p2, pIter); + } + return code; +} +int32_t compareHashTable(SHashObj* p1, SHashObj* p2, SArray* add, SArray* del) { + int32_t code = 0; + + code = compareHashTableImpl(p1, p2, add); + code = compareHashTableImpl(p2, p1, del); + + return code; +} + +void hashTableToDebug(SHashObj* pTbl, char** buf) { + size_t sz = taosHashGetSize(pTbl); + int32_t total = 0; + char* p = taosMemoryCalloc(1, sz * 16 + 4); + void* pIter = taosHashIterate(pTbl, NULL); + while (pIter) { + size_t len = 0; + char* name = taosHashGetKey(pIter, &len); + char* tname = taosMemoryCalloc(1, len + 1); + memcpy(tname, name, len); + total += sprintf(p + total, "%s,", tname); + + pIter = taosHashIterate(pTbl, pIter); + taosMemoryFree(tname); + } + if (total > 0) { + p[total - 1] = 0; + } + *buf = p; +} +void strArrayDebugInfo(SArray* pArr, char** buf) { + int32_t sz = taosArrayGetSize(pArr); + if (sz <= 0) return; + + char* p = (char*)taosMemoryCalloc(1, 64 + sz * 64); + int32_t total = 0; + + for (int i = 0; i < sz; i++) { + char* name = taosArrayGetP(pArr, i); + total += sprintf(p + total, "%s,", name); + } + p[total - 1] = 0; + + *buf = p; +} +void dbChkpDebugInfo(SDbChkp* pDb) { + // stTrace("chkp get file list: curr"); + char* p[4] = {NULL}; + + hashTableToDebug(pDb->pSstTbl[pDb->idx], &p[0]); + stTrace("chkp previous file: [%s]", p[0]); + + hashTableToDebug(pDb->pSstTbl[1 - pDb->idx], &p[1]); + stTrace("chkp curr file: [%s]", p[1]); + + strArrayDebugInfo(pDb->pAdd, &p[2]); + stTrace("chkp newly addded file: [%s]", p[2]); + + strArrayDebugInfo(pDb->pDel, &p[3]); + stTrace("chkp newly deleted file: [%s]", p[3]); + + for (int i = 0; i < 4; i++) { + taosMemoryFree(p[i]); + } +} +int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { + taosThreadRwlockWrlock(&p->rwLock); + + p->preCkptId = p->curChkpId; + p->curChkpId = chkpId; + const char* pCurrent = "CURRENT"; + int32_t currLen = strlen(pCurrent); + + const char* pManifest = "MANIFEST-"; + int32_t maniLen = strlen(pManifest); + + const char* pSST = ".sst"; + int32_t sstLen = strlen(pSST); + + memset(p->buf, 0, p->len); + sprintf(p->buf, "%s%s%s%scheckpoint%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + + taosArrayClearP(p->pAdd, taosMemoryFree); + taosArrayClearP(p->pDel, taosMemoryFree); + taosHashClear(p->pSstTbl[1 - p->idx]); + + TdDirPtr pDir = taosOpenDir(p->buf); + TdDirEntryPtr de = NULL; + int8_t dummy = 0; + while ((de = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(de); + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + if (strlen(name) == currLen && strcmp(name, pCurrent) == 0) { + taosMemoryFreeClear(p->pCurrent); + p->pCurrent = taosStrdup(name); + // taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + + if (strlen(name) >= maniLen && strncmp(name, pManifest, maniLen) == 0) { + taosMemoryFreeClear(p->pManifest); + p->pManifest = taosStrdup(name); + // taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { + taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + } + taosCloseDir(&pDir); + + if (p->init == 0) { + void* pIter = taosHashIterate(p->pSstTbl[1 - p->idx], NULL); + while (pIter) { + size_t len = 0; + char* name = taosHashGetKey(pIter, &len); + if (name != NULL && !isBkdDataMeta(name, len)) { + char* fname = taosMemoryCalloc(1, len + 1); + strncpy(fname, name, len); + taosArrayPush(p->pAdd, &fname); + } + pIter = taosHashIterate(p->pSstTbl[1 - p->idx], pIter); + } + if (taosArrayGetSize(p->pAdd) > 0) p->update = 1; + + p->init = 1; + p->preCkptId = -1; + p->curChkpId = chkpId; + } else { + int32_t code = compareHashTable(p->pSstTbl[p->idx], p->pSstTbl[1 - p->idx], p->pAdd, p->pDel); + if (code != 0) { + // dead code + taosArrayClearP(p->pAdd, taosMemoryFree); + taosArrayClearP(p->pDel, taosMemoryFree); + taosHashClear(p->pSstTbl[1 - p->idx]); + p->update = 0; + return code; + } + + if (taosArrayGetSize(p->pAdd) == 0 && taosArrayGetSize(p->pDel) == 0) { + p->update = 0; + } + + p->preCkptId = p->curChkpId; + p->curChkpId = chkpId; + } + + dbChkpDebugInfo(p); + + p->idx = 1 - p->idx; + + taosThreadRwlockUnlock(&p->rwLock); + + return 0; +} + +SDbChkp* dbChkpCreate(char* path, int64_t initChkpId) { + SDbChkp* p = taosMemoryCalloc(1, sizeof(SDbChkp)); + p->curChkpId = initChkpId; + p->preCkptId = -1; + p->pSST = taosArrayInit(64, sizeof(void*)); + p->path = path; + p->len = strlen(path) + 128; + p->buf = taosMemoryCalloc(1, p->len); + + p->idx = 0; + p->pSstTbl[0] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + p->pSstTbl[1] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + + p->pAdd = taosArrayInit(64, sizeof(void*)); + p->pDel = taosArrayInit(64, sizeof(void*)); + p->update = 0; + taosThreadRwlockInit(&p->rwLock, NULL); + + SArray* list = NULL; + int32_t code = dbChkpGetDelta(p, initChkpId, list); + + return p; +} + +void dbChkpDestroy(SDbChkp* pChkp) { + taosMemoryFree(pChkp->buf); + taosMemoryFree(pChkp->path); + + taosArrayDestroyP(pChkp->pSST, taosMemoryFree); + taosArrayDestroyP(pChkp->pAdd, taosMemoryFree); + taosArrayDestroyP(pChkp->pDel, taosMemoryFree); + + taosHashCleanup(pChkp->pSstTbl[0]); + taosHashCleanup(pChkp->pSstTbl[1]); + + taosMemoryFree(pChkp->pCurrent); + taosMemoryFree(pChkp->pManifest); + taosMemoryFree(pChkp); +} + +int32_t dbChkpInit(SDbChkp* p) { + if (p == NULL) return 0; + return 0; +} +int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { + taosThreadRwlockRdlock(&p->rwLock); + int32_t code = -1; + int32_t len = p->len + 128; + + char* srcBuf = taosMemoryCalloc(1, len); + char* dstBuf = taosMemoryCalloc(1, len); + + char* srcDir = taosMemoryCalloc(1, len); + char* dstDir = taosMemoryCalloc(1, len); + + sprintf(srcDir, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", p->curChkpId); + sprintf(dstDir, "%s", dname); + + if (!taosDirExist(srcDir)) { + stError("failed to dump srcDir %s, reason: not exist such dir", srcDir); + goto _ERROR; + } + + // add file to $name dir + for (int i = 0; i < taosArrayGetSize(p->pAdd); i++) { + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + + char* filename = taosArrayGetP(p->pAdd, i); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, filename); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); + + if (taosCopyFile(srcBuf, dstBuf) < 0) { + stError("failed to copy file from %s to %s", srcBuf, dstBuf); + goto _ERROR; + } + } + // del file in $name + for (int i = 0; i < taosArrayGetSize(p->pDel); i++) { + char* filename = taosArrayGetP(p->pDel, i); + char* p = taosStrdup(filename); + taosArrayPush(list, &p); + } + + // copy current file to dst dir + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pCurrent); + sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pCurrent, p->curChkpId); + if (taosCopyFile(srcBuf, dstBuf) < 0) { + stError("failed to copy file from %s to %s", srcBuf, dstBuf); + goto _ERROR; + } + + // copy manifest file to dst dir + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pManifest); + sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pManifest, p->curChkpId); + if (taosCopyFile(srcBuf, dstBuf) < 0) { + stError("failed to copy file from %s to %s", srcBuf, dstBuf); + goto _ERROR; + } + + static char* chkpMeta = "META"; + memset(dstBuf, 0, len); + sprintf(dstDir, "%s%s%s", dstDir, TD_DIRSEP, chkpMeta); + + TdFilePtr pFile = taosOpenFile(dstDir, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) { + stError("chkp failed to create meta file: %s", dstDir); + goto _ERROR; + } + char content[128] = {0}; + snprintf(content, sizeof(content), "%s_%" PRId64 "\n%s_%" PRId64 "", p->pCurrent, p->curChkpId, p->pManifest, + p->curChkpId); + if (taosWriteFile(pFile, content, strlen(content)) <= 0) { + stError("chkp failed to write meta file: %s", dstDir); + taosCloseFile(&pFile); + goto _ERROR; + } + taosCloseFile(&pFile); + + // clear delta data buf + taosArrayClearP(p->pAdd, taosMemoryFree); + taosArrayClearP(p->pDel, taosMemoryFree); + code = 0; + +_ERROR: + taosThreadRwlockUnlock(&p->rwLock); + taosMemoryFree(srcBuf); + taosMemoryFree(dstBuf); + taosMemoryFree(srcDir); + taosMemoryFree(dstDir); + return code; +} +SBkdMgt* bkdMgtCreate(char* path) { + SBkdMgt* p = taosMemoryCalloc(1, sizeof(SBkdMgt)); + p->pDbChkpTbl = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + p->path = taosStrdup(path); + taosThreadRwlockInit(&p->rwLock, NULL); + return p; +} + +void bkdMgtDestroy(SBkdMgt* bm) { + if (bm == NULL) return; + void* pIter = taosHashIterate(bm->pDbChkpTbl, NULL); + while (pIter) { + SDbChkp* pChkp = *(SDbChkp**)(pIter); + dbChkpDestroy(pChkp); + + pIter = taosHashIterate(bm->pDbChkpTbl, pIter); + } + + taosThreadRwlockDestroy(&bm->rwLock); + taosMemoryFree(bm->path); + taosHashCleanup(bm->pDbChkpTbl); + + taosMemoryFree(bm); +} +int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, char* dname) { + int32_t code = 0; + + taosThreadRwlockWrlock(&bm->rwLock); + SDbChkp** ppChkp = taosHashGet(bm->pDbChkpTbl, taskId, strlen(taskId)); + SDbChkp* pChkp = ppChkp != NULL ? *ppChkp : NULL; + + if (pChkp == NULL) { + char* path = taosMemoryCalloc(1, strlen(bm->path) + 64); + sprintf(path, "%s%s%s", bm->path, TD_DIRSEP, taskId); + + SDbChkp* p = dbChkpCreate(path, chkpId); + taosHashPut(bm->pDbChkpTbl, taskId, strlen(taskId), &p, sizeof(void*)); + + pChkp = p; + + code = dbChkpDumpTo(pChkp, dname, list); + taosThreadRwlockUnlock(&bm->rwLock); + return code; + } + + code = dbChkpGetDelta(pChkp, chkpId, NULL); + code = dbChkpDumpTo(pChkp, dname, list); + + taosThreadRwlockUnlock(&bm->rwLock); + return code; +} + +int32_t bkdMgtAddChkp(SBkdMgt* bm, char* task, char* path) { + int32_t code = -1; + + taosThreadRwlockWrlock(&bm->rwLock); + SDbChkp** pp = taosHashGet(bm->pDbChkpTbl, task, strlen(task)); + if (pp == NULL) { + SDbChkp* p = dbChkpCreate(path, 0); + if (p != NULL) { + taosHashPut(bm->pDbChkpTbl, task, strlen(task), &p, sizeof(void*)); + code = 0; + } + } else { + stError("task chkp already exists"); + } + + taosThreadRwlockUnlock(&bm->rwLock); + + return code; +} + +int32_t bkdMgtDumpTo(SBkdMgt* bm, char* taskId, char* dname) { + int32_t code = 0; + taosThreadRwlockRdlock(&bm->rwLock); + + SDbChkp* p = taosHashGet(bm->pDbChkpTbl, taskId, strlen(taskId)); + code = dbChkpDumpTo(p, dname, NULL); + + taosThreadRwlockUnlock(&bm->rwLock); + return code; +} \ No newline at end of file diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 9391cbcd9b..e2561de841 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -15,8 +15,16 @@ #include "cos.h" #include "rsync.h" +#include "streamBackendRocksdb.h" #include "streamInt.h" +typedef struct { + UPLOAD_TYPE type; + char* taskId; + int64_t chkpId; + + SStreamTask* pTask; +} SAsyncUploadArg; int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; @@ -95,12 +103,12 @@ int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointRea static int32_t streamAlignCheckpoint(SStreamTask* pTask) { int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); - int64_t old = atomic_val_compare_exchange_32(&pTask->checkpointAlignCnt, 0, num); + int64_t old = atomic_val_compare_exchange_32(&pTask->chkInfo.downstreamAlignNum, 0, num); if (old == 0) { stDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num); } - return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1); + return atomic_sub_fetch_32(&pTask->chkInfo.downstreamAlignNum, 1); } static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { @@ -118,7 +126,7 @@ static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpoint } pBlock->info.type = STREAM_CHECKPOINT; - pBlock->info.version = pTask->checkpointingId; + pBlock->info.version = pTask->chkInfo.checkpointingId; pBlock->info.rows = 1; pBlock->info.childId = pTask->info.selfChildId; @@ -141,13 +149,12 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo // 1. set task status to be prepared for check point, no data are allowed to put into inputQ. streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); - pTask->checkpointingId = pReq->checkpointId; - pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + pTask->chkInfo.checkpointingId = pReq->checkpointId; + pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); pTask->chkInfo.startTs = taosGetTimestampMs(); pTask->execInfo.checkpoint += 1; // 2. Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task - // already. int32_t code = appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); return code; } @@ -171,13 +178,12 @@ static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStream int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { SSDataBlock* pDataBlock = taosArrayGet(pBlock->blocks, 0); int64_t checkpointId = pDataBlock->info.version; - - const char* id = pTask->id.idStr; - int32_t code = TSDB_CODE_SUCCESS; + const char* id = pTask->id.idStr; + int32_t code = TSDB_CODE_SUCCESS; // set task status if (streamTaskGetStatus(pTask, NULL) != TASK_STATUS__CK) { - pTask->checkpointingId = checkpointId; + pTask->chkInfo.checkpointingId = checkpointId; code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s handle checkpoint-trigger block failed, code:%s", id, tstrerror(code)); @@ -185,26 +191,15 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc } } - { // todo: remove this when the pipeline checkpoint generating is used. - SStreamMeta* pMeta = pTask->pMeta; - streamMetaWLock(pMeta); - - if (pMeta->chkptNotReadyTasks == 0) { - pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks; - } - - streamMetaWUnLock(pMeta); - } - // todo fix race condition: set the status and append checkpoint block int32_t taskLevel = pTask->info.taskLevel; if (taskLevel == TASK_LEVEL__SOURCE) { - if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || - pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + int8_t type = pTask->outputInfo.type; + if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) { stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); continueDispatchCheckpointBlock(pBlock, pTask); } else { // only one task exists, no need to dispatch downstream info - atomic_add_fetch_32(&pTask->checkpointNotReadyTasks, 1); + atomic_add_fetch_32(&pTask->chkInfo.checkpointNotReadyTasks, 1); streamProcessCheckpointReadyMsg(pTask); streamFreeQitem((SStreamQueueItem*)pBlock); } @@ -233,15 +228,13 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc id, num); streamFreeQitem((SStreamQueueItem*)pBlock); streamTaskBuildCheckpoint(pTask); - } else { - stDebug( - "s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, dispatch checkpoint msg " - "downstream", - id, num); + } else { // source & agg tasks need to forward the checkpoint msg downwards + stDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, continue forwards msg", id, + num); // set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task // can start local checkpoint procedure - pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); // Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task // already. And then, dispatch check point msg to all downstream tasks @@ -260,7 +253,7 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG); // only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task - int32_t notReady = atomic_sub_fetch_32(&pTask->checkpointNotReadyTasks, 1); + int32_t notReady = atomic_sub_fetch_32(&pTask->chkInfo.checkpointNotReadyTasks, 1); ASSERT(notReady >= 0); if (notReady == 0) { @@ -276,11 +269,11 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { } void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { - pTask->checkpointingId = 0; // clear the checkpoint id + pTask->chkInfo.checkpointingId = 0; // clear the checkpoint id pTask->chkInfo.failedId = 0; pTask->chkInfo.startTs = 0; // clear the recorded start time - pTask->checkpointNotReadyTasks = 0; - pTask->checkpointAlignCnt = 0; + pTask->chkInfo.checkpointNotReadyTasks = 0; + // pTask->chkInfo.checkpointAlignCnt = 0; pTask->chkInfo.dispatchCheckpointTrigger = false; streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks if (clearChkpReadyMsg) { @@ -288,100 +281,217 @@ void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { } } -int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { - int32_t vgId = pMeta->vgId; - int32_t code = 0; +int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) { + SStreamMeta* pMeta = p->pMeta; + int32_t vgId = pMeta->vgId; + const char* id = p->id.idStr; + int32_t code = 0; - streamMetaWLock(pMeta); + if (p->info.fillHistory == 1) { + return code; + } - for (int32_t i = 0; i < taosArrayGetSize(pMeta->pTaskList); ++i) { - STaskId* pId = taosArrayGet(pMeta->pTaskList, i); - SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); - if (ppTask == NULL) { - continue; - } + if (p->info.taskLevel > TASK_LEVEL__SINK) { + return code; + } - SStreamTask* p = *ppTask; - if (p->info.fillHistory == 1) { - continue; - } + taosThreadMutexLock(&p->lock); - ASSERT(p->chkInfo.checkpointId <= p->checkpointingId && p->checkpointingId == checkpointId && - p->chkInfo.checkpointVer <= p->chkInfo.processedVer); + ASSERT(p->chkInfo.checkpointId <= p->chkInfo.checkpointingId && p->chkInfo.checkpointingId == checkpointId && + p->chkInfo.checkpointVer <= p->chkInfo.processedVer); + p->chkInfo.checkpointId = p->chkInfo.checkpointingId; + p->chkInfo.checkpointVer = p->chkInfo.processedVer; - p->chkInfo.checkpointId = p->checkpointingId; - p->chkInfo.checkpointVer = p->chkInfo.processedVer; + streamTaskClearCheckInfo(p, false); + char* str = NULL; + streamTaskGetStatus(p, &str); - streamTaskClearCheckInfo(p, false); + code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + taosThreadMutexUnlock(&p->lock); - char* str = NULL; - streamTaskGetStatus(p, &str); + if (code != TSDB_CODE_SUCCESS) { + stDebug("s-task:%s vgId:%d handle event:checkpoint-done failed", id, vgId); + return -1; + } - code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + stDebug("vgId:%d s-task:%s level:%d open upstream inputQ, save status after checkpoint, checkpointId:%" PRId64 + ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status: normal, prev:%s", + vgId, id, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer, str); + + // save the task if not sink task + if (p->info.taskLevel != TASK_LEVEL__SINK) { + streamMetaWLock(pMeta); + + code = streamMetaSaveTask(pMeta, p); if (code != TSDB_CODE_SUCCESS) { - stDebug("s-task:%s vgId:%d save task status failed, since handle event failed", p->id.idStr, vgId); streamMetaWUnLock(pMeta); - return -1; - } else { // save the task - streamMetaSaveTask(pMeta, p); + stError("s-task:%s vgId:%d failed to save task info after do checkpoint, checkpointId:%" PRId64 ", since %s", id, + vgId, checkpointId, terrstr()); + return code; } - stDebug( - "vgId:%d s-task:%s level:%d open upstream inputQ, commit task status after checkpoint completed, " - "checkpointId:%" PRId64 ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status to be normal, prev:%s", - pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer, - str); - } + code = streamMetaCommit(pMeta); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:%s vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", + id, vgId, checkpointId, terrstr()); + } - code = streamMetaCommit(pMeta); - if (code < 0) { - stError("vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", pMeta->vgId, - checkpointId, terrstr()); - } else { - stInfo("vgId:%d commit stream meta after do checkpoint, checkpointId:%" PRId64 " DONE", pMeta->vgId, checkpointId); + streamMetaWUnLock(pMeta); } - - streamMetaWUnLock(pMeta); return code; } -int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { - int32_t code = 0; +void streamTaskSetFailedId(SStreamTask* pTask) { + pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; + pTask->chkInfo.checkpointId = pTask->chkInfo.checkpointingId; +} - // check for all tasks, and do generate the vnode-wide checkpoint data. - SStreamMeta* pMeta = pTask->pMeta; - int32_t remain = atomic_sub_fetch_32(&pMeta->chkptNotReadyTasks, 1); - ASSERT(remain >= 0); - - double el = (taosGetTimestampMs() - pTask->chkInfo.startTs) / 1000.0; - if (remain == 0) { // all tasks are ready - stDebug("s-task:%s all downstreams are ready, ready for do checkpoint", pTask->id.idStr); - streamBackendDoCheckpoint(pMeta, pTask->checkpointingId); - streamSaveAllTaskStatus(pMeta, pTask->checkpointingId); - stInfo( - "vgId:%d vnode wide checkpoint completed, save all tasks status, last:%s, level:%d elapsed time:%.2f Sec " - "checkpointId:%" PRId64, - pMeta->vgId, pTask->id.idStr, pTask->info.taskLevel, el, pTask->checkpointingId); +int32_t getChkpMeta(char* id, char* path, SArray* list) { + char* file = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(file, "%s%s%s", path, TD_DIRSEP, "META_TMP"); + int32_t code = downloadCheckpointByName(id, "META", file); + if (code != 0) { + stDebug("chkp failed to download meta file:%s", file); + taosMemoryFree(file); + return code; + } + TdFilePtr pFile = taosOpenFile(file, TD_FILE_READ); + char buf[128] = {0}; + if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { + stError("chkp failed to read meta file:%s", file); + code = -1; } else { - stInfo( - "vgId:%d vnode wide tasks not reach checkpoint ready status, ready s-task:%s, level:%d elapsed time:%.2f Sec " - "not ready:%d/%d", - pMeta->vgId, pTask->id.idStr, pTask->info.taskLevel, el, remain, pMeta->numOfStreamTasks); + int32_t len = strlen(buf); + for (int i = 0; i < len; i++) { + if (buf[i] == '\n') { + char* item = taosMemoryCalloc(1, i + 1); + memcpy(item, buf, i); + taosArrayPush(list, &item); + + item = taosMemoryCalloc(1, len - i); + memcpy(item, buf + i + 1, len - i - 1); + taosArrayPush(list, &item); + } + } + } + taosCloseFile(&pFile); + taosRemoveFile(file); + taosMemoryFree(file); + return code; +} +int32_t doUploadChkp(void* param) { + SAsyncUploadArg* arg = param; + char* path = NULL; + int32_t code = 0; + SArray* toDelFiles = taosArrayInit(4, sizeof(void*)); + + if ((code = taskDbGenChkpUploadData(arg->pTask->pBackend, arg->pTask->pMeta->bkdChkptMgt, arg->chkpId, + (int8_t)(arg->type), &path, toDelFiles)) != 0) { + stError("s-task:%s failed to gen upload checkpoint:%" PRId64 "", arg->pTask->id.idStr, arg->chkpId); + } + if (arg->type == UPLOAD_S3) { + if (code == 0 && (code = getChkpMeta(arg->taskId, path, toDelFiles)) != 0) { + stError("s-task:%s failed to get checkpoint:%" PRId64 " meta", arg->pTask->id.idStr, arg->chkpId); + } + } + + if (code == 0 && (code = uploadCheckpoint(arg->taskId, path)) != 0) { + stError("s-task:%s failed to upload checkpoint:%" PRId64, arg->pTask->id.idStr, arg->chkpId); + } + + if (code == 0) { + for (int i = 0; i < taosArrayGetSize(toDelFiles); i++) { + char* p = taosArrayGetP(toDelFiles, i); + code = deleteCheckpointFile(arg->taskId, p); + stDebug("s-task:%s try to del file: %s", arg->pTask->id.idStr, p); + if (code != 0) { + break; + } + } + } + + taosArrayDestroyP(toDelFiles, taosMemoryFree); + + taosRemoveDir(path); + taosMemoryFree(path); + + taosMemoryFree(arg->taskId); + taosMemoryFree(arg); + return code; +} +int32_t streamTaskUploadChkp(SStreamTask* pTask, int64_t chkpId, char* taskId) { + // async upload + UPLOAD_TYPE type = getUploadType(); + if (type == UPLOAD_DISABLE) { + return 0; + } + SAsyncUploadArg* arg = taosMemoryCalloc(1, sizeof(SAsyncUploadArg)); + arg->type = type; + arg->taskId = taosStrdup(taskId); + arg->chkpId = chkpId; + arg->pTask = pTask; + + return streamMetaAsyncExec(pTask->pMeta, doUploadChkp, arg, NULL); +} +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { + int32_t code = TSDB_CODE_SUCCESS; + int64_t startTs = pTask->chkInfo.startTs; + int64_t ckId = pTask->chkInfo.checkpointingId; + + // sink task do not need to save the status, and generated the checkpoint + if (pTask->info.taskLevel != TASK_LEVEL__SINK) { + stDebug("s-task:%s level:%d start gen checkpoint", pTask->id.idStr, pTask->info.taskLevel); + code = streamBackendDoCheckpoint(pTask->pBackend, ckId); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:%s gen checkpoint:%" PRId64 " failed, code:%s", pTask->id.idStr, ckId, tstrerror(terrno)); + } } // send check point response to upstream task - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - code = streamTaskSendCheckpointSourceRsp(pTask); - } else { - code = streamTaskSendCheckpointReadyMsg(pTask); + if (code == TSDB_CODE_SUCCESS) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + code = streamTaskSendCheckpointSourceRsp(pTask); + } else { + code = streamTaskSendCheckpointReadyMsg(pTask); + } + + if (code != TSDB_CODE_SUCCESS) { + // todo: let's retry send rsp to upstream/mnode + stError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr, + ckId, tstrerror(code)); + } } - if (code != TSDB_CODE_SUCCESS) { - // todo: let's retry send rsp to upstream/mnode - stError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr, - pTask->checkpointingId, tstrerror(code)); + // clear the checkpoint info, and commit the newest checkpoint info if all works are done successfully + if (code == TSDB_CODE_SUCCESS) { + code = streamSaveTaskCheckpointInfo(pTask, ckId); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:%s commit taskInfo failed, checkpoint:%" PRId64 " failed, code:%s", pTask->id.idStr, ckId, + tstrerror(terrno)); + } else { + code = streamTaskUploadChkp(pTask, ckId, (char*)pTask->id.idStr); + if (code != 0) { + stError("s-task:%s failed to upload checkpoint:%" PRId64 " failed", pTask->id.idStr, ckId); + } + } } + if (code != TSDB_CODE_SUCCESS) { // clear the checkpoint info if failed + taosThreadMutexLock(&pTask->lock); + streamTaskClearCheckInfo(pTask, false); + code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + taosThreadMutexUnlock(&pTask->lock); + + streamTaskSetFailedId(pTask); + stDebug("s-task:%s clear checkpoint flag since gen checkpoint failed, checkpointId:%" PRId64, pTask->id.idStr, + ckId); + } + + double el = (taosGetTimestampMs() - startTs) / 1000.0; + stInfo("s-task:%s vgId:%d level:%d, checkpointId:%" PRId64 " ver:%" PRId64 " elapsed time:%.2f Sec, %s ", + pTask->id.idStr, pTask->pMeta->vgId, pTask->info.taskLevel, ckId, pTask->chkInfo.checkpointVer, el, + (code == TSDB_CODE_SUCCESS) ? "succ" : "failed"); + return code; } @@ -390,6 +500,7 @@ static int uploadCheckpointToS3(char* id, char* path) { if (pDir == NULL) return -1; TdDirEntryPtr de = NULL; + s3Init(); while ((de = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(de); if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || taosDirEntryIsDir(de)) continue; @@ -409,12 +520,24 @@ static int uploadCheckpointToS3(char* id, char* path) { return -1; } stDebug("[s3] upload checkpoint:%s", filename); + // break; } taosCloseDir(&pDir); return 0; } +static int downloadCheckpointByNameS3(char* id, char* fname, char* dstName) { + int code = 0; + char* buf = taosMemoryCalloc(1, strlen(id) + strlen(dstName) + 4); + sprintf(buf, "%s/%s", id, fname); + if (s3GetObjectToFile(buf, dstName) != 0) { + code = -1; + } + taosMemoryFree(buf); + return code; +} + UPLOAD_TYPE getUploadType() { if (strlen(tsSnodeAddress) != 0) { return UPLOAD_RSYNC; @@ -438,6 +561,20 @@ int uploadCheckpoint(char* id, char* path) { return 0; } +// fileName: CURRENT +int downloadCheckpointByName(char* id, char* fname, char* dstName) { + if (id == NULL || fname == NULL || strlen(id) == 0 || strlen(fname) == 0 || strlen(fname) >= PATH_MAX) { + stError("uploadCheckpointByName parameters invalid"); + return -1; + } + if (strlen(tsSnodeAddress) != 0) { + return 0; + } else if (tsS3StreamEnabled) { + return downloadCheckpointByNameS3(id, fname, dstName); + } + return 0; +} + int downloadCheckpoint(char* id, char* path) { if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { stError("downloadCheckpoint parameters invalid"); diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index b0ba78ca6e..6247d4ed53 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -1167,10 +1167,10 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i pTask->info.taskLevel == TASK_LEVEL__SOURCE) { stError("s-task:%s failed to dispatch checkpoint-trigger msg, checkpointId:%" PRId64 ", set the current checkpoint failed, and send rsp to mnode", - id, pTask->checkpointingId); + id, pTask->chkInfo.checkpointingId); { // send checkpoint failure msg to mnode directly - pTask->chkInfo.failedId = pTask->checkpointingId; // record the latest failed checkpoint id - pTask->checkpointingId = pTask->checkpointingId; + pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; // record the latest failed checkpoint id + pTask->chkInfo.checkpointingId = pTask->chkInfo.checkpointingId; streamTaskSendCheckpointSourceRsp(pTask); } } else { diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 8b14846414..8ab8f3852e 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -252,14 +252,15 @@ static void streamScanHistoryDataImpl(SStreamTask* pTask, SArray* pRes, int32_t* SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); - void* exec = pTask->exec.pExecutor; - bool finished = false; + void* exec = pTask->exec.pExecutor; + bool finished = false; + const char* id = pTask->id.idStr; qSetStreamOpOpen(exec); while (1) { if (streamTaskShouldPause(pTask)) { - stDebug("s-task:%s paused from the scan-history task", pTask->id.idStr); + stDebug("s-task:%s paused from the scan-history task", id); // quit from step1, not continue to handle the step2 return (SScanhistoryDataInfo){TASK_SCANHISTORY_QUIT, 0}; } @@ -267,8 +268,7 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); if (pRes == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - stError("s-task:%s scan-history prepare result block failed, code:%s, retry later", pTask->id.idStr, - tstrerror(terrno)); + stError("s-task:%s scan-history prepare result block failed, code:%s, retry later", id, tstrerror(terrno)); continue; } @@ -281,12 +281,12 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { } // dispatch the generated results - int32_t code = handleResultBlocks(pTask, pRes, size); + /*int32_t code = */handleResultBlocks(pTask, pRes, size); int64_t el = taosGetTimestampMs() - st; // downstream task input queue is full, try in 5sec - if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { + if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED && (pTask->info.fillHistory == 1)) { return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 5000}; } @@ -294,9 +294,9 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { return (SScanhistoryDataInfo){TASK_SCANHISTORY_CONT, 0}; } - if (el >= STREAM_SCAN_HISTORY_TIMESLICE) { - stDebug("s-task:%s fill-history:%d time slice exhausted, elapsed time:%.2fs, retry in 100ms", - pTask->id.idStr, pTask->info.fillHistory, el / 1000.0); + if (el >= STREAM_SCAN_HISTORY_TIMESLICE && (pTask->info.fillHistory == 1)) { + stDebug("s-task:%s fill-history:%d time slice exhausted, elapsed time:%.2fs, retry in 100ms", id, + pTask->info.fillHistory, el / 1000.0); return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 100}; } } @@ -543,7 +543,7 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock * todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the * appropriate batch of blocks should be handled in 5 to 10 sec. */ -int32_t streamExecForAll(SStreamTask* pTask) { +int32_t doStreamExecTask(SStreamTask* pTask) { const char* id = pTask->id.idStr; // merge multiple input data if possible in the input queue. @@ -654,7 +654,7 @@ int32_t streamExecTask(SStreamTask* pTask) { int8_t schedStatus = streamTaskSetSchedStatusActive(pTask); if (schedStatus == TASK_SCHED_STATUS__WAITING) { while (1) { - int32_t code = streamExecForAll(pTask); + int32_t code = doStreamExecTask(pTask); if (code < 0) { // todo this status should be removed atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED); return -1; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 00b292a69a..bd23e41a84 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -18,6 +18,7 @@ #include "streamInt.h" #include "tmisce.h" #include "tref.h" +#include "tsched.h" #include "tstream.h" #include "ttimer.h" #include "wal.h" @@ -27,6 +28,7 @@ static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT; int32_t streamBackendId = 0; int32_t streamBackendCfWrapperId = 0; int32_t streamMetaId = 0; +int32_t taskDbWrapperId = 0; static void metaHbToMnode(void* param, void* tmrId); static void streamMetaClear(SStreamMeta* pMeta); @@ -55,6 +57,7 @@ int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid); static void streamMetaEnvInit() { streamBackendId = taosOpenRef(64, streamBackendCleanup); streamBackendCfWrapperId = taosOpenRef(64, streamBackendHandleCleanup); + taskDbWrapperId = taosOpenRef(64, taskDbDestroy2); streamMetaId = taosOpenRef(64, streamMetaCloseImpl); @@ -62,6 +65,7 @@ static void streamMetaEnvInit() { } void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } + void streamMetaCleanup() { taosCloseRef(streamBackendId); taosCloseRef(streamBackendCfWrapperId); @@ -106,6 +110,174 @@ int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid) { return 0; } +typedef struct { + int64_t chkpId; + char* path; + char* taskId; + + SArray* pChkpSave; + SArray* pChkpInUse; + int8_t chkpCap; + void* backend; + +} StreamMetaTaskState; + +int32_t streamMetaOpenTdb(SStreamMeta* pMeta) { + if (tdbOpen(pMeta->path, 16 * 1024, 1, &pMeta->db, 0) < 0) { + return -1; + // goto _err; + } + + if (tdbTbOpen("task.db", STREAM_TASK_KEY_LEN, -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) { + return -1; + } + + if (tdbTbOpen("checkpoint.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pCheckpointDb, 0) < 0) { + return -1; + } + return 0; +} + +// +// impl later +// +enum STREAM_STATE_VER { + STREAM_STATA_NO_COMPATIBLE, + STREAM_STATA_COMPATIBLE, + STREAM_STATA_NEED_CONVERT, +}; + +int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { + int8_t ret = STREAM_STATA_COMPATIBLE; + TBC* pCur = NULL; + + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { + // no task info, no stream + return ret; + } + void* pKey = NULL; + int32_t kLen = 0; + void* pVal = NULL; + int32_t vLen = 0; + + tdbTbcMoveToFirst(pCur); + while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { + if (pVal == NULL || vLen == 0) { + break; + } + SDecoder decoder; + SCheckpointInfo info; + tDecoderInit(&decoder, (uint8_t*)pVal, vLen); + if (tDecodeStreamTaskChkInfo(&decoder, &info) < 0) { + continue; + } + if (info.msgVer <= SSTREAM_TASK_INCOMPATIBLE_VER) { + ret = STREAM_STATA_NO_COMPATIBLE; + } else if (info.msgVer == SSTREAM_TASK_NEED_CONVERT_VER) { + ret = STREAM_STATA_NEED_CONVERT; + } else if (info.msgVer == SSTREAM_TASK_VER) { + ret = STREAM_STATA_COMPATIBLE; + } + tDecoderClear(&decoder); + break; + } + tdbFree(pKey); + tdbFree(pVal); + tdbTbcClose(pCur); + return ret; +} + +int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { + int32_t code = 0; + int64_t chkpId = streamMetaGetLatestCheckpointId(pMeta); + + bool exist = streamBackendDataIsExist(pMeta->path, chkpId, pMeta->vgId); + if (exist == false) { + return code; + } + SBackendWrapper* pBackend = streamBackendInit(pMeta->path, chkpId, pMeta->vgId); + + void* pIter = taosHashIterate(pBackend->cfInst, NULL); + while (pIter) { + void* key = taosHashGetKey(pIter, NULL); + code = streamStateCvtDataFormat(pMeta->path, key, *(void**)pIter); + if (code != 0) { + qError("failed to cvt data"); + goto _EXIT; + } + + pIter = taosHashIterate(pBackend->cfInst, pIter); + } + +_EXIT: + streamBackendCleanup((void*)pBackend); + + if (code == 0) { + char* state = taosMemoryCalloc(1, strlen(pMeta->path) + 32); + sprintf(state, "%s%s%s", pMeta->path, TD_DIRSEP, "state"); + taosRemoveDir(state); + taosMemoryFree(state); + } + + return code; +} +int32_t streamMetaMayCvtDbFormat(SStreamMeta* pMeta) { + int8_t compatible = streamMetaCheckBackendCompatible(pMeta); + if (compatible == STREAM_STATA_COMPATIBLE) { + return 0; + } else if (compatible == STREAM_STATA_NEED_CONVERT) { + qInfo("stream state need covert backend format"); + + return streamMetaCvtDbFormat(pMeta); + } else if (compatible == STREAM_STATA_NO_COMPATIBLE) { + qError( + "stream read incompatible data, rm %s/vnode/vnode*/tq/stream if taosd cannot start, and rebuild stream " + "manually", + tsDataDir); + + return -1; + } + return 0; +} + +int32_t streamTaskSetDb(SStreamMeta* pMeta, void* arg, char* key) { + SStreamTask* pTask = arg; + + int64_t chkpId = pTask->chkInfo.checkpointId; + + taosThreadMutexLock(&pMeta->backendMutex); + void** ppBackend = taosHashGet(pMeta->pTaskDbUnique, key, strlen(key)); + if (ppBackend != NULL && *ppBackend != NULL) { + taskDbAddRef(*ppBackend); + + STaskDbWrapper* pBackend = *ppBackend; + + pTask->backendRefId = pBackend->refId; + pTask->pBackend = pBackend; + taosThreadMutexUnlock(&pMeta->backendMutex); + + stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend); + return 0; + } + + STaskDbWrapper* pBackend = taskDbOpen(pMeta->path, key, chkpId); + if (pBackend == NULL) { + taosThreadMutexUnlock(&pMeta->backendMutex); + return -1; + } + + int64_t tref = taosAddRef(taskDbWrapperId, pBackend); + pTask->backendRefId = tref; + pTask->pBackend = pBackend; + pBackend->refId = tref; + pBackend->pTask = pTask; + + taosHashPut(pMeta->pTaskDbUnique, key, strlen(key), &pBackend, sizeof(void*)); + taosThreadMutexUnlock(&pMeta->backendMutex); + + stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend); + return 0; +} SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage) { int32_t code = -1; SStreamMeta* pMeta = taosMemoryCalloc(1, sizeof(SStreamMeta)); @@ -121,15 +293,11 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF sprintf(tpath, "%s%s%s", path, TD_DIRSEP, "stream"); pMeta->path = tpath; - if (tdbOpen(pMeta->path, 16 * 1024, 1, &pMeta->db, 0) < 0) { + if (streamMetaOpenTdb(pMeta) < 0) { goto _err; } - if (tdbTbOpen("task.db", STREAM_TASK_KEY_LEN, -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) { - goto _err; - } - - if (tdbTbOpen("checkpoint.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pCheckpointDb, 0) < 0) { + if (streamMetaMayCvtDbFormat(pMeta) < 0) { goto _err; } if (streamMetaBegin(pMeta) < 0) { @@ -176,47 +344,41 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF pMeta->stage = stage; pMeta->role = (vgId == SNODE_HANDLE) ? NODE_ROLE_LEADER : NODE_ROLE_UNINIT; - // send heartbeat every 5sec. - pMeta->rid = taosAddRef(streamMetaId, pMeta); - int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); - *pRid = pMeta->rid; + pMeta->pTaskDbUnique = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); - metaRefMgtAdd(pMeta->vgId, pRid); - - pMeta->pHbInfo->hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); - pMeta->pHbInfo->tickCounter = 0; - pMeta->pHbInfo->stopFlag = 0; - - pMeta->pTaskBackendUnique = - taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); - pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t)); - pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t)); - pMeta->chkpCap = 2; - taosInitRWLatch(&pMeta->chkpDirLock); - - pMeta->chkpId = streamMetaGetLatestCheckpointId(pMeta); - pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId); - while (pMeta->streamBackend == NULL) { - taosMsleep(100); - pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, vgId); - if (pMeta->streamBackend == NULL) { - stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); - } - } - pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); - - code = streamBackendLoadCheckpointInfo(pMeta); - - taosInitRWLatch(&pMeta->lock); - taosThreadMutexInit(&pMeta->backendMutex, NULL); + // pMeta->chkpId = streamGetLatestCheckpointId(pMeta); + // pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + // while (pMeta->streamBackend == NULL) { + // qError("vgId:%d failed to init stream backend", pMeta->vgId); + // taosMsleep(2 * 1000); + // qInfo("vgId:%d retry to init stream backend", pMeta->vgId); + // pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + // if (pMeta->streamBackend == NULL) { + // } + // } + // pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); pMeta->numOfPausedTasks = 0; pMeta->numOfStreamTasks = 0; stInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId, stage); + + pMeta->rid = taosAddRef(streamMetaId, pMeta); + + int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); + memcpy(pRid, &pMeta->rid, sizeof(pMeta->rid)); + metaRefMgtAdd(pMeta->vgId, pRid); + + pMeta->pHbInfo->hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); + pMeta->pHbInfo->tickCounter = 0; + pMeta->pHbInfo->stopFlag = 0; + pMeta->qHandle = taosInitScheduler(32, 1, "stream-chkp", NULL); + + pMeta->bkdChkptMgt = bkdMgtCreate(tpath); + return pMeta; - _err: +_err: taosMemoryFree(pMeta->path); if (pMeta->pTasksMap) taosHashCleanup(pMeta->pTasksMap); if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList); @@ -311,14 +473,13 @@ void streamMetaClear(SStreamMeta* pMeta) { taosRemoveRef(streamBackendId, pMeta->streamBackendRid); taosHashClear(pMeta->pTasksMap); - taosHashClear(pMeta->pTaskBackendUnique); + taosHashClear(pMeta->pTaskDbUnique); taosArrayClear(pMeta->pTaskList); taosArrayClear(pMeta->chkpSaved); taosArrayClear(pMeta->chkpInUse); pMeta->numOfStreamTasks = 0; pMeta->numOfPausedTasks = 0; - pMeta->chkptNotReadyTasks = 0; // the willrestart/starting flag can NOT be cleared taosHashClear(pMeta->startInfo.pReadyTaskSet); @@ -360,7 +521,9 @@ void streamMetaCloseImpl(void* arg) { taosArrayDestroy(pMeta->chkpInUse); taosHashCleanup(pMeta->pTasksMap); - taosHashCleanup(pMeta->pTaskBackendUnique); + taosHashCleanup(pMeta->pTaskDbUnique); + taosHashCleanup(pMeta->pUpdateTaskSet); + // taosHashCleanup(pMeta->pTaskBackendUnique); taosHashCleanup(pMeta->updateInfo.pTasks); taosHashCleanup(pMeta->startInfo.pReadyTaskSet); taosHashCleanup(pMeta->startInfo.pFailedTaskSet); @@ -369,6 +532,11 @@ void streamMetaCloseImpl(void* arg) { taosMemoryFree(pMeta->path); taosThreadMutexDestroy(&pMeta->backendMutex); + taosCleanUpScheduler(pMeta->qHandle); + taosMemoryFree(pMeta->qHandle); + + bkdMgtDestroy(pMeta->bkdChkptMgt); + pMeta->role = NODE_ROLE_UNINIT; taosMemoryFree(pMeta); stDebug("end to close stream meta"); @@ -661,6 +829,11 @@ static void doClear(void* pKey, void* pVal, TBC* pCur, SArray* pRecycleList) { taosArrayDestroy(pRecycleList); } +int32_t streamMetaReloadAllTasks(SStreamMeta* pMeta) { + if (pMeta == NULL) return 0; + + return streamMetaLoadAllTasks(pMeta); +} int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { TBC* pCur = NULL; int32_t vgId = pMeta->vgId; @@ -728,8 +901,6 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { } else { // todo this should replace the existed object put by replay creating stream task msg from mnode stError("s-task:0x%x already added into table meta by replaying WAL, need check", pTask->id.taskId); - tdbFree(pKey); - tdbFree(pVal); taosMemoryFree(pTask); continue; } @@ -983,9 +1154,9 @@ void metaHbToMnode(void* param, void* tmrId) { entry.sinkDataSize = SIZE_IN_MiB((*pTask)->execInfo.sink.dataSize); } - if ((*pTask)->checkpointingId != 0) { - entry.checkpointFailed = ((*pTask)->chkInfo.failedId >= (*pTask)->checkpointingId); - entry.activeCheckpointId = (*pTask)->checkpointingId; + if ((*pTask)->chkInfo.checkpointingId != 0) { + entry.checkpointFailed = ((*pTask)->chkInfo.failedId >= (*pTask)->chkInfo.checkpointingId); + entry.activeCheckpointId = (*pTask)->chkInfo.checkpointingId; } if ((*pTask)->exec.pWalReader != NULL) { @@ -1028,7 +1199,9 @@ void metaHbToMnode(void* param, void* tmrId) { } tEncoderClear(&encoder); - SRpcMsg msg = {.info.noResp = 1,}; + SRpcMsg msg = { + .info.noResp = 1, + }; initRpcMsg(&msg, TDMT_MND_STREAM_HEARTBEAT, buf, tlen); pMeta->pHbInfo->hbCount += 1; @@ -1040,7 +1213,7 @@ void metaHbToMnode(void* param, void* tmrId) { stDebug("vgId:%d no tasks and no mnd epset, not send stream hb to mnode", pMeta->vgId); } - _end: +_end: clearHbMsg(&hbMsg, pIdList); taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->pHbInfo->hbTmr); taosReleaseRef(streamMetaId, rid); @@ -1070,8 +1243,8 @@ bool streamMetaTaskInTimer(SStreamMeta* pMeta) { void streamMetaNotifyClose(SStreamMeta* pMeta) { int32_t vgId = pMeta->vgId; - stDebug("vgId:%d notify all stream tasks that the vnode is closing. isLeader:%d startHb:%" PRId64 ", totalHb:%d", vgId, - (pMeta->role == NODE_ROLE_LEADER), pMeta->pHbInfo->hbStart, pMeta->pHbInfo->hbCount); + stDebug("vgId:%d notify all stream tasks that the vnode is closing. isLeader:%d startHb:%" PRId64 ", totalHb:%d", + vgId, (pMeta->role == NODE_ROLE_LEADER), pMeta->pHbInfo->hbStart, pMeta->pHbInfo->hbCount); streamMetaWLock(pMeta); @@ -1142,4 +1315,19 @@ void streamMetaWUnLock(SStreamMeta* pMeta) { stTrace("vgId:%d meta-wunlock", pMeta->vgId); taosWUnLockLatch(&pMeta->lock); } +static void execHelper(struct SSchedMsg* pSchedMsg) { + __async_exec_fn_t execFn = (__async_exec_fn_t)pSchedMsg->ahandle; + int32_t code = execFn(pSchedMsg->thandle); + if (code != 0 && pSchedMsg->msg != NULL) { + *(int32_t*)pSchedMsg->msg = code; + } +} +int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code) { + SSchedMsg schedMsg = {0}; + schedMsg.fp = execHelper; + schedMsg.ahandle = fn; + schedMsg.thandle = param; + schedMsg.msg = code; + return taosScheduleTask(pMeta->qHandle, &schedMsg); +} diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 5893bc14f1..e29f2ba7de 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -32,6 +32,7 @@ typedef struct SBackendFileItem { char* name; int8_t type; int64_t size; + int8_t ref; } SBackendFileItem; typedef struct SBackendFile { char* pCurrent; @@ -40,7 +41,28 @@ typedef struct SBackendFile { SArray* pSst; char* pCheckpointMeta; char* path; + } SBanckendFile; + +typedef struct SBackendSnapFiles2 { + char* pCurrent; + char* pMainfest; + char* pOptions; + SArray* pSst; + char* pCheckpointMeta; + char* path; + + int64_t checkpointId; + int64_t seraial; + int64_t offset; + TdFilePtr fd; + int8_t filetype; + SArray* pFileList; + int32_t currFileIdx; + SStreamTaskSnap snapInfo; + int8_t inited; + +} SBackendSnapFile2; struct SStreamSnapHandle { void* handle; SBanckendFile* pBackendFile; @@ -51,16 +73,23 @@ struct SStreamSnapHandle { int8_t filetype; SArray* pFileList; int32_t currFileIdx; - int8_t delFlag; // 0 : not del, 1: del + char* metaPath; + + SArray* pDbSnapSet; + int32_t currIdx; + int8_t delFlag; // 0 : not del, 1: del }; struct SStreamSnapBlockHdr { int8_t type; int8_t flag; int64_t index; - char name[128]; - int64_t totalSize; - int64_t size; - uint8_t data[]; + // int64_t streamId; + // int64_t taskId; + SStreamTaskSnap snapInfo; + char name[128]; + int64_t totalSize; + int64_t size; + uint8_t data[]; }; struct SStreamSnapReader { void* pMeta; @@ -82,7 +111,7 @@ const char* ROCKSDB_CURRENT = "CURRENT"; const char* ROCKSDB_CHECKPOINT_META = "CHECKPOINT"; static int64_t kBlockSize = 64 * 1024; -int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, int64_t chkpId, void* pMeta); +int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, void* pMeta); void streamSnapHandleDestroy(SStreamSnapHandle* handle); // static void streamBuildFname(char* path, char* file, char* fullname) @@ -106,195 +135,205 @@ int32_t streamGetFileSize(char* path, char* name, int64_t* sz) { TdFilePtr streamOpenFile(char* path, char* name, int32_t opt) { char fullname[256] = {0}; + STREAM_ROCKSDB_BUILD_FULLNAME(path, name, fullname); return taosOpenFile(fullname, opt); } -int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chkpId, void* pMeta) { - // impl later - int len = strlen(path); - char* tdir = taosMemoryCalloc(1, len + 256); - memcpy(tdir, path, len); +int32_t streamTaskDbGetSnapInfo(void* arg, char* path, SArray* pSnap) { return taskDbBuildSnap(arg, pSnap); } - int32_t code = 0; +void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) { + if (qDebugFlag & DEBUG_DEBUG) { + char* buf = taosMemoryCalloc(1, 512); + sprintf(buf + strlen(buf), "["); - int8_t validChkp = 0; - if (chkpId != 0) { - sprintf(tdir, "%s%s%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "stream", TD_DIRSEP, "checkpoints", TD_DIRSEP, - chkpId); - if (taosIsDir(tdir)) { - validChkp = 1; - stInfo("%s start to read snap %s", STREAM_STATE_TRANSFER, tdir); - streamBackendAddInUseChkp(pMeta, chkpId); - } else { - stWarn("%s failed to read from %s, reason: dir not exist,retry to default state dir", STREAM_STATE_TRANSFER, - tdir); + if (pSnapFile->pCurrent) sprintf(buf, "current: %s,", pSnapFile->pCurrent); + if (pSnapFile->pMainfest) sprintf(buf + strlen(buf), "MANIFEST: %s,", pSnapFile->pMainfest); + if (pSnapFile->pOptions) sprintf(buf + strlen(buf), "options: %s,", pSnapFile->pOptions); + if (pSnapFile->pSst) { + for (int i = 0; i < taosArrayGetSize(pSnapFile->pSst); i++) { + char* name = taosArrayGetP(pSnapFile->pSst, i); + sprintf(buf + strlen(buf), "%s,", name); + } } + sprintf(buf + strlen(buf) - 1, "]"); + + qInfo("%s %" PRId64 "-%" PRId64 " get file list: %s", STREAM_STATE_TRANSFER, pSnapFile->snapInfo.streamId, + pSnapFile->snapInfo.taskId, buf); + taosMemoryFree(buf); } +} - // no checkpoint specified or not exists invalid checkpoint, do checkpoint at default path and translate it - if (validChkp == 0) { - sprintf(tdir, "%s%s%s%s%s", path, TD_DIRSEP, "stream", TD_DIRSEP, "state"); - char* chkpdir = taosMemoryCalloc(1, len + 256); - sprintf(chkpdir, "%s%s%s", tdir, TD_DIRSEP, "tmp"); - taosMemoryFree(tdir); +int32_t snapFileGenMeta(SBackendSnapFile2* pSnapFile) { + SBackendFileItem item = {0}; + item.ref = 1; + // current + item.name = pSnapFile->pCurrent; + item.type = ROCKSDB_CURRENT_TYPE; + streamGetFileSize(pSnapFile->path, item.name, &item.size); + taosArrayPush(pSnapFile->pFileList, &item); - tdir = chkpdir; - stInfo("%s start to trigger checkpoint on %s", STREAM_STATE_TRANSFER, tdir); + // mainfest + item.name = pSnapFile->pMainfest; + item.type = ROCKSDB_MAINFEST_TYPE; + streamGetFileSize(pSnapFile->path, item.name, &item.size); + taosArrayPush(pSnapFile->pFileList, &item); - code = streamBackendTriggerChkp(pMeta, tdir); - if (code != 0) { - stError("%s failed to trigger chekckpoint at %s", STREAM_STATE_TRANSFER, tdir); - taosMemoryFree(tdir); - return code; - } - pHandle->delFlag = 1; - chkpId = 0; + // options + item.name = pSnapFile->pOptions; + item.type = ROCKSDB_OPTIONS_TYPE; + streamGetFileSize(pSnapFile->path, item.name, &item.size); + taosArrayPush(pSnapFile->pFileList, &item); + // sst + for (int i = 0; i < taosArrayGetSize(pSnapFile->pSst); i++) { + char* sst = taosArrayGetP(pSnapFile->pSst, i); + item.name = sst; + item.type = ROCKSDB_SST_TYPE; + streamGetFileSize(pSnapFile->path, item.name, &item.size); + taosArrayPush(pSnapFile->pFileList, &item); } - - stInfo("%s start to read dir: %s", STREAM_STATE_TRANSFER, tdir); - - TdDirPtr pDir = taosOpenDir(tdir); + // meta + item.name = pSnapFile->pCheckpointMeta; + item.type = ROCKSDB_CHECKPOINT_META_TYPE; + if (streamGetFileSize(pSnapFile->path, item.name, &item.size) == 0) { + taosArrayPush(pSnapFile->pFileList, &item); + } + return 0; +} +int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { + TdDirPtr pDir = taosOpenDir(pSnapFile->path); if (NULL == pDir) { - stError("%s failed to open %s", STREAM_STATE_TRANSFER, tdir); - goto _err; + qError("%s failed to open %s", STREAM_STATE_TRANSFER, pSnapFile->path); + return -1; } - SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile)); - pHandle->pBackendFile = pFile; - pHandle->checkpointId = chkpId; - pHandle->seraial = 0; - - pFile->path = tdir; - pFile->pSst = taosArrayInit(16, sizeof(void*)); - TdDirEntryPtr pDirEntry; while ((pDirEntry = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(pDirEntry); if (strlen(name) >= strlen(ROCKSDB_CURRENT) && 0 == strncmp(name, ROCKSDB_CURRENT, strlen(ROCKSDB_CURRENT))) { - pFile->pCurrent = taosStrdup(name); + pSnapFile->pCurrent = taosStrdup(name); continue; } if (strlen(name) >= strlen(ROCKSDB_MAINFEST) && 0 == strncmp(name, ROCKSDB_MAINFEST, strlen(ROCKSDB_MAINFEST))) { - pFile->pMainfest = taosStrdup(name); + pSnapFile->pMainfest = taosStrdup(name); continue; } if (strlen(name) >= strlen(ROCKSDB_OPTIONS) && 0 == strncmp(name, ROCKSDB_OPTIONS, strlen(ROCKSDB_OPTIONS))) { - pFile->pOptions = taosStrdup(name); + pSnapFile->pOptions = taosStrdup(name); continue; } if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_META) && 0 == strncmp(name, ROCKSDB_CHECKPOINT_META, strlen(ROCKSDB_CHECKPOINT_META))) { - pFile->pCheckpointMeta = taosStrdup(name); + pSnapFile->pCheckpointMeta = taosStrdup(name); continue; } if (strlen(name) >= strlen(ROCKSDB_SST) && 0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) { char* sst = taosStrdup(name); - taosArrayPush(pFile->pSst, &sst); + taosArrayPush(pSnapFile->pSst, &sst); } } - if (qDebugFlag & DEBUG_TRACE) { - char* buf = taosMemoryCalloc(1, 128 + taosArrayGetSize(pFile->pSst) * 64); - sprintf(buf, "[current: %s,", pFile->pCurrent); - sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest); - sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions); - - for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { - char* name = taosArrayGetP(pFile->pSst, i); - sprintf(buf + strlen(buf), "%s,", name); - } - sprintf(buf + strlen(buf) - 1, "]"); - - stInfo("%s get file list: %s", STREAM_STATE_TRANSFER, buf); - taosMemoryFree(buf); - } - taosCloseDir(&pDir); - - if (pFile->pCurrent == NULL) { - stError("%s failed to open %s, reason: no valid file", STREAM_STATE_TRANSFER, tdir); - code = -1; - tdir = NULL; - goto _err; - } - SArray* list = taosArrayInit(64, sizeof(SBackendFileItem)); - - SBackendFileItem item; - // current - item.name = pFile->pCurrent; - item.type = ROCKSDB_CURRENT_TYPE; - streamGetFileSize(pFile->path, item.name, &item.size); - taosArrayPush(list, &item); - - // mainfest - item.name = pFile->pMainfest; - item.type = ROCKSDB_MAINFEST_TYPE; - streamGetFileSize(pFile->path, item.name, &item.size); - taosArrayPush(list, &item); - - // options - item.name = pFile->pOptions; - item.type = ROCKSDB_OPTIONS_TYPE; - streamGetFileSize(pFile->path, item.name, &item.size); - taosArrayPush(list, &item); - // sst - for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { - char* sst = taosArrayGetP(pFile->pSst, i); - item.name = sst; - item.type = ROCKSDB_SST_TYPE; - streamGetFileSize(pFile->path, item.name, &item.size); - taosArrayPush(list, &item); - } - // meta - item.name = pFile->pCheckpointMeta; - item.type = ROCKSDB_CHECKPOINT_META_TYPE; - if (streamGetFileSize(pFile->path, item.name, &item.size) == 0) { - taosArrayPush(list, &item); - } - - pHandle->pBackendFile = pFile; - - pHandle->currFileIdx = 0; - pHandle->pFileList = list; - pHandle->seraial = 0; - pHandle->offset = 0; - pHandle->handle = pMeta; return 0; +} +int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBackendSnapFile2* pSnapFile) { + int32_t code = -1; + + char* path = taosMemoryCalloc(1, strlen(pSnap->dbPrefixPath) + 256); + // char idstr[64] = {0}; + sprintf(path, "%s%s%s%s%s%" PRId64 "", pSnap->dbPrefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", + pSnap->chkpId); + if (!taosIsDir(path)) { + goto _ERROR; + } + + pSnapFile->pSst = taosArrayInit(16, sizeof(void*)); + pSnapFile->pFileList = taosArrayInit(64, sizeof(SBackendFileItem)); + pSnapFile->path = path; + pSnapFile->snapInfo = *pSnap; + if ((code = snapFileReadMeta(pSnapFile)) != 0) { + goto _ERROR; + } + if ((code = snapFileGenMeta(pSnapFile)) != 0) { + goto _ERROR; + } + + snapFileDebugInfo(pSnapFile); + path = NULL; + code = 0; + +_ERROR: + taosMemoryFree(path); + return code; +} +void snapFileDestroy(SBackendSnapFile2* pSnap) { + taosMemoryFree(pSnap->pCheckpointMeta); + taosMemoryFree(pSnap->pCurrent); + taosMemoryFree(pSnap->pMainfest); + taosMemoryFree(pSnap->pOptions); + taosMemoryFree(pSnap->path); + for (int i = 0; i < taosArrayGetSize(pSnap->pSst); i++) { + char* sst = taosArrayGetP(pSnap->pSst, i); + taosMemoryFree(sst); + } + // unite read/write snap file + for (int i = 0; i < taosArrayGetSize(pSnap->pFileList); i++) { + SBackendFileItem* pItem = taosArrayGet(pSnap->pFileList, i); + if (pItem->ref == 0) { + taosMemoryFree(pItem->name); + } + } + taosArrayDestroy(pSnap->pFileList); + taosArrayDestroy(pSnap->pSst); + taosCloseFile(&pSnap->fd); + + return; +} +int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta) { + // impl later + SArray* pSnapSet = taosArrayInit(4, sizeof(SStreamTaskSnap)); + int32_t code = streamTaskDbGetSnapInfo(pMeta, path, pSnapSet); + if (code != 0) { + return -1; + } + + SArray* pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); + + for (int i = 0; i < taosArrayGetSize(pSnapSet); i++) { + SStreamTaskSnap* pSnap = taosArrayGet(pSnapSet, i); + + SBackendSnapFile2 snapFile = {0}; + code = streamBackendSnapInitFile(path, pSnap, &snapFile); + ASSERT(code == 0); + taosArrayPush(pDbSnapSet, &snapFile); + } + for (int i = 0; i < taosArrayGetSize(pSnapSet); i++) { + SStreamTaskSnap* pSnap = taosArrayGet(pSnapSet, i); + taosMemoryFree(pSnap->dbPrefixPath); + } + taosArrayDestroy(pSnapSet); + + pHandle->pDbSnapSet = pDbSnapSet; + pHandle->currIdx = 0; + return 0; + _err: streamSnapHandleDestroy(pHandle); - taosMemoryFreeClear(tdir); code = -1; return code; } void streamSnapHandleDestroy(SStreamSnapHandle* handle) { - SBanckendFile* pFile = handle->pBackendFile; - - if (handle->checkpointId == 0) { - // del tmp dir - if (pFile && taosIsDir(pFile->path)) { - if (handle->delFlag) taosRemoveDir(pFile->path); + if (handle->pDbSnapSet) { + for (int i = 0; i < taosArrayGetSize(handle->pDbSnapSet); i++) { + SBackendSnapFile2* pSnapFile = taosArrayGet(handle->pDbSnapSet, i); + snapFileDebugInfo(pSnapFile); + snapFileDestroy(pSnapFile); } - } else { - streamBackendDelInUseChkp(handle->handle, handle->checkpointId); + taosArrayDestroy(handle->pDbSnapSet); } - if (pFile) { - taosMemoryFree(pFile->pCheckpointMeta); - taosMemoryFree(pFile->pCurrent); - taosMemoryFree(pFile->pMainfest); - taosMemoryFree(pFile->pOptions); - taosMemoryFree(pFile->path); - for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { - char* sst = taosArrayGetP(pFile->pSst, i); - taosMemoryFree(sst); - } - taosArrayDestroy(pFile->pSst); - taosMemoryFree(pFile); - } - taosArrayDestroy(handle->pFileList); - taosCloseFile(&handle->fd); + taosMemoryFree(handle->metaPath); return; } @@ -305,7 +344,7 @@ int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* pa return TSDB_CODE_OUT_OF_MEMORY; } - if (streamSnapHandleInit(&pReader->handle, (char*)path, chkpId, pMeta) < 0) { + if (streamSnapHandleInit(&pReader->handle, (char*)path, pMeta) < 0) { taosMemoryFree(pReader); return -1; } @@ -321,34 +360,50 @@ int32_t streamSnapReaderClose(SStreamSnapReader* pReader) { taosMemoryFree(pReader); return 0; } + int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* size) { // impl later int32_t code = 0; SStreamSnapHandle* pHandle = &pReader->handle; - SBanckendFile* pFile = pHandle->pBackendFile; + int32_t idx = pHandle->currIdx; - SBackendFileItem* item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + SBackendSnapFile2* pSnapFile = taosArrayGet(pHandle->pDbSnapSet, idx); + if (pSnapFile == NULL) { + return 0; + } + SBackendFileItem* item = NULL; + +_NEXT: + + if (pSnapFile->fd == NULL) { + if (pSnapFile->currFileIdx >= taosArrayGetSize(pSnapFile->pFileList)) { + if (pHandle->currIdx + 1 < taosArrayGetSize(pHandle->pDbSnapSet)) { + pHandle->currIdx += 1; + + pSnapFile = taosArrayGet(pHandle->pDbSnapSet, pHandle->currIdx); + goto _NEXT; + } else { + *ppData = NULL; + *size = 0; + return 0; + } - if (pHandle->fd == NULL) { - if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) { - // finish - *ppData = NULL; - *size = 0; - return 0; } else { - pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ); + item = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + pSnapFile->fd = streamOpenFile(pSnapFile->path, item->name, TD_FILE_READ); stDebug("%s open file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, - item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx); } } + item = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + + qDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 + ", file no.%d, total set:%d, current set idx: %d", + STREAM_STATE_TRANSFER, item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx, + (int)taosArrayGetSize(pHandle->pDbSnapSet), pHandle->currIdx); - stDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER, - item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize); - if (buf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); + int64_t nread = taosPReadFile(pSnapFile->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pSnapFile->offset); if (nread == -1) { taosMemoryFree(buf); code = TAOS_SYSTEM_ERROR(terrno); @@ -358,44 +413,51 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si } else if (nread > 0 && nread <= kBlockSize) { // left bytes less than kBlockSize stDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, - item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); - pHandle->offset += nread; - if (pHandle->offset >= item->size || nread < kBlockSize) { - taosCloseFile(&pHandle->fd); - pHandle->offset = 0; - pHandle->currFileIdx += 1; + item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx); + pSnapFile->offset += nread; + if (pSnapFile->offset >= item->size || nread < kBlockSize) { + taosCloseFile(&pSnapFile->fd); + pSnapFile->offset = 0; + pSnapFile->currFileIdx += 1; } } else { stDebug("%s no data read, close file no.%d, move to next file, open and read", STREAM_STATE_TRANSFER, - pHandle->currFileIdx); - taosCloseFile(&pHandle->fd); - pHandle->offset = 0; - pHandle->currFileIdx += 1; + pSnapFile->currFileIdx); + taosCloseFile(&pSnapFile->fd); + pSnapFile->offset = 0; + pSnapFile->currFileIdx += 1; - if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) { + if (pSnapFile->currFileIdx >= taosArrayGetSize(pSnapFile->pFileList)) { // finish - *ppData = NULL; - *size = 0; - taosMemoryFree(buf); - return 0; + if (pHandle->currIdx + 1 < taosArrayGetSize(pHandle->pDbSnapSet)) { + // skip to next snap set + pHandle->currIdx += 1; + pSnapFile = taosArrayGet(pHandle->pDbSnapSet, pHandle->currIdx); + goto _NEXT; + } else { + *ppData = NULL; + *size = 0; + return 0; + } } - item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); - pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ); + item = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + pSnapFile->fd = streamOpenFile(pSnapFile->path, item->name, TD_FILE_READ); - nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); - pHandle->offset += nread; + nread = taosPReadFile(pSnapFile->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pSnapFile->offset); + pSnapFile->offset += nread; stDebug("%s open file and read file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", - STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + STREAM_STATE_TRANSFER, item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx); } SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)buf; pHdr->size = nread; pHdr->type = item->type; pHdr->totalSize = item->size; + pHdr->snapInfo = pSnapFile->snapInfo; memcpy(pHdr->name, item->name, strlen(item->name)); - pHandle->seraial += nread; + pSnapFile->seraial += nread; *ppData = buf; *size = sizeof(SStreamSnapBlockHdr) + nread; @@ -408,101 +470,133 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path if (pWriter == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } + SStreamSnapHandle* pHandle = &pWriter->handle; + pHandle->currIdx = 0; + pHandle->metaPath = taosStrdup(path); + pHandle->pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); - SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile)); - pFile->path = taosStrdup(path); - SArray* list = taosArrayInit(64, sizeof(SBackendFileItem)); - - SBackendFileItem item; - item.name = taosStrdup((char*)ROCKSDB_CURRENT); - item.type = ROCKSDB_CURRENT_TYPE; - taosArrayPush(list, &item); - - pHandle->pBackendFile = pFile; - - pHandle->pFileList = list; - pHandle->currFileIdx = 0; - pHandle->offset = 0; - pHandle->delFlag = 0; + SBackendSnapFile2 snapFile = {0}; + taosArrayPush(pHandle->pDbSnapSet, &snapFile); *ppWriter = pWriter; return 0; } +int32_t snapInfoEqual(SStreamTaskSnap* a, SStreamTaskSnap* b) { + if (a->streamId != b->streamId || a->taskId != b->taskId || a->chkpId != b->chkpId) { + return 0; + } + return 1; +} + +int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData, SBackendSnapFile2* pSnapFile) { + int code = -1; + SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)pData; + SStreamSnapHandle* pHandle = &pWriter->handle; + SStreamTaskSnap snapInfo = pHdr->snapInfo; + + SStreamTaskSnap* pSnapInfo = &pSnapFile->snapInfo; + + SBackendFileItem* pItem = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + + if (pSnapFile->fd == 0) { + pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pSnapFile->fd == NULL) { + code = TAOS_SYSTEM_ERROR(terrno); + stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pHandle->metaPath, TD_DIRSEP, + pHdr->name, tstrerror(code)); + } + } + if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) { + int64_t bytes = taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset); + if (bytes != pHdr->size) { + code = TAOS_SYSTEM_ERROR(terrno); + stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); + return code; + } else { + qInfo("succ to write data %s", pItem->name); + } + pSnapFile->offset += bytes; + } else { + taosCloseFile(&pSnapFile->fd); + pSnapFile->offset = 0; + pSnapFile->currFileIdx += 1; + + SBackendFileItem item = {0}; + item.name = taosStrdup(pHdr->name); + item.type = pHdr->type; + + taosArrayPush(pSnapFile->pFileList, &item); + + SBackendFileItem* pItem = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pSnapFile->fd == NULL) { + code = TAOS_SYSTEM_ERROR(terrno); + stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pSnapFile->path, TD_DIRSEP, + pHdr->name, tstrerror(code)); + } + + taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset); + qInfo("succ to write data %s", pItem->name); + pSnapFile->offset += pHdr->size; + } + code = 0; +_EXIT: + return code; +} + int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { int32_t code = 0; SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)pData; SStreamSnapHandle* pHandle = &pWriter->handle; - SBanckendFile* pFile = pHandle->pBackendFile; - SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + SStreamTaskSnap snapInfo = pHdr->snapInfo; - if (pHandle->fd == NULL) { - pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); - if (pHandle->fd == NULL) { - code = TAOS_SYSTEM_ERROR(terrno); - stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, - pHdr->name, tstrerror(code)); - } - } + SBackendSnapFile2* pDbSnapFile = taosArrayGet(pHandle->pDbSnapSet, pHandle->currIdx); + if (pDbSnapFile->inited == 0) { + char idstr[64] = {0}; + sprintf(idstr, "0x%" PRIx64 "-0x%x", snapInfo.streamId, (int32_t)(snapInfo.taskId)); - if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) { - int64_t bytes = taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); - if (bytes != pHdr->size) { - code = TAOS_SYSTEM_ERROR(terrno); - stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); - return code; + char* path = taosMemoryCalloc(1, strlen(pHandle->metaPath) + 256); + sprintf(path, "%s%s%s%s%s%s%s%" PRId64 "", pHandle->metaPath, TD_DIRSEP, idstr, TD_DIRSEP, "checkpoints", TD_DIRSEP, + "checkpoint", snapInfo.chkpId); + if (!taosIsDir(path)) { + code = taosMulMkDir(path); + qInfo("%s mkdir %s", STREAM_STATE_TRANSFER, path); + ASSERT(code == 0); } - pHandle->offset += bytes; + + pDbSnapFile->path = path; + pDbSnapFile->snapInfo = snapInfo; + pDbSnapFile->pFileList = taosArrayInit(64, sizeof(SBackendFileItem)); + pDbSnapFile->currFileIdx = 0; + pDbSnapFile->offset = 0; + + SBackendFileItem item = {0}; + item.name = taosStrdup((char*)ROCKSDB_CURRENT); + item.type = ROCKSDB_CURRENT_TYPE; + + taosArrayPush(pDbSnapFile->pFileList, &item); + + pDbSnapFile->inited = 1; + return streamSnapWriteImpl(pWriter, pData, nData, pDbSnapFile); } else { - taosCloseFile(&pHandle->fd); - pHandle->offset = 0; - pHandle->currFileIdx += 1; + if (snapInfoEqual(&snapInfo, &pDbSnapFile->snapInfo)) { + return streamSnapWriteImpl(pWriter, pData, nData, pDbSnapFile); + } else { + SBackendSnapFile2 snapFile = {0}; + taosArrayPush(pHandle->pDbSnapSet, &snapFile); + pHandle->currIdx += 1; - SBackendFileItem item; - item.name = taosStrdup(pHdr->name); - item.type = pHdr->type; - taosArrayPush(pHandle->pFileList, &item); - - SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); - pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); - if (pHandle->fd == NULL) { - code = TAOS_SYSTEM_ERROR(terrno); - stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, - pHdr->name, tstrerror(code)); + return streamSnapWrite(pWriter, pData, nData); } - - taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); - pHandle->offset += pHdr->size; } - - // impl later - return 0; + return code; } int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) { - SStreamSnapHandle* handle = &pWriter->handle; - if (qDebugFlag & DEBUG_TRACE) { - char* buf = (char*)taosMemoryMalloc(128 + taosArrayGetSize(handle->pFileList) * 64); - int n = sprintf(buf, "["); - for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { - SBackendFileItem* item = taosArrayGet(handle->pFileList, i); - if (i != taosArrayGetSize(handle->pFileList) - 1) { - n += sprintf(buf + n, "%s %" PRId64 ",", item->name, item->size); - } else { - n += sprintf(buf + n, "%s %" PRId64 "]", item->name, item->size); - } - } - stDebug("%s snap get file list, %s", STREAM_STATE_TRANSFER, buf); - taosMemoryFree(buf); - } - - for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { - SBackendFileItem* item = taosArrayGet(handle->pFileList, i); - taosMemoryFree(item->name); - } - - streamSnapHandleDestroy(handle); + if (pWriter == NULL) return 0; + streamSnapHandleDestroy(&pWriter->handle); taosMemoryFree(pWriter); return 0; diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 71f1e9b45b..a4c448f678 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -471,6 +471,14 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs streamMetaUpdateTaskDownstreamStatus(pTask, pTask->execInfo.init, taosGetTimestampMs(), false); + // automatically set the related fill-history task to be failed. + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + STaskId* pId = &pTask->hTaskInfo.id; + + SStreamTask* pHTask = streamMetaAcquireTask(pTask->pMeta, pId->streamId, pId->taskId); + streamMetaUpdateTaskDownstreamStatus(pHTask, pHTask->execInfo.init, taosGetTimestampMs(), false); + streamMetaReleaseTask(pTask->pMeta, pHTask); + } } else { // TASK_DOWNSTREAM_NOT_READY, let's retry in 100ms STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp); @@ -1072,8 +1080,9 @@ int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs taosHashPut(pDst, &id, sizeof(id), &initTs, sizeof(STaskInitTs)); int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta); + int32_t numOfRecv = taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet); - if (taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet) == numOfTotal) { + if (numOfRecv == numOfTotal) { pStartInfo->readyTs = taosGetTimestampMs(); pStartInfo->elapsedTime = (pStartInfo->startTs != 0) ? pStartInfo->readyTs - pStartInfo->startTs : 0; @@ -1087,6 +1096,8 @@ int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs displayStatusInfo(pMeta, pStartInfo->pFailedTaskSet, false); streamMetaResetStartInfo(pStartInfo); + } else { + stDebug("vgId:%d recv check down results:%d, total:%d", pMeta->vgId, numOfRecv, numOfTotal); } streamMetaWUnLock(pMeta); diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 0f32fd6879..276ed08785 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -106,51 +106,21 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz } SStreamTask* pStreamTask = pTask; - char statePath[1024]; - if (!specPath) { - sprintf(statePath, "%s%s%d", path, TD_DIRSEP, pStreamTask->id.taskId); - } else { - memset(statePath, 0, 1024); - tstrncpy(statePath, path, 1024); - } - pState->taskId = pStreamTask->id.taskId; pState->streamId = pStreamTask->id.streamId; - sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-%d", pState->streamId, pState->taskId); + sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-0x%x", pState->streamId, pState->taskId); + + streamTaskSetDb(pStreamTask->pMeta, pTask, pState->pTdbState->idstr); #ifdef USE_ROCKSDB SStreamMeta* pMeta = pStreamTask->pMeta; - pState->streamBackendRid = pMeta->streamBackendRid; - // streamMetaWLock(pMeta); - taosThreadMutexLock(&pMeta->backendMutex); - void* uniqueId = - taosHashGet(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); - if (uniqueId == NULL) { - int code = streamStateOpenBackend(pMeta->streamBackend, pState); - if (code == -1) { - taosThreadMutexUnlock(&pMeta->backendMutex); - taosMemoryFree(pState); - return NULL; - } - taosHashPut(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1, - &pState->pTdbState->backendCfWrapperId, sizeof(pState->pTdbState->backendCfWrapperId)); - } else { - int64_t id = *(int64_t*)uniqueId; - pState->pTdbState->backendCfWrapperId = id; - pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id); - // already exist stream task for - stInfo("already exist stream-state for %s", pState->pTdbState->idstr); - // taosAcquireRef(streamBackendId, pState->streamBackendRid); - } - taosThreadMutexUnlock(&pMeta->backendMutex); - pState->pTdbState->pOwner = pTask; pState->pFileState = NULL; _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT); pState->parNameMap = tSimpleHashInit(1024, hashFn); stInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId, - pState->taskId); + pState->taskId); return pState; #else @@ -237,6 +207,12 @@ _err: #endif } +int32_t streamStateDelTaskDb(SStreamState* pState) { + SStreamTask* pTask = pState->pTdbState->pOwner; + taskDbRemoveRef(pTask->pBackend); + taosMemoryFree(pTask); + return 0; +} void streamStateClose(SStreamState* pState, bool remove) { SStreamTask* pTask = pState->pTdbState->pOwner; #ifdef USE_ROCKSDB @@ -692,8 +668,7 @@ void streamStateResetCur(SStreamStateCur* pCur) { } void streamStateFreeCur(SStreamStateCur* pCur) { - if (!pCur || pCur->buffIndex >= 0) { - taosMemoryFree(pCur); + if (!pCur) { return; } qDebug("streamStateFreeCur"); @@ -722,7 +697,7 @@ int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, void streamStateReleaseBuf(pState, pos, true); putFreeBuff(pState->pFileState, pos); stDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 ".code:%d", key->win.skey, - key->win.ekey, key->groupId, code); + key->win.ekey, key->groupId, code); } else { code = putSessionWinResultBuff(pState->pFileState, value); } @@ -768,7 +743,7 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB - stDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, + qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, key->groupId); return deleteRowBuff(pState->pFileState, key, sizeof(SSessionKey)); #else @@ -1122,7 +1097,7 @@ int32_t streamStateGetParName(SStreamState* pState, int64_t groupId, void** pVal void streamStateDestroy(SStreamState* pState, bool remove) { #ifdef USE_ROCKSDB streamFileStateDestroy(pState->pFileState); - streamStateDestroy_rocksdb(pState, remove); + // streamStateDestroy_rocksdb(pState, remove); tSimpleHashCleanup(pState->parNameMap); // do nothong #endif @@ -1232,4 +1207,4 @@ char* streamStateIntervalDump(SStreamState* pState) { streamStateFreeCur(pCur); return dumpBuf; } -#endif +#endif \ No newline at end of file diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 2f0bfc8dbc..db0217f000 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -250,9 +250,8 @@ int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo) SEpSet epSet; if (tStartDecode(pDecoder) < 0) return -1; - if (tDecodeI64(pDecoder, &ver) < 0) return -1; - - if (ver != SSTREAM_TASK_VER) return -1; + if (tDecodeI64(pDecoder, &pChkpInfo->msgVer) < 0) return -1; + // if (ver != SSTREAM_TASK_VER) return -1; if (tDecodeI64(pDecoder, &skip64) < 0) return -1; if (tDecodeI32(pDecoder, &skip32) < 0) return -1; @@ -379,6 +378,8 @@ void tFreeStreamTask(SStreamTask* pTask) { if (pTask->pState) { stDebug("s-task:0x%x start to free task state", taskId); streamStateClose(pTask->pState, status == TASK_STATUS__DROPPING); + taskDbRemoveRef(pTask->pBackend); + } if (pTask->id.idStr != NULL) { @@ -467,6 +468,14 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i } taosThreadMutexInit(&pTask->lock, &attr); + // if (pTask->info.fillHistory == 1) { + // // + // } else { + + // } + // if (streamTaskSetDb(pMeta, pTask) != 0) { + // return -1; + // } streamTaskOpenAllUpstreamInput(pTask); pTask->outputInfo.pDownstreamUpdateList = taosArrayInit(4, sizeof(SDownstreamTaskEpset)); diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index ef60c8a94e..e51c61c49d 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -957,8 +957,8 @@ static void cliSendCb(uv_write_t* req, int status) { SCliMsg* pMsg = !transQueueEmpty(&pConn->cliMsgs) ? transQueueGet(&pConn->cliMsgs, 0) : NULL; if (pMsg != NULL) { int64_t cost = taosGetTimestampUs() - pMsg->st; - if (cost > 1000 * 20) { - tWarn("%s conn %p send cost:%dus, send exception", CONN_GET_INST_LABEL(pConn), pConn, (int)cost); + if (cost > 1000 * 50) { + tTrace("%s conn %p send cost:%dus ", CONN_GET_INST_LABEL(pConn), pConn, (int)cost); } } diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index bf73c253bc..017969b4e5 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -159,7 +159,7 @@ static void uvStartSendResp(SSvrMsg* msg); static void uvNotifyLinkBrokenToApp(SSvrConn* conn); -static FORCE_INLINE void destroySmsg(SSvrMsg* smsg); +static FORCE_INLINE void destroySmsg(SSvrMsg* smsg); static FORCE_INLINE SSvrConn* createConn(void* hThrd); static FORCE_INLINE void destroyConn(SSvrConn* conn, bool clear /*clear handle or not*/); static FORCE_INLINE void destroyConnRegArg(SSvrConn* conn); @@ -1499,6 +1499,7 @@ int transSendResponse(const STransMsg* msg) { } SExHandle* exh = msg->info.handle; if (exh == NULL) { + rpcFreeCont(msg->pCont); return 0; } int64_t refId = msg->info.refId; diff --git a/source/util/src/tcompression.c b/source/util/src/tcompression.c index 09f50d0f6d..3cc00ddc7f 100644 --- a/source/util/src/tcompression.c +++ b/source/util/src/tcompression.c @@ -538,66 +538,71 @@ int32_t tsDecompressTimestampImp(const char *const input, const int32_t nelement memcpy(output, input + 1, nelements * longBytes); return nelements * longBytes; } else if (input[0] == 1) { // Decompress - int64_t *ostream = (int64_t *)output; + if (tsSIMDEnable && tsAVX512Enable) { + tsDecompressTimestampAvx512(input, nelements, output, false); + } else if (tsSIMDEnable && tsAVX2Enable) { + tsDecompressTimestampAvx2(input, nelements, output, false); + } else { + int64_t *ostream = (int64_t *)output; - int32_t ipos = 1, opos = 0; - int8_t nbytes = 0; - int64_t prev_value = 0; - int64_t prev_delta = 0; - int64_t delta_of_delta = 0; + int32_t ipos = 1, opos = 0; + int8_t nbytes = 0; + int64_t prev_value = 0; + int64_t prev_delta = 0; + int64_t delta_of_delta = 0; - while (1) { - uint8_t flags = input[ipos++]; - // Decode dd1 - uint64_t dd1 = 0; - nbytes = flags & INT8MASK(4); - if (nbytes == 0) { - delta_of_delta = 0; - } else { - if (is_bigendian()) { - memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes); + while (1) { + uint8_t flags = input[ipos++]; + // Decode dd1 + uint64_t dd1 = 0; + nbytes = flags & INT8MASK(4); + if (nbytes == 0) { + delta_of_delta = 0; } else { - memcpy(&dd1, input + ipos, nbytes); + if (is_bigendian()) { + memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes); + } else { + memcpy(&dd1, input + ipos, nbytes); + } + delta_of_delta = ZIGZAG_DECODE(int64_t, dd1); } - delta_of_delta = ZIGZAG_DECODE(int64_t, dd1); - } - ipos += nbytes; - if (opos == 0) { - prev_value = delta_of_delta; - prev_delta = 0; - ostream[opos++] = delta_of_delta; - } else { + + ipos += nbytes; + if (opos == 0) { + prev_value = delta_of_delta; + prev_delta = 0; + ostream[opos++] = delta_of_delta; + } else { + prev_delta = delta_of_delta + prev_delta; + prev_value = prev_value + prev_delta; + ostream[opos++] = prev_value; + } + if (opos == nelements) return nelements * longBytes; + + // Decode dd2 + uint64_t dd2 = 0; + nbytes = (flags >> 4) & INT8MASK(4); + if (nbytes == 0) { + delta_of_delta = 0; + } else { + if (is_bigendian()) { + memcpy(((char *)(&dd2)) + longBytes - nbytes, input + ipos, nbytes); + } else { + memcpy(&dd2, input + ipos, nbytes); + } + // zigzag_decoding + delta_of_delta = ZIGZAG_DECODE(int64_t, dd2); + } + ipos += nbytes; prev_delta = delta_of_delta + prev_delta; prev_value = prev_value + prev_delta; ostream[opos++] = prev_value; + if (opos == nelements) return nelements * longBytes; } - if (opos == nelements) return nelements * longBytes; - - // Decode dd2 - uint64_t dd2 = 0; - nbytes = (flags >> 4) & INT8MASK(4); - if (nbytes == 0) { - delta_of_delta = 0; - } else { - if (is_bigendian()) { - memcpy(((char *)(&dd2)) + longBytes - nbytes, input + ipos, nbytes); - } else { - memcpy(&dd2, input + ipos, nbytes); - } - // zigzag_decoding - delta_of_delta = ZIGZAG_DECODE(int64_t, dd2); - } - ipos += nbytes; - prev_delta = delta_of_delta + prev_delta; - prev_value = prev_value + prev_delta; - ostream[opos++] = prev_value; - if (opos == nelements) return nelements * longBytes; } - - } else { - ASSERT(0); - return -1; } + + return nelements * longBytes; } /* --------------------------------------------Double Compression ---------------------------------------------- */ diff --git a/source/util/src/tdecompress.c b/source/util/src/tdecompress.c index 68841941db..f212bf5231 100644 --- a/source/util/src/tdecompress.c +++ b/source/util/src/tdecompress.c @@ -53,11 +53,8 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, int64_t prevValue = 0; #if __AVX2__ - while (1) { - if (_pos == nelements) break; - - uint64_t w = 0; - memcpy(&w, ip, LONG_BYTES); + while (_pos < nelements) { + uint64_t w = *(uint64_t*) ip; char selector = (char)(w & INT64MASK(4)); // selector = 4 char bit = bit_per_integer[(int32_t)selector]; // bit = 3 @@ -114,7 +111,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, __m256i signmask = _mm256_and_si256(_mm256_set1_epi64x(1), zigzagVal); signmask = _mm256_sub_epi64(_mm256_setzero_si256(), signmask); - // get the four zigzag values here + // get four zigzag values here __m256i delta = _mm256_xor_si256(_mm256_srli_epi64(zigzagVal, 1), signmask); // calculate the cumulative sum (prefix sum) for each number @@ -250,73 +247,264 @@ int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelemen return 0; } -int32_t tsDecompressTimestampAvx2(const char* const input, const int32_t nelements, char *const output, bool bigEndian) { +int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelements, char *const output, + bool bigEndian) { +#if 0 int64_t *ostream = (int64_t *)output; int32_t ipos = 1, opos = 0; - int8_t nbytes = 0; - - int64_t prevValue = 0; - int64_t prevDelta = 0; - - int64_t deltaOfDelta = 0; - int32_t longBytes = LONG_BYTES; + __m128i prevVal = _mm_setzero_si128(); + __m128i prevDelta = _mm_setzero_si128(); #if __AVX2__ + int32_t batch = nelements >> 1; + int32_t remainder = nelements & 0x01; + __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff}; - int32_t batch = nelements >> 2; - int32_t remainder = nelements & 0x1; - - while (1) { + int32_t i = 0; + if (batch > 1) { + // first loop uint8_t flags = input[ipos++]; - // Decode dd1 - uint64_t dd1 = 0; - nbytes = flags & INT8MASK(4); + int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 + int8_t nbytes2 = (flags >> 4) & INT8MASK(4); + __m128i data1; + if (nbytes1 == 0) { + data1 = _mm_setzero_si128(); + } else { + memcpy(&data1, (const void*) (input + ipos), nbytes1); + } + + __m128i data2; + if (nbytes2 == 0) { + data2 = _mm_setzero_si128(); + } else { + memcpy(&data2, (const void*) (input + ipos + nbytes1), nbytes2); + } + + data2 = _mm_broadcastq_epi64(data2); + __m128i zzVal = _mm_blend_epi32(data2, data1, 0x03); + + // ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) + __m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal); + signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask); + + // get two zigzag values here + __m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask); + + __m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta); + deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent); + + __m128i val = _mm_add_epi64(deltaCurrent, prevVal); + _mm_storeu_si128((__m128i *)&ostream[opos], val); + + // keep the previous value + prevVal = _mm_shuffle_epi32 (val, 0xEE); + + // keep the previous delta of delta, for the first item + prevDelta = _mm_shuffle_epi32(deltaOfDelta, 0xEE); + + opos += 2; + ipos += nbytes1 + nbytes2; + i += 1; + } + + // the remain + for(; i < batch; ++i) { + uint8_t flags = input[ipos++]; + + int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 + int8_t nbytes2 = (flags >> 4) & INT8MASK(4); + +// __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos)); +// __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1)); + __m128i data1; + if (nbytes1 == 0) { + data1 = _mm_setzero_si128(); + } else { + int64_t dd = 0; + memcpy(&dd, (const void*) (input + ipos), nbytes1); + data1 = _mm_loadu_si64(&dd); + } + + __m128i data2; + if (nbytes2 == 0) { + data2 = _mm_setzero_si128(); + } else { + int64_t dd = 0; + memcpy(&dd, (const void*) (input + ipos + nbytes1), nbytes2); + data2 = _mm_loadu_si64(&dd); + } + + data2 = _mm_broadcastq_epi64(data2); + + __m128i zzVal = _mm_blend_epi32(data2, data1, 0x03); + + // ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) + __m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal); + signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask); + + // get two zigzag values here + __m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask); + + __m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta); + deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent); + + __m128i val = _mm_add_epi64(deltaCurrent, prevVal); + _mm_storeu_si128((__m128i *)&ostream[opos], val); + + // keep the previous value + prevVal = _mm_shuffle_epi32 (val, 0xEE); + + // keep the previous delta of delta + __m128i delta = _mm_add_epi64(_mm_slli_si128(deltaOfDelta, 8), deltaOfDelta); + prevDelta = _mm_shuffle_epi32(_mm_add_epi64(delta, prevDelta), 0xEE); + + opos += 2; + ipos += nbytes1 + nbytes2; + } + + if (remainder > 0) { + uint64_t dd = 0; + uint8_t flags = input[ipos++]; + + int32_t nbytes = flags & INT8MASK(4); + int64_t deltaOfDelta = 0; if (nbytes == 0) { deltaOfDelta = 0; } else { - if (bigEndian) { - memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes); - } else { - memcpy(&dd1, input + ipos, nbytes); - } - deltaOfDelta = ZIGZAG_DECODE(int64_t, dd1); + // if (is_bigendian()) { + // memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes); + // } else { + memcpy(&dd, input + ipos, nbytes); + // } + deltaOfDelta = ZIGZAG_DECODE(int64_t, dd); } ipos += nbytes; - prevDelta += deltaOfDelta; - prevValue += prevDelta; - ostream[opos++] = prevValue; - - if (opos == nelements) { - return nelements * longBytes; - } - - // Decode dd2 - uint64_t dd2 = 0; - nbytes = (flags >> 4) & INT8MASK(4); - if (nbytes == 0) { - deltaOfDelta = 0; + if (opos == 0) { + ostream[opos++] = deltaOfDelta; } else { - if (bigEndian) { - memcpy(((char *)(&dd2)) + longBytes - nbytes, input + ipos, nbytes); - } else { - memcpy(&dd2, input + ipos, nbytes); - } - // zigzag_decoding - deltaOfDelta = ZIGZAG_DECODE(int64_t, dd2); - } - - ipos += nbytes; - prevDelta += deltaOfDelta; - prevValue += prevDelta; - ostream[opos++] = prevValue; - - if (opos == nelements) { - return nelements * longBytes; + int64_t prevDeltaX = deltaOfDelta + prevDelta[1]; + ostream[opos++] = prevVal[1] + prevDeltaX; } } +#endif +#endif + return 0; +} + +int32_t tsDecompressTimestampAvx512(const char *const input, const int32_t nelements, char *const output, + bool UNUSED_PARAM(bigEndian)) { + int64_t *ostream = (int64_t *)output; + int32_t ipos = 1, opos = 0; + +#if __AVX512VL__ + + __m128i prevVal = _mm_setzero_si128(); + __m128i prevDelta = _mm_setzero_si128(); + + int32_t numOfBatch = nelements >> 1; + int32_t remainder = nelements & 0x01; + __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff}; + + int32_t i = 0; + if (numOfBatch > 1) { + // first loop + uint8_t flags = input[ipos++]; + + int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 + int8_t nbytes2 = (flags >> 4) & INT8MASK(4); + + __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos)); + __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1)); + data2 = _mm_broadcastq_epi64(data2); + + __m128i zzVal = _mm_blend_epi32(data2, data1, 0x03); + + // ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) + __m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal); + signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask); + + // get two zigzag values here + __m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask); + + __m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta); + deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent); + + __m128i val = _mm_add_epi64(deltaCurrent, prevVal); + _mm_storeu_si128((__m128i *)&ostream[opos], val); + + // keep the previous value + prevVal = _mm_shuffle_epi32 (val, 0xEE); + + // keep the previous delta of delta, for the first item + prevDelta = _mm_shuffle_epi32(deltaOfDelta, 0xEE); + + opos += 2; + ipos += nbytes1 + nbytes2; + i += 1; + } + + // the remain + for(; i < numOfBatch; ++i) { + uint8_t flags = input[ipos++]; + + int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 + int8_t nbytes2 = (flags >> 4) & INT8MASK(4); + + __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos)); + __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1)); + data2 = _mm_broadcastq_epi64(data2); + + __m128i zzVal = _mm_blend_epi32(data2, data1, 0x03); + + // ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) + __m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal); + signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask); + + // get two zigzag values here + __m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask); + + __m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta); + deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent); + + __m128i val = _mm_add_epi64(deltaCurrent, prevVal); + _mm_storeu_si128((__m128i *)&ostream[opos], val); + + // keep the previous value + prevVal = _mm_shuffle_epi32 (val, 0xEE); + + // keep the previous delta of delta + __m128i delta = _mm_add_epi64(_mm_slli_si128(deltaOfDelta, 8), deltaOfDelta); + prevDelta = _mm_shuffle_epi32(_mm_add_epi64(delta, prevDelta), 0xEE); + + opos += 2; + ipos += nbytes1 + nbytes2; + } + + if (remainder > 0) { + uint64_t dd = 0; + uint8_t flags = input[ipos++]; + + int32_t nbytes = flags & INT8MASK(4); + int64_t deltaOfDelta = 0; + if (nbytes == 0) { + deltaOfDelta = 0; + } else { + memcpy(&dd, input + ipos, nbytes); + deltaOfDelta = ZIGZAG_DECODE(int64_t, dd); + } + + ipos += nbytes; + if (opos == 0) { + ostream[opos++] = deltaOfDelta; + } else { + int64_t prevDeltaX = deltaOfDelta + prevDelta[1]; + ostream[opos++] = prevVal[1] + prevDeltaX; + } + } + #endif return 0; } \ No newline at end of file diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 8847b7d894..cc6647e463 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -445,7 +445,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_PAR_DEC_IVLD_KLEN, "Invalid klen to decod TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_IVLD_KEY, "Invalid key to gen active code") TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_APP_LIMIT, "Limited app num to gen active code") TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_ENC_IVLD_KLEN, "Invalid klen to encode active code") -TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_PAR_IVLD_DIST, "Invalid dist to parse active code") // sync TAOS_DEFINE_ERROR(TSDB_CODE_SYN_TIMEOUT, "Sync timeout") diff --git a/source/util/test/decompressTest.cpp b/source/util/test/decompressTest.cpp new file mode 100644 index 0000000000..caf8df3ba8 --- /dev/null +++ b/source/util/test/decompressTest.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include + +namespace {} // namespace + +TEST(utilTest, decompress_test) { + int64_t tsList[10] = {1700000000, 1700000100, 1700000200, 1700000300, 1700000400, + 1700000500, 1700000600, 1700000700, 1700000800, 1700000900}; + + char* pOutput[10 * sizeof(int64_t)] = {0}; + int32_t len = tsCompressTimestamp(tsList, sizeof(tsList), sizeof(tsList) / sizeof(tsList[0]), pOutput, 10, ONE_STAGE_COMP, NULL, 0); + + char* decompOutput[10 * 8] = {0}; + tsDecompressTimestamp(pOutput, len, 10, decompOutput, sizeof(int64_t)*10, ONE_STAGE_COMP, NULL, 0); + + for(int32_t i = 0; i < 10; ++i) { + std::cout<< ((int64_t*)decompOutput)[i] << std::endl; + } + + memset(decompOutput, 0, 10*8); + tsDecompressTimestampAvx512(reinterpret_cast(pOutput), 10, + reinterpret_cast(decompOutput), false); + + for(int32_t i = 0; i < 10; ++i) { + std::cout<<((int64_t*)decompOutput)[i] << std::endl; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + int64_t tsList1[7] = {1700000000, 1700000000, 1700000000, 1700000000, 1700000000, 1700000000, 1700000900}; + int32_t len1 = tsCompressTimestamp(tsList1, sizeof(tsList1), sizeof(tsList1) / sizeof(tsList1[0]), pOutput, 7, ONE_STAGE_COMP, NULL, 0); + + memset(decompOutput, 0, 10*8); + tsDecompressTimestampAvx512(reinterpret_cast(pOutput), 7, + reinterpret_cast(decompOutput), false); + + for(int32_t i = 0; i < 7; ++i) { + std::cout<<((int64_t*)decompOutput)[i] << std::endl; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + int64_t tsList2[1] = {1700000000}; + int32_t len2 = tsCompressTimestamp(tsList2, sizeof(tsList2), sizeof(tsList2) / sizeof(tsList2[0]), pOutput, 1, ONE_STAGE_COMP, NULL, 0); + + memset(decompOutput, 0, 10*8); + tsDecompressTimestampAvx512(reinterpret_cast(pOutput), 1, + reinterpret_cast(decompOutput), false); + + for(int32_t i = 0; i < 1; ++i) { + std::cout<<((int64_t*)decompOutput)[i] << std::endl; + } +} + +TEST(utilTest, decompress_perf_test) { + int32_t num = 10000; + + int64_t* pList = static_cast(taosMemoryCalloc(num, sizeof(int64_t))); + int64_t iniVal = 1700000000; + + uint32_t v = 100; + + for(int32_t i = 0; i < num; ++i) { + iniVal += taosRandR(&v)%10; + pList[i] = iniVal; + } + + char* px = static_cast(taosMemoryMalloc(num * sizeof(int64_t))); + int32_t len = tsCompressTimestamp(pList, num * sizeof(int64_t), num, px, num, ONE_STAGE_COMP, NULL, 0); + + char* pOutput = static_cast(taosMemoryMalloc(num * sizeof(int64_t))); + + int64_t st = taosGetTimestampUs(); + for(int32_t k = 0; k < 10000; ++k) { + tsDecompressTimestamp(px, len, num, pOutput, sizeof(int64_t) * num, ONE_STAGE_COMP, NULL, 0); + } + + int64_t el1 = taosGetTimestampUs() - st; + std::cout << "soft decompress elapsed time:" << el1 << " us" << std::endl; + + memset(pOutput, 0, num * sizeof(int64_t)); + st = taosGetTimestampUs(); + for(int32_t k = 0; k < 10000; ++k) { + tsDecompressTimestampAvx512(px, num, pOutput, false); + } + + int64_t el2 = taosGetTimestampUs() - st; + std::cout << "SIMD decompress elapsed time:" << el2 << " us" << std::endl; + + taosMemoryFree(pList); + taosMemoryFree(pOutput); + taosMemoryFree(px); +} + diff --git a/tests/develop-test/win-test-file b/tests/develop-test/win-test-file index b640ef6bfe..1da890fc36 100644 --- a/tests/develop-test/win-test-file +++ b/tests/develop-test/win-test-file @@ -1,4 +1,7 @@ python3 ./test.py -f 2-query/table_count_scan.py +python3 ./test.py -f 2-query/pseudo_column.py +python3 ./test.py -f 2-query/ts-range.py +python3 ./test.py -f 2-query/tag_scan.py python3 ./test.py -f 2-query/show_create_db.py python3 ./test.py -f 5-taos-tools/taosbenchmark/auto_create_table_json.py python3 ./test.py -f 5-taos-tools/taosbenchmark/custom_col_tag.py diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 8cb23c081f..7d3efbf181 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -22,7 +22,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/pause_resume_test.py #,,n,system-test,python3 ./test.py -f 8-stream/vnode_restart.py -N 4 #,,n,system-test,python3 ./test.py -f 8-stream/snode_restart.py -N 4 -,,n,system-test,python3 ./test.py -f 8-stream/snode_restart_with_checkpoint.py -N 4 +#,,n,system-test,python3 ./test.py -f 8-stream/snode_restart_with_checkpoint.py -N 4 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py @@ -198,6 +198,7 @@ #,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeTransform-db.py -N 6 -n 3 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select.py -N 2 -n 1 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py -N 3 -n 3 +,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select-duplicatedata-false.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-column.py -N 3 -n 3 @@ -299,6 +300,7 @@ e ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/precisionUS.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/precisionNS.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/test_ts4219.py +,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/ts-4272.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/test_ts4295.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/test_td27388.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/show.py @@ -878,7 +880,7 @@ e ,,y,script,./test.sh -f tsim/dnode/balance2.sim ,,y,script,./test.sh -f tsim/vnode/replica3_repeat.sim ,,y,script,./test.sh -f tsim/parser/col_arithmetic_operation.sim -,,y,script,./test.sh -f tsim/trans/create_db.sim +#,,y,script,./test.sh -f tsim/trans/create_db.sim ,,y,script,./test.sh -f tsim/dnode/balance3.sim ,,y,script,./test.sh -f tsim/vnode/replica3_many.sim ,,y,script,./test.sh -f tsim/stable/metrics_idx.sim @@ -1335,7 +1337,7 @@ e #docs-examples test ,,n,docs-examples-test,bash python.sh -,,n,docs-examples-test,bash node.sh +#,,n,docs-examples-test,bash node.sh ,,n,docs-examples-test,bash csharp.sh ,,n,docs-examples-test,bash jdbc.sh ,,n,docs-examples-test,bash go.sh diff --git a/tests/script/win-test-file b/tests/script/win-test-file index b2d50ade8a..d9ff09f468 100644 --- a/tests/script/win-test-file +++ b/tests/script/win-test-file @@ -19,19 +19,18 @@ ./test.sh -f tsim/dnode/balance3.sim ./test.sh -f tsim/vnode/replica3_many.sim ./test.sh -f tsim/stable/metrics_idx.sim -./test.sh -f tsim/db/alter_replica_13.sim ./test.sh -f tsim/sync/3Replica1VgElect.sim ./test.sh -f tsim/sync/3Replica5VgElect.sim ./test.sh -f tsim/valgrind/checkError6.sim ./test.sh -f tsim/user/basic.sim ./test.sh -f tsim/user/password.sim +./test.sh -f tsim/user/whitelist.sim ./test.sh -f tsim/user/privilege_db.sim ./test.sh -f tsim/user/privilege_sysinfo.sim ./test.sh -f tsim/user/privilege_topic.sim ./test.sh -f tsim/user/privilege_table.sim ./test.sh -f tsim/user/privilege_create_db.sim ./test.sh -f tsim/db/alter_option.sim -./test.sh -f tsim/db/alter_replica_31.sim ./test.sh -f tsim/db/basic1.sim ./test.sh -f tsim/db/basic2.sim ./test.sh -f tsim/db/basic3.sim @@ -97,6 +96,7 @@ ./test.sh -f tsim/insert/delete0.sim ./test.sh -f tsim/insert/update1_sort_merge.sim ./test.sh -f tsim/insert/update2.sim +./test.sh -f tsim/insert/insert_stb.sim ./test.sh -f tsim/parser/alter__for_community_version.sim ./test.sh -f tsim/parser/alter_column.sim ./test.sh -f tsim/parser/alter_stable.sim @@ -186,6 +186,7 @@ ./test.sh -f tsim/query/session.sim ./test.sh -f tsim/query/join_interval.sim ./test.sh -f tsim/query/join_pk.sim +./test.sh -f tsim/query/count_spread.sim ./test.sh -f tsim/query/unionall_as_table.sim ./test.sh -f tsim/query/multi_order_by.sim ./test.sh -f tsim/query/sys_tbname.sim @@ -197,10 +198,14 @@ ./test.sh -f tsim/query/emptyTsRange_scl.sim ./test.sh -f tsim/query/partitionby.sim ./test.sh -f tsim/query/tableCount.sim +./test.sh -f tsim/query/show_db_table_kind.sim +./test.sh -f tsim/query/bi_star_table.sim +./test.sh -f tsim/query/bi_tag_scan.sim ./test.sh -f tsim/query/tag_scan.sim ./test.sh -f tsim/query/nullColSma.sim ./test.sh -f tsim/query/bug3398.sim ./test.sh -f tsim/query/explain_tsorder.sim +./test.sh -f tsim/query/apercentile.sim ./test.sh -f tsim/qnode/basic1.sim ./test.sh -f tsim/snode/basic1.sim ./test.sh -f tsim/mnode/basic1.sim @@ -237,52 +242,6 @@ ./test.sh -f tsim/table/table.sim ./test.sh -f tsim/table/tinyint.sim ./test.sh -f tsim/table/vgroup.sim -./test.sh -f tsim/stream/basic0.sim -g -./test.sh -f tsim/stream/basic1.sim -./test.sh -f tsim/stream/basic2.sim -./test.sh -f tsim/stream/basic3.sim -./test.sh -f tsim/stream/basic4.sim -./test.sh -f tsim/stream/checkpointInterval0.sim -./test.sh -f tsim/stream/checkStreamSTable1.sim -./test.sh -f tsim/stream/checkStreamSTable.sim -./test.sh -f tsim/stream/deleteInterval.sim -./test.sh -f tsim/stream/deleteSession.sim -./test.sh -f tsim/stream/deleteState.sim -./test.sh -f tsim/stream/distributeInterval0.sim -./test.sh -f tsim/stream/distributeIntervalRetrive0.sim -./test.sh -f tsim/stream/distributeSession0.sim -./test.sh -f tsim/stream/drop_stream.sim -./test.sh -f tsim/stream/fillHistoryBasic1.sim -./test.sh -f tsim/stream/fillHistoryBasic2.sim -./test.sh -f tsim/stream/fillHistoryBasic3.sim -./test.sh -f tsim/stream/fillIntervalDelete0.sim -./test.sh -f tsim/stream/fillIntervalDelete1.sim -./test.sh -f tsim/stream/fillIntervalLinear.sim -./test.sh -f tsim/stream/fillIntervalPartitionBy.sim -./test.sh -f tsim/stream/fillIntervalPrevNext1.sim -./test.sh -f tsim/stream/fillIntervalPrevNext.sim -./test.sh -f tsim/stream/fillIntervalRange.sim -./test.sh -f tsim/stream/fillIntervalValue.sim -./test.sh -f tsim/stream/ignoreCheckUpdate.sim -./test.sh -f tsim/stream/ignoreExpiredData.sim -./test.sh -f tsim/stream/partitionby1.sim -./test.sh -f tsim/stream/partitionbyColumnInterval.sim -./test.sh -f tsim/stream/partitionbyColumnSession.sim -./test.sh -f tsim/stream/partitionbyColumnState.sim -./test.sh -f tsim/stream/partitionby.sim -./test.sh -f tsim/stream/pauseAndResume.sim -./test.sh -f tsim/stream/schedSnode.sim -./test.sh -f tsim/stream/session0.sim -./test.sh -f tsim/stream/session1.sim -./test.sh -f tsim/stream/sliding.sim -./test.sh -f tsim/stream/state0.sim -./test.sh -f tsim/stream/state1.sim -./test.sh -f tsim/stream/triggerInterval0.sim -./test.sh -f tsim/stream/triggerSession0.sim -./test.sh -f tsim/stream/udTableAndTag0.sim -./test.sh -f tsim/stream/udTableAndTag1.sim -./test.sh -f tsim/stream/udTableAndTag2.sim -./test.sh -f tsim/stream/windowClose.sim ./test.sh -f tsim/trans/lossdata1.sim ./test.sh -f tsim/tmq/basic1.sim ./test.sh -f tsim/tmq/basic2.sim @@ -431,3 +390,8 @@ ./test.sh -f tsim/tag/drop_tag.sim ./test.sh -f tsim/tag/tbNameIn.sim ./test.sh -f tmp/monitor.sim +./test.sh -f tsim/tagindex/add_index.sim +./test.sh -f tsim/tagindex/sma_and_tag_index.sim +./test.sh -f tsim/view/view.sim +./test.sh -f tsim/query/cache_last.sim +./test.sh -f tsim/query/const.sim diff --git a/tests/system-test/0-others/compatibility.py b/tests/system-test/0-others/compatibility.py index 83bfb2bed7..d54c676c0d 100644 --- a/tests/system-test/0-others/compatibility.py +++ b/tests/system-test/0-others/compatibility.py @@ -90,7 +90,10 @@ class TDTestCase: packagePath = "/usr/local/src/" dataPath = cPath + "/../data/" - packageName = "TDengine-server-"+ BASEVERSION + "-Linux-x64.tar.gz" + if platform.system() == "Linux" and platform.machine() == "aarch64": + packageName = "TDengine-server-"+ BASEVERSION + "-Linux-arm64.tar.gz" + else: + packageName = "TDengine-server-"+ BASEVERSION + "-Linux-x64.tar.gz" packageTPath = packageName.split("-Linux-")[0] my_file = Path(f"{packagePath}/{packageName}") if not my_file.exists(): diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py index 544a966960..2bfe33d0af 100644 --- a/tests/system-test/0-others/information_schema.py +++ b/tests/system-test/0-others/information_schema.py @@ -247,10 +247,7 @@ class TDTestCase: tdSql.error('alter all dnodes "activeCode" "' + self.str510 + '"') tdSql.query(f'select * from information_schema.ins_dnodes') tdSql.checkEqual(tdSql.queryResult[0][8],"") - tdSql.error('alter dnode 1 "activeCode" ""') - tdSql.error('alter dnode 1 "activeCode"') - tdSql.execute('alter all dnodes "activeCode" ""') - tdSql.execute('alter all dnodes "activeCode"') + tdSql.execute('alter dnode 1 "activeCode" ""') tdSql.query(f'select active_code,c_active_code from information_schema.ins_dnodes') tdSql.checkEqual(tdSql.queryResult[0][0],"") tdSql.checkEqual(tdSql.queryResult[0][1],'') @@ -262,10 +259,6 @@ class TDTestCase: tdSql.error('alter all dnodes "cActiveCode" "' + self.str257 + '"') tdSql.error('alter all dnodes "cActiveCode" "' + self.str254 + '"') tdSql.error('alter dnode 1 "cActiveCode" "' + self.str510 + '"') - tdSql.error('alter dnode 1 "cActiveCode" ""') - tdSql.error('alter dnode 1 "cActiveCode"') - tdSql.execute('alter all dnodes "cActiveCode" ""') - tdSql.execute('alter all dnodes "cActiveCode"') tdSql.query(f'select active_code,c_active_code from information_schema.ins_dnodes') tdSql.checkEqual(tdSql.queryResult[0][0],"") tdSql.checkEqual(tdSql.queryResult[0][1],"") diff --git a/tests/system-test/0-others/test_hot_refresh_configurations.py b/tests/system-test/0-others/test_hot_refresh_configurations.py index 7aed7274a4..cbde8c060e 100644 --- a/tests/system-test/0-others/test_hot_refresh_configurations.py +++ b/tests/system-test/0-others/test_hot_refresh_configurations.py @@ -2,7 +2,7 @@ import subprocess import random import time import os - +import platform from util.log import * from util.sql import * from util.cases import * @@ -190,6 +190,8 @@ class TDTestCase: for v in values: dnode = random.choice(p_list) tdSql.execute(f'alter {dnode} "{name} {v}";') + if platform.system() == "Linux" and platform.machine() == "aarch64": + continue value = self.get_param_value_with_gdb(alias, "taosd") if value: tdLog.debug(f"value: {value}") diff --git a/tests/system-test/0-others/user_privilege.py b/tests/system-test/0-others/user_privilege.py index d1b93f6942..a731e85ddb 100644 --- a/tests/system-test/0-others/user_privilege.py +++ b/tests/system-test/0-others/user_privilege.py @@ -27,6 +27,7 @@ class TDTestCase: tdSql.init(conn.cursor()) self.setsql = TDSetSql() self.stbname = 'stb' + self.user_name = 'test' self.binary_length = 20 # the length of binary for column_dict self.nchar_length = 20 # the length of nchar for column_dict self.dbnames = ['db1', 'db2'] @@ -54,12 +55,12 @@ class TDTestCase: ] self.tbnum = 4 + self.stbnum_grant = 200 def create_user(self): - user_name = 'test' - tdSql.execute(f'create user {user_name} pass "test"') - tdSql.execute(f'grant read on {self.dbnames[0]}.{self.stbname} with t2 = "Beijing" to {user_name}') - tdSql.execute(f'grant write on {self.dbnames[1]}.{self.stbname} with t1 = 2 to {user_name}') + tdSql.execute(f'create user {self.user_name} pass "test"') + tdSql.execute(f'grant read on {self.dbnames[0]}.{self.stbname} with t2 = "Beijing" to {self.user_name}') + tdSql.execute(f'grant write on {self.dbnames[1]}.{self.stbname} with t1 = 2 to {self.user_name}') def prepare_data(self): for db in self.dbnames: @@ -70,6 +71,8 @@ class TDTestCase: tdSql.execute(f'create table {self.stbname}_{i} using {self.stbname} tags({self.tag_list[i]})') for j in self.values_list: tdSql.execute(f'insert into {self.stbname}_{i} values({j})') + for i in range(self.stbnum_grant): + tdSql.execute(f'create table {self.stbname}_grant_{i} (ts timestamp, c0 int) tags(t0 int)') def user_read_privilege_check(self, dbname): testconn = taos.connect(user='test', password='test') @@ -128,12 +131,20 @@ class TDTestCase: tdLog.exit(f"{caller.filename}({caller.lineno}) failed: sql:{sql}, expect error not occured") pass + def user_privilege_grant_check(self): + for db in self.dbnames: + tdSql.execute(f"use {db}") + for i in range(self.stbnum_grant): + tdSql.execute(f'grant read on {db}.{self.stbname}_grant_{i} to {self.user_name}') + tdSql.execute(f'grant write on {db}.{self.stbname}_grant_{i} to {self.user_name}') + def run(self): self.prepare_data() self.create_user() self.user_read_privilege_check(self.dbnames[0]) self.user_write_privilege_check(self.dbnames[1]) self.user_privilege_error_check() + self.user_privilege_grant_check() def stop(self): tdSql.close() diff --git a/tests/system-test/1-insert/ts-4272.py b/tests/system-test/1-insert/ts-4272.py new file mode 100644 index 0000000000..bb81305eb3 --- /dev/null +++ b/tests/system-test/1-insert/ts-4272.py @@ -0,0 +1,205 @@ + +import csv +from datetime import datetime + +import taos +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * + +class TDTestCase: + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + self.testcasePath = os.path.split(__file__)[0] + self.testcasefilename = os.path.split(__file__)[-1] + self.ts = 1700638570000 # 2023-11-22T07:36:10.000Z + self.db = 'db1' + self.tb1 = 'd001' + self.tb2 = 'd002' + self.stable0 = "meters" + self.stable1 = "stb_1" + self.stable2 = "stb_null" + self.tag1 = f'using {self.stable0}(groupId) tags(1)' + self.tag2 = f'using {self.stable0}(groupId) tags(2)' + self.file1 = f"{self.testcasePath}/b.csv" + self.file2 = f"{self.testcasePath}/c.csv" + + #os.system("rm -rf %s/b.csv" %self.testcasePath) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), logSql) + + def check_count(self, rows, records): + tdSql.execute(f"use {self.db};") + tdSql.query(f"select tbname,count(*) from {self.stable0} group by tbname order by tbname;") + tdSql.checkRows(rows) + for i in range(rows): + tdSql.checkData(i, 1, records[i]) + + def reset_tb(self): + # create database and tables + # os.system("taos -s 'drop database if exists d1;'") + # os.system("taos -s 'create database d1;use d1;create stable meters (ts timestamp, current float, voltage int, phase float) tags (location binary(64), groupId int);'") + # os.system(f"taos -s 'use d1;create table d2001 using meters(groupId) tags(5);'") + # res = os.system(f"taos -s 'use d1;create table d2002 using meters(groupId) tags(6);'") + # if (0 != res): + # tdLog.exit(f"create tb error") + + tdSql.execute(f"drop database if exists {self.db};") + tdSql.execute(f"create database {self.db};") + tdSql.execute(f"use {self.db};") + tdSql.execute(f"create stable {self.stable0} (ts timestamp, current float, voltage int, phase float) tags (location binary(64), groupId int);") + tdSql.execute(f"create table {self.tb1} {self.tag1};") + tdSql.execute(f"create table {self.tb2} {self.tag2};") + tdSql.execute(f"create stable {self.stable1} (ts timestamp , q_int int , q_bigint bigint , q_smallint smallint , q_tinyint tinyint , q_float float , q_double double , q_bool bool , q_binary binary(100) , q_nchar nchar(100) , q_ts timestamp , q_int_null int , q_bigint_null bigint , q_smallint_null smallint , q_tinyint_null tinyint, q_float_null float , q_double_null double , q_bool_null bool , q_binary_null binary(20) , q_nchar_null nchar(20) , q_ts_null timestamp) tags(loc nchar(100) , t_int int , t_bigint bigint , t_smallint smallint , t_tinyint tinyint, t_bool bool , t_binary binary(100) , t_nchar nchar(100) ,t_float float , t_double double , t_ts timestamp);") + tdSql.execute(f"create stable {self.stable2} (ts timestamp , q_int int , q_bigint bigint , q_smallint smallint , q_tinyint tinyint , q_float float , q_double double , q_bool bool , q_binary binary(100) , q_nchar nchar(100) , q_ts timestamp , q_int_null int , q_bigint_null bigint , q_smallint_null smallint , q_tinyint_null tinyint, q_float_null float , q_double_null double , q_bool_null bool , q_binary_null binary(20) , q_nchar_null nchar(20) , q_ts_null timestamp) tags(loc nchar(100) , t_int int , t_bigint bigint , t_smallint smallint , t_tinyint tinyint, t_bool bool , t_binary binary(100) , t_nchar nchar(100) ,t_float float , t_double double , t_ts timestamp);") + + def test(self, sql): + sql = f"use {self.db};" + sql + res = os.system(f'taos -s "{sql}"') + # if (0 != res): + # tdLog.exit(f"taos sql error") + + + def check(self): + # same table, auto create + create + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}' {self.tb1} {self.tag1} file '{self.file2}';" + self.test(sql) + + # same table, create + insert + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}' {self.tb1} file '{self.file2}';" + self.test(sql) + + # same table, insert + create + sql = f"insert into {self.tb1} file '{self.file1}' {self.tb1} {self.tag1} file '{self.file2}';" + self.test(sql) + + # same table, insert + insert + sql = f"insert into {self.tb1} file '{self.file1}' {self.tb1} file '{self.file2}';" + self.test(sql) + + # diff table auto create + create + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}' {self.tb2} {self.tag2} file '{self.file2}';" + self.test(sql) + + # diff table, create + insert + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}' {self.tb2} file '{self.file2}';" + self.test(sql) + + # diff table, insert + create + sql = f"insert into {self.tb1} file '{self.file1}' {self.tb2} {self.tag2} file '{self.file2}';" + self.test(sql) + + # diff table, insert + insert + sql = f"insert into {self.tb1} file '{self.file1}' {self.tb2} file '{self.file2}';" + self.test(sql) + + # bigNum = 1010000 + # self.check_count(5, [2100, 2100, bigNum, bigNum, bigNum]) + + result = os.popen("taos -s 'select count(*) from %s.%s'" %(self.db, self.tb1)) + res = result.read() + if (f"OK" in res): + tdLog.info(f"check count success") + + def make_csv(self, filepath, once, qtime, startts): + f = open(filepath, 'w') + with f: + writer = csv.writer(f) + for j in range(qtime): + ts = startts + j*once + rows = [] + for i in range(once): + rows.append([ts + i, 0.3 + (i%10)/100.0, 210 + i%10, 10.0 + (i%20)/20.0]) + writer.writerows(rows) + f.close() + print(datetime.now(), filepath, " ready!") + + def test_mix(self): + #forbid use both value and file in one insert + result = os.popen(f"insert into {self.tb1} file '{self.file2}' {self.tb2} values('2021-07-13 14:06:34.630', 10.2, 219, 0.32);") + res = result.read() + if (f"error" in res): + tdLog.info(f"forbid success") + + def test_bigcsv(self): + # prepare csv + print("start csv data prepare") + once = 10000 + qtime1 = 101 + qtime2 = 100 + rowNum1 = qtime1 * once + rowNum2 = qtime2 * once + self.make_csv(self.file1, once, qtime1, self.ts - 86400000) + self.make_csv(self.file2, once, qtime2, self.ts) + print("end csv data prepare") + + # auto create + insert + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}';" + self.test(sql) + + # only insert + sql = f"insert into {self.tb2} file '{self.file2}';" + self.test(sql) + print("end insert to table") + + #tdSql.execute(f"use d1;") + tdSql.query(f"select tbname,count(*) from {self.stable0} group by tbname order by tbname;") + tdSql.checkRows(2) + tdSql.checkData(0, 1, rowNum1) + tdSql.checkData(1, 1, rowNum2) + print("check insert file to table success") + + def make_stable_csv(self, filepath, once, qtime, startts, table_name): + f = open(filepath, 'w') + with f: + writer = csv.writer(f) + for j in range(qtime): + offset = j*once + ts = startts + offset + rows = [] + for i in range(once): + rows.append([table_name, ts + i, offset + i, 'NULL']) + writer.writerows(rows) + f.close() + print(datetime.now(), filepath, " ready!") + + def test_stable_csv(self): + # prepare csv + print("start stable_csv data prepare") + once = 10000 + qtime1 = 101 + qtime2 = 100 + # rowNum1 = qtime1 * once + # rowNum2 = qtime2 * once + child_1 = f"{self.stable1}_1" + child_2 = f"{self.stable2}_1" + self.make_stable_csv(self.file1, once, qtime1, self.ts - 86400000, child_1) + self.make_stable_csv(self.file2, once, qtime2, self.ts, child_2) + print("end stable_csv data prepare") + + # insert create child table of stable + sql = f"insert into {self.db}.{self.stable1}(tbname,ts,q_int,q_binary) file '{self.file1}' {self.db}.{self.stable2}(tbname,ts,q_int,q_binary) file '{self.file2}';" + self.test(sql) + print("end insert to stable") + + #tdSql.execute(f"insert into {self.db}.{child_1}(ts, q_int) values(now, 1);") + tdSql.query(f"select tbname,count(*) from {self.stable1} group by tbname order by tbname;") + tdSql.checkRows(0) + print("check stable success") + + def run(self): + tdSql.prepare() + self.reset_tb() + self.test_stable_csv() + self.test_bigcsv() + self.test_mix() + self.check() + tdSql.close() + + def stop(self): + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/6-cluster/5dnode3mnodeRoll.py b/tests/system-test/6-cluster/5dnode3mnodeRoll.py index 9d62eb3b4b..11a153c48f 100644 --- a/tests/system-test/6-cluster/5dnode3mnodeRoll.py +++ b/tests/system-test/6-cluster/5dnode3mnodeRoll.py @@ -4,7 +4,7 @@ import taos import sys import time import os - +import platform from util.log import * from util.sql import * from util.cases import * @@ -96,7 +96,10 @@ class TDTestCase: packagePath = "/usr/local/src/" dataPath = cPath + "/../data/" - packageName = "TDengine-server-"+ BASEVERSION + "-Linux-x64.tar.gz" + if platform.system() == "Linux" and platform.machine() == "aarch64": + packageName = "TDengine-server-"+ BASEVERSION + "-Linux-arm64.tar.gz" + else: + packageName = "TDengine-server-"+ BASEVERSION + "-Linux-x64.tar.gz" packageTPath = packageName.split("-Linux-")[0] my_file = Path(f"{packagePath}/{packageName}") if not my_file.exists(): diff --git a/tests/system-test/7-tmq/tmqCommon.py b/tests/system-test/7-tmq/tmqCommon.py index 059744caf0..66c88cf600 100644 --- a/tests/system-test/7-tmq/tmqCommon.py +++ b/tests/system-test/7-tmq/tmqCommon.py @@ -75,7 +75,7 @@ class TMQCom: if tdSql.getRows() == expectRows: break else: - time.sleep(5) + time.sleep(0.5) for i in range(expectRows): tdLog.info ("consume id: %d, consume msgs: %d, consume rows: %d"%(tdSql.getData(i , 1), tdSql.getData(i , 2), tdSql.getData(i , 3))) @@ -156,7 +156,7 @@ class TMQCom: tdLog.info("row: %d"%(actRows)) if (actRows >= rows): loopFlag = 0 - time.sleep(0.02) + time.sleep(0.5) return def getStartCommitNotifyFromTmqsim(self,cdbName='cdb',rows=1): @@ -167,7 +167,7 @@ class TMQCom: tdLog.info("row: %d"%(actRows)) if (actRows >= rows): loopFlag = 0 - time.sleep(0.02) + time.sleep(0.5) return def create_database(self,tsql, dbName,dropFlag=1,vgroups=4,replica=1): diff --git a/tests/system-test/7-tmq/tmqDnodeRestart.py b/tests/system-test/7-tmq/tmqDnodeRestart.py index 74aba31726..0ac8482163 100644 --- a/tests/system-test/7-tmq/tmqDnodeRestart.py +++ b/tests/system-test/7-tmq/tmqDnodeRestart.py @@ -4,6 +4,7 @@ import sys import time import socket import os +import platform import threading from enum import Enum @@ -184,6 +185,9 @@ class TDTestCase: paraDict['vgroups'] = self.vgroups paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl + # ARM64:time cost is so long for stopping taosd, so add the pollDdelay to 120s + if platform.system() == "Linux" and platform.machine() == "aarch64": + paraDict['pollDelay'] = 300 tmqCom.initConsumerTable() # tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) diff --git a/tests/system-test/7-tmq/tmqVnodeSplit-column.py b/tests/system-test/7-tmq/tmqVnodeSplit-column.py index 54a43465e7..87a73e981e 100644 --- a/tests/system-test/7-tmq/tmqVnodeSplit-column.py +++ b/tests/system-test/7-tmq/tmqVnodeSplit-column.py @@ -188,7 +188,7 @@ class TDTestCase: expectRows = 1 resultList = tmqCom.selectConsumeResult(expectRows) - if expectrowcnt / 2 >= resultList[0]: + if expectrowcnt / 2 > resultList[0]: tdLog.info("expect consume rows: %d, act consume rows: %d"%(expectrowcnt / 2, resultList[0])) tdLog.exit("%d tmq consume rows error!"%consumerId) diff --git a/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata-false.py b/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata-false.py new file mode 100644 index 0000000000..8276ae638b --- /dev/null +++ b/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata-false.py @@ -0,0 +1,218 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * +sys.path.append("./7-tmq") +from tmqCommon import * + +sys.path.append("./6-cluster") +from clusterCommonCreate import * +from clusterCommonCheck import clusterComCheck + + +class TDTestCase: + def __init__(self): + self.vgroups = 1 + self.ctbNum = 10 + self.rowsPerTbl = 10000 + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def getDataPath(self): + selfPath = tdCom.getBuildPath() + + return selfPath + '/../sim/dnode%d/data/vnode/vnode%d/wal/*'; + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 10, + 'rowsPerTbl': 10000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 60, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tdCom.drop_all_db() + tmqCom.initConsumerTable() + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], wal_retention_period=36000,vgroups=paraDict["vgroups"],replica=self.replicaVar) + tdLog.info("create stb") + tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) + return + + def restartAndRemoveWal(self, deleteWal): + tdDnodes = cluster.dnodes + tdSql.query("select * from information_schema.ins_vnodes") + for result in tdSql.queryResult: + if result[2] == 'dbt': + tdLog.debug("dnode is %d"%(result[0])) + dnodeId = result[0] + vnodeId = result[1] + + tdDnodes[dnodeId - 1].stoptaosd() + time.sleep(1) + dataPath = self.getDataPath() + dataPath = dataPath%(dnodeId,vnodeId) + tdLog.debug("dataPath:%s"%dataPath) + if deleteWal: + if os.system('rm -rf ' + dataPath) != 0: + tdLog.exit("rm error") + + tdDnodes[dnodeId - 1].starttaosd() + time.sleep(1) + break + tdLog.debug("restart dnode ok") + + def splitVgroups(self): + tdSql.query("select * from information_schema.ins_vnodes") + vnodeId = 0 + for result in tdSql.queryResult: + if result[2] == 'dbt': + vnodeId = result[1] + tdLog.debug("vnode is %d"%(vnodeId)) + break + splitSql = "split vgroup %d" %(vnodeId) + tdLog.debug("splitSql:%s"%(splitSql)) + tdSql.query(splitSql) + tdLog.debug("splitSql ok") + + def tmqCase1(self, deleteWal=False): + tdLog.printNoPrefix("======== test case 1: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb1', + 'ctbStartIdx': 0, + 'ctbNum': 10, + 'rowsPerTbl': 10000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 120, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + topicNameList = ['topic1'] + # expectRowsList = [] + tmqCom.initConsumerTable() + + tdLog.info("create topics from stb with filter") + queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + # sqlString = "create topic %s as stable %s" %(topicNameList[0], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicNameList[0], queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + # tdSql.query(queryString) + # expectRowsList.append(tdSql.getRows()) + + # init consume info, and start tmq_sim, then check consume result + tdLog.info("insert consume info to consume processor") + consumerId = 0 + expectrowcnt = paraDict["rowsPerTbl"] * paraDict["ctbNum"] * 2 + topicList = topicNameList[0] + ifcheckdata = 1 + ifManualCommit = 1 + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:200, auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + tdLog.info("wait the consume result") + + tdLog.info("create ctb1") + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + + tdLog.info("create ctb2") + paraDict['ctbPrefix'] = "ctb2" + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + + tdLog.info("insert ctb1 data") + pInsertThread = tmqCom.asyncInsertDataByInterlace(paraDict) + + tmqCom.getStartConsumeNotifyFromTmqsim() + tmqCom.getStartCommitNotifyFromTmqsim() + + #restart dnode & remove wal + self.restartAndRemoveWal(deleteWal) + + # split vgroup + self.splitVgroups() + + + tdLog.info("insert ctb2 data") + pInsertThread1 = tmqCom.asyncInsertDataByInterlace(paraDict) + pInsertThread.join() + pInsertThread1.join() + + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + + if expectrowcnt / 2 >= resultList[0]: + tdLog.info("expect consume rows: %d, act consume rows: %d"%(expectrowcnt / 2, resultList[0])) + tdLog.exit("%d tmq consume rows error!"%consumerId) + + # tmqCom.checkFileContent(consumerId, queryString) + + time.sleep(2) + for i in range(len(topicNameList)): + tdSql.query("drop topic %s"%topicNameList[i]) + + if deleteWal == True: + clusterComCheck.check_vgroups_status(vgroup_numbers=2,db_replica=self.replicaVar,db_name="dbt",count_number=240) + + tdLog.printNoPrefix("======== test case 1 end ...... ") + + def run(self): + self.prepareTestEnv() + self.tmqCase1(False) + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py b/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py index 4f3f46725a..0d247b2848 100644 --- a/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py +++ b/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py @@ -207,8 +207,6 @@ class TDTestCase: def run(self): self.prepareTestEnv() self.tmqCase1(True) - self.prepareTestEnv() - self.tmqCase1(False) def stop(self): tdSql.close() diff --git a/tests/system-test/7-tmq/tmq_per.py b/tests/system-test/7-tmq/tmq_per.py new file mode 100644 index 0000000000..f3701dacab --- /dev/null +++ b/tests/system-test/7-tmq/tmq_per.py @@ -0,0 +1,196 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * +sys.path.append("./7-tmq") +from tmqCommon import * + +from util.cluster import * +sys.path.append("./6-cluster") +from clusterCommonCreate import * +from clusterCommonCheck import clusterComCheck + +class TDTestCase: + def __init__(self): + self.vgroups = 1 + self.ctbNum = 10000 + self.rowsPerTbl = 10000 + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def getDataPath(self): + selfPath = tdCom.getBuildPath() + + return selfPath + '/../sim/dnode%d/data/vnode/vnode%d/wal/*'; + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 10, + 'rowsPerTbl': 10000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 60, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tdCom.drop_all_db() + tmqCom.initConsumerTable() + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], wal_retention_period=36000,vgroups=paraDict["vgroups"],replica=self.replicaVar) + tdLog.info("create stb") + tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) + return + + def restartAndRemoveWal(self, deleteWal): + tdDnodes = cluster.dnodes + tdSql.query("select * from information_schema.ins_vnodes") + for result in tdSql.queryResult: + if result[2] == 'dbt': + tdLog.debug("dnode is %d"%(result[0])) + dnodeId = result[0] + vnodeId = result[1] + + tdDnodes[dnodeId - 1].stoptaosd() + time.sleep(1) + dataPath = self.getDataPath() + dataPath = dataPath%(dnodeId,vnodeId) + tdLog.debug("dataPath:%s"%dataPath) + if deleteWal: + if os.system('rm -rf ' + dataPath) != 0: + tdLog.exit("rm error") + + tdDnodes[dnodeId - 1].starttaosd() + time.sleep(1) + break + tdLog.debug("restart dnode ok") + + def splitVgroups(self): + tdSql.query("select * from information_schema.ins_vnodes") + vnodeId = 0 + for result in tdSql.queryResult: + if result[2] == 'dbt': + vnodeId = result[1] + tdLog.debug("vnode is %d"%(vnodeId)) + break + splitSql = "split vgroup %d" %(vnodeId) + tdLog.debug("splitSql:%s"%(splitSql)) + tdSql.query(splitSql) + tdLog.debug("splitSql ok") + + def tmqCase1(self, deleteWal=False): + tdLog.printNoPrefix("======== test case 1: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb1', + 'ctbStartIdx': 0, + 'ctbNum': 10000, + 'rowsPerTbl': 10000, + 'batchNum': 10000, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 5, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + print + paraDict['rowsPerTbl'] = self.rowsPerTbl + + topicNameList = ['topic1'] + # expectRowsList = [] + tmqCom.initConsumerTable() + + tdLog.info("create topics from stb ") + queryString = "stable %s.%s"%(paraDict['dbName'], paraDict['stbName']) + # sqlString = "create topic %s as stable %s" %(topicNameList[0], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicNameList[0], queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + # tdSql.query(queryString) + # expectRowsList.append(tdSql.getRows()) + + # init consume info, and start tmq_sim, then check consume result + tdLog.info("insert consume info to consume processor") + consumerId = 0 + expectrowcnt = paraDict["rowsPerTbl"] * paraDict["ctbNum"] + topicList = topicNameList[0] + ifcheckdata = 1 + ifManualCommit = 1 + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:200, auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("create ctb1") + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + + tdLog.info("insert ctb1 data") + pInsertThread = tmqCom.asyncInsertDataByInterlace(paraDict) + pInsertThread.join() + + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + tdLog.info("wait the consume result") + + tmqCom.getStartConsumeNotifyFromTmqsim() + tmqCom.getStartCommitNotifyFromTmqsim() + + expectRows = 1 + tdLog.info("expectRows:%d"%expectRows) + resultList = tmqCom.selectConsumeResult(expectRows) + # for i in range(len(topicNameList)): + # tdSql.query("drop topic %s"%topicNameList[i]) + + if deleteWal == True: + clusterComCheck.check_vgroups_status(vgroup_numbers=1,db_replica=self.replicaVar,db_name="dbt",count_number=240) + tdLog.printNoPrefix("======== test case 1 end ...... ") + + def run(self): + self.prepareTestEnv() + self.tmqCase1(True) + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/8-stream/snode_restart_with_checkpoint.py b/tests/system-test/8-stream/snode_restart_with_checkpoint.py index 9567bbe439..d7bfd7b407 100644 --- a/tests/system-test/8-stream/snode_restart_with_checkpoint.py +++ b/tests/system-test/8-stream/snode_restart_with_checkpoint.py @@ -31,7 +31,7 @@ class TDTestCase: tdSql.query("use test") tdSql.query("create snode on dnode 4") tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)") - tdLog.debug("========create stream useing snode and insert data ok========") + tdLog.debug("========create stream using snode and insert data ok========") time.sleep(60) tdDnodes = cluster.dnodes diff --git a/tests/system-test/win-test-file b/tests/system-test/win-test-file index 3daf65b406..aefdb1e824 100644 --- a/tests/system-test/win-test-file +++ b/tests/system-test/win-test-file @@ -1,3 +1,12 @@ +python3 ./test.py -f 2-query/tbname_vgroup.py +python3 ./test.py -f 2-query/stbJoin.py +python3 ./test.py -f 2-query/stbJoin.py -Q 2 +python3 ./test.py -f 2-query/stbJoin.py -Q 3 +python3 ./test.py -f 2-query/stbJoin.py -Q 4 +python3 ./test.py -f 2-query/hint.py +python3 ./test.py -f 2-query/hint.py -Q 2 +python3 ./test.py -f 2-query/hint.py -Q 3 +python3 ./test.py -f 2-query/hint.py -Q 4 python3 ./test.py -f 2-query/nestedQuery.py python3 ./test.py -f 2-query/nestedQuery_str.py python3 ./test.py -f 2-query/nestedQuery_math.py @@ -18,7 +27,30 @@ python3 ./test.py -f 2-query/nestedQuery_math.py -Q 4 python3 ./test.py -f 2-query/nestedQuery_time.py -Q 4 python3 ./test.py -f 2-query/nestedQuery_26.py -Q 4 python3 ./test.py -f 2-query/interval_limit_opt.py -Q 4 +python3 ./test.py -f 2-query/interval_unit.py +python3 ./test.py -f 2-query/interval_unit.py -Q 2 +python3 ./test.py -f 2-query/interval_unit.py -Q 3 +python3 ./test.py -f 2-query/interval_unit.py -Q 4 python3 ./test.py -f 2-query/partition_by_col.py -Q 4 +python3 ./test.py -f 2-query/partition_by_col.py -Q 3 +python3 ./test.py -f 2-query/partition_by_col.py -Q 2 +python3 ./test.py -f 2-query/partition_by_col.py +python3 ./test.py -f 2-query/partition_by_col_agg.py +python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 2 +python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 3 +python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 4 +python3 ./test.py -f 2-query/interval_limit_opt_2.py -Q 4 +python3 ./test.py -f 2-query/interval_limit_opt_2.py -Q 3 +python3 ./test.py -f 2-query/interval_limit_opt_2.py -Q 2 +python3 ./test.py -f 2-query/interval_limit_opt_2.py +python3 ./test.py -f 2-query/func_to_char_timestamp.py +python3 ./test.py -f 2-query/func_to_char_timestamp.py -Q 2 +python3 ./test.py -f 2-query/func_to_char_timestamp.py -Q 3 +python3 ./test.py -f 2-query/func_to_char_timestamp.py -Q 4 +python3 ./test.py -f 2-query/last_cache_scan.py +python3 ./test.py -f 2-query/last_cache_scan.py -Q 2 +python3 ./test.py -f 2-query/last_cache_scan.py -Q 3 +python3 ./test.py -f 2-query/last_cache_scan.py -Q 4 python3 ./test.py -f 7-tmq/tmqShow.py python3 ./test.py -f 7-tmq/tmqDropStb.py python3 ./test.py -f 7-tmq/subscribeStb0.py @@ -29,11 +61,13 @@ python3 ./test.py -f 7-tmq/subscribeDb0.py -N 3 -n 3 python3 ./test.py -f 7-tmq/ins_topics_test.py python3 ./test.py -f 7-tmq/tmqMaxTopic.py python3 ./test.py -f 7-tmq/tmqParamsTest.py +python3 ./test.py -f 7-tmq/tmqParamsTest.py -R python3 ./test.py -f 7-tmq/tmqClientConsLog.py python3 ./test.py -f 7-tmq/tmqMaxGroupIds.py python3 ./test.py -f 7-tmq/tmqConsumeDiscontinuousData.py python3 ./test.py -f 7-tmq/tmqOffset.py python3 ./test.py -f 7-tmq/tmqDropConsumer.py +python3 ./test.py -f 1-insert/insert_stb.py python3 ./test.py -f 1-insert/delete_stable.py python3 ./test.py -f 2-query/out_of_order.py -Q 3 python3 ./test.py -f 2-query/out_of_order.py @@ -61,10 +95,14 @@ python3 ./test.py -f 2-query/slimit.py -R python3 ./test.py -f 2-query/slimit.py -Q 2 python3 ./test.py -f 2-query/slimit.py -Q 3 python3 ./test.py -f 2-query/slimit.py -Q 4 -python3 ./test.py -f 3-enterprise/restore/restoreDnode.py -N 5 -M 3 -python3 ./test.py -f 3-enterprise/restore/restoreVnode.py -N 5 -M 3 -python3 ./test.py -f 3-enterprise/restore/restoreMnode.py -N 5 -M 3 -python3 ./test.py -f 3-enterprise/restore/restoreQnode.py -N 5 -M 3 +python3 ./test.py -f 2-query/ts-4233.py +python3 ./test.py -f 2-query/ts-4233.py -Q 2 +python3 ./test.py -f 2-query/ts-4233.py -Q 3 +python3 ./test.py -f 2-query/ts-4233.py -Q 4 +python3 ./test.py -f 3-enterprise/restore/restoreDnode.py -N 5 -M 3 -i False +python3 ./test.py -f 3-enterprise/restore/restoreVnode.py -N 5 -M 3 -i False +python3 ./test.py -f 3-enterprise/restore/restoreMnode.py -N 5 -M 3 -i False +python3 ./test.py -f 3-enterprise/restore/restoreQnode.py -N 5 -M 3 -i False python3 ./test.py -f 7-tmq/create_wrong_topic.py python3 ./test.py -f 7-tmq/dropDbR3ConflictTransaction.py -N 3 python3 ./test.py -f 7-tmq/basic5.py @@ -112,16 +150,30 @@ python3 ./test.py -f 7-tmq/stbTagFilter-1ctb.py python3 ./test.py -f 7-tmq/dataFromTsdbNWal.py python3 ./test.py -f 7-tmq/dataFromTsdbNWal-multiCtb.py python3 ./test.py -f 7-tmq/tmq_taosx.py +python3 ./test.py -f 7-tmq/tmq_replay.py +python3 ./test.py -f 7-tmq/tmqSeekAndCommit.py +python3 ./test.py -f 7-tmq/tmq_offset.py +python3 ./test.py -f 7-tmq/tmqDataPrecisionUnit.py python3 ./test.py -f 7-tmq/raw_block_interface_test.py python3 ./test.py -f 7-tmq/stbTagFilter-multiCtb.py python3 ./test.py -f 7-tmq/tmqSubscribeStb-r3.py -N 5 -python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -n 3 +python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -i True +python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -n 3 -i True +python3 test.py -f 7-tmq/tmqVnodeTransform-stb.py -N 2 -n 1 +python3 test.py -f 7-tmq/tmqVnodeTransform-stb.py -N 6 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select.py -N 2 -n 1 +python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-stb.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-column.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-db.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeReplicate.py -M 3 -N 3 -n 3 python3 ./test.py -f 99-TDcase/TD-19201.py python3 ./test.py -f 99-TDcase/TD-21561.py python3 ./test.py -f 99-TDcase/TS-3404.py python3 ./test.py -f 99-TDcase/TS-3581.py python3 ./test.py -f 99-TDcase/TS-3311.py +python3 ./test.py -f 99-TDcase/TS-3821.py python3 ./test.py -f 0-others/balance_vgroups_r1.py -N 6 python3 ./test.py -f 0-others/taosShell.py python3 ./test.py -f 0-others/taosShellError.py @@ -139,10 +191,19 @@ python3 ./test.py -f 0-others/user_privilege_show.py python3 ./test.py -f 0-others/user_privilege_all.py python3 ./test.py -f 0-others/fsync.py python3 ./test.py -f 0-others/multilevel.py +python3 ./test.py -f 0-others/ttl.py +python3 ./test.py -f 0-others/ttlChangeOnWrite.py +python3 ./test.py -f 0-others/compress_tsz1.py +python3 ./test.py -f 0-others/compress_tsz2.py +python3 ./test.py -f 0-others/view/non_marterial_view/test_view.py python3 ./test.py -f 0-others/compatibility.py python3 ./test.py -f 0-others/tag_index_basic.py python3 ./test.py -N 3 -f 0-others/walRetention.py +python3 ./test.py -f 0-others/splitVGroupRep1.py -N 3 +python3 ./test.py -f 0-others/splitVGroupRep3.py -N 3 python3 ./test.py -f 0-others/timeRangeWise.py -N 3 +python3 ./test.py -f 0-others/delete_check.py +python3 ./test.py -f 0-others/test_hot_refresh_configurations.py python3 ./test.py -f 1-insert/alter_database.py python3 ./test.py -f 1-insert/alter_replica.py -N 3 python3 ./test.py -f 1-insert/influxdb_line_taosc_insert.py @@ -190,6 +251,7 @@ python3 ./test.py -f 1-insert/rowlength64k_4.py -Q 3 python3 ./test.py -f 1-insert/rowlength64k_4.py -Q 4 python3 ./test.py -f 1-insert/precisionUS.py python3 ./test.py -f 1-insert/precisionNS.py +python3 ./test.py -f 1-insert/test_ts4219.py python3 ./test.py -f 0-others/show.py python3 ./test.py -f 0-others/show_tag_index.py python3 ./test.py -f 0-others/information_schema.py @@ -325,6 +387,8 @@ python3 ./test.py -f 2-query/smaTest.py python3 ./test.py -f 2-query/smaTest.py -R python3 ./test.py -f 0-others/sma_index.py python3 ./test.py -f 2-query/sml_TS-3724.py +python3 ./test.py -f 2-query/sml-TD19291.py +python3 ./test.py -f 2-query/varbinary.py python3 ./test.py -f 2-query/sml.py python3 ./test.py -f 2-query/sml.py -R python3 ./test.py -f 2-query/spread.py @@ -426,7 +490,6 @@ python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertData.py -N 6 -M 3 python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertData.py -N 6 -M 3 -n 3 python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertDataAsync.py -N 6 -M 3 python3 ./test.py -f 6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py -N 6 -M 3 -python3 ./test.py -f 6-cluster/5dnode3mnodeRoll.py -N 3 -C 1 python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 7 -M 3 -C 6 python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 7 -M 3 -C 6 -n 3 python3 ./test.py -f 6-cluster/5dnode3mnodeRecreateMnode.py -N 6 -M 3 @@ -731,7 +794,9 @@ python3 ./test.py -f 2-query/out_of_order.py -R python3 ./test.py -f 2-query/blockSMA.py -Q 4 python3 ./test.py -f 2-query/projectionDesc.py -Q 4 python3 ./test.py -f 2-query/odbc.py +python3 ./test.py -f 2-query/fill_with_group.py python3 ./test.py -f 99-TDcase/TD-21561.py -Q 4 python3 ./test.py -f 99-TDcase/TD-20582.py python3 ./test.py -f 5-taos-tools/taosbenchmark/insertMix.py -N 3 python3 ./test.py -f 5-taos-tools/taosbenchmark/stt.py -N 3 +python3 ./test.py -f eco-system/meta/database/keep_time_offset.py