diff --git a/Jenkinsfile2 b/Jenkinsfile2 index 8ddee6dbbd..d3fc05a1d2 100644 --- a/Jenkinsfile2 +++ b/Jenkinsfile2 @@ -355,7 +355,7 @@ pipeline { } parallel { stage('check docs') { - agent{label " slave1_47 || slave1_48 || slave1_49 || slave1_52 || worker03 || slave215 || slave217 || slave219 || Mac_catalina "} + agent{label " slave1_47 || slave1_48 || slave1_49 || slave1_50 || slave1_52 || slave1_59 || slave1_63 || worker03 || slave215 || slave217 || slave219 || Mac_catalina "} steps { check_docs() } @@ -393,7 +393,7 @@ pipeline { agent{label " Mac_catalina "} steps { catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { - timeout(time: 20, unit: 'MINUTES'){ + timeout(time: 30, unit: 'MINUTES'){ pre_test() pre_test_build_mac() } @@ -401,7 +401,7 @@ pipeline { } } stage('linux test') { - agent{label " slave1_47 || slave1_48 || slave1_49 || slave1_52 || worker03 || slave215 || slave217 || slave219 "} + agent{label " slave1_47 || slave1_48 || slave1_49 || slave1_50 || slave1_52 || slave1_59 || slave1_63 || worker03 || slave215 || slave217 || slave219 "} options { skipDefaultCheckout() } when { changeRequest() diff --git a/cmake/bdb_CMakeLists.txt.in b/cmake/bdb_CMakeLists.txt.in deleted file mode 100644 index dd21020105..0000000000 --- a/cmake/bdb_CMakeLists.txt.in +++ /dev/null @@ -1,13 +0,0 @@ - -# bdb -ExternalProject_Add(bdb - GIT_REPOSITORY https://github.com/berkeleydb/libdb.git - GIT_TAG v5.3.28 - SOURCE_DIR "${TD_CONTRIB_DIR}/bdb" - BINARY_DIR "${TD_CONTRIB_DIR}/bdb" - #BUILD_IN_SOURCE TRUE - CONFIGURE_COMMAND COMMAND ./dist/configure --enable-debug - BUILD_COMMAND "$(MAKE)" - INSTALL_COMMAND "" - TEST_COMMAND "" -) diff --git a/cmake/cmake.define b/cmake/cmake.define index 44b36d0efa..7db6baafab 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.0) -set(CMAKE_VERBOSE_MAKEFILE FALSE) +set(CMAKE_VERBOSE_MAKEFILE TRUE) set(TD_BUILD_TAOSA_INTERNAL FALSE) #set output directory @@ -97,7 +97,15 @@ ENDIF() SET(JEMALLOC_ENABLED OFF) IF (TD_WINDOWS) MESSAGE("${Yellow} set compiler flag for Windows! ${ColourReset}") - SET(COMMON_FLAGS "/w /D_WIN32 /DWIN32 /Zi /MTd") + IF (${CMAKE_BUILD_TYPE} MATCHES "Release") + MESSAGE("${Green} will build Release version! ${ColourReset}") + SET(COMMON_FLAGS "/W3 /D_WIN32 /DWIN32 /Zi- /O2 /GL /MD") + + ELSE () + MESSAGE("${Green} will build Debug version! ${ColourReset}") + SET(COMMON_FLAGS "/w /D_WIN32 /DWIN32 /Zi /MTd") + ENDIF() + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO") # IF (MSVC AND (MSVC_VERSION GREATER_EQUAL 1900)) # SET(COMMON_FLAGS "${COMMON_FLAGS} /Wv:18") @@ -151,6 +159,7 @@ ELSE () CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2) CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F) CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI) + CHECK_C_COMPILER_FLAG("-mavx512vl" COMPILER_SUPPORT_AVX512VL) IF (COMPILER_SUPPORT_SSE42) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") @@ -158,11 +167,11 @@ ELSE () ENDIF() IF ("${SIMD_SUPPORT}" MATCHES "true") - IF (COMPILER_SUPPORT_FMA) + IF (COMPILER_SUPPORT_FMA) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma") - ENDIF() - IF (COMPILER_SUPPORT_AVX) + ENDIF() + IF (COMPILER_SUPPORT_AVX) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") ENDIF() @@ -175,7 +184,13 @@ ELSE () IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi") - MESSAGE(STATUS "avx512 supported by gcc") + MESSAGE(STATUS "avx512f/avx512bmi supported by compiler") + ENDIF() + + IF (COMPILER_SUPPORT_AVX512VL) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl") + MESSAGE(STATUS "avx512vl supported by compiler") ENDIF() ENDIF() diff --git a/cmake/cmake.options b/cmake/cmake.options index d34c34dd89..e12de3e4e6 100644 --- a/cmake/cmake.options +++ b/cmake/cmake.options @@ -151,6 +151,7 @@ IF(${BUILD_S3}) IF(${BUILD_WITH_S3}) +add_definitions(-DUSE_S3) option(BUILD_WITH_COS "If build with cos" OFF) ELSE () diff --git a/cmake/leveldb_CMakeLists.txt.in b/cmake/leveldb_CMakeLists.txt.in deleted file mode 100644 index 6878711bc1..0000000000 --- a/cmake/leveldb_CMakeLists.txt.in +++ /dev/null @@ -1,13 +0,0 @@ - -# leveldb -ExternalProject_Add(leveldb - GIT_REPOSITORY https://github.com/taosdata-contrib/leveldb.git - GIT_TAG master - SOURCE_DIR "${TD_CONTRIB_DIR}/leveldb" - BINARY_DIR "" - #BUILD_IN_SOURCE TRUE - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) \ No newline at end of file diff --git a/cmake/lucene_CMakeLists.txt.in b/cmake/lucene_CMakeLists.txt.in deleted file mode 100644 index 9fd7471705..0000000000 --- a/cmake/lucene_CMakeLists.txt.in +++ /dev/null @@ -1,12 +0,0 @@ - -# lucene -ExternalProject_Add(lucene - GIT_REPOSITORY https://github.com/yihaoDeng/LucenePlusPlus.git - SOURCE_DIR "${TD_CONTRIB_DIR}/lucene" - BINARY_DIR "" - #BUILD_IN_SOURCE TRUE - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) diff --git a/cmake/nuraft_CMakeLists.txt.in b/cmake/nuraft_CMakeLists.txt.in deleted file mode 100644 index 593c6fed26..0000000000 --- a/cmake/nuraft_CMakeLists.txt.in +++ /dev/null @@ -1,12 +0,0 @@ - -# NuRaft -ExternalProject_Add(NuRaft - GIT_REPOSITORY https://github.com/eBay/NuRaft.git - GIT_TAG v1.3.0 - SOURCE_DIR "${TD_CONTRIB_DIR}/nuraft" - BINARY_DIR "${TD_CONTRIB_DIR}/nuraft" - CONFIGURE_COMMAND "./prepare.sh" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" - ) \ No newline at end of file diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index c5715bd53f..bb18516ba4 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -109,11 +109,6 @@ cat("${TD_SUPPORT_DIR}/zlib_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) # cJson cat("${TD_SUPPORT_DIR}/cjson_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) -# leveldb -if(${BUILD_WITH_LEVELDB}) - cat("${TD_SUPPORT_DIR}/leveldb_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) -endif(${BUILD_WITH_LEVELDB}) - if (${BUILD_CONTRIB}) if(${BUILD_WITH_ROCKSDB}) cat("${TD_SUPPORT_DIR}/rocksdb_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) @@ -132,28 +127,11 @@ else() endif() endif() -# canonical-raft -if(${BUILD_WITH_CRAFT}) - cat("${TD_SUPPORT_DIR}/craft_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) - SET(BUILD_WITH_UV ON CACHE BOOL "craft need libuv" FORCE) -endif(${BUILD_WITH_CRAFT}) - -# traft -if(${BUILD_WITH_TRAFT}) - cat("${TD_SUPPORT_DIR}/traft_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) - SET(BUILD_WITH_UV ON CACHE BOOL "traft need libuv" FORCE) -endif(${BUILD_WITH_TRAFT}) - #libuv if(${BUILD_WITH_UV}) cat("${TD_SUPPORT_DIR}/libuv_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) endif(${BUILD_WITH_UV}) -# bdb -if(${BUILD_WITH_BDB}) - cat("${TD_SUPPORT_DIR}/bdb_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) -endif(${BUILD_WITH_BDB}) - # sqlite if(${BUILD_WITH_SQLITE}) cat("${TD_SUPPORT_DIR}/sqlite_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) @@ -178,17 +156,6 @@ elseif(${BUILD_WITH_COS}) endif() -# lucene -if(${BUILD_WITH_LUCENE}) - cat("${TD_SUPPORT_DIR}/lucene_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) - add_definitions(-DUSE_LUCENE) -endif(${BUILD_WITH_LUCENE}) - -# NuRaft -if(${BUILD_WITH_NURAFT}) - cat("${TD_SUPPORT_DIR}/nuraft_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) -endif(${BUILD_WITH_NURAFT}) - # crashdump if(${BUILD_CRASHDUMP}) cat("${TD_SUPPORT_DIR}/crashdump_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) @@ -317,7 +284,8 @@ if (${BUILD_WITH_ROCKSDB}) SET(CMAKE_BUILD_TYPE Release) endif() endif(${TD_LINUX}) - MESSAGE(STATUS "CXXXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS "ROCKSDB CXXXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS "ROCKSDB C STATUS CONFIG: " ${CMAKE_C_FLAGS}) if(${TD_DARWIN}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized") @@ -329,8 +297,12 @@ if (${BUILD_WITH_ROCKSDB}) if (${TD_WINDOWS}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4244 /wd4819") option(WITH_JNI "" OFF) - option(WITH_MD_LIBRARY "build with MD" OFF) + if(CMAKE_C_FLAGS MATCHES "/MT" OR CMAKE_C_FLAGS MATCHES "/MTd") + message("Rocksdb build runtime lib use /MT or /MTd") + option(WITH_MD_LIBRARY "build with MD" OFF) + endif() set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) endif(${TD_WINDOWS}) @@ -361,9 +333,11 @@ if (${BUILD_WITH_ROCKSDB}) ) else() if (NOT ${TD_LINUX}) - MESSAGE(STATUS "CXXXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS "ROCKSDB CXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS "ROCKSDB C STATUS CONFIG: " ${CMAKE_C_FLAGS}) if(${TD_DARWIN}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-error=maybe-uninitialized") endif(${TD_DARWIN}) if (${TD_DARWIN_ARM64}) @@ -372,8 +346,12 @@ if (${BUILD_WITH_ROCKSDB}) if (${TD_WINDOWS}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4244 /wd4819") option(WITH_JNI "" OFF) - option(WITH_MD_LIBRARY "build with MD" OFF) + if(CMAKE_C_FLAGS MATCHES "/MT" OR CMAKE_C_FLAGS MATCHES "/MTd") + message("Rocksdb build runtime lib use /MT or /MTd") + option(WITH_MD_LIBRARY "build with MD" OFF) + endif() set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) endif(${TD_WINDOWS}) @@ -437,26 +415,11 @@ elseif(${BUILD_WITH_COS}) endif() -# lucene -# To support build on ubuntu: sudo apt-get install libboost-all-dev -if(${BUILD_WITH_LUCENE}) - option(ENABLE_TEST "Enable the tests" OFF) - add_subdirectory(lucene EXCLUDE_FROM_ALL) - target_include_directories( - lucene++ - PUBLIC $ - ) - -endif(${BUILD_WITH_LUCENE}) - -# NuRaft -if(${BUILD_WITH_NURAFT}) - add_subdirectory(nuraft EXCLUDE_FROM_ALL) -endif(${BUILD_WITH_NURAFT}) - # pthread if(${BUILD_PTHREAD}) - set(CMAKE_BUILD_TYPE debug) + if ("${CMAKE_BUILD_TYPE}" STREQUAL "") + SET(CMAKE_BUILD_TYPE Release) + endif() add_definitions(-DPTW32_STATIC_LIB) add_subdirectory(pthread EXCLUDE_FROM_ALL) set_target_properties(libpthreadVC3 PROPERTIES OUTPUT_NAME pthread) @@ -524,30 +487,6 @@ if(${BUILD_WCWIDTH}) SET_TARGET_PROPERTIES(wcwidth PROPERTIES OUTPUT_NAME wcwidth) endif(${BUILD_WCWIDTH}) -# CRAFT -if(${BUILD_WITH_CRAFT}) - add_library(craft STATIC IMPORTED GLOBAL) - set_target_properties(craft PROPERTIES - IMPORTED_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}/craft/.libs/libraft.a" - INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/craft/include" - ) - # target_link_libraries(craft - # INTERFACE pthread - # ) -endif(${BUILD_WITH_CRAFT}) - -# TRAFT -if(${BUILD_WITH_TRAFT}) - add_library(traft STATIC IMPORTED GLOBAL) - set_target_properties(traft PROPERTIES - IMPORTED_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}/traft/.libs/libraft.a" - INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/traft/include" - ) - # target_link_libraries(craft - # INTERFACE pthread - # ) -endif(${BUILD_WITH_TRAFT}) - # LIBUV if(${BUILD_WITH_UV}) if (TD_WINDOWS) @@ -559,18 +498,6 @@ if(${BUILD_WITH_UV}) add_subdirectory(libuv EXCLUDE_FROM_ALL) endif(${BUILD_WITH_UV}) -# BDB -if(${BUILD_WITH_BDB}) - add_library(bdb STATIC IMPORTED GLOBAL) - set_target_properties(bdb PROPERTIES - IMPORTED_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}/bdb/libdb.a" - INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/bdb" - ) - target_link_libraries(bdb - INTERFACE pthread - ) -endif(${BUILD_WITH_BDB}) - # SQLite # see https://stackoverflow.com/questions/8774593/cmake-link-to-external-library#comment58570736_10550334 if(${BUILD_WITH_SQLITE}) @@ -640,13 +567,18 @@ if(${BUILD_GEOS}) if(${TD_LINUX}) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS_REL}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_REL}") - IF ("${CMAKE_BUILD_TYPE}" STREQUAL "") + if ("${CMAKE_BUILD_TYPE}" STREQUAL "") SET(CMAKE_BUILD_TYPE Release) endif() endif(${TD_LINUX}) option(BUILD_SHARED_LIBS "Build GEOS with shared libraries" OFF) add_subdirectory(geos EXCLUDE_FROM_ALL) - unset(CMAKE_CXX_STANDARD CACHE) # undo libgeos's setting of global CMAKE_CXX_STANDARD + if (${TD_WINDOWS}) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + else () + unset(CMAKE_CXX_STANDARD CACHE) # undo libgeos's setting of global CMAKE_CXX_STANDARD + endif(${TD_WINDOWS}) target_include_directories( geos_c PUBLIC $ diff --git a/docs/en/05-get-started/03-package.md b/docs/en/05-get-started/03-package.md index 3e3c04682f..d34df2c970 100644 --- a/docs/en/05-get-started/03-package.md +++ b/docs/en/05-get-started/03-package.md @@ -243,7 +243,7 @@ You can use the TDengine CLI to monitor your TDengine deployment and execute ad taos ``` -The TDengine CLI displays a welcome message and version information to indicate that its connection to the TDengine service was successful. If an error message is displayed, see the [FAQ](/train-faq/faq) for troubleshooting information. At the following prompt, you can execute SQL commands. +The TDengine CLI displays a welcome message and version information to indicate that its connection to the TDengine service was successful. If an error message is displayed, see the [FAQ](../../train-faq/faq) for troubleshooting information. At the following prompt, you can execute SQL commands. ```cmd taos> diff --git a/docs/en/05-get-started/_pkg_install.mdx b/docs/en/05-get-started/_pkg_install.mdx index 32d7c1f376..2372d2ff26 100644 --- a/docs/en/05-get-started/_pkg_install.mdx +++ b/docs/en/05-get-started/_pkg_install.mdx @@ -10,7 +10,7 @@ Between official releases, beta versions may be released that contain new featur -For information about installing TDengine, see [Install and Uninstall](/operation/pkg-install). +For information about installing TDengine, see [Install and Uninstall](../../operation/pkg-install). For information about TDengine releases, see [All Downloads](https://tdengine.com/all-downloads) diff --git a/docs/en/05-get-started/index.md b/docs/en/05-get-started/index.md index cc3b4826dd..5c4ab59f75 100644 --- a/docs/en/05-get-started/index.md +++ b/docs/en/05-get-started/index.md @@ -12,7 +12,7 @@ import StackOverflowSVG from './stackoverflow.svg' You can install and run TDengine on Linux/Windows/macOS machines as well as Docker containers. You can also deploy TDengine as a managed service with TDengine Cloud. -The full package of TDengine includes the TDengine Server (`taosd`), TDengine Client (`taosc`), taosAdapter for connecting with third-party systems and providing a RESTful interface, a command-line interface, and some tools. In addition to connectors for multiple languages, TDengine also provides a [RESTful interface](/reference/rest-api) through [taosAdapter](/reference/taosadapter). +The full package of TDengine includes the TDengine Server (`taosd`), TDengine Client (`taosc`), taosAdapter for connecting with third-party systems and providing a RESTful interface, a command-line interface, and some tools. In addition to connectors for multiple languages, TDengine also provides a [RESTful interface](../reference/rest-api) through [taosAdapter](../reference/taosadapter). ```mdx-code-block import DocCardList from '@theme/DocCardList'; diff --git a/docs/en/07-develop/01-connect/_connect_java.mdx b/docs/en/07-develop/01-connect/_connect_java.mdx index 538e27fcc3..fda86f2221 100644 --- a/docs/en/07-develop/01-connect/_connect_java.mdx +++ b/docs/en/07-develop/01-connect/_connect_java.mdx @@ -12,4 +12,4 @@ When using REST connection, the feature of bulk pulling can be enabled if the si {{#include docs/examples/java/src/main/java/com/taos/example/WSConnectExample.java:main}} ``` -More configuration about connection, please refer to [Java Connector](/reference/connector/java) +More configuration about connection, please refer to [Java Connector](../../reference/connector/java) diff --git a/docs/en/07-develop/01-connect/index.md b/docs/en/07-develop/01-connect/index.md index 3ca44783c4..3f09f9fb6a 100644 --- a/docs/en/07-develop/01-connect/index.md +++ b/docs/en/07-develop/01-connect/index.md @@ -22,7 +22,7 @@ import VerifyLinux from "../../14-reference/03-connector/_verify_linux.mdx"; import VerifyWindows from "../../14-reference/03-connector/_verify_windows.mdx"; import VerifyMacOS from "../../14-reference/03-connector/_verify_macos.mdx"; -Any application running on any platform can access TDengine through the REST API provided by TDengine. For information, see [REST API](/reference/rest-api/). Applications can also use the connectors for various programming languages, including C/C++, Java, Python, Go, Node.js, C#, and Rust, to access TDengine. These connectors support connecting to TDengine clusters using both native interfaces (taosc). Some connectors also support connecting over a REST interface. Community developers have also contributed several unofficial connectors, such as the ADO.NET connector, the Lua connector, and the PHP connector. +Any application running on any platform can access TDengine through the REST API provided by TDengine. For information, see [REST API](../../reference/rest-api/). Applications can also use the connectors for various programming languages, including C/C++, Java, Python, Go, Node.js, C#, and Rust, to access TDengine. These connectors support connecting to TDengine clusters using both native interfaces (taosc). Some connectors also support connecting over a REST interface. Community developers have also contributed several unofficial connectors, such as the ADO.NET connector, the Lua connector, and the PHP connector. ## Establish Connection @@ -36,7 +36,7 @@ For REST and native connections, connectors provide similar APIs for performing Key differences: 3. The REST connection is more accessible with cross-platform support, however it results in a 30% performance downgrade. -1. The TDengine client driver (taosc) has the highest performance with all the features of TDengine like [Parameter Binding](/reference/connector/cpp#parameter-binding-api), [Subscription](/reference/connector/cpp#subscription-and-consumption-api), etc. +1. The TDengine client driver (taosc) has the highest performance with all the features of TDengine like [Parameter Binding](../../reference/connector/cpp#parameter-binding-api), [Subscription](../../reference/connector/cpp#subscription-and-consumption-api), etc. ## Install Client Driver taosc diff --git a/docs/en/07-develop/02-model/index.mdx b/docs/en/07-develop/02-model/index.mdx index 4524a66a41..a18e1932ab 100644 --- a/docs/en/07-develop/02-model/index.mdx +++ b/docs/en/07-develop/02-model/index.mdx @@ -3,9 +3,9 @@ title: Data Model description: This document describes the data model of TDengine. --- -The data model employed by TDengine is similar to that of a relational database. You have to create databases and tables. You must design the data model based on your own business and application requirements. You should design the [STable](/concept/#super-table-stable) (an abbreviation for super table) schema to fit your data. This chapter will explain the big picture without getting into syntactical details. +The data model employed by TDengine is similar to that of a relational database. You have to create databases and tables. You must design the data model based on your own business and application requirements. You should design the [STable](../../concept/#super-table-stable) (an abbreviation for super table) schema to fit your data. This chapter will explain the big picture without getting into syntactical details. -Note: before you read this chapter, please make sure you have already read through [Key Concepts](/concept/), since TDengine introduces new concepts like "one table for one [data collection point](/concept/#data-collection-point)" and "[super table](/concept/#super-table-stable)". +Note: before you read this chapter, please make sure you have already read through [Key Concepts](../../concept/), since TDengine introduces new concepts like "one table for one [data collection point](../../concept/#data-collection-point)" and "[super table](../../concept/#super-table-stable)". ## Create Database @@ -22,7 +22,7 @@ In the above SQL statement: - a new data file will be created every 10 days - the size of the write cache pool on each VNode is 16 MB - the number of vgroups is 100 -- WAL is enabled but fsync is disabled For more details please refer to [Database](/taos-sql/database). +- WAL is enabled but fsync is disabled For more details please refer to [Database](../../taos-sql/database). After creating a database, the current database in use can be switched using SQL command `USE`. For example the SQL statement below switches the current database to `power`. @@ -41,13 +41,13 @@ Without the current database specified, table name must be preceded with the cor ## Create STable -In a time-series application, there may be multiple kinds of data collection points. For example, in the electrical power system there are meters, transformers, bus bars, switches, etc. For easy and efficient aggregation of multiple tables, one STable needs to be created for each kind of data collection point. For example, for the meters in [table 1](/concept/#model_table1), the SQL statement below can be used to create the super table. +In a time-series application, there may be multiple kinds of data collection points. For example, in the electrical power system there are meters, transformers, bus bars, switches, etc. For easy and efficient aggregation of multiple tables, one STable needs to be created for each kind of data collection point. For example, for the meters in [table 1](../../concept/#model_table1), the SQL statement below can be used to create the super table. ```sql CREATE STABLE meters (ts timestamp, current float, voltage int, phase float) TAGS (location binary(64), groupId int); ``` -Similar to creating a regular table, when creating a STable, the name and schema need to be provided. In the STable schema, the first column must always be a timestamp (like ts in the example), and the other columns (like current, voltage and phase in the example) are the data collected. The remaining columns can [contain data of type](/taos-sql/data-type/) integer, float, double, string etc. In addition, the schema for tags, like location and groupId in the example, must be provided. The tag type can be integer, float, string, etc. Tags are essentially the static properties of a data collection point. For example, properties like the location, device type, device group ID, manager ID are tags. Tags in the schema can be added, removed or updated. Please refer to [STable](/taos-sql/stable) for more details. +Similar to creating a regular table, when creating a STable, the name and schema need to be provided. In the STable schema, the first column must always be a timestamp (like ts in the example), and the other columns (like current, voltage and phase in the example) are the data collected. The remaining columns can [contain data of type](../../taos-sql/data-type/) integer, float, double, string etc. In addition, the schema for tags, like location and groupId in the example, must be provided. The tag type can be integer, float, string, etc. Tags are essentially the static properties of a data collection point. For example, properties like the location, device type, device group ID, manager ID are tags. Tags in the schema can be added, removed or updated. Please refer to [STable](../../taos-sql/stable) for more details. For each kind of data collection point, a corresponding STable must be created. There may be many STables in an application. For electrical power system, we need to create a STable respectively for meters, transformers, busbars, switches. There may be multiple kinds of data collection points on a single device, for example there may be one data collection point for electrical data like current and voltage and another data collection point for environmental data like temperature, humidity and wind direction. Multiple STables are required for these kinds of devices. @@ -61,7 +61,7 @@ A specific table needs to be created for each data collection point. Similar to CREATE TABLE d1001 USING meters TAGS ("California.SanFrancisco", 2); ``` -In the above SQL statement, "d1001" is the table name, "meters" is the STable name, followed by the value of tag "Location" and the value of tag "groupId", which are "California.SanFrancisco" and "2" respectively in the example. The tag values can be updated after the table is created. Please refer to [Tables](/taos-sql/table) for details. +In the above SQL statement, "d1001" is the table name, "meters" is the STable name, followed by the value of tag "Location" and the value of tag "groupId", which are "California.SanFrancisco" and "2" respectively in the example. The tag values can be updated after the table is created. Please refer to [Tables](../../taos-sql/table) for details. It's suggested to use the globally unique ID of a data collection point as the table name. For example the device serial number could be used as a unique ID. If a unique ID doesn't exist, multiple IDs that are not globally unique can be combined to form a globally unique ID. It's not recommended to use a globally unique ID as tag value. @@ -75,7 +75,7 @@ INSERT INTO d1001 USING meters TAGS ("California.SanFrancisco", 2) VALUES (now, In the above SQL statement, a row with value `(now, 10.2, 219, 0.32)` will be inserted into table "d1001". If table "d1001" doesn't exist, it will be created automatically using STable "meters" as template with tag value `"California.SanFrancisco", 2`. -For more details please refer to [Create Table Automatically](/taos-sql/insert#automatically-create-table-when-inserting). +For more details please refer to [Create Table Automatically](../../taos-sql/insert#automatically-create-table-when-inserting). ## Single Column vs Multiple Column diff --git a/docs/en/07-develop/03-insert-data/01-sql-writing.mdx b/docs/en/07-develop/03-insert-data/01-sql-writing.mdx index 4d1b67e451..1e719283cc 100644 --- a/docs/en/07-develop/03-insert-data/01-sql-writing.mdx +++ b/docs/en/07-develop/03-insert-data/01-sql-writing.mdx @@ -33,7 +33,7 @@ The below SQL statement is used to insert one row into table "d1001". INSERT INTO d1001 VALUES (ts1, 10.3, 219, 0.31); ``` -`ts1` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](/taos-sql/insert). +`ts1` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](../../../taos-sql/insert). ### Insert Multiple Rows @@ -43,7 +43,7 @@ Multiple rows can be inserted in a single SQL statement. The example below inser INSERT INTO d1001 VALUES (ts2, 10.2, 220, 0.23) (ts2, 10.3, 218, 0.25); ``` -`ts1` and `ts2` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](/taos-sql/insert). +`ts1` and `ts2` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](../../../taos-sql/insert). ### Insert into Multiple Tables @@ -53,9 +53,9 @@ Data can be inserted into multiple tables in the same SQL statement. The example INSERT INTO d1001 VALUES (ts1, 10.3, 219, 0.31) (ts2, 12.6, 218, 0.33) d1002 VALUES (ts3, 12.3, 221, 0.31); ``` -`ts1`, `ts2` and `ts3` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](/taos-sql/insert). +`ts1`, `ts2` and `ts3` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](../../../taos-sql/insert). -For more details about `INSERT` please refer to [INSERT](/taos-sql/insert). +For more details about `INSERT` please refer to [INSERT](../../../taos-sql/insert). :::info diff --git a/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx b/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx index 89ca10b669..e8de010d5b 100644 --- a/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx +++ b/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx @@ -35,7 +35,7 @@ bin/kafka-topics.sh --bootstrap-server=localhost:9092 --describe ## Insert into TDengine -We can write data into TDengine via SQL or Schemaless. For more information, please refer to [Insert Using SQL](/develop/insert-data/sql-writing/) or [High Performance Writing](/develop/insert-data/high-volume/) or [Schemaless Writing](/reference/schemaless/). +We can write data into TDengine via SQL or Schemaless. For more information, please refer to [Insert Using SQL](../sql-writing/) or [High Performance Writing](../high-volume/) or [Schemaless Writing](../../../reference/schemaless/). ## Examples diff --git a/docs/en/07-develop/03-insert-data/30-influxdb-line.mdx b/docs/en/07-develop/03-insert-data/30-influxdb-line.mdx index bd430d5973..b90b92510a 100644 --- a/docs/en/07-develop/03-insert-data/30-influxdb-line.mdx +++ b/docs/en/07-develop/03-insert-data/30-influxdb-line.mdx @@ -46,7 +46,7 @@ meters,location=California.LosAngeles,groupid=2 current=13.4,voltage=223,phase=0 ::: -For more details please refer to [InfluxDB Line Protocol](https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/) and [TDengine Schemaless](/reference/schemaless/#Schemaless-Line-Protocol) +For more details please refer to [InfluxDB Line Protocol](https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/) and [TDengine Schemaless](../../../reference/schemaless/#Schemaless-Line-Protocol) ## Examples diff --git a/docs/en/07-develop/04-query-data/index.mdx b/docs/en/07-develop/04-query-data/index.mdx index 5a05d22d85..413baf6b2c 100644 --- a/docs/en/07-develop/04-query-data/index.mdx +++ b/docs/en/07-develop/04-query-data/index.mdx @@ -128,7 +128,7 @@ For more information, see [Aggregate by Window](../../taos-sql/distinguished). ### Query -In the section describing [Insert](/develop/insert-data/sql-writing), a database named `power` is created and some data are inserted into STable `meters`. Below sample code demonstrates how to query the data in this STable. +In the section describing [Insert](../insert-data/sql-writing), a database named `power` is created and some data are inserted into STable `meters`. Below sample code demonstrates how to query the data in this STable. diff --git a/docs/en/07-develop/index.md b/docs/en/07-develop/index.md index 8f80b82b97..4ed5e8c19f 100644 --- a/docs/en/07-develop/index.md +++ b/docs/en/07-develop/index.md @@ -6,7 +6,7 @@ description: This document describes how to use the various components of TDengi Before creating an application to process time-series data with TDengine, consider the following: 1. Choose the method to connect to TDengine. TDengine offers a REST API that can be used with any programming language. It also has connectors for a variety of languages. -2. Design the data model based on your own use cases. Consider the main [concepts](/concept/) of TDengine, including "one table per data collection point" and the supertable. Learn about static labels, collected metrics, and subtables. Depending on the characteristics of your data and your requirements, you decide to create one or more databases and design a supertable schema that fit your data. +2. Design the data model based on your own use cases. Consider the main [concepts](../concept/) of TDengine, including "one table per data collection point" and the supertable. Learn about static labels, collected metrics, and subtables. Depending on the characteristics of your data and your requirements, you decide to create one or more databases and design a supertable schema that fit your data. 3. Decide how you will insert data. TDengine supports writing using standard SQL, but also supports schemaless writing, so that data can be written directly without creating tables manually. 4. Based on business requirements, find out what SQL query statements need to be written. You may be able to repurpose any existing SQL. 5. If you want to run real-time analysis based on time series data, including various dashboards, use the TDengine stream processing component instead of deploying complex systems such as Spark or Flink. @@ -14,7 +14,7 @@ Before creating an application to process time-series data with TDengine, consid 7. In many use cases (such as fleet management), the application needs to obtain the latest status of each data collection point. It is recommended that you use the cache function of TDengine instead of deploying Redis separately. 8. If you find that the SQL functions of TDengine cannot meet your requirements, then you can use user-defined functions to solve the problem. -This section is organized in the order described above. For ease of understanding, TDengine provides sample code for each supported programming language for each function. If you want to learn more about the use of SQL, please read the [SQL manual](/taos-sql/). For a more in-depth understanding of the use of each connector, please read the [Connector Reference Guide](/reference/connector/). If you also want to integrate TDengine with third-party systems, such as Grafana, please refer to the [third-party tools](/third-party/). +This section is organized in the order described above. For ease of understanding, TDengine provides sample code for each supported programming language for each function. If you want to learn more about the use of SQL, please read the [SQL manual](../taos-sql/). For a more in-depth understanding of the use of each connector, please read the [Connector Reference Guide](../reference/connector/). If you also want to integrate TDengine with third-party systems, such as Grafana, please refer to the [third-party tools](../third-party/). If you encounter any problems during the development process, please click ["Submit an issue"](https://github.com/taosdata/TDengine/issues/new/choose) at the bottom of each page and submit it on GitHub right away. diff --git a/docs/en/10-deployment/01-deploy.md b/docs/en/10-deployment/01-deploy.md index 4a6a3aae41..6e7e0d1b7c 100644 --- a/docs/en/10-deployment/01-deploy.md +++ b/docs/en/10-deployment/01-deploy.md @@ -72,7 +72,7 @@ For all the dnodes in a TDengine cluster, the below parameters must be configure ## Start Cluster -The first dnode can be started following the instructions in [Get Started](/get-started/). Then TDengine CLI `taos` can be launched to execute command `show dnodes`, the output is as following for example: +The first dnode can be started following the instructions in [Get Started](../../get-started/). Then TDengine CLI `taos` can be launched to execute command `show dnodes`, the output is as following for example: ``` taos> show dnodes; @@ -90,7 +90,7 @@ From the above output, it is shown that the end point of the started dnode is "h There are a few steps necessary to add other dnodes in the cluster. -Second, we can start `taosd` as instructed in [Get Started](/get-started/). +Second, we can start `taosd` as instructed in [Get Started](../../get-started/). Then, on the first dnode i.e. h1.tdengine.com in our example, use TDengine CLI `taos` to execute the following command: diff --git a/docs/en/12-taos-sql/26-udf.md b/docs/en/12-taos-sql/26-udf.md index b533b98b3d..f86b535927 100644 --- a/docs/en/12-taos-sql/26-udf.md +++ b/docs/en/12-taos-sql/26-udf.md @@ -53,7 +53,7 @@ CREATE AGGREGATE FUNCTION function_name AS library_path OUTPUTTYPE output_type [ CREATE AGGREGATE FUNCTION l2norm AS "/home/taos/udf_example/libl2norm.so" OUTPUTTYPE DOUBLE bufsize 64; ``` -For more information about user-defined functions, see [User-Defined Functions](/develop/udf). +For more information about user-defined functions, see [User-Defined Functions](../../develop/udf). ## Manage UDF diff --git a/docs/en/13-operation/10-monitor.md b/docs/en/13-operation/10-monitor.md index f1be4c5fd3..009db425a4 100644 --- a/docs/en/13-operation/10-monitor.md +++ b/docs/en/13-operation/10-monitor.md @@ -41,7 +41,7 @@ Launch `TDinsight.sh` with the command above and restart Grafana, then open Dash ## log database -The data of tdinsight dashboard is stored in `log` database (default. You can change it in taoskeeper's config file. For more infrmation, please reference to [taoskeeper document](/reference/taosKeeper)). The taoskeeper will create log database on taoskeeper startup. +The data of tdinsight dashboard is stored in `log` database (default. You can change it in taoskeeper's config file. For more infrmation, please reference to [taoskeeper document](../../reference/taosKeeper)). The taoskeeper will create log database on taoskeeper startup. ### cluster\_info table diff --git a/docs/en/14-reference/02-rest-api/02-rest-api.mdx b/docs/en/14-reference/02-rest-api/02-rest-api.mdx index 4da987213c..8f8e966f8d 100644 --- a/docs/en/14-reference/02-rest-api/02-rest-api.mdx +++ b/docs/en/14-reference/02-rest-api/02-rest-api.mdx @@ -514,4 +514,4 @@ Response body: ## Reference -[taosAdapter](/reference/taosadapter/) +[taosAdapter](../taosadapter/) diff --git a/docs/en/14-reference/03-connector/03-cpp.mdx b/docs/en/14-reference/03-connector/03-cpp.mdx index f6ebf0fe47..27adb58c12 100644 --- a/docs/en/14-reference/03-connector/03-cpp.mdx +++ b/docs/en/14-reference/03-connector/03-cpp.mdx @@ -24,7 +24,7 @@ The dynamic libraries for the TDengine client driver are located in. ## Supported platforms -Please refer to [list of supported platforms](/reference/connector#supported-platforms) +Please refer to [list of supported platforms](../#supported-platforms) ## Supported versions @@ -32,7 +32,7 @@ The version number of the TDengine client driver and the version number of the T ## Installation Steps -Please refer to the [Installation Steps](/reference/connector#installation-steps) for TDengine client driver installation +Please refer to the [Installation Steps](../#installation-steps) for TDengine client driver installation ## Establishing a connection @@ -394,7 +394,7 @@ The specific functions related to the interface are as follows (see also the [pr ### Schemaless Writing API -In addition to writing data using the SQL method or the parameter binding API, writing can also be done using schemaless writing, which eliminates the need to create a super table/data sub-table structure in advance and writes the data directly. The TDengine system automatically creates and maintains the required table structure based on the written data content. The use of schemaless writing is described in the chapter [Schemaless Writing](/reference/schemaless/), and the C/C++ API used with it is described here. +In addition to writing data using the SQL method or the parameter binding API, writing can also be done using schemaless writing, which eliminates the need to create a super table/data sub-table structure in advance and writes the data directly. The TDengine system automatically creates and maintains the required table structure based on the written data content. The use of schemaless writing is described in the chapter [Schemaless Writing](../../schemaless/), and the C/C++ API used with it is described here. - `TAOS_RES* taos_schemaless_insert(TAOS* taos, const char* lines[], int numLines, int protocol, int precision)` diff --git a/docs/en/14-reference/03-connector/04-java.mdx b/docs/en/14-reference/03-connector/04-java.mdx index f770ce0d5d..039395cc30 100644 --- a/docs/en/14-reference/03-connector/04-java.mdx +++ b/docs/en/14-reference/03-connector/04-java.mdx @@ -148,7 +148,7 @@ TDengine currently supports timestamp, number, character, Boolean type, and the **Note**: Only TAG supports JSON types Due to historical reasons, the BINARY type data in TDengine is not truly binary data and is no longer recommended for use. Please use VARBINARY type instead. -GEOMETRY type is binary data in little endian byte order, which complies with the WKB specification. For detailed information, please refer to [Data Type] (/tao-sql/data-type/#Data Types) +GEOMETRY type is binary data in little endian byte order, which complies with the WKB specification. For detailed information, please refer to [Data Type](../../../taos-sql/data-type/) For WKB specifications, please refer to [Well Known Binary (WKB)]( https://libgeos.org/specifications/wkb/ ) For Java connector, the jts library can be used to easily create GEOMETRY type objects, serialize them, and write them to TDengine. Here is an example [Geometry example](https://github.com/taosdata/TDengine/blob/3.0/examples/JDBC/JDBCDemo/src/main/java/com/taosdata/example/GeometryDemo.java) @@ -160,7 +160,7 @@ For Java connector, the jts library can be used to easily create GEOMETRY type o Before using Java Connector to connect to the database, the following conditions are required. - Java 1.8 or above runtime environment and Maven 3.6 or above installed -- TDengine client driver installed (required for native connections, not required for REST connections), please refer to [Installing Client Driver](/reference/connector#Install-Client-Driver) +- TDengine client driver installed (required for native connections, not required for REST connections), please refer to [Installing Client Driver](../#Install-Client-Driver) ### Install the connectors @@ -368,7 +368,7 @@ The configuration parameters in properties are as follows. - TSDBDriver.PROPERTY_KEY_MESSAGE_WAIT_TIMEOUT: message transmission timeout in milliseconds, the default value is 60000 ms. It only takes effect when using JDBC REST connection and batchfetch is true. - TSDBDriver.PROPERTY_KEY_USE_SSL: connecting Securely Using SSL. true: using SSL connection, false: not using SSL connection. It only takes effect when using JDBC REST connection. - TSDBDriver.HTTP_POOL_SIZE: size of REST concurrent requests. The default value is 20. - For JDBC native connections, you can specify other parameters, such as log level, SQL length, etc., by specifying URL and Properties. For more detailed configuration, please refer to [Client Configuration](/reference/config/#Client-Only). + For JDBC native connections, you can specify other parameters, such as log level, SQL length, etc., by specifying URL and Properties. For more detailed configuration, please refer to [Client Configuration](../../config/#Client-Only). ### Priority of configuration parameters diff --git a/docs/en/14-reference/03-connector/05-go.mdx b/docs/en/14-reference/03-connector/05-go.mdx index a0be7a4a02..33f7a93439 100644 --- a/docs/en/14-reference/03-connector/05-go.mdx +++ b/docs/en/14-reference/03-connector/05-go.mdx @@ -74,7 +74,7 @@ If it is a TDengine error, you can get the error code and error information in t ### Pre-installation preparation * Install Go development environment (Go 1.14 and above, GCC 4.8.5 and above) -* If you use the native connector, please install the TDengine client driver. Please refer to [Install Client Driver](/reference/connector/#install-client-driver) for specific steps +* If you use the native connector, please install the TDengine client driver. Please refer to [Install Client Driver](../#install-client-driver) for specific steps Configure the environment variables and check the command. diff --git a/docs/en/14-reference/03-connector/06-rust.mdx b/docs/en/14-reference/03-connector/06-rust.mdx index 5a44b161cb..0981df6724 100644 --- a/docs/en/14-reference/03-connector/06-rust.mdx +++ b/docs/en/14-reference/03-connector/06-rust.mdx @@ -80,7 +80,7 @@ Note: Only TAG supports JSON types ### Pre-installation preparation * Install the Rust development toolchain -* If using the native connection, please install the TDengine client driver. Please refer to [install client driver](/reference/connector#install-client-driver) +* If using the native connection, please install the TDengine client driver. Please refer to [install client driver](../#install-client-driver) ### Install the connectors diff --git a/docs/en/14-reference/03-connector/07-python.mdx b/docs/en/14-reference/03-connector/07-python.mdx index b699019b44..ccc270d3be 100644 --- a/docs/en/14-reference/03-connector/07-python.mdx +++ b/docs/en/14-reference/03-connector/07-python.mdx @@ -7,7 +7,7 @@ description: This document describes taospy, the TDengine Python connector. import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -`taospy` is the official Python connector for TDengine. taospy provides a rich API that makes it easy for Python applications to use TDengine. `taospy` wraps both the [native interface](/reference/connector/cpp) and [REST interface](/reference/rest-api) of TDengine, which correspond to the `taos` and `taosrest` modules of the `taospy` package, respectively. +`taospy` is the official Python connector for TDengine. taospy provides a rich API that makes it easy for Python applications to use TDengine. `taospy` wraps both the [native interface](../cpp) and [REST interface](../../rest-api) of TDengine, which correspond to the `taos` and `taosrest` modules of the `taospy` package, respectively. In addition to wrapping the native and REST interfaces, `taospy` also provides a set of programming interfaces that conforms to the [Python Data Access Specification (PEP 249)](https://peps.python.org/pep-0249/). It is easy to integrate `taospy` with many third-party tools, such as [SQLAlchemy](https://www.sqlalchemy.org/) and [pandas](https://pandas.pydata.org/). `taos-ws-py` is an optional package to enable using WebSocket to connect TDengine. @@ -17,7 +17,7 @@ The direct connection to the server using the native interface provided by the c The source code for the Python connector is hosted on [GitHub](https://github.com/taosdata/taos-connector-python). ## Supported platforms -- The [supported platforms](/reference/connector/#supported-platforms) for the native connection are the same as the ones supported by the TDengine client. +- The [supported platforms](../#supported-platforms) for the native connection are the same as the ones supported by the TDengine client. - REST connections are supported on all platforms that can run Python. ### Supported features @@ -95,7 +95,7 @@ TDengine currently supports timestamp, number, character, Boolean type, and the 1. Install Python. The recent taospy package requires Python 3.6.2+. The earlier versions of taospy require Python 3.7+. The taos-ws-py package requires Python 3.7+. If Python is not available on your system, refer to the [Python BeginnersGuide](https://wiki.python.org/moin/BeginnersGuide/Download) to install it. 2. Install [pip](https://pypi.org/project/pip/). In most cases, the Python installer comes with the pip utility. If not, please refer to [pip documentation](https://pip.pypa.io/en/stable/installation/) to install it. -If you use a native connection, you will also need to [Install Client Driver](/reference/connector#Install-Client-Driver). The client install package includes the TDengine client dynamic link library (`libtaos.so` or `taos.dll`) and the TDengine CLI. +If you use a native connection, you will also need to [Install Client Driver](../#Install-Client-Driver). The client install package includes the TDengine client dynamic link library (`libtaos.so` or `taos.dll`) and the TDengine CLI. ### Install via pip @@ -444,7 +444,7 @@ The best practice for TaosCursor is to create a cursor at the beginning of a que ##### Use of the RestClient class -The `RestClient` class is a direct wrapper for the [REST API](/reference/rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. +The `RestClient` class is a direct wrapper for the [REST API](../../rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. ```python title="Use of RestClient" {{#include docs/examples/python/rest_client_example.py}} @@ -501,7 +501,7 @@ The queried results can only be fetched once. For example, only one of `fetch_al -The `RestClient` class is a direct wrapper for the [REST API](/reference/rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. +The `RestClient` class is a direct wrapper for the [REST API](../../rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. ```python {{#include docs/examples/python/rest_client_example.py}} @@ -561,7 +561,7 @@ The `TaosConnection` class and the `TaosResult` class already implement all the ##### Use of the RestClient class -The `RestClient` class is a direct wrapper for the [REST API](/reference/rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. +The `RestClient` class is a direct wrapper for the [REST API](../../rest-api). It contains only a `sql()` method for executing arbitrary SQL statements and returning the result. ```python title="Use of RestClient" {{#include docs/examples/python/rest_client_with_req_id_example.py}} diff --git a/docs/en/14-reference/03-connector/08-node.mdx b/docs/en/14-reference/03-connector/08-node.mdx index a02bcddfd6..bed06477f1 100644 --- a/docs/en/14-reference/03-connector/08-node.mdx +++ b/docs/en/14-reference/03-connector/08-node.mdx @@ -28,7 +28,7 @@ The REST connector supports all platforms that can run Node.js. ## Version support -Please refer to [version support list](/reference/connector#version-support) +Please refer to [version support list](../#version-support) ## Supported features @@ -58,7 +58,7 @@ Please refer to [version support list](/reference/connector#version-support) ### Pre-installation preparation - Install the Node.js development environment -- If you are using the REST connector, skip this step. However, if you use the native connector, please install the TDengine client driver. Please refer to [Install Client Driver](/reference/connector#Install-Client-Driver) for more details. We use [node-gyp](https://github.com/nodejs/node-gyp) to interact with TDengine instances and also need to install some dependencies mentioned below depending on the specific OS. +- If you are using the REST connector, skip this step. However, if you use the native connector, please install the TDengine client driver. Please refer to [Install Client Driver](../#Install-Client-Driver) for more details. We use [node-gyp](https://github.com/nodejs/node-gyp) to interact with TDengine instances and also need to install some dependencies mentioned below depending on the specific OS. diff --git a/docs/en/14-reference/03-connector/09-csharp.mdx b/docs/en/14-reference/03-connector/09-csharp.mdx index 203d44fe02..282be3af6b 100644 --- a/docs/en/14-reference/03-connector/09-csharp.mdx +++ b/docs/en/14-reference/03-connector/09-csharp.mdx @@ -36,7 +36,7 @@ Please note TDengine does not support 32bit Windows any more. ## Version support -Please refer to [version support list](/reference/connector#version-support) +Please refer to [version support list](../#version-support) ## Supported features @@ -69,7 +69,7 @@ Please refer to [version support list](/reference/connector#version-support) * Install the [.NET SDK](https://dotnet.microsoft.com/download) * [Nuget Client](https://docs.microsoft.com/en-us/nuget/install-nuget-client-tools) (optional installation) -* Install TDengine client driver, please refer to [Install client driver](/reference/connector/#install-client-driver) for details +* Install TDengine client driver, please refer to [Install client driver](../#install-client-driver) for details ### Install `TDengine.Connector` diff --git a/docs/en/14-reference/03-connector/80-php.mdx b/docs/en/14-reference/03-connector/80-php.mdx index b3c2065b6e..bff9e8e5d5 100644 --- a/docs/en/14-reference/03-connector/80-php.mdx +++ b/docs/en/14-reference/03-connector/80-php.mdx @@ -40,7 +40,7 @@ Because the version of TDengine client driver is tightly associated with that of ### Install TDengine Client Driver -Regarding how to install TDengine client driver please refer to [Install Client Driver](/reference/connector#installation-steps) +Regarding how to install TDengine client driver please refer to [Install Client Driver](../#installation-steps) ### Install php-tdengine diff --git a/docs/en/14-reference/03-connector/_preparation.mdx b/docs/en/14-reference/03-connector/_preparation.mdx index 25b78ec134..99887ac36b 100644 --- a/docs/en/14-reference/03-connector/_preparation.mdx +++ b/docs/en/14-reference/03-connector/_preparation.mdx @@ -2,7 +2,7 @@ :::info -Since the TDengine client driver is written in C, using the native connection requires loading the client driver shared library file, which is usually included in the TDengine installer. You can install either standard TDengine server installation package or [TDengine client installation package](/get-started/). For Windows development, you need to install the corresponding Windows client, please refer to [Install TDengine](../../get-started/package). +Since the TDengine client driver is written in C, using the native connection requires loading the client driver shared library file, which is usually included in the TDengine installer. You can install either standard TDengine server installation package or [TDengine client installation package](../../get-started/). For Windows development, you need to install the corresponding Windows client, please refer to [Install TDengine](../../get-started/package). - libtaos.so: After successful installation of TDengine on a Linux system, the dependent Linux version of the client driver `libtaos.so` file will be automatically linked to `/usr/lib/libtaos.so`, which is included in the Linux scannable path and does not need to be specified separately. - taos.dll: After installing the client on Windows, the dependent Windows version of the client driver taos.dll file will be automatically copied to the system default search path C:/Windows/System32, again without the need to specify it separately. diff --git a/docs/en/14-reference/04-taosadapter.md b/docs/en/14-reference/04-taosadapter.md index c75598b0df..a9330d21c7 100644 --- a/docs/en/14-reference/04-taosadapter.md +++ b/docs/en/14-reference/04-taosadapter.md @@ -186,7 +186,7 @@ See [example/config/taosadapter.toml](https://github.com/taosdata/taosadapter/bl ### TDengine RESTful interface -You can use any client that supports the http protocol to write data to or query data from TDengine by accessing the REST interface address `http://:6041/rest/sql`. See the [official documentation](/reference/rest-api/) for details. +You can use any client that supports the http protocol to write data to or query data from TDengine by accessing the REST interface address `http://:6041/rest/sql`. See the [official documentation](../rest-api/) for details. ### InfluxDB @@ -202,7 +202,7 @@ Support InfluxDB query parameters as follows. - `precision` The time precision used by TDengine - `u` TDengine user name - `p` TDengine password -- `ttl` The time to live of automatically created sub-table. This value cannot be updated. TDengine will use the ttl value of the first data of sub-table to create sub-table. For more information, please refer [Create Table](/taos-sql/table/#create-table) +- `ttl` The time to live of automatically created sub-table. This value cannot be updated. TDengine will use the ttl value of the first data of sub-table to create sub-table. For more information, please refer [Create Table](../../taos-sql/table/#create-table) Note: InfluxDB token authorization is not supported at present. Only Basic authorization and query parameter validation are supported. Example: curl --request POST http://127.0.0.1:6041/influxdb/v1/write?db=test --user "root:taosdata" --data-binary "measurement,host=host1 field1=2i,field2=2.0 1577836800000000000" diff --git a/docs/en/14-reference/06-taosdump.md b/docs/en/14-reference/06-taosdump.md index c07465a97c..5ec26df1fa 100644 --- a/docs/en/14-reference/06-taosdump.md +++ b/docs/en/14-reference/06-taosdump.md @@ -31,7 +31,7 @@ There are two ways to install taosdump: 2. backup multiple specified databases: use `-D db1,db2,... ` parameters; 3. back up some super or normal tables in the specified database: use `dbname stbname1 stbname2 tbname1 tbname2 ... ` parameters. Note that the first parameter of this input sequence is the database name, and only one database is supported. The second and subsequent parameters are the names of super or normal tables in that database, separated by spaces. 4. back up the system log database: TDengine clusters usually contain a system database named `log`. The data in this database is the data that TDengine runs itself, and the taosdump will not back up the log database by default. If users need to back up the log database, users can use the `-a` or `-allow-sys` command-line parameter. -5. Loose mode backup: taosdump version 1.4.1 onwards provides `-n` and `-L` parameters for backing up data without using escape characters and "loose" mode, which can reduce the number of backups if table names, column names, tag names do not use escape characters. This can also reduce the backup data time and backup data footprint. If you are unsure about using `-n` and `-L` conditions, please use the default parameters for "strict" mode backup. See the [official documentation](/taos-sql/escape) for a description of escaped characters. +5. Loose mode backup: taosdump version 1.4.1 onwards provides `-n` and `-L` parameters for backing up data without using escape characters and "loose" mode, which can reduce the number of backups if table names, column names, tag names do not use escape characters. This can also reduce the backup data time and backup data footprint. If you are unsure about using `-n` and `-L` conditions, please use the default parameters for "strict" mode backup. See the [official documentation](../../taos-sql/escape) for a description of escaped characters. :::tip - taosdump versions after 1.4.1 provide the `-I` argument for parsing Avro file schema and data. If users specify `-s` then only taosdump will parse schema. diff --git a/docs/en/14-reference/08-taos-shell.md b/docs/en/14-reference/08-taos-shell.md index 7e0433a8b2..8512f5b59d 100644 --- a/docs/en/14-reference/08-taos-shell.md +++ b/docs/en/14-reference/08-taos-shell.md @@ -8,7 +8,7 @@ The TDengine command-line interface (hereafter referred to as `TDengine CLI`) is ## Installation -If executed on the TDengine server-side, there is no need for additional installation steps to install TDengine CLI as it is already included and installed automatically. To run TDengine CLI in an environment where no TDengine server is running, the TDengine client installation package needs to be installed first. For details, please refer to [Connector](/reference/connector/). +If executed on the TDengine server-side, there is no need for additional installation steps to install TDengine CLI as it is already included and installed automatically. To run TDengine CLI in an environment where no TDengine server is running, the TDengine client installation package needs to be installed first. For details, please refer to [Connector](../connector/). ## Execution @@ -18,7 +18,7 @@ To access the TDengine CLI, you can execute `taos` command-line utility from a t taos ``` -TDengine CLI will display a welcome message and version information if it successfully connected to the TDengine service. If it fails, TDengine CLI will print an error message. See [FAQ](/train-faq/faq) to solve the problem of terminal connection failure to the server. The TDengine CLI prompts as follows: +TDengine CLI will display a welcome message and version information if it successfully connected to the TDengine service. If it fails, TDengine CLI will print an error message. See [FAQ](../../train-faq/faq) to solve the problem of terminal connection failure to the server. The TDengine CLI prompts as follows: ```cmd taos> diff --git a/docs/en/14-reference/12-config/index.md b/docs/en/14-reference/12-config/index.md index ac2364a65d..65c48f9190 100755 --- a/docs/en/14-reference/12-config/index.md +++ b/docs/en/14-reference/12-config/index.md @@ -87,7 +87,7 @@ Ensure that your firewall rules do not block TCP port 6042 on any host in the c | Protocol | Default Port | Description | How to configure | | :------- | :----------- | :-------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------- | | TCP | 6030 | Communication between client and server. In a multi-node cluster, communication between nodes. serverPort | -| TCP | 6041 | REST connection between client and server | Prior to 2.4.0.0: serverPort+11; After 2.4.0.0 refer to [taosAdapter](/reference/taosadapter/) | +| TCP | 6041 | REST connection between client and server | Prior to 2.4.0.0: serverPort+11; After 2.4.0.0 refer to [taosAdapter](../taosadapter/) | | TCP | 6043 | Service Port of taosKeeper | The parameter of taosKeeper | | TCP | 6044 | Data access port for StatsD | Configurable through taosAdapter parameters. | | UDP | 6045 | Data access for statsd | Configurable through taosAdapter parameters. | diff --git a/docs/en/14-reference/13-schemaless/13-schemaless.md b/docs/en/14-reference/13-schemaless/13-schemaless.md index eb336f4633..9b001ee79c 100644 --- a/docs/en/14-reference/13-schemaless/13-schemaless.md +++ b/docs/en/14-reference/13-schemaless/13-schemaless.md @@ -116,7 +116,7 @@ You can configure smlChildTableName in taos.cfg to specify table names, for exam 10. Taos.cfg adds the configuration of smlTsDefaultName (with a string value), which only works on the client side. After configuration, the time column name of the schemaless automatic table creation can be set through this configuration. If not configured, defaults to _ts. 11. Super table name or child table name are case sensitive. :::tip -All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48 KB(64 KB since version 3.0.5.0) and the total length of a tag value cannot exceed 16 KB. See [TDengine SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area. +All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48 KB(64 KB since version 3.0.5.0) and the total length of a tag value cannot exceed 16 KB. See [TDengine SQL Boundary Limits](../../taos-sql/limit) for specific constraints in this area. ::: ## Time resolution recognition diff --git a/docs/en/20-third-party/02-prometheus.md b/docs/en/20-third-party/02-prometheus.md index bfdd3d015e..e6bd855939 100644 --- a/docs/en/20-third-party/02-prometheus.md +++ b/docs/en/20-third-party/02-prometheus.md @@ -16,7 +16,7 @@ Prometheus data can be stored in TDengine via the `remote_write` interface with To write Prometheus data to TDengine requires the following preparations. - The TDengine cluster is deployed and functioning properly -- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](/reference/taosadapter) for details. +- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](../../reference/taosadapter) for details. - Prometheus has been installed. Please refer to the [official documentation](https://prometheus.io/docs/prometheus/latest/installation/) for installing Prometheus ## Configuration steps diff --git a/docs/en/20-third-party/03-telegraf.md b/docs/en/20-third-party/03-telegraf.md index 7e99b84eab..900262f51f 100644 --- a/docs/en/20-third-party/03-telegraf.md +++ b/docs/en/20-third-party/03-telegraf.md @@ -14,7 +14,7 @@ Telegraf's data can be written to TDengine by simply adding the output configura To write Telegraf data to TDengine requires the following preparations. - The TDengine cluster is deployed and functioning properly -- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](/reference/taosadapter) for details. +- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](../../reference/taosadapter) for details. - Telegraf has been installed. Please refer to the [official documentation](https://docs.influxdata.com/telegraf/v1.22/install/) for Telegraf installation. - Telegraf collects the running status measurements of current system. You can enable [input plugins](https://docs.influxdata.com/telegraf/v1.22/plugins/) to insert [other formats](https://docs.influxdata.com/telegraf/v1.24/data_formats/input/) data to Telegraf then forward to TDengine. @@ -73,6 +73,6 @@ Query OK, 3 row(s) in set (0.013269s) - TDengine take influxdb format data and create unique ID for table names by the rule. The user can configure `smlChildTableName` parameter to generate specified table names if he/she needs. And he/she also need to insert data with specified data format. -For example, Add `smlChildTableName=tname` in the taos.cfg file. Insert data `st,tname=cpu1,t1=4 c1=3 1626006833639000000` then the table name will be cpu1. If there are multiple lines has same tname but different tag_set, the first line's tag_set will be used to automatically creating table and ignore other lines. Please refer to [TDengine Schemaless](/reference/schemaless/#Schemaless-Line-Protocol) +For example, Add `smlChildTableName=tname` in the taos.cfg file. Insert data `st,tname=cpu1,t1=4 c1=3 1626006833639000000` then the table name will be cpu1. If there are multiple lines has same tname but different tag_set, the first line's tag_set will be used to automatically creating table and ignore other lines. Please refer to [TDengine Schemaless](../../reference/schemaless/#Schemaless-Line-Protocol) ::: diff --git a/docs/en/20-third-party/05-collectd.md b/docs/en/20-third-party/05-collectd.md index d8c8e7f81d..5672852cd0 100644 --- a/docs/en/20-third-party/05-collectd.md +++ b/docs/en/20-third-party/05-collectd.md @@ -15,7 +15,7 @@ You can write the data collected by collectd to TDengine by simply modifying the Writing collectd data to the TDengine requires several preparations. - The TDengine cluster is deployed and running properly -- taosAdapter is installed and running, please refer to [taosAdapter's manual](/reference/taosadapter) for details +- taosAdapter is installed and running, please refer to [taosAdapter's manual](../../reference/taosadapter) for details - collectd has been installed. Please refer to the [official documentation](https://collectd.org/download.shtml) to install collectd ## Configuration steps diff --git a/docs/en/20-third-party/07-icinga2.md b/docs/en/20-third-party/07-icinga2.md index 540aae8689..43cd9b83ec 100644 --- a/docs/en/20-third-party/07-icinga2.md +++ b/docs/en/20-third-party/07-icinga2.md @@ -14,7 +14,7 @@ You can write the data collected by icinga2 to TDengine by simply modifying the To write icinga2 data to TDengine requires the following preparations. - The TDengine cluster is deployed and working properly -- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](/reference/taosadapter) for details. +- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](../../reference/taosadapter) for details. - icinga2 has been installed. Please refer to the [official documentation](https://icinga.com/docs/icinga-2/latest/doc/02-installation/) for icinga2 installation ## Configuration steps diff --git a/docs/en/20-third-party/08-tcollector.md b/docs/en/20-third-party/08-tcollector.md index f1c0ecd44d..83d01dda24 100644 --- a/docs/en/20-third-party/08-tcollector.md +++ b/docs/en/20-third-party/08-tcollector.md @@ -14,7 +14,7 @@ You can write the data collected by TCollector to TDengine by simply changing th To write data to the TDengine via TCollector requires the following preparations. - The TDengine cluster has been deployed and is working properly -- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](/reference/taosadapter) for details. +- taosAdapter is installed and running properly. Please refer to the [taosAdapter manual](../../reference/taosadapter) for details. - TCollector has been installed. Please refer to [official documentation](http://opentsdb.net/docs/build/html/user_guide/utilities/tcollector.html#installation-of-tcollector) for TCollector installation ## Configuration steps diff --git a/docs/en/20-third-party/09-emq-broker.md b/docs/en/20-third-party/09-emq-broker.md index 9dab6e52c9..7ca6cd4aa7 100644 --- a/docs/en/20-third-party/09-emq-broker.md +++ b/docs/en/20-third-party/09-emq-broker.md @@ -82,7 +82,7 @@ Edit the resource configuration to add the key/value pairing for Authorization. Basic cm9vdDp0YW9zZGF0YQ== ``` -Please refer to the [ TDengine REST API documentation ](/reference/rest-api/) for the authorization in details. +Please refer to the [ TDengine REST API documentation ](../../reference/rest-api/) for the authorization in details. Enter the rule engine replacement template in the message body: diff --git a/docs/en/20-third-party/11-kafka.md b/docs/en/20-third-party/11-kafka.md index b865c00bc3..cac4f5f604 100644 --- a/docs/en/20-third-party/11-kafka.md +++ b/docs/en/20-third-party/11-kafka.md @@ -94,7 +94,7 @@ The output as bellow: The role of the TDengine Sink Connector is to synchronize the data of the specified topic to TDengine. Users do not need to create databases and super tables in advance. The name of the target database can be specified manually (see the configuration parameter connection.database), or it can be generated according to specific rules (see the configuration parameter connection.database.prefix). -TDengine Sink Connector internally uses TDengine [modeless write interface](/reference/connector/cpp#modeless write-api) to write data to TDengine, currently supports data in three formats: [InfluxDB line protocol format](/develop /insert-data/influxdb-line), [OpenTSDB Telnet protocol format](/develop/insert-data/opentsdb-telnet), and [OpenTSDB JSON protocol format](/develop/insert-data/opentsdb-json). +TDengine Sink Connector internally uses TDengine [modeless write interface](../../reference/connector/cpp#modeless write-api) to write data to TDengine, currently supports data in three formats: [InfluxDB line protocol format](../../develop/insert-data/influxdb-line), [OpenTSDB Telnet protocol format](../../develop/insert-data/opentsdb-telnet), and [OpenTSDB JSON protocol format](../../develop/insert-data/opentsdb-json). The following example synchronizes the data of the topic meters to the target database power. The data format is the InfluxDB Line protocol format. @@ -213,7 +213,7 @@ If you see the above data, the synchronization is successful. If not, check the The role of the TDengine Source Connector is to push all the data of a specific TDengine database after a particular time to Kafka. The implementation principle of TDengine Source Connector is to first pull historical data in batches and then synchronize incremental data with the strategy of the regular query. At the same time, the changes in the table will be monitored, and the newly added table can be automatically synchronized. If Kafka Connect is restarted, synchronization will resume where it left off. -TDengine Source Connector will convert the data in TDengine data table into [InfluxDB Line protocol format](/develop/insert-data/influxdb-line/) or [OpenTSDB JSON protocol format](/develop/insert-data/opentsdb-json ) and then write to Kafka. +TDengine Source Connector will convert the data in TDengine data table into [InfluxDB Line protocol format](../../develop/insert-data/influxdb-line/) or [OpenTSDB JSON protocol format](../../develop/insert-data/opentsdb-json ) and then write to Kafka. The following sample program synchronizes the data in the database test to the topic tdengine-test-meters. diff --git a/docs/en/20-third-party/_deploytaosadapter.mdx b/docs/en/20-third-party/_deploytaosadapter.mdx index 840ca7640a..f8ee53af62 100644 --- a/docs/en/20-third-party/_deploytaosadapter.mdx +++ b/docs/en/20-third-party/_deploytaosadapter.mdx @@ -14,4 +14,4 @@ Check the running status of taosAdapter. systemctl status taosadapter ``` -taosAdapter Please refer to the `taosadapter --help` command output and [reference documentation](/reference/taosadapter) for detailed configuration parameters and usage of taosAdapter. +taosAdapter Please refer to the `taosadapter --help` command output and [reference documentation](../../reference/taosadapter) for detailed configuration parameters and usage of taosAdapter. diff --git a/docs/en/25-application/01-telegraf.md b/docs/en/25-application/01-telegraf.md index 1e3325b2b2..a6db826fa3 100644 --- a/docs/en/25-application/01-telegraf.md +++ b/docs/en/25-application/01-telegraf.md @@ -41,7 +41,7 @@ Download and install the [latest version of TDengine](https://docs.tdengine.com/ ### Install Grafana Plugin and Configure Data Source -Please refer to [Install Grafana Plugin and Configure Data Source](/third-party/grafana/#install-grafana-plugin-and-configure-data-source) +Please refer to [Install Grafana Plugin and Configure Data Source](../../third-party/grafana/#install-grafana-plugin-and-configure-data-source) ### Modify /etc/telegraf/telegraf.conf diff --git a/docs/en/25-application/02-collectd.md b/docs/en/25-application/02-collectd.md index ee1e944928..2c198dcdf5 100644 --- a/docs/en/25-application/02-collectd.md +++ b/docs/en/25-application/02-collectd.md @@ -44,7 +44,7 @@ Download and install the [latest version of TDengine](https://docs.tdengine.com/ ### Install Grafana Plugin and Configure Data Source -Please refer to [Install Grafana Plugin and Configure Data Source](/third-party/grafana/#install-grafana-plugin-and-configure-data-source) +Please refer to [Install Grafana Plugin and Configure Data Source](../../third-party/grafana/#install-grafana-plugin-and-configure-data-source) ### Configure collectd diff --git a/docs/en/25-application/_03-immigrate.md b/docs/en/25-application/_03-immigrate.md index 457a40614e..cdb3d5591c 100644 --- a/docs/en/25-application/_03-immigrate.md +++ b/docs/en/25-application/_03-immigrate.md @@ -70,7 +70,7 @@ You can use collectd and push the data to taosAdapter utilizing the write_tsdb p - **Tuning the Dashboard system** -After writing the data to TDengine, you can configure Grafana to visualize the data written to TDengine. To obtain and use the Grafana plugin provided by TDengine, please refer to [Links to other tools](/third-party/grafana). +After writing the data to TDengine, you can configure Grafana to visualize the data written to TDengine. To obtain and use the Grafana plugin provided by TDengine, please refer to [Links to other tools](../../third-party/grafana). TDengine provides two sets of Dashboard templates by default, and users only need to import the templates from the Grafana directory into Grafana to activate their use. @@ -396,7 +396,7 @@ Hard disk writing performance has little effect on TDengine. The TDengine writin ### Computational resource estimates -Due to the characteristics of IoT data, when the frequency of data generation is consistent, the writing process of TDengine maintains a relatively fixed amount of resource consumption (computing and storage). According to the [TDengine Operation and Maintenance Guide](/operation/) description, the system consumes less than 1 CPU core at 22,000 writes per second. +Due to the characteristics of IoT data, when the frequency of data generation is consistent, the writing process of TDengine maintains a relatively fixed amount of resource consumption (computing and storage). According to the [TDengine Operation and Maintenance Guide](../../operation/) description, the system consumes less than 1 CPU core at 22,000 writes per second. In estimating the CPU resources consumed by the query, assuming that the application requires the database to provide 10,000 QPS, the CPU time consumed by each query is about 1 ms. The query provided by each core per second is 1,000 QPS, which satisfies 10,000 QPS. The query request requires at least 10 cores. For the system as a whole system to have less than 50% CPU load, the entire cluster needs twice as many cores i.e. 20 cores. diff --git a/include/common/cos.h b/include/common/cos.h index 21b645f604..c6b159c1da 100644 --- a/include/common/cos.h +++ b/include/common/cos.h @@ -43,6 +43,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, int32_t s3GetObjectsByPrefix(const char *prefix, const char *path); void s3EvictCache(const char *path, long object_size); long s3Size(const char *object_name); +int32_t s3GetObjectToFile(const char *object_name, char *fileName); #ifdef __cplusplus } diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 58517a5db0..33cfada338 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -75,12 +75,15 @@ extern int32_t tsElectInterval; extern int32_t tsHeartbeatInterval; extern int32_t tsHeartbeatTimeout; +// vnode +extern int64_t tsVndCommitMaxIntervalMs; + // snode extern int32_t tsRsyncPort; -extern char tsCheckpointBackupDir[]; +extern char tsCheckpointBackupDir[]; // vnode checkpoint -extern char tsSnodeAddress[]; //127.0.0.1:873 +extern char tsSnodeAddress[]; // 127.0.0.1:873 // mnode extern int64_t tsMndSdbWriteDelta; @@ -104,8 +107,8 @@ extern int32_t tsMonitorMaxLogs; extern bool tsMonitorComp; // audit -extern bool tsEnableAudit; -extern bool tsEnableAuditCreateTable; +extern bool tsEnableAudit; +extern bool tsEnableAuditCreateTable; // telem extern bool tsEnableTelem; @@ -113,9 +116,9 @@ extern int32_t tsTelemInterval; extern char tsTelemServer[]; extern uint16_t tsTelemPort; extern bool tsEnableCrashReport; -extern char *tsTelemUri; -extern char *tsClientCrashReportUri; -extern char *tsSvrCrashReportUri; +extern char * tsTelemUri; +extern char * tsClientCrashReportUri; +extern char * tsSvrCrashReportUri; // query buffer management extern int32_t tsQueryBufferSize; // maximum allowed usage buffer size in MB for each data node during query processing diff --git a/include/common/tgrant.h b/include/common/tgrant.h index a5f3ab2e3f..f06fca8014 100644 --- a/include/common/tgrant.h +++ b/include/common/tgrant.h @@ -31,8 +31,6 @@ extern "C" { #endif #define GRANT_HEART_BEAT_MIN 2 -#define GRANT_ACTIVE_CODE "activeCode" -#define GRANT_C_ACTIVE_CODE "cActiveCode" typedef enum { TSDB_GRANT_ALL, @@ -52,11 +50,6 @@ typedef enum { TSDB_GRANT_TABLE, } EGrantType; -typedef struct { - int64_t grantedTime; - int64_t connGrantedTime; -} SGrantedInfo; - int32_t grantCheck(EGrantType grant); int32_t grantAlterActiveCode(int32_t did, const char* old, const char* newer, char* out, int8_t type); diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 86d34502c6..73d1ab2473 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -3112,7 +3112,7 @@ typedef struct { int32_t tSerializeSMDropStreamReq(void* buf, int32_t bufLen, const SMDropStreamReq* pReq); int32_t tDeserializeSMDropStreamReq(void* buf, int32_t bufLen, SMDropStreamReq* pReq); -void tFreeSMDropStreamReq(SMDropStreamReq* pReq); +void tFreeMDropStreamReq(SMDropStreamReq* pReq); typedef struct { char name[TSDB_STREAM_FNAME_LEN]; @@ -3271,7 +3271,7 @@ typedef struct { SMsgHead head; int64_t streamId; int32_t taskId; -} SVPauseStreamTaskReq, SVResetStreamTaskReq; +} SVPauseStreamTaskReq, SVResetStreamTaskReq, SVDropHTaskReq; typedef struct { int8_t reserved; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 4eb8328caa..afa0fa2a6e 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -67,7 +67,7 @@ enum { enum { // WARN: new msg should be appended to segment tail #endif - TD_NEW_MSG_SEG(TDMT_DND_MSG) + TD_NEW_MSG_SEG(TDMT_DND_MSG) // 0<<8 TD_DEF_MSG_TYPE(TDMT_DND_CREATE_MNODE, "dnode-create-mnode", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_DND_DROP_MNODE, "dnode-drop-mnode", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_DND_CREATE_QNODE, "dnode-create-qnode", NULL, NULL) @@ -87,7 +87,7 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_DND_ALTER_VNODE_TYPE, "dnode-alter-vnode-type", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_DND_CHECK_VNODE_LEARNER_CATCHUP, "dnode-check-vnode-learner-catchup", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_MND_MSG) + TD_NEW_MSG_SEG(TDMT_MND_MSG) // 1<<8 TD_DEF_MSG_TYPE(TDMT_MND_CONNECT, "connect", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_CREATE_ACCT, "create-acct", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_ALTER_ACCT, "alter-acct", NULL, NULL) @@ -186,6 +186,7 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_MND_RESUME_STREAM, "resume-stream", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, "stream-checkpoint-remain", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_STREAM_NODECHANGE_CHECK, "stream-nodechange-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TRIM_DB_TIMER, "trim-db-tmr", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_GRANT_NOTIFY, "grant-notify", NULL, NULL) @@ -194,7 +195,7 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_MND_VIEW_META, "view-meta", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_VND_MSG) + TD_NEW_MSG_SEG(TDMT_VND_MSG) // 2<<8 TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT, "submit", SSubmitReq, SSubmitRsp) TD_DEF_MSG_TYPE(TDMT_VND_CREATE_TABLE, "create-table", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_TABLE, "alter-table", NULL, NULL) @@ -243,7 +244,7 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_VND_DISABLE_WRITE, "vnode-disable-write", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_MAX_MSG, "vnd-max", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_SCH_MSG) + TD_NEW_MSG_SEG(TDMT_SCH_MSG) // 3<<8 TD_DEF_MSG_TYPE(TDMT_SCH_QUERY, "query", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_MERGE_QUERY, "merge-query", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_QUERY_CONTINUE, "query-continue", NULL, NULL) @@ -258,25 +259,26 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_SCH_MAX_MSG, "sch-max", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_STREAM_MSG) - TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DEPLOY, "stream-task-deploy", SStreamTaskDeployReq, SStreamTaskDeployRsp) + TD_NEW_MSG_SEG(TDMT_STREAM_MSG) //4 << 8 + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DEPLOY, "stream-task-deploy", SStreamTaskDeployReq, SStreamTaskDeployRsp) //1025 1026 TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DROP, "stream-task-drop", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RUN, "stream-task-run", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) //1035 1036 TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT_READY, "stream-checkpoint-ready", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESTORE_CHECKPOINT, "stream-restore-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_PAUSE, "stream-task-pause", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESUME, "stream-task-resume", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_STOP, "stream-task-stop", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_HTASK_DROP, "stream-htask-drop", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_MON_MSG) + TD_NEW_MSG_SEG(TDMT_MON_MSG) //5 << 8 TD_DEF_MSG_TYPE(TDMT_MON_MAX_MSG, "monitor-max", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_SYNC_MSG) + TD_NEW_MSG_SEG(TDMT_SYNC_MSG) //6 << 8 TD_DEF_MSG_TYPE(TDMT_SYNC_TIMEOUT, "sync-timer", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_TIMEOUT_ELECTION, "sync-elect", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_PING_REPLY, "sync-ping-reply", NULL, NULL) // no longer used @@ -307,7 +309,7 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_SYNC_MAX_MSG, "sync-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_FORCE_FOLLOWER, "sync-force-become-follower", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG) + TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG) //7 << 8 TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY, "vnode-stream-scan-history", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY_FINISH, "vnode-stream-scan-history-finish", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL) @@ -316,7 +318,7 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_CHECK, "vnode-stream-task-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL) - TD_NEW_MSG_SEG(TDMT_VND_TMQ_MSG) + TD_NEW_MSG_SEG(TDMT_VND_TMQ_MSG) //8 << 8 TD_DEF_MSG_TYPE(TDMT_VND_TMQ_SUBSCRIBE, "vnode-tmq-subscribe", SMqRebVgReq, SMqRebVgRsp) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_DELETE_SUB, "vnode-tmq-delete-sub", SMqVDeleteReq, SMqVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_COMMIT_OFFSET, "vnode-tmq-commit-offset", STqOffset, STqOffset) diff --git a/include/dnode/snode/snode.h b/include/dnode/snode/snode.h index e8c64b07c4..c3dfd3a611 100644 --- a/include/dnode/snode/snode.h +++ b/include/dnode/snode/snode.h @@ -45,6 +45,7 @@ typedef struct { */ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption); +int32_t sndInit(SSnode * pSnode); /** * @brief Stop Snode in Dnode. * diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h new file mode 100644 index 0000000000..75dafcdbff --- /dev/null +++ b/include/dnode/vnode/tqCommon.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TQ_COMMON_H +#define TDENGINE_TQ_COMMON_H + +// message process +int32_t tqStreamTaskStartAsync(SStreamMeta* pMeta, SMsgCb* cb, bool restart); +int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pMsg, bool restored); +int32_t tqStreamTaskProcessDispatchReq(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessDispatchRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessRetrieveReq(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessScanHistoryFinishReq(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessScanHistoryFinishRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessCheckReq(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessCheckRsp(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader); +int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, int64_t sversion, char* msg, int32_t msgLen, bool isLeader, bool restored); +int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen); +int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader); +int32_t startStreamTasks(SStreamMeta* pMeta); +int32_t resetStreamTaskStatus(SStreamMeta* pMeta); + +#endif // TDENGINE_TQ_COMMON_H diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index 2c9b8a4e5e..baa5ef8100 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -150,19 +150,6 @@ typedef struct { int32_t colNum; } SMetaStbStats; -// void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList); -// int32_t tqReaderSetTbUidList(STqReader *pReader, const SArray *tbUidList); -// int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList); -// int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList); -// bool tqReaderIsQueriedTable(STqReader* pReader, uint64_t uid); -// bool tqCurrentBlockConsumed(const STqReader* pReader); -// int32_t tqReaderSeek(STqReader *pReader, int64_t ver, const char *id); -// bool tqNextBlockInWal(STqReader* pReader, const char* idstr); -// bool tqNextBlockImpl(STqReader *pReader, const char* idstr); -// int32_t getTableInfoFromSnapshot(SSnapContext *ctx, void **pBuf, int32_t *contLen, int16_t *type, int64_t -// *uid); SMetaTableInfo getMetaTableInfoFromSnapshot(SSnapContext *ctx); int32_t setForSnapShot(SSnapContext -// *ctx, int64_t uid); int32_t destroySnapContext(SSnapContext *ctx); - // clang-format off /*-------------------------------------------------new api format---------------------------------------------------*/ typedef enum { @@ -211,27 +198,6 @@ typedef struct SStoreCacheReader { // clang-format on /*------------------------------------------------------------------------------------------------------------------*/ -/* -void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList); -int32_t tqReaderSetTbUidList(STqReader *pReader, const SArray *tbUidList); -int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList); -int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList); -bool tqReaderIsQueriedTable(STqReader* pReader, uint64_t uid); -bool tqCurrentBlockConsumed(const STqReader* pReader); - -int32_t tqReaderSeek(STqReader *pReader, int64_t ver, const char *id); -bool tqNextBlockInWal(STqReader* pReader, const char* idstr); -bool tqNextBlockImpl(STqReader *pReader, const char* idstr); - - int32_t tqRetrieveDataBlock(STqReader *pReader, SSDataBlock **pRes, const char* idstr); -STqReader *tqReaderOpen(void *pVnode); -void tqReaderClose(STqReader *); - -int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); -bool tqNextDataBlockFilterOut(STqReader *pReader, SHashObj *filterOutUids); -SWalReader* tqGetWalReader(STqReader* pReader); -int32_t tqRetrieveTaosxBlock(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet); -*/ // todo rename typedef struct SStoreTqReader { struct STqReader* (*tqReaderOpen)(); @@ -295,28 +261,18 @@ typedef struct SStoreMeta { void* (*storeGetIndexInfo)(); void* (*getInvertIndex)(void* pVnode); - int32_t (*getChildTableList)( - void* pVnode, int64_t suid, - SArray* list); // support filter and non-filter cases. [vnodeGetCtbIdList & vnodeGetCtbIdListByFilter] - int32_t (*storeGetTableList)(void* pVnode, int8_t type, SArray* pList); // vnodeGetStbIdList & vnodeGetAllTableList + // support filter and non-filter cases. [vnodeGetCtbIdList & vnodeGetCtbIdListByFilter] + int32_t (*getChildTableList)( void* pVnode, int64_t suid, SArray* list); + int32_t (*storeGetTableList)(void* pVnode, int8_t type, SArray* pList); void* storeGetVersionRange; void* storeGetLastTimestamp; int32_t (*getTableSchema)(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid); // tsdbGetTableSchema + int32_t (*getNumOfChildTables)( void* pVnode, int64_t uid, int64_t* numOfTables, int32_t* numOfCols); + void (*getBasicInfo)(void* pVnode, const char** dbname, int32_t* vgId, int64_t* numOfTables, int64_t* numOfNormalTables); - // db name, vgId, numOfTables, numOfSTables - int32_t (*getNumOfChildTables)( - void* pVnode, int64_t uid, int64_t* numOfTables, - int32_t* numOfCols); // int32_t metaGetStbStats(SMeta *pMeta, int64_t uid, SMetaStbStats *pInfo); - void (*getBasicInfo)(void* pVnode, const char** dbname, int32_t* vgId, int64_t* numOfTables, - int64_t* numOfNormalTables); // vnodeGetInfo(void *pVnode, const char **dbname, int32_t *vgId) & - // metaGetTbNum(SMeta *pMeta) & metaGetNtbNum(SMeta *pMeta); int64_t (*getNumOfRowsInMem)(void* pVnode); - /** -int32_t vnodeGetCtbIdList(void *pVnode, int64_t suid, SArray *list); -int32_t vnodeGetCtbIdListByFilter(void *pVnode, int64_t suid, SArray *list, bool (*filter)(void *arg), void *arg); -int32_t vnodeGetStbIdList(void *pVnode, int64_t suid, SArray *list); - */ + SMCtbCursor* (*openCtbCursor)(void *pVnode, tb_uid_t uid, int lock); int32_t (*resumeCtbCursor)(SMCtbCursor* pCtbCur, int8_t first); void (*pauseCtbCursor)(SMCtbCursor* pCtbCur); diff --git a/include/libs/nodes/querynodes.h b/include/libs/nodes/querynodes.h index 19dc8c9e4d..5c5172b9cd 100644 --- a/include/libs/nodes/querynodes.h +++ b/include/libs/nodes/querynodes.h @@ -449,6 +449,7 @@ typedef struct SVnodeModifyOpStmt { SHashObj* pSubTableHashObj; // SHashObj SHashObj* pTableNameHashObj; // set of table names for refreshing meta, sync mode SHashObj* pDbFNameHashObj; // set of db names for refreshing meta, sync mode + SHashObj* pTableCxtHashObj; // temp SHashObj for single request SArray* pVgDataBlocks; // SArray SVCreateTbReq* pCreateTblReq; TdFilePtr fp; diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index d0a2b311ee..ba90c0dc7a 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -35,6 +35,7 @@ int32_t streamStateBegin(SStreamState* pState); int32_t streamStateCommit(SStreamState* pState); void streamStateDestroy(SStreamState* pState, bool remove); int32_t streamStateDeleteCheckPoint(SStreamState* pState, TSKEY mark); +int32_t streamStateDelTaskDb(SStreamState* pState); int32_t streamStateFuncPut(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen); int32_t streamStateFuncGet(SStreamState* pState, const SWinKey* key, void** ppVal, int32_t* pVLen); @@ -133,4 +134,4 @@ char* streamStateIntervalDump(SStreamState* pState); } #endif -#endif /* ifndef _STREAM_STATE_H_ */ +#endif /* ifndef _STREAM_STATE_H_ */ \ No newline at end of file diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 6e191e412d..9f88672231 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -34,28 +34,33 @@ extern "C" { #define SIZE_IN_MiB(_v) ((_v) / ONE_MiB_F) #define SIZE_IN_KiB(_v) ((_v) / ONE_KiB_F) +#define TASK_DOWNSTREAM_READY 0x0 +#define TASK_DOWNSTREAM_NOT_READY 0x1 +#define TASK_DOWNSTREAM_NOT_LEADER 0x2 +#define TASK_UPSTREAM_NEW_STAGE 0x3 -#define TASK_DOWNSTREAM_READY 0x0 -#define TASK_DOWNSTREAM_NOT_READY 0x1 -#define TASK_DOWNSTREAM_NOT_LEADER 0x2 -#define TASK_UPSTREAM_NEW_STAGE 0x3 +#define NODE_ROLE_UNINIT 0x1 +#define NODE_ROLE_LEADER 0x2 +#define NODE_ROLE_FOLLOWER 0x3 -#define NODE_ROLE_UNINIT 0x1 -#define NODE_ROLE_LEADER 0x2 -#define NODE_ROLE_FOLLOWER 0x3 - -#define HAS_RELATED_FILLHISTORY_TASK(_t) ((_t)->hTaskInfo.id.taskId != 0) +#define HAS_RELATED_FILLHISTORY_TASK(_t) ((_t)->hTaskInfo.id.taskId != 0) #define CLEAR_RELATED_FILLHISTORY_TASK(_t) \ do { \ (_t)->hTaskInfo.id.taskId = 0; \ (_t)->hTaskInfo.id.streamId = 0; \ } while (0) +#define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1) +#define STREAM_EXEC_START_ALL_TASKS_ID (-2) +#define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3) + typedef struct SStreamTask SStreamTask; typedef struct SStreamQueue SStreamQueue; typedef struct SStreamTaskSM SStreamTaskSM; -#define SSTREAM_TASK_VER 2 +#define SSTREAM_TASK_VER 2 +#define SSTREAM_TASK_INCOMPATIBLE_VER 1 +#define SSTREAM_TASK_NEED_CONVERT_VER 2 enum { STREAM_STATUS__NORMAL = 0, @@ -107,6 +112,7 @@ typedef enum { TASK_LEVEL__SOURCE = 1, TASK_LEVEL__AGG, TASK_LEVEL__SINK, + TASK_LEVEL_SMA, } ETASK_LEVEL; enum { @@ -159,7 +165,7 @@ typedef struct { typedef struct { int8_t type; int64_t ver; - SArray* submits; // SArray + SArray* submits; // SArray } SStreamMergedSubmit; typedef struct { @@ -251,7 +257,7 @@ typedef struct { } SScanhistoryDataInfo; typedef struct { - int32_t idleDuration; // idle time before use time slice the continue execute scan-history + int32_t idleDuration; // idle time before use time slice the continue execute scan-history int32_t numOfTicks; tmr_h pTimer; int32_t execCount; @@ -301,10 +307,16 @@ typedef struct SStreamTaskId { typedef struct SCheckpointInfo { int64_t startTs; int64_t checkpointId; - int64_t checkpointVer; // latest checkpointId version - int64_t processedVer; // already processed ver, that has generated results version. - int64_t nextProcessVer; // current offset in WAL, not serialize it - int64_t failedId; // record the latest failed checkpoint id + + int64_t checkpointVer; // latest checkpointId version + int64_t processedVer; + int64_t nextProcessVer; // current offset in WAL, not serialize it + int64_t failedId; // record the latest failed checkpoint id + int64_t checkpointingId; + int32_t downstreamAlignNum; + int32_t checkpointNotReadyTasks; + bool dispatchCheckpointTrigger; + int64_t msgVer; } SCheckpointInfo; typedef struct SStreamStatus { @@ -331,22 +343,22 @@ typedef struct SSTaskBasicInfo { int32_t selfChildId; int32_t totalLevel; int8_t taskLevel; - int8_t fillHistory; // is fill history task or not - int64_t triggerParam; // in msec + int8_t fillHistory; // is fill history task or not + int64_t triggerParam; // in msec } SSTaskBasicInfo; typedef struct SStreamDispatchReq SStreamDispatchReq; -typedef struct STokenBucket STokenBucket; -typedef struct SMetaHbInfo SMetaHbInfo; +typedef struct STokenBucket STokenBucket; +typedef struct SMetaHbInfo SMetaHbInfo; typedef struct SDispatchMsgInfo { - SStreamDispatchReq* pData; // current dispatch data - int8_t dispatchMsgType; - int16_t msgType; // dispatch msg type - int32_t retryCount; // retry send data count - int64_t startTs; // dispatch start time, record total elapsed time for dispatch - SArray* pRetryList; // current dispatch successfully completed node of downstream - void* pTimer; // used to dispatch data after a given time duration + SStreamDispatchReq* pData; // current dispatch data + int8_t dispatchMsgType; + int16_t msgType; // dispatch msg type + int32_t retryCount; // retry send data count + int64_t startTs; // dispatch start time, record total elapsed time for dispatch + SArray* pRetryList; // current dispatch successfully completed node of downstream + void* pTimer; // used to dispatch data after a given time duration } SDispatchMsgInfo; typedef struct STaskQueue { @@ -355,8 +367,8 @@ typedef struct STaskQueue { } STaskQueue; typedef struct STaskSchedInfo { - int8_t status; - void* pTimer; + int8_t status; + void* pTimer; } STaskSchedInfo; typedef struct SSinkRecorder { @@ -391,6 +403,7 @@ typedef struct SHistoryTaskInfo { int32_t retryTimes; int32_t waitInterval; int64_t haltVer; // offset in wal when halt the stream task + bool operatorOpen; // false by default } SHistoryTaskInfo; typedef struct STaskOutputInfo { @@ -443,12 +456,11 @@ struct SStreamTask { int64_t checkReqId; SArray* checkReqIds; // shuffle int32_t refCnt; - int64_t checkpointingId; - int32_t checkpointAlignCnt; - int32_t checkpointNotReadyTasks; int32_t transferStateAlignCnt; struct SStreamMeta* pMeta; SSHashObj* pNameMap; + void* pBackend; + int64_t backendRefId; char reserve[256]; }; @@ -456,9 +468,9 @@ typedef struct STaskStartInfo { int64_t startTs; int64_t readyTs; int32_t tasksWillRestart; - int32_t taskStarting; // restart flag, sentinel to guard the restart procedure. - SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing - SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed + int32_t taskStarting; // restart flag, sentinel to guard the restart procedure. + SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing + SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed int64_t elapsedTime; } STaskStartInfo; @@ -486,20 +498,25 @@ typedef struct SStreamMeta { int32_t walScanCounter; void* streamBackend; int64_t streamBackendRid; - SHashObj* pTaskBackendUnique; + SHashObj* pTaskDbUnique; TdThreadMutex backendMutex; SMetaHbInfo* pHbInfo; STaskUpdateInfo updateInfo; + SHashObj* pUpdateTaskSet; int32_t numOfStreamTasks; // this value should be increased when a new task is added into the meta int32_t numOfPausedTasks; - int32_t chkptNotReadyTasks; int64_t rid; int64_t chkpId; + int32_t chkpCap; SArray* chkpSaved; SArray* chkpInUse; - int32_t chkpCap; SRWLatch chkpDirLock; + + void* qHandle; + int32_t pauseTaskNum; + + void* bkdChkptMgt; } SStreamMeta; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -531,7 +548,7 @@ struct SStreamDispatchReq { int64_t stage; // nodeId from upstream task int64_t streamId; int32_t taskId; - int32_t msgId; // msg id to identify if the incoming msg from the same sender + int32_t msgId; // msg id to identify if the incoming msg from the same sender int32_t srcVgId; int32_t upstreamTaskId; int32_t upstreamChildId; @@ -590,7 +607,7 @@ typedef struct { int32_t downstreamNodeId; int32_t downstreamTaskId; int32_t childId; - int32_t oldStage; + int64_t oldStage; int8_t status; } SStreamTaskCheckRsp; @@ -655,24 +672,28 @@ int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointRea typedef struct STaskStatusEntry { STaskId id; int32_t status; - int32_t stage; + int32_t statusLastDuration; // to record the last duration of current status + int64_t stage; int32_t nodeId; int64_t verStart; // start version in WAL, only valid for source task int64_t verEnd; // end version in WAL, only valid for source task int64_t processedVer; // only valid for source task + int32_t relatedHTask; // has related fill-history task int64_t activeCheckpointId; // current active checkpoint id bool checkpointFailed; // denote if the checkpoint is failed or not + bool inputQChanging; // inputQ is changing or not + int64_t inputQUnchangeCounter; double inputQUsed; // in MiB double inputRate; - double sinkQuota; // existed quota size for sink task - double sinkDataSize; // sink to dest data size + double sinkQuota; // existed quota size for sink task + double sinkDataSize; // sink to dst data size } STaskStatusEntry; typedef struct SStreamHbMsg { int32_t vgId; int32_t numOfTasks; - SArray* pTaskStatus; // SArray - SArray* pUpdateNodes; // SArray, needs update the epsets in stream tasks for those nodes. + SArray* pTaskStatus; // SArray + SArray* pUpdateNodes; // SArray, needs update the epsets in stream tasks for those nodes. } SStreamHbMsg; int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pRsp); @@ -696,7 +717,7 @@ typedef struct SNodeUpdateInfo { } SNodeUpdateInfo; typedef struct SStreamTaskNodeUpdateMsg { - int32_t transId; // to identify the msg + int32_t transId; // to identify the msg int64_t streamId; int32_t taskId; SArray* pNodeList; // SArray @@ -753,12 +774,13 @@ const char* streamTaskGetStatusStr(ETaskStatus status); void streamTaskResetStatus(SStreamTask* pTask); void streamTaskSetStatusReady(SStreamTask* pTask); -void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen); +void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen); // recover and fill history -void streamTaskCheckDownstream(SStreamTask* pTask); +void streamTaskCheckDownstream(SStreamTask* pTask); -int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage); +int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage, + int64_t* oldStage); int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList); void streamTaskResetUpstreamStageInfo(SStreamTask* pTask); bool streamTaskAllUpstreamClosed(SStreamTask* pTask); @@ -784,18 +806,17 @@ bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer) int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); // common -int32_t streamRestoreParam(SStreamTask* pTask); -void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta); -void streamTaskResume(SStreamTask* pTask); -void streamTaskEnablePause(SStreamTask* pTask); -int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask); -void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); -void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); -void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask); -int32_t streamTaskReleaseState(SStreamTask* pTask); -int32_t streamTaskReloadState(SStreamTask* pTask); -void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); -void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); +int32_t streamRestoreParam(SStreamTask* pTask); +void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta); +void streamTaskResume(SStreamTask* pTask); +int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask); +void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); +void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); +void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask); +int32_t streamTaskReleaseState(SStreamTask* pTask); +int32_t streamTaskReloadState(SStreamTask* pTask); +void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); +void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask); void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc); @@ -804,7 +825,7 @@ void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc); int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st); -int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); +int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); // agg level int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, SRpcHandleInfo* pInfo); @@ -826,10 +847,11 @@ int32_t streamMetaReopen(SStreamMeta* pMeta); void streamMetaInitBackend(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); +int32_t streamMetaReloadAllTasks(SStreamMeta* pMeta); int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta); +int32_t streamTaskSetDb(SStreamMeta* pMeta, void* pTask, char* key); void streamMetaStartHb(SStreamMeta* pMeta); -void streamMetaInitForSnode(SStreamMeta* pMeta); bool streamMetaTaskInTimer(SStreamMeta* pMeta); int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs, int64_t endTs, bool succ); void streamMetaRLock(SStreamMeta* pMeta); @@ -842,7 +864,7 @@ void streamMetaResetStartInfo(STaskStartInfo* pMeta); int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); -void streamTaskClearCheckInfo(SStreamTask* pTask); +void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg); int32_t streamAlignTransferState(SStreamTask* pTask); int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId); int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask, @@ -850,8 +872,10 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, int8_t isSucceed); +SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask); +void* streamDestroyStateMachine(SStreamTaskSM* pSM); #ifdef __cplusplus } #endif -#endif /* ifndef _STREAM_H_ */ +#endif /* ifndef _STREAM_H_ */ \ No newline at end of file diff --git a/include/os/osFile.h b/include/os/osFile.h index 63483dc906..503535a454 100644 --- a/include/os/osFile.h +++ b/include/os/osFile.h @@ -54,15 +54,18 @@ extern "C" { typedef struct TdFile *TdFilePtr; -#define TD_FILE_CREATE 0x0001 -#define TD_FILE_WRITE 0x0002 -#define TD_FILE_READ 0x0004 -#define TD_FILE_TRUNC 0x0008 -#define TD_FILE_APPEND 0x0010 -#define TD_FILE_TEXT 0x0020 -#define TD_FILE_AUTO_DEL 0x0040 -#define TD_FILE_EXCL 0x0080 -#define TD_FILE_STREAM 0x0100 // Only support taosFprintfFile, taosGetLineFile, taosEOFFile +#define TD_FILE_CREATE 0x0001 +#define TD_FILE_WRITE 0x0002 +#define TD_FILE_READ 0x0004 +#define TD_FILE_TRUNC 0x0008 +#define TD_FILE_APPEND 0x0010 +#define TD_FILE_TEXT 0x0020 +#define TD_FILE_AUTO_DEL 0x0040 +#define TD_FILE_EXCL 0x0080 +#define TD_FILE_STREAM 0x0100 // Only support taosFprintfFile, taosGetLineFile, taosEOFFile +#define TD_FILE_WRITE_THROUGH 0x0200 +#define TD_FILE_CLOEXEC 0x0400 + TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions); TdFilePtr taosCreateFile(const char *path, int32_t tdFileOptions); diff --git a/include/util/taoserror.h b/include/util/taoserror.h index ce8db162b6..6ab06d06a3 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -124,6 +124,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_INVALID_CFG_VALUE TAOS_DEF_ERROR_CODE(0, 0x0133) #define TSDB_CODE_IP_NOT_IN_WHITE_LIST TAOS_DEF_ERROR_CODE(0, 0x0134) +#define TSDB_CODE_FAILED_TO_CONNECT_S3 TAOS_DEF_ERROR_CODE(0, 0x0135) //client #define TSDB_CODE_TSC_INVALID_OPERATION TAOS_DEF_ERROR_CODE(0, 0x0200) @@ -557,7 +558,6 @@ int32_t* taosGetErrno(); #define TSDB_CODE_GRANT_GEN_IVLD_KEY TAOS_DEF_ERROR_CODE(0, 0x0812) #define TSDB_CODE_GRANT_GEN_APP_LIMIT TAOS_DEF_ERROR_CODE(0, 0x0813) #define TSDB_CODE_GRANT_GEN_ENC_IVLD_KLEN TAOS_DEF_ERROR_CODE(0, 0x0814) -#define TSDB_CODE_GRANT_PAR_IVLD_DIST TAOS_DEF_ERROR_CODE(0, 0x0815) // sync // #define TSDB_CODE_SYN_INVALID_CONFIG TAOS_DEF_ERROR_CODE(0, 0x0900) // 2.x diff --git a/include/util/tcompression.h b/include/util/tcompression.h index ab0c22fc9b..75ddbb12e7 100644 --- a/include/util/tcompression.h +++ b/include/util/tcompression.h @@ -139,6 +139,8 @@ int32_t getWordLength(char type); int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, char *const output, const char type); int32_t tsDecompressFloatImplAvx512(const char *const input, const int32_t nelements, char *const output); int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelements, char *const output); +int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelements, char *const output, bool bigEndian); +int32_t tsDecompressTimestampAvx2(const char* const input, const int32_t nelements, char *const output, bool bigEndian); /************************************************************************* * STREAM COMPRESSION diff --git a/include/util/tlog.h b/include/util/tlog.h index a6d146a79e..6d393bfefb 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -66,6 +66,7 @@ extern int32_t udfDebugFlag; extern int32_t smaDebugFlag; extern int32_t idxDebugFlag; extern int32_t tdbDebugFlag; +extern int32_t sndDebugFlag; int32_t taosInitLog(const char *logName, int32_t maxFiles); void taosCloseLog(); diff --git a/packaging/tools/set_core.sh b/packaging/tools/set_core.sh index db95aeb343..084c4465f9 100755 --- a/packaging/tools/set_core.sh +++ b/packaging/tools/set_core.sh @@ -38,3 +38,4 @@ source /etc/profile ${csudo}mkdir -p ${corePath} ||: ${csudo}sysctl -w kernel.core_pattern=${corePath}/core-%e-%p ||: ${csudo}echo "${corePath}/core-%e-%p" | ${csudo}tee /proc/sys/kernel/core_pattern ||: +${csudo}echo "kernel.core_pattern = ${corePath}/core_%e-%p" >> /etc/sysctl.conf ||: diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index da24bc0a3b..212c42125e 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -555,6 +555,9 @@ static void *tscCrashReportThreadFp(void *param) { if (pFile) { taosReleaseCrashLogFile(pFile, false); pFile = NULL; + + taosMsleep(sleepTime); + loopTimes = 0; continue; } } else { diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 2091bcf64f..3a91ae82d5 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -876,6 +876,7 @@ int taos_get_current_db(TAOS *taos, char *database, int len, int *required) { code = 0; } taosThreadMutexUnlock(&pTscObj->mutex); + releaseTscObj(*(int64_t *)taos); return code; } diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 6aebba7c02..d05cdc0156 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1216,7 +1216,7 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { } int32_t retryCnt = 0; - while (TSDB_CODE_MND_CONSUMER_NOT_READY == syncAskEp(tmq)) { + while (syncAskEp(tmq) != 0) { if (retryCnt++ > MAX_RETRY_COUNT) { tscError("consumer:0x%" PRIx64 ", mnd not ready for subscribe, retry more than 2 minutes", tmq->consumerId); code = TSDB_CODE_MND_CONSUMER_NOT_READY; @@ -1454,6 +1454,9 @@ static bool doUpdateLocalEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) if (epoch < tmq->epoch || (epoch == tmq->epoch && topicNumGet == 0)) { tscInfo("consumer:0x%" PRIx64 " no update ep epoch from %d to epoch %d, incoming topics:%d", tmq->consumerId, tmq->epoch, epoch, topicNumGet); + if(atomic_load_8(&tmq->status) == TMQ_CONSUMER_STATUS__RECOVER){ + atomic_store_8(&tmq->status, TMQ_CONSUMER_STATUS__READY); + } return false; } @@ -1965,9 +1968,14 @@ TAOS_RES* tmq_consumer_poll(tmq_t* tmq, int64_t timeout) { return NULL; } - while (atomic_load_8(&tmq->status) == TMQ_CONSUMER_STATUS__RECOVER) { + while (1) { + if(atomic_load_8(&tmq->status) != TMQ_CONSUMER_STATUS__RECOVER){ + break; + } + tscInfo("consumer:0x%" PRIx64 " tmq status is recover", tmq->consumerId); + int32_t retryCnt = 0; - while (TSDB_CODE_MND_CONSUMER_NOT_READY == syncAskEp(tmq)) { + while (syncAskEp(tmq) != 0) { if (retryCnt++ > 40) { return NULL; } diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index addf0aa629..e78783cf3c 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -126,9 +126,9 @@ void queryCallback(void* param, void* res, int32_t code) { taos_fetch_raw_block_a(res, fetchCallback, param); } -void createNewTable(TAOS* pConn, int32_t index) { +void createNewTable(TAOS* pConn, int32_t index, int32_t numOfRows, int64_t startTs, const char* pVarchar) { char str[1024] = {0}; - sprintf(str, "create table tu%d using st2 tags(%d)", index, index); + sprintf(str, "create table if not exists tu%d using st2 tags(%d)", index, index); TAOS_RES* pRes = taos_query(pConn, str); if (taos_errno(pRes) != 0) { @@ -136,22 +136,43 @@ void createNewTable(TAOS* pConn, int32_t index) { } taos_free_result(pRes); - for (int32_t i = 0; i < 10000; i += 20) { - char sql[1024] = {0}; - sprintf(sql, - "insert into tu%d values(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" - "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" - "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" - "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)", - index, i, i, i + 1, i + 1, i + 2, i + 2, i + 3, i + 3, i + 4, i + 4, i + 5, i + 5, i + 6, i + 6, i + 7, - i + 7, i + 8, i + 8, i + 9, i + 9, i + 10, i + 10, i + 11, i + 11, i + 12, i + 12, i + 13, i + 13, i + 14, - i + 14, i + 15, i + 15, i + 16, i + 16, i + 17, i + 17, i + 18, i + 18, i + 19, i + 19); - TAOS_RES* p = taos_query(pConn, sql); - if (taos_errno(p) != 0) { - printf("failed to insert data, reason:%s\n", taos_errstr(p)); - } + if (startTs == 0) { + for (int32_t i = 0; i < numOfRows; i += 20) { + char sql[1024] = {0}; + sprintf(sql, + "insert into tu%d values(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" + "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" + "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" + "(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)", + index, i, i, i + 1, i + 1, i + 2, i + 2, i + 3, i + 3, i + 4, i + 4, i + 5, i + 5, i + 6, i + 6, i + 7, + i + 7, i + 8, i + 8, i + 9, i + 9, i + 10, i + 10, i + 11, i + 11, i + 12, i + 12, i + 13, i + 13, i + 14, + i + 14, i + 15, i + 15, i + 16, i + 16, i + 17, i + 17, i + 18, i + 18, i + 19, i + 19); + TAOS_RES* p = taos_query(pConn, sql); + if (taos_errno(p) != 0) { + printf("failed to insert data, reason:%s\n", taos_errstr(p)); + } - taos_free_result(p); + taos_free_result(p); + } + } else { + for (int32_t i = 0; i < numOfRows; i += 20) { + char sql[1024*50] = {0}; + sprintf(sql, + "insert into tu%d values(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, " + "%d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, " + "'%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')(%ld, %d, '%s')", + index, startTs, i, pVarchar, startTs + 1, i + 1, pVarchar, startTs + 2, i + 2, pVarchar, startTs + 3, i + 3, pVarchar, startTs + 4, i + 4, + pVarchar, startTs + 5, i + 5, pVarchar, startTs + 6, i + 6, pVarchar, startTs + 7, i + 7, pVarchar, startTs + 8, i + 8, pVarchar, startTs + 9, i + 9, + pVarchar, startTs + 10, i + 10, pVarchar, startTs + 11, i + 11, pVarchar, startTs + 12, i + 12, pVarchar, startTs + 13, i + 13, pVarchar, startTs + 14, + i + 14, pVarchar, startTs + 15, i + 15, pVarchar, startTs + 16, i + 16, pVarchar, startTs + 17, i + 17, pVarchar, startTs + 18, i + 18, + pVarchar, startTs + 19, i + 19, pVarchar); + TAOS_RES* p = taos_query(pConn, sql); + if (taos_errno(p) != 0) { + printf("failed to insert data, reason:%s\n", taos_errstr(p)); + } + + taos_free_result(p); + } } } @@ -808,14 +829,7 @@ TEST(clientCase, projection_query_tables) { TAOS_RES* pRes = taos_query(pConn, "use abc1"); taos_free_result(pRes); - pRes = taos_query(pConn, "create stable st1 (ts timestamp, k int) tags(a int)"); - if (taos_errno(pRes) != 0) { - printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); - } - - taos_free_result(pRes); - - pRes = taos_query(pConn, "create stable st2 (ts timestamp, k int) tags(a int)"); + pRes = taos_query(pConn, "create stable st2 (ts timestamp, k int, f varchar(4096)) tags(a int)"); if (taos_errno(pRes) != 0) { printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); } @@ -828,28 +842,32 @@ TEST(clientCase, projection_query_tables) { taos_free_result(pRes); int64_t start = 1685959190000; + const char* pstr = + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefgh" + "ijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnop" + "qrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwx" + "yzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdef" + "ghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz!@#$%^&&*&^^%$#@!qQWERTYUIOPASDFGHJKL:" + "QWERTYUIOP{}"; - int32_t code = -1; - for(int32_t i = 0; i < 1000000; ++i) { - char t[512] = {0}; + for(int32_t i = 0; i < 10000; ++i) { + char str[1024] = {0}; + sprintf(str, "create table if not exists tu%d using st2 tags(%d)", i, i); - sprintf(t, "insert into t1 values(now, %d)", i); - while(1) { - void* p = taos_query(pConn, t); - code = taos_errno(p); - taos_free_result(p); - if (code != 0) { - printf("insert data error, retry\n"); - } else { - break; - } + TAOS_RES* px = taos_query(pConn, str); + if (taos_errno(px) != 0) { + printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(px); + } + + for(int32_t j = 0; j < 5000; ++j) { + start += 20; + for (int32_t i = 0; i < 10000; ++i) { + createNewTable(pConn, i, 20, start, pstr); } } - for (int32_t i = 0; i < 1; ++i) { - printf("create table :%d\n", i); - createNewTable(pConn, i); - } // // pRes = taos_query(pConn, "select * from tu"); // if (taos_errno(pRes) != 0) { diff --git a/source/common/src/cos.c b/source/common/src/cos.c index 1dca5eda9b..7c8676e9f5 100644 --- a/source/common/src/cos.c +++ b/source/common/src/cos.c @@ -86,7 +86,7 @@ typedef struct { char err_msg[128]; S3Status status; uint64_t content_length; - char *buf; + char * buf; int64_t buf_pos; } TS3SizeCBD; @@ -270,7 +270,7 @@ typedef struct list_parts_callback_data { typedef struct MultipartPartData { put_object_callback_data put_object_data; int seq; - UploadManager *manager; + UploadManager * manager; } MultipartPartData; static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData) { @@ -317,7 +317,7 @@ S3Status MultipartResponseProperiesCallback(const S3ResponseProperties *properti MultipartPartData *data = (MultipartPartData *)callbackData; int seq = data->seq; - const char *etag = properties->eTag; + const char * etag = properties->eTag; data->manager->etags[seq - 1] = strdup(etag); data->manager->next_etags_pos = seq; return S3StatusOK; @@ -450,10 +450,10 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { int32_t code = 0; const char *key = object; // const char *uploadId = 0; - const char *filename = 0; + const char * filename = 0; uint64_t contentLength = 0; - const char *cacheControl = 0, *contentType = 0, *md5 = 0; - const char *contentDispositionFilename = 0, *contentEncoding = 0; + const char * cacheControl = 0, *contentType = 0, *md5 = 0; + const char * contentDispositionFilename = 0, *contentEncoding = 0; int64_t expires = -1; S3CannedAcl cannedAcl = S3CannedAclPrivate; int metaPropertiesCount = 0; @@ -467,6 +467,7 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { // data.infileFD = NULL; // data.noStatus = noStatus; + // uError("ERROR: %s stat file %s: ", __func__, file); if (taosStatFile(file, &contentLength, NULL, NULL) < 0) { uError("ERROR: %s Failed to stat file %s: ", __func__, file); code = TAOS_SYSTEM_ERROR(errno); @@ -647,7 +648,7 @@ typedef struct list_bucket_callback_data { char nextMarker[1024]; int keyCount; int allDetails; - SArray *objectArray; + SArray * objectArray; } list_bucket_callback_data; static S3Status listBucketCallback(int isTruncated, const char *nextMarker, int contentsCount, @@ -692,11 +693,11 @@ static void s3FreeObjectKey(void *pItem) { static SArray *getListByPrefix(const char *prefix) { S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, - 0, awsRegionG}; + 0, awsRegionG}; S3ListBucketHandler listBucketHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback}, &listBucketCallback}; - const char *marker = 0, *delimiter = 0; + const char * marker = 0, *delimiter = 0; int maxkeys = 0, allDetails = 0; list_bucket_callback_data data; data.objectArray = taosArrayInit(32, sizeof(void *)); @@ -737,7 +738,7 @@ static SArray *getListByPrefix(const char *prefix) { void s3DeleteObjects(const char *object_name[], int nobject) { S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, - 0, awsRegionG}; + 0, awsRegionG}; S3ResponseHandler responseHandler = {0, &responseCompleteCallback}; for (int i = 0; i < nobject; ++i) { @@ -788,7 +789,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, const char *ifMatch = 0, *ifNotMatch = 0; S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, - 0, awsRegionG}; + 0, awsRegionG}; S3GetConditions getConditions = {ifModifiedSince, ifNotModifiedSince, ifMatch, ifNotMatch}; S3GetObjectHandler getObjectHandler = {{&responsePropertiesCallback, &responseCompleteCallback}, &getObjectDataCallback}; @@ -826,7 +827,7 @@ int32_t s3GetObjectToFile(const char *object_name, char *fileName) { const char *ifMatch = 0, *ifNotMatch = 0; S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, - 0, awsRegionG}; + 0, awsRegionG}; S3GetConditions getConditions = {ifModifiedSince, ifNotModifiedSince, ifMatch, ifNotMatch}; S3GetObjectHandler getObjectHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback}, &getObjectCallback}; @@ -857,7 +858,7 @@ int32_t s3GetObjectsByPrefix(const char *prefix, const char *path) { if (objectArray == NULL) return -1; for (size_t i = 0; i < taosArrayGetSize(objectArray); i++) { - char *object = taosArrayGetP(objectArray, i); + char * object = taosArrayGetP(objectArray, i); const char *tmp = strchr(object, '/'); tmp = (tmp == NULL) ? object : tmp + 1; char fileName[PATH_MAX] = {0}; @@ -891,6 +892,8 @@ long s3Size(const char *object_name) { if ((cbd.status != S3StatusOK) && (cbd.status != S3StatusErrorPreconditionFailed)) { s3PrintError(__FILE__, __LINE__, __func__, cbd.status, cbd.err_msg); + + return -1; } size = cbd.content_length; @@ -946,12 +949,12 @@ static void s3InitRequestOptions(cos_request_options_t *options, int is_cname) { int32_t s3PutObjectFromFile(const char *file_str, const char *object_str) { int32_t code = 0; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_request_options_t *options = NULL; cos_string_t bucket, object, file; - cos_table_t *resp_headers; + cos_table_t * resp_headers; // int traffic_limit = 0; cos_pool_create(&p, NULL); @@ -982,14 +985,14 @@ int32_t s3PutObjectFromFile(const char *file_str, const char *object_str) { int32_t s3PutObjectFromFile2(const char *file_str, const char *object_str) { int32_t code = 0; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; - cos_request_options_t *options = NULL; + cos_status_t * s = NULL; + cos_request_options_t * options = NULL; cos_string_t bucket, object, file; - cos_table_t *resp_headers; + cos_table_t * resp_headers; int traffic_limit = 0; - cos_table_t *headers = NULL; + cos_table_t * headers = NULL; cos_resumable_clt_params_t *clt_params = NULL; cos_pool_create(&p, NULL); @@ -1022,11 +1025,11 @@ int32_t s3PutObjectFromFile2(const char *file_str, const char *object_str) { } void s3DeleteObjectsByPrefix(const char *prefix_str) { - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; cos_request_options_t *options = NULL; int is_cname = 0; cos_string_t bucket; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_string_t prefix; cos_pool_create(&p, NULL); @@ -1041,10 +1044,10 @@ void s3DeleteObjectsByPrefix(const char *prefix_str) { } void s3DeleteObjects(const char *object_name[], int nobject) { - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; cos_string_t bucket; - cos_table_t *resp_headers = NULL; + cos_table_t * resp_headers = NULL; cos_request_options_t *options = NULL; cos_list_t object_list; cos_list_t deleted_object_list; @@ -1078,14 +1081,14 @@ void s3DeleteObjects(const char *object_name[], int nobject) { bool s3Exists(const char *object_name) { bool ret = false; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; - cos_request_options_t *options = NULL; + cos_status_t * s = NULL; + cos_request_options_t * options = NULL; cos_string_t bucket; cos_string_t object; - cos_table_t *resp_headers; - cos_table_t *headers = NULL; + cos_table_t * resp_headers; + cos_table_t * headers = NULL; cos_object_exist_status_e object_exist; cos_pool_create(&p, NULL); @@ -1112,15 +1115,15 @@ bool s3Exists(const char *object_name) { bool s3Get(const char *object_name, const char *path) { bool ret = false; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_request_options_t *options = NULL; cos_string_t bucket; cos_string_t object; cos_string_t file; - cos_table_t *resp_headers = NULL; - cos_table_t *headers = NULL; + cos_table_t * resp_headers = NULL; + cos_table_t * headers = NULL; int traffic_limit = 0; //创建内存池 @@ -1156,15 +1159,15 @@ bool s3Get(const char *object_name, const char *path) { int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_size, bool check, uint8_t **ppBlock) { (void)check; int32_t code = 0; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_request_options_t *options = NULL; cos_string_t bucket; cos_string_t object; - cos_table_t *resp_headers; - cos_table_t *headers = NULL; - cos_buf_t *content = NULL; + cos_table_t * resp_headers; + cos_table_t * headers = NULL; + cos_buf_t * content = NULL; // cos_string_t file; // int traffic_limit = 0; char range_buf[64]; @@ -1258,7 +1261,7 @@ void s3EvictCache(const char *path, long object_size) { terrno = TAOS_SYSTEM_ERROR(errno); vError("failed to open %s since %s", dir_name, terrstr()); } - SArray *evict_files = taosArrayInit(16, sizeof(SEvictFile)); + SArray * evict_files = taosArrayInit(16, sizeof(SEvictFile)); tdbDirEntryPtr pDirEntry; while ((pDirEntry = taosReadDir(pDir)) != NULL) { char *name = taosGetDirEntryName(pDirEntry); @@ -1300,13 +1303,13 @@ void s3EvictCache(const char *path, long object_size) { long s3Size(const char *object_name) { long size = 0; - cos_pool_t *p = NULL; + cos_pool_t * p = NULL; int is_cname = 0; - cos_status_t *s = NULL; + cos_status_t * s = NULL; cos_request_options_t *options = NULL; cos_string_t bucket; cos_string_t object; - cos_table_t *resp_headers = NULL; + cos_table_t * resp_headers = NULL; //创建内存池 cos_pool_create(&p, NULL); @@ -1352,5 +1355,6 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, void s3EvictCache(const char *path, long object_size) {} long s3Size(const char *object_name) { return 0; } int32_t s3GetObjectsByPrefix(const char *prefix, const char *path) { return 0; } +int32_t s3GetObjectToFile(const char *object_name, char *fileName) { return 0; } #endif diff --git a/source/common/src/systable.c b/source/common/src/systable.c index a1f8d74571..1623d9f062 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -140,7 +140,7 @@ static const SSysDbTableSchema userStbsSchema[] = { {.name = "columns", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "tags", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "last_update", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, - {.name = "table_comment", .bytes = TSDB_TB_COMMENT_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "table_comment", .bytes = TSDB_TB_COMMENT_LEN - 1 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "watermark", .bytes = 64 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "max_delay", .bytes = 64 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "rollup", .bytes = 128 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, @@ -167,7 +167,7 @@ static const SSysDbTableSchema streamTaskSchema[] = { {.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "level", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "status", .bytes = 15 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "stage", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, + {.name = "stage", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "in_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index d220da0d84..f97f9c0c11 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -3590,9 +3590,9 @@ void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max, int64_ tColDataCalcSMAUInt, // TSDB_DATA_TYPE_UINT tColDataCalcSMAUBigInt, // TSDB_DATA_TYPE_UBIGINT tColDataCalcSMAVarType, // TSDB_DATA_TYPE_JSON - NULL, // TSDB_DATA_TYPE_VARBINARY - NULL, // TSDB_DATA_TYPE_DECIMAL - NULL, // TSDB_DATA_TYPE_BLOB + tColDataCalcSMAVarType, // TSDB_DATA_TYPE_VARBINARY + tColDataCalcSMAVarType, // TSDB_DATA_TYPE_DECIMAL + tColDataCalcSMAVarType, // TSDB_DATA_TYPE_BLOB NULL, // TSDB_DATA_TYPE_MEDIUMBLOB tColDataCalcSMAVarType // TSDB_DATA_TYPE_GEOMETRY }; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index f5df2fef21..ced68c134e 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -107,7 +107,7 @@ bool tsEnableTelem = true; int32_t tsTelemInterval = 43200; char tsTelemServer[TSDB_FQDN_LEN] = "telemetry.tdengine.com"; uint16_t tsTelemPort = 80; -char *tsTelemUri = "/report"; +char * tsTelemUri = "/report"; #ifdef TD_ENTERPRISE bool tsEnableCrashReport = false; @@ -354,11 +354,24 @@ static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *input char cfgFile[PATH_MAX + 100] = {0}; taosExpandDir(inputCfgDir, cfgDir, PATH_MAX); + char lastC = cfgDir[strlen(cfgDir) - 1]; + char *tdDirsep = TD_DIRSEP; + if (lastC == '\\' || lastC == '/') { + tdDirsep = ""; + } if (taosIsDir(cfgDir)) { #ifdef CUS_PROMPT - snprintf(cfgFile, sizeof(cfgFile), "%s" TD_DIRSEP "%s.cfg", cfgDir, CUS_PROMPT); + snprintf(cfgFile, sizeof(cfgFile), + "%s" + "%s" + "%s.cfg", + cfgDir, tdDirsep, CUS_PROMPT); #else - snprintf(cfgFile, sizeof(cfgFile), "%s" TD_DIRSEP "taos.cfg", cfgDir); + snprintf(cfgFile, sizeof(cfgFile), + "%s" + "%s" + "taos.cfg", + cfgDir, tdDirsep); #endif } else { tstrncpy(cfgFile, cfgDir, sizeof(cfgDir)); @@ -431,6 +444,7 @@ static int32_t taosAddServerLogCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "tdbDebugFlag", tdbDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "metaDebugFlag", metaDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "stDebugFlag", stDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "sndDebugFlag", sndDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; return 0; } @@ -507,6 +521,7 @@ static int32_t taosAddClientCfg(SConfig *pCfg) { tsNumOfTaskQueueThreads = tsNumOfCores / 2; tsNumOfTaskQueueThreads = TMAX(tsNumOfTaskQueueThreads, 4); + if (tsNumOfTaskQueueThreads >= 50) { tsNumOfTaskQueueThreads = 50; } @@ -722,6 +737,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "disableStream", tsDisableStream, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "checkpointInterval", tsStreamCheckpointInterval, 60, 1200, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; @@ -951,6 +967,7 @@ static void taosSetServerLogCfg(SConfig *pCfg) { tdbDebugFlag = cfgGetItem(pCfg, "tdbDebugFlag")->i32; metaDebugFlag = cfgGetItem(pCfg, "metaDebugFlag")->i32; stDebugFlag = cfgGetItem(pCfg, "stDebugFlag")->i32; + sndDebugFlag = cfgGetItem(pCfg, "sndDebugFlag")->i32; } static int32_t taosSetSlowLogScope(char *pScope) { @@ -1339,7 +1356,7 @@ void taosCleanupCfg() { typedef struct { const char *optionName; - void *optionVar; + void * optionVar; } OptionNameAndVar; static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize, SConfigItem *pItem, bool isDebugflag) { @@ -1352,7 +1369,7 @@ static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize, switch (pItem->dtype) { case CFG_DTYPE_BOOL: { int32_t flag = pItem->i32; - bool *pVar = pOptions[d].optionVar; + bool * pVar = pOptions[d].optionVar; uInfo("%s set from %d to %d", optName, *pVar, flag); *pVar = flag; terrno = TSDB_CODE_SUCCESS; @@ -1424,7 +1441,7 @@ static int32_t taosCfgDynamicOptionsForServer(SConfig *pCfg, char *name) { {"smaDebugFlag", &smaDebugFlag}, {"idxDebugFlag", &idxDebugFlag}, {"tdbDebugFlag", &tdbDebugFlag}, {"tmrDebugFlag", &tmrDebugFlag}, {"uDebugFlag", &uDebugFlag}, {"smaDebugFlag", &smaDebugFlag}, {"rpcDebugFlag", &rpcDebugFlag}, {"qDebugFlag", &qDebugFlag}, {"metaDebugFlag", &metaDebugFlag}, - {"jniDebugFlag", &jniDebugFlag}, {"stDebugFlag", &stDebugFlag}, + {"jniDebugFlag", &jniDebugFlag}, {"stDebugFlag", &stDebugFlag}, {"sndDebugFlag", &sndDebugFlag}, }; static OptionNameAndVar options[] = { @@ -1732,7 +1749,8 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { taosSetDebugFlag(&tdbDebugFlag, "tdbDebugFlag", flag, rewrite); taosSetDebugFlag(&metaDebugFlag, "metaDebugFlag", flag, rewrite); taosSetDebugFlag(&stDebugFlag, "stDebugFlag", flag, rewrite); + taosSetDebugFlag(&sndDebugFlag, "sndDebugFlag", flag, rewrite); uInfo("all debug flag are set to %d", flag); } -int8_t taosGranted() { return atomic_load_8(&tsGrant); } +int8_t taosGranted() { return atomic_load_8(&tsGrant); } \ No newline at end of file diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index dc3ba7934f..01b1df9d5f 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -7152,7 +7152,7 @@ int32_t tDeserializeSMDropStreamReq(void *buf, int32_t bufLen, SMDropStreamReq * return 0; } -void tFreeSMDropStreamReq(SMDropStreamReq *pReq) { +void tFreeMDropStreamReq(SMDropStreamReq *pReq) { FREESQL(); } diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 18da1d638c..d6bdaf51bc 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -127,6 +127,9 @@ static void *dmCrashReportThreadFp(void *param) { if (pFile) { taosReleaseCrashLogFile(pFile, false); pFile = NULL; + + taosMsleep(sleepTime); + loopTimes = 0; continue; } } else { diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c index 64e18ef06d..27baa5ede5 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c @@ -169,7 +169,7 @@ int32_t mmWriteFile(const char *path, const SMnodeOpt *pOption) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index b29c5c1eb4..6de29f8513 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -73,6 +73,7 @@ SArray *smGetMsgHandles() { SArray *pArray = taosArrayInit(4, sizeof(SMgmtHandle)); if (pArray == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; @@ -83,11 +84,13 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_HTASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_snode/src/smInt.c b/source/dnode/mgmt/mgmt_snode/src/smInt.c index 47c2993014..56744e4654 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smInt.c +++ b/source/dnode/mgmt/mgmt_snode/src/smInt.c @@ -76,9 +76,14 @@ int32_t smOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { return 0; } +static int32_t smStartSnodes(SSnodeMgmt *pMgmt) { + return sndInit(pMgmt->pSnode); +} + SMgmtFunc smGetMgmtFunc() { SMgmtFunc mgmtFunc = {0}; mgmtFunc.openFp = smOpen; + mgmtFunc.startFp = (NodeStartFp)smStartSnodes; mgmtFunc.closeFp = (NodeCloseFp)smClose; mgmtFunc.createFp = (NodeCreateFp)smProcessCreateReq; mgmtFunc.dropFp = (NodeDropFp)smProcessDropReq; diff --git a/source/dnode/mgmt/mgmt_snode/src/smWorker.c b/source/dnode/mgmt/mgmt_snode/src/smWorker.c index e8402eb7c0..9220d3395d 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smWorker.c +++ b/source/dnode/mgmt/mgmt_snode/src/smWorker.c @@ -193,10 +193,6 @@ int32_t smPutNodeMsgToStreamQueue(SSnodeMgmt *pMgmt, SRpcMsg *pMsg) { SSingleWorker *pWorker = &pMgmt->streamWorker; dTrace("msg:%p, put into worker %s", pMsg, pWorker->name); - if (pMsg->msgType == TDMT_STREAM_TASK_DISPATCH) { - sndEnqueueStreamDispatch(pMgmt->pSnode, pMsg); - } else { - taosWriteQitem(pWorker->queue, pMsg); - } + taosWriteQitem(pWorker->queue, pMsg); return 0; } diff --git a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h index 34f2b5c446..d9c368b582 100644 --- a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h +++ b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h @@ -84,8 +84,10 @@ typedef struct { } SVnodeThread; // vmInt.c +int32_t vmGetPrimaryDisk(SVnodeMgmt *pMgmt, int32_t vgId); int32_t vmAllocPrimaryDisk(SVnodeMgmt *pMgmt, int32_t vgId); SVnodeObj *vmAcquireVnode(SVnodeMgmt *pMgmt, int32_t vgId); +SVnodeObj *vmAcquireVnodeImpl(SVnodeMgmt *pMgmt, int32_t vgId, bool strict); void vmReleaseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode); int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl); void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal); diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c index ed32e75d18..53139330a3 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c @@ -200,7 +200,7 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 64bf875a8e..a535ab17d7 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -281,8 +281,8 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { vmGenerateWrapperCfg(pMgmt, &req, &wrapperCfg); - SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); - if (pVnode != NULL && !pVnode->failed) { + SVnodeObj *pVnode = vmAcquireVnodeImpl(pMgmt, req.vgId, false); + if (pVnode != NULL && (req.replica == 1 || !pVnode->failed)) { dError("vgId:%d, already exist", req.vgId); tFreeSCreateVnodeReq(&req); vmReleaseVnode(pMgmt, pVnode); @@ -291,10 +291,11 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return 0; } - ASSERT(pVnode == NULL || pVnode->failed); - - wrapperCfg.diskPrimary = pVnode ? pVnode->diskPrimary : vmAllocPrimaryDisk(pMgmt, vnodeCfg.vgId); - int32_t diskPrimary = wrapperCfg.diskPrimary; + int32_t diskPrimary = vmGetPrimaryDisk(pMgmt, vnodeCfg.vgId); + if (diskPrimary < 0) { + diskPrimary = vmAllocPrimaryDisk(pMgmt, vnodeCfg.vgId); + } + wrapperCfg.diskPrimary = diskPrimary; snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, vnodeCfg.vgId); @@ -371,7 +372,7 @@ int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { TMSG_INFO(pMsg->msgType)); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); - if (pVnode == NULL || pVnode->failed) { + if (pVnode == NULL) { dError("vgId:%d, failed to alter vnode type since %s", req.vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; if (pVnode) vmReleaseVnode(pMgmt, pVnode); @@ -489,7 +490,7 @@ int32_t vmProcessCheckLearnCatchupReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { req.vgId, TMSG_INFO(pMsg->msgType)); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); - if (pVnode == NULL || pVnode->failed) { + if (pVnode == NULL) { dError("vgId:%d, failed to alter vnode type since %s", req.vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; if (pVnode) vmReleaseVnode(pMgmt, pVnode); @@ -532,7 +533,7 @@ int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { dInfo("vgId:%d, vnode write disable:%d", req.vgId, req.disable); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); - if (pVnode == NULL || pVnode->failed) { + if (pVnode == NULL) { dError("vgId:%d, failed to disable write since %s", req.vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; if (pVnode) vmReleaseVnode(pMgmt, pVnode); @@ -565,7 +566,7 @@ int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { dInfo("vgId:%d, start to alter vnode hashrange:[%u, %u], dstVgId:%d", req.srcVgId, req.hashBegin, req.hashEnd, req.dstVgId); pVnode = vmAcquireVnode(pMgmt, srcVgId); - if (pVnode == NULL || pVnode->failed) { + if (pVnode == NULL) { dError("vgId:%d, failed to alter hashrange since %s", srcVgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; if (pVnode) vmReleaseVnode(pMgmt, pVnode); @@ -680,7 +681,7 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } SVnodeObj *pVnode = vmAcquireVnode(pMgmt, vgId); - if (pVnode == NULL || pVnode->failed) { + if (pVnode == NULL) { dError("vgId:%d, failed to alter replica since %s", vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; if (pVnode) vmReleaseVnode(pMgmt, pVnode); @@ -748,7 +749,7 @@ int32_t vmProcessDropVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return -1; } - SVnodeObj *pVnode = vmAcquireVnode(pMgmt, vgId); + SVnodeObj *pVnode = vmAcquireVnodeImpl(pMgmt, vgId, false); if (pVnode == NULL) { dInfo("vgId:%d, failed to drop since %s", vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; @@ -832,6 +833,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_HTASK_DROP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 21b791eb4d..be88e8b3fd 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -19,6 +19,19 @@ #include "vnd.h" #include "libs/function/tudf.h" +int32_t vmGetPrimaryDisk(SVnodeMgmt *pMgmt, int32_t vgId) { + int32_t diskId = -1; + SVnodeObj *pVnode = NULL; + + taosThreadRwlockRdlock(&pMgmt->lock); + taosHashGetDup(pMgmt->hash, &vgId, sizeof(int32_t), (void *)&pVnode); + if (pVnode != NULL) { + diskId = pVnode->diskPrimary; + } + taosThreadRwlockUnlock(&pMgmt->lock); + return diskId; +} + int32_t vmAllocPrimaryDisk(SVnodeMgmt *pMgmt, int32_t vgId) { STfs *pTfs = pMgmt->pTfs; int32_t diskId = 0; @@ -74,12 +87,12 @@ int32_t vmAllocPrimaryDisk(SVnodeMgmt *pMgmt, int32_t vgId) { return diskId; } -SVnodeObj *vmAcquireVnode(SVnodeMgmt *pMgmt, int32_t vgId) { +SVnodeObj *vmAcquireVnodeImpl(SVnodeMgmt *pMgmt, int32_t vgId, bool strict) { SVnodeObj *pVnode = NULL; taosThreadRwlockRdlock(&pMgmt->lock); taosHashGetDup(pMgmt->hash, &vgId, sizeof(int32_t), (void *)&pVnode); - if (pVnode == NULL || pVnode->dropped) { + if (pVnode == NULL || strict && (pVnode->dropped || pVnode->failed)) { terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; pVnode = NULL; } else { @@ -91,6 +104,8 @@ SVnodeObj *vmAcquireVnode(SVnodeMgmt *pMgmt, int32_t vgId) { return pVnode; } +SVnodeObj *vmAcquireVnode(SVnodeMgmt *pMgmt, int32_t vgId) { return vmAcquireVnodeImpl(pMgmt, vgId, true); } + void vmReleaseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { if (pVnode == NULL) return; @@ -100,6 +115,15 @@ void vmReleaseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { taosThreadRwlockUnlock(&pMgmt->lock); } +static void vmFreeVnodeObj(SVnodeObj **ppVnode) { + if (!ppVnode || !(*ppVnode)) return; + + SVnodeObj *pVnode = *ppVnode; + taosMemoryFree(pVnode->path); + taosMemoryFree(pVnode); + ppVnode[0] = NULL; +} + int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) { SVnodeObj *pVnode = taosMemoryCalloc(1, sizeof(SVnodeObj)); if (pVnode == NULL) { @@ -134,6 +158,12 @@ int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) { } taosThreadRwlockWrlock(&pMgmt->lock); + SVnodeObj *pOld = NULL; + taosHashGetDup(pMgmt->hash, &pVnode->vgId, sizeof(int32_t), (void *)&pOld); + if (pOld) { + ASSERT(pOld->failed); + vmFreeVnodeObj(&pOld); + } int32_t code = taosHashPut(pMgmt->hash, &pVnode->vgId, sizeof(int32_t), &pVnode, sizeof(SVnodeObj *)); taosThreadRwlockUnlock(&pMgmt->lock); @@ -223,8 +253,7 @@ _closed: vnodeDestroy(pVnode->vgId, path, pMgmt->pTfs); } - taosMemoryFree(pVnode->path); - taosMemoryFree(pVnode); + vmFreeVnodeObj(&pVnode); } static int32_t vmRestoreVgroupId(SWrapperCfg *pCfg, STfs *pTfs) { @@ -621,7 +650,7 @@ static void *vmRestoreVnodeInThread(void *param) { for (int32_t v = 0; v < pThread->vnodeNum; ++v) { SVnodeObj *pVnode = pThread->ppVnodes[v]; if (pVnode->failed) { - dError("vgId:%d, skip restoring vnode in failure mode.", pVnode->vgId); + dError("vgId:%d, cannot restore a vnode in failed mode.", pVnode->vgId); continue; } diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 4b18ec4fb0..9a792a2774 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -187,7 +187,7 @@ static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtyp pHead->vgId = ntohl(pHead->vgId); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, pHead->vgId); - if (pVnode == NULL || pVnode->failed) { + if (pVnode == NULL) { dGDebug("vgId:%d, msg:%p failed to put into vnode queue since %s, type:%s qtype:%d contLen:%d", pHead->vgId, pMsg, terrstr(), TMSG_INFO(pMsg->msgType), qtype, pHead->contLen); terrno = (terrno != 0) ? terrno : -1; @@ -206,11 +206,7 @@ static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtyp break; case STREAM_QUEUE: dGTrace("vgId:%d, msg:%p put into vnode-stream queue", pVnode->vgId, pMsg); - if (pMsg->msgType == TDMT_STREAM_TASK_DISPATCH) { - vnodeEnqueueStreamMsg(pVnode->pImpl, pMsg); - } else { - taosWriteQitem(pVnode->pStreamQ, pMsg); - } + taosWriteQitem(pVnode->pStreamQ, pMsg); break; case FETCH_QUEUE: dGTrace("vgId:%d, msg:%p put into vnode-fetch queue", pVnode->vgId, pMsg); @@ -316,7 +312,7 @@ int32_t vmPutRpcMsgToQueue(SVnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { int32_t vmGetQueueSize(SVnodeMgmt *pMgmt, int32_t vgId, EQueueType qtype) { int32_t size = -1; SVnodeObj *pVnode = vmAcquireVnode(pMgmt, vgId); - if (pVnode != NULL && !pVnode->failed) { + if (pVnode != NULL) { switch (qtype) { case WRITE_QUEUE: size = taosQueueItemSize(pVnode->pWriteW.queue); diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index 3e948678a4..bee77528bd 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -232,7 +232,7 @@ int32_t dmWriteEps(SDnodeData *pData) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/dnode/mgmt/node_util/src/dmFile.c b/source/dnode/mgmt/node_util/src/dmFile.c index c81efddcc1..5cbeeebc14 100644 --- a/source/dnode/mgmt/node_util/src/dmFile.c +++ b/source/dnode/mgmt/node_util/src/dmFile.c @@ -120,7 +120,7 @@ int32_t dmWriteFile(const char *path, const char *name, bool deployed) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); @@ -149,7 +149,7 @@ TdFilePtr dmCheckRunning(const char *dataDir) { char filepath[PATH_MAX] = {0}; snprintf(filepath, sizeof(filepath), "%s%s.running", dataDir, TD_DIRSEP); - TdFilePtr pFile = taosOpenFile(filepath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + TdFilePtr pFile = taosOpenFile(filepath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_CLOEXEC); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); dError("failed to open file:%s since %s", filepath, terrstr()); diff --git a/source/dnode/mnode/impl/inc/mndCluster.h b/source/dnode/mnode/impl/inc/mndCluster.h index 2b59d9dbf5..e33ffdb372 100644 --- a/source/dnode/mnode/impl/inc/mndCluster.h +++ b/source/dnode/mnode/impl/inc/mndCluster.h @@ -27,8 +27,6 @@ void mndCleanupCluster(SMnode *pMnode); int32_t mndGetClusterName(SMnode *pMnode, char *clusterName, int32_t len); int64_t mndGetClusterId(SMnode *pMnode); int64_t mndGetClusterCreateTime(SMnode *pMnode); -int32_t mndGetClusterGrantedInfo(SMnode *pMnode, SGrantedInfo *pInfo); -int32_t mndSetClusterGrantedInfo(SMnode *pMnode, SGrantedInfo *pInfo); int64_t mndGetClusterUpTime(SMnode *pMnode); #ifdef __cplusplus diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index fc9086eebf..08c0aec46a 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -192,8 +192,6 @@ typedef struct { int64_t createdTime; int64_t updateTime; int32_t upTime; - int64_t grantedTime; - int64_t connGrantedTime; } SClusterObj; typedef struct { @@ -651,8 +649,7 @@ typedef struct SStreamConf { } SStreamConf; typedef struct { - char name[TSDB_STREAM_FNAME_LEN]; - // ctl + char name[TSDB_STREAM_FNAME_LEN]; SRWLatch lock; // create info @@ -701,6 +698,11 @@ typedef struct { } SStreamObj; +typedef struct SStreamSeq { + char name[24]; + uint64_t seq; + SRWLatch lock; +} SStreamSeq; int32_t tEncodeSStreamObj(SEncoder* pEncoder, const SStreamObj* pObj); int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj, int32_t sver); void tFreeStreamObj(SStreamObj* pObj); @@ -732,14 +734,13 @@ typedef struct { int8_t type; int32_t numOfCols; SSchema* pSchema; - SRWLatch lock; + SRWLatch lock; } SViewObj; int32_t tEncodeSViewObj(SEncoder* pEncoder, const SViewObj* pObj); int32_t tDecodeSViewObj(SDecoder* pDecoder, SViewObj* pObj, int32_t sver); void tFreeSViewObj(SViewObj* pObj); - #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 19fd2a3fd4..e3ac4fd6fc 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -22,17 +22,48 @@ extern "C" { #endif -int32_t mndInitStream(SMnode *pMnode); -void mndCleanupStream(SMnode *pMnode); +typedef struct SStreamTransInfo { + int64_t startTime; + int32_t transId; + const char *name; +} SStreamTransInfo; +// time to generated the checkpoint, if now() - checkpointTs >= tsCheckpointInterval, this checkpoint will be discard +// to avoid too many checkpoints for a taskk in the waiting list +typedef struct SCheckpointCandEntry { + char * pName; + int64_t streamId; + int64_t checkpointTs; + int64_t checkpointId; +} SCheckpointCandEntry; + +typedef struct SStreamTransMgmt { + SHashObj *pDBTrans; + SHashObj *pWaitingList; // stream id list, of which timed checkpoint failed to be issued due to the trans conflict. +} SStreamTransMgmt; + +typedef struct SStreamExecInfo { + SArray * pNodeList; + int64_t ts; // snapshot ts + SStreamTransMgmt transMgmt; + int64_t activeCheckpoint; // active check point id + SHashObj * pTaskMap; + SArray * pTaskList; + TdThreadMutex lock; +} SStreamExecInfo; + +extern SStreamExecInfo execInfo; + +int32_t mndInitStream(SMnode *pMnode); +void mndCleanupStream(SMnode *pMnode); SStreamObj *mndAcquireStream(SMnode *pMnode, char *streamName); void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); +int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); +int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); -SSdbRaw *mndStreamActionEncode(SStreamObj *pStream); -SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); - -int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); -int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); +int32_t mndStreamRegisterTrans(STrans *pTrans, const char *pName, const char *pSrcDb, const char *pDstDb); +int32_t mndAddtoCheckpointWaitingList(SStreamObj *pStream, int64_t checkpointId); +bool streamTransConflictOtherTrans(SMnode *pMnode, const char *pSrcDb, const char *pDstDb, bool lock); // for sma // TODO refactor diff --git a/source/dnode/mnode/impl/src/mndCluster.c b/source/dnode/mnode/impl/src/mndCluster.c index 26c678b513..4c799e1e1e 100644 --- a/source/dnode/mnode/impl/src/mndCluster.c +++ b/source/dnode/mnode/impl/src/mndCluster.c @@ -19,7 +19,7 @@ #include "mndTrans.h" #define CLUSTER_VER_NUMBE 1 -#define CLUSTER_RESERVE_SIZE 44 +#define CLUSTER_RESERVE_SIZE 60 int64_t tsExpireTime = 0; static SSdbRaw *mndClusterActionEncode(SClusterObj *pCluster); @@ -112,19 +112,6 @@ int64_t mndGetClusterCreateTime(SMnode *pMnode) { return createTime; } -int32_t mndGetClusterGrantedInfo(SMnode *pMnode, SGrantedInfo *pInfo) { - void *pIter = NULL; - SClusterObj *pCluster = mndAcquireCluster(pMnode, &pIter); - if (pCluster != NULL) { - pInfo->grantedTime = pCluster->grantedTime; - pInfo->connGrantedTime = pCluster->connGrantedTime; - mndReleaseCluster(pMnode, pCluster, pIter); - return 0; - } - - return -1; -} - static int32_t mndGetClusterUpTimeImp(SClusterObj *pCluster) { #if 0 int32_t upTime = taosGetTimestampSec() - pCluster->updateTime / 1000; @@ -159,8 +146,6 @@ static SSdbRaw *mndClusterActionEncode(SClusterObj *pCluster) { SDB_SET_INT64(pRaw, dataPos, pCluster->updateTime, _OVER) SDB_SET_BINARY(pRaw, dataPos, pCluster->name, TSDB_CLUSTER_ID_LEN, _OVER) SDB_SET_INT32(pRaw, dataPos, pCluster->upTime, _OVER) - SDB_SET_INT64(pRaw, dataPos, pCluster->grantedTime, _OVER) - SDB_SET_INT64(pRaw, dataPos, pCluster->connGrantedTime, _OVER) SDB_SET_RESERVE(pRaw, dataPos, CLUSTER_RESERVE_SIZE, _OVER) terrno = 0; @@ -201,8 +186,6 @@ static SSdbRow *mndClusterActionDecode(SSdbRaw *pRaw) { SDB_GET_INT64(pRaw, dataPos, &pCluster->updateTime, _OVER) SDB_GET_BINARY(pRaw, dataPos, pCluster->name, TSDB_CLUSTER_ID_LEN, _OVER) SDB_GET_INT32(pRaw, dataPos, &pCluster->upTime, _OVER) - SDB_GET_INT64(pRaw, dataPos, &pCluster->grantedTime, _OVER); - SDB_GET_INT64(pRaw, dataPos, &pCluster->connGrantedTime, _OVER); SDB_GET_RESERVE(pRaw, dataPos, CLUSTER_RESERVE_SIZE, _OVER) terrno = 0; @@ -235,8 +218,6 @@ static int32_t mndClusterActionUpdate(SSdb *pSdb, SClusterObj *pOld, SClusterObj mTrace("cluster:%" PRId64 ", perform update action, old row:%p new row:%p, uptime from %d to %d", pOld->id, pOld, pNew, pOld->upTime, pNew->upTime); pOld->upTime = pNew->upTime; - pOld->grantedTime = pNew->grantedTime; - pOld->connGrantedTime = pNew->connGrantedTime; pOld->updateTime = taosGetTimestampMs(); return 0; } @@ -378,44 +359,3 @@ static int32_t mndProcessUptimeTimer(SRpcMsg *pReq) { mndTransDrop(pTrans); return 0; } - -int32_t mndSetClusterGrantedInfo(SMnode *pMnode, SGrantedInfo *pInfo) { - SClusterObj clusterObj = {0}; - void *pIter = NULL; - SClusterObj *pCluster = mndAcquireCluster(pMnode, &pIter); - if (pCluster != NULL) { - if (pCluster->grantedTime >= pInfo->grantedTime && pCluster->connGrantedTime >= pInfo->connGrantedTime) { - mndReleaseCluster(pMnode, pCluster, pIter); - return 0; - } - memcpy(&clusterObj, pCluster, sizeof(SClusterObj)); - if (pCluster->grantedTime < pInfo->grantedTime) clusterObj.grantedTime = pInfo->grantedTime; - if (pCluster->connGrantedTime < pInfo->connGrantedTime) clusterObj.connGrantedTime = pInfo->connGrantedTime; - mndReleaseCluster(pMnode, pCluster, pIter); - } - - if (clusterObj.id <= 0) { - mError("can't get cluster info while update granted info"); - return -1; - } - - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_NOTHING, NULL, "granted-info"); - if (pTrans == NULL) return -1; - - SSdbRaw *pCommitRaw = mndClusterActionEncode(&clusterObj); - if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { - mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); - mndTransDrop(pTrans); - return -1; - } - (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); - - if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); - mndTransDrop(pTrans); - return -1; - } - - mndTransDrop(pTrans); - return 0; -} \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index b0bffcc83e..e224aceec2 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -790,9 +790,7 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg if (cfgAll) { // alter all dnodes: if (!failRecord) failRecord = taosArrayInit(1, sizeof(int32_t)); if (failRecord) taosArrayPush(failRecord, &pDnode->id); - if (0 == cfgAllErr || cfgAllErr == TSDB_CODE_GRANT_PAR_IVLD_ACTIVE) { - cfgAllErr = terrno; // output 1st or more specific error - } + if (0 == cfgAllErr) cfgAllErr = terrno; // output 1st terrno. } } else { terrno = 0; // no action for dup active code @@ -808,9 +806,7 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg if (cfgAll) { if (!failRecord) failRecord = taosArrayInit(1, sizeof(int32_t)); if (failRecord) taosArrayPush(failRecord, &pDnode->id); - if (0 == cfgAllErr || cfgAllErr == TSDB_CODE_GRANT_PAR_IVLD_ACTIVE) { - cfgAllErr = terrno; // output 1st or more specific error - } + if (0 == cfgAllErr) cfgAllErr = terrno; } } else { terrno = 0; @@ -1287,12 +1283,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { strcpy(dcfgReq.config, "supportvnodes"); snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag); - } else if (strncasecmp(cfgReq.config, GRANT_ACTIVE_CODE, 10) == 0 || - strncasecmp(cfgReq.config, GRANT_C_ACTIVE_CODE, 11) == 0) { - if (cfgReq.dnodeId != -1) { - terrno = TSDB_CODE_INVALID_CFG; - goto _err_out; - } + } else if (strncasecmp(cfgReq.config, "activeCode", 10) == 0 || strncasecmp(cfgReq.config, "cActiveCode", 11) == 0) { int8_t opt = strncasecmp(cfgReq.config, "a", 1) == 0 ? DND_ACTIVE_CODE : DND_CONN_ACTIVE_CODE; int8_t index = opt == DND_ACTIVE_CODE ? 10 : 11; if (' ' != cfgReq.config[index] && 0 != cfgReq.config[index]) { @@ -1310,11 +1301,12 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { goto _err_out; } - strcpy(dcfgReq.config, opt == DND_ACTIVE_CODE ? GRANT_ACTIVE_CODE : GRANT_C_ACTIVE_CODE); + strcpy(dcfgReq.config, opt == DND_ACTIVE_CODE ? "activeCode" : "cActiveCode"); snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%s", cfgReq.value); - if ((terrno = mndConfigDnode(pMnode, pReq, &cfgReq, opt)) != 0) { + if (mndConfigDnode(pMnode, pReq, &cfgReq, opt) != 0) { mError("dnode:%d, failed to config activeCode since %s", cfgReq.dnodeId, terrstr()); + terrno = TSDB_CODE_INVALID_CFG; goto _err_out; } tFreeSMCfgDnodeReq(&cfgReq); diff --git a/source/dnode/mnode/impl/src/mndDump.c b/source/dnode/mnode/impl/src/mndDump.c index 481495cbe5..5efebbc16e 100644 --- a/source/dnode/mnode/impl/src/mndDump.c +++ b/source/dnode/mnode/impl/src/mndDump.c @@ -605,7 +605,7 @@ void mndDumpSdb() { char *pCont = tjsonToString(json); int32_t contLen = strlen(pCont); char file[] = "sdb.json"; - TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC| TD_FILE_WRITE_THROUGH); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); mError("failed to write %s since %s", file, terrstr()); diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 8d00dfefb6..fdfec610d9 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -146,6 +146,15 @@ static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) { } } +static void mndStreamCheckpointRemain(SMnode* pMnode) { + int32_t contLen = 0; + void *pReq = mndBuildCheckpointTickMsg(&contLen, 0); + if (pReq != NULL) { + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, .pCont = pReq, .contLen = contLen}; + tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); + } +} + static void mndStreamCheckNode(SMnode* pMnode) { int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); @@ -286,6 +295,10 @@ static void *mndThreadFp(void *param) { mndStreamCheckpointTick(pMnode, sec); } + if (sec % 5 == 0) { + mndStreamCheckpointRemain(pMnode); + } + if (sec % tsStreamNodeCheckInterval == 0) { mndStreamCheckNode(pMnode); } diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 88a06cb513..3ef4c9a4d2 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -28,7 +28,7 @@ extern bool tsDeployOnSnode; static int32_t doAddSinkTask(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup, - SEpSet* pEpset, bool isFillhistory); + SEpSet* pEpset, bool isFillhistory); int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int64_t watermark, int64_t deleteMark) { diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index eb2f9dcbe3..1ddd2f34e6 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -1014,6 +1014,20 @@ static int32_t mndFindSuperTableColumnIndex(const SStbObj *pStb, const char *col return -1; } +static bool mndValidateSchema(SSchema *pSchemas, int32_t nSchema, SArray *pFields, int32_t maxLen) { + int32_t rowLen = 0; + for (int32_t i = 0; i < nSchema; ++i) { + rowLen += (pSchemas + i)->bytes; + } + + int32_t nField = taosArrayGetSize(pFields); + for (int32_t i = 0; i < nField; ++i) { + rowLen += ((SField *)TARRAY_GET_ELEM(pFields, i))->bytes; + } + + return rowLen <= maxLen; +} + static int32_t mndBuildStbFromAlter(SStbObj *pStb, SStbObj *pDst, SMCreateStbReq *createReq) { taosRLockLatch(&pStb->lock); memcpy(pDst, pStb, sizeof(SStbObj)); @@ -1269,6 +1283,11 @@ static int32_t mndAddSuperTableTag(const SStbObj *pOld, SStbObj *pNew, SArray *p return -1; } + if (!mndValidateSchema(pOld->pTags, pOld->numOfTags, pFields, TSDB_MAX_TAGS_LEN)) { + terrno = TSDB_CODE_PAR_INVALID_TAGS_LENGTH; + return -1; + } + pNew->numOfTags = pNew->numOfTags + ntags; if (mndAllocStbSchemas(pOld, pNew) != 0) { return -1; @@ -1558,6 +1577,16 @@ static int32_t mndAlterStbTagBytes(SMnode *pMnode, const SStbObj *pOld, SStbObj return -1; } + uint32_t nLen = 0; + for (int32_t i = 0; i < pOld->numOfTags; ++i) { + nLen += (pOld->pTags[i].colId == colId) ? pField->bytes : pOld->pTags[i].bytes; + } + + if (nLen > TSDB_MAX_TAGS_LEN) { + terrno = TSDB_CODE_PAR_INVALID_TAGS_LENGTH; + return -1; + } + if (mndAllocStbSchemas(pOld, pNew) != 0) { return -1; } @@ -1592,6 +1621,11 @@ static int32_t mndAddSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, SArray return -1; } + if (!mndValidateSchema(pOld->pColumns, pOld->numOfColumns, pFields, TSDB_MAX_BYTES_PER_ROW)) { + terrno = TSDB_CODE_PAR_INVALID_ROW_LENGTH; + return -1; + } + pNew->numOfColumns = pNew->numOfColumns + ncols; if (mndAllocStbSchemas(pOld, pNew) != 0) { return -1; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 02d401d924..21969cc404 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -21,20 +21,25 @@ #include "mndPrivilege.h" #include "mndScheduler.h" #include "mndShow.h" +#include "mndSnode.h" #include "mndStb.h" -#include "mndTopic.h" #include "mndTrans.h" -#include "mndUser.h" #include "mndVgroup.h" #include "osMemory.h" #include "parser.h" #include "tmisce.h" #include "tname.h" -#define MND_STREAM_VER_NUMBER 4 -#define MND_STREAM_RESERVE_SIZE 64 -#define MND_STREAM_MAX_NUM 60 -#define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint" +#define MND_STREAM_VER_NUMBER 4 +#define MND_STREAM_RESERVE_SIZE 64 +#define MND_STREAM_MAX_NUM 60 + +#define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint" +#define MND_STREAM_PAUSE_NAME "stream-pause" +#define MND_STREAM_RESUME_NAME "stream-resume" +#define MND_STREAM_DROP_NAME "stream-drop" +#define MND_STREAM_TASK_RESET_NAME "stream-task-reset" +#define MND_STREAM_TASK_UPDATE_NAME "stream-task-update" typedef struct SNodeEntry { int32_t nodeId; @@ -43,22 +48,13 @@ typedef struct SNodeEntry { int64_t hbTimestamp; // second } SNodeEntry; -typedef struct SStreamExecInfo { - SArray *pNodeList; - int64_t ts; // snapshot ts - int64_t activeCheckpoint; // active check point id - SHashObj * pTaskMap; - SArray * pTaskList; - TdThreadMutex lock; -} SStreamExecInfo; - typedef struct SVgroupChangeInfo { SHashObj *pDBMap; SArray * pUpdateNodeList; // SArray } SVgroupChangeInfo; -static int32_t mndNodeCheckSentinel = 0; -static SStreamExecInfo execInfo; +static int32_t mndNodeCheckSentinel = 0; +SStreamExecInfo execInfo; static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); @@ -67,6 +63,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq); static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); +static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq); static int32_t mndProcessStreamHb(SRpcMsg *pReq); static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); static void mndCancelGetNextStream(SMnode *pMnode, void *pIter); @@ -81,18 +78,30 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg); static SArray *extractNodeListFromStream(SMnode *pMnode); static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady); +static SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId); + static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); -static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name); +static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char *pMsg); static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans); static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset, int32_t retryCode); static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *pInfo, STrans *pTrans); static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); -static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); +static void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode); static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); -static int32_t doKillActiveCheckpointTrans(SMnode *pMnode); +static int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDbName, size_t len); static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList); +static void freeCheckpointCandEntry(void *); + +static SSdbRaw *mndStreamActionEncode(SStreamObj *pStream); +static SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); + +SSdbRaw * mndStreamSeqActionEncode(SStreamObj *pStream); +SSdbRow * mndStreamSeqActionDecode(SSdbRaw *pRaw); +static int32_t mndStreamSeqActionInsert(SSdb *pSdb, SStreamSeq *pStream); +static int32_t mndStreamSeqActionDelete(SSdb *pSdb, SStreamSeq *pStream); +static int32_t mndStreamSeqActionUpdate(SSdb *pSdb, SStreamSeq *pOldStream, SStreamSeq *pNewStream); int32_t mndInitStream(SMnode *pMnode) { SSdbTable table = { @@ -104,6 +113,15 @@ int32_t mndInitStream(SMnode *pMnode) { .updateFp = (SdbUpdateFp)mndStreamActionUpdate, .deleteFp = (SdbDeleteFp)mndStreamActionDelete, }; + SSdbTable tableSeq = { + .sdbType = SDB_STREAM_SEQ, + .keyType = SDB_KEY_BINARY, + .encodeFp = (SdbEncodeFp)mndStreamSeqActionEncode, + .decodeFp = (SdbDecodeFp)mndStreamSeqActionDecode, + .insertFp = (SdbInsertFp)mndStreamSeqActionInsert, + .updateFp = (SdbUpdateFp)mndStreamSeqActionUpdate, + .deleteFp = (SdbDeleteFp)mndStreamSeqActionDelete, + }; mndSetMsgHandle(pMnode, TDMT_MND_CREATE_STREAM, mndProcessCreateStreamReq); mndSetMsgHandle(pMnode, TDMT_MND_DROP_STREAM, mndProcessDropStreamReq); @@ -120,6 +138,7 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, mndProcessStreamCheckpointInCandid); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_NODECHANGE_CHECK, mndProcessNodeCheckReq); @@ -133,15 +152,28 @@ int32_t mndInitStream(SMnode *pMnode) { mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndCancelGetNextStreamTask); taosThreadMutexInit(&execInfo.lock, NULL); - execInfo.pTaskMap = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_NO_LOCK); - execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId)); + _hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR); - return sdbSetTable(pMnode->pSdb, table); + execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId)); + execInfo.pTaskMap = taosHashInit(64, fn, true, HASH_NO_LOCK); + execInfo.transMgmt.pDBTrans = taosHashInit(32, fn, true, HASH_NO_LOCK); + execInfo.transMgmt.pWaitingList = taosHashInit(32, fn, true, HASH_NO_LOCK); + taosHashSetFreeFp(execInfo.transMgmt.pWaitingList, freeCheckpointCandEntry); + + if (sdbSetTable(pMnode->pSdb, table) != 0) { + return -1; + } + if (sdbSetTable(pMnode->pSdb, tableSeq) != 0) { + return -1; + } + return 0; } void mndCleanupStream(SMnode *pMnode) { taosArrayDestroy(execInfo.pTaskList); taosHashCleanup(execInfo.pTaskMap); + taosHashCleanup(execInfo.transMgmt.pDBTrans); + taosHashCleanup(execInfo.transMgmt.pWaitingList); taosThreadMutexDestroy(&execInfo.lock); mDebug("mnd stream exec info cleanup"); } @@ -188,7 +220,8 @@ STREAM_ENCODE_OVER: return NULL; } - mTrace("stream:%s, encode to raw:%p, row:%p", pStream->name, pRaw, pStream); + mTrace("stream:%s, encode to raw:%p, row:%p, checkpoint:%" PRId64 "", pStream->name, pRaw, pStream, + pStream->checkpointId); return pRaw; } @@ -241,7 +274,8 @@ STREAM_DECODE_OVER: return NULL; } - mTrace("stream:%s, decode from raw:%p, row:%p", pStream->name, pRaw, pStream); + mTrace("stream:%s, decode from raw:%p, row:%p, checkpoint:%" PRId64 "", pStream->name, pRaw, pStream, + pStream->checkpointId); return pRow; } @@ -267,6 +301,8 @@ static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStream pOldStream->status = pNewStream->status; pOldStream->updateTime = pNewStream->updateTime; + pOldStream->checkpointId = pNewStream->checkpointId; + pOldStream->checkpointFreq = pNewStream->checkpointFreq; taosWUnLockLatch(&pOldStream->lock); return 0; @@ -301,6 +337,12 @@ static void mndShowStreamStatus(char *dst, SStreamObj *pStream) { } } +SSdbRaw *mndStreamSeqActionEncode(SStreamObj *pStream) { return NULL; } +SSdbRow *mndStreamSeqActionDecode(SSdbRaw *pRaw) { return NULL; } +int32_t mndStreamSeqActionInsert(SSdb *pSdb, SStreamSeq *pStream) { return 0; } +int32_t mndStreamSeqActionDelete(SSdb *pSdb, SStreamSeq *pStream) { return 0; } +int32_t mndStreamSeqActionUpdate(SSdb *pSdb, SStreamSeq *pOldStream, SStreamSeq *pNewStream) { return 0; } + static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) { int8_t trigger = pStream->conf.trigger; if (trigger == STREAM_TRIGGER_AT_ONCE) { @@ -335,7 +377,7 @@ static int32_t mndStreamGetPlanString(const char *ast, int8_t triggerType, int64 .pAstRoot = pAst, .topicQuery = false, .streamQuery = true, - .triggerType = triggerType == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : triggerType, + .triggerType = (triggerType == STREAM_TRIGGER_MAX_DELAY) ? STREAM_TRIGGER_WINDOW_CLOSE : triggerType, .watermark = watermark, }; code = qCreateQueryPlan(&cxt, &pPlan, NULL); @@ -679,7 +721,7 @@ _OVER: return -1; } -static int32_t mndPersistTaskDropReq(SMnode* pMnode, STrans *pTrans, SStreamTask *pTask) { +static int32_t mndPersistTaskDropReq(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) { SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -691,9 +733,15 @@ static int32_t mndPersistTaskDropReq(SMnode* pMnode, STrans *pTrans, SStreamTask pReq->streamId = pTask->id.streamId; STransAction action = {0}; - SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); - SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); - mndReleaseVgroup(pMnode, pVgObj); + SEpSet epset = {0}; + if (pTask->info.nodeId == SNODE_HANDLE) { + SSnodeObj *pObj = mndAcquireSnode(pMnode, pTask->info.nodeId); + addEpIntoEpSet(&epset, pObj->pDnode->fqdn, pObj->pDnode->port); + } else { + SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); + epset = mndGetVgroupEpset(pMnode, pVgObj); + mndReleaseVgroup(pMnode, pVgObj); + } // The epset of nodeId of this task may have been expired now, let's use the newest epset from mnode. initTransAction(&action, pReq, sizeof(SVDropStreamTaskReq), TDMT_STREAM_TASK_DROP, &epset, 0); @@ -720,18 +768,50 @@ int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) return 0; } -static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { - SMnode * pMnode = pReq->info.node; - int32_t code = -1; - SStreamObj * pStream = NULL; - SDbObj * pDb = NULL; - SCMCreateStreamReq createStreamReq = {0}; - SStreamObj streamObj = {0}; +static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { // check for number of existed tasks + int32_t numOfStream = 0; + SStreamObj *pStream = NULL; + void * pIter = NULL; + while ((pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { + if (pStream->sourceDbUid == pStreamObj->sourceDbUid) { + ++numOfStream; + } + + sdbRelease(pMnode->pSdb, pStream); + + if (numOfStream > MND_STREAM_MAX_NUM) { + mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM); + sdbCancelFetch(pMnode->pSdb, pIter); + terrno = TSDB_CODE_MND_TOO_MANY_STREAMS; + return terrno; + } + + if (pStream->targetStbUid == pStreamObj->targetStbUid) { + mError("Cannot write the same stable as other stream:%s", pStream->name); + sdbCancelFetch(pMnode->pSdb, pIter); + terrno = TSDB_CODE_MND_TOO_MANY_STREAMS; + return terrno; + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SStreamObj *pStream = NULL; + SStreamObj streamObj = {0}; + char * sql = NULL; + int32_t sqlLen = 0; + terrno = TSDB_CODE_SUCCESS; + + SCMCreateStreamReq createStreamReq = {0}; if (tDeserializeSCMCreateStreamReq(pReq->pCont, pReq->contLen, &createStreamReq) != 0) { terrno = TSDB_CODE_INVALID_MSG; goto _OVER; } + #ifdef WINDOWS terrno = TSDB_CODE_MND_INVALID_PLATFORM; goto _OVER; @@ -747,7 +827,6 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { if (pStream != NULL) { if (createStreamReq.igExists) { mInfo("stream:%s, already exist, ignore exist is set", createStreamReq.name); - code = 0; goto _OVER; } else { terrno = TSDB_CODE_MND_STREAM_ALREADY_EXIST; @@ -757,9 +836,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - char* sql = NULL; - int32_t sqlLen = 0; - if(createStreamReq.sql != NULL){ + if (createStreamReq.sql != NULL) { sqlLen = strlen(createStreamReq.sql); sql = taosMemoryMalloc(sqlLen + 1); memset(sql, 0, sqlLen + 1); @@ -772,42 +849,8 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - { - int32_t numOfStream = 0; - - SStreamObj *pStream = NULL; - void * pIter = NULL; - - while (1) { - pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) { - if (numOfStream > MND_STREAM_MAX_NUM) { - mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM); - terrno = TSDB_CODE_MND_TOO_MANY_STREAMS; - goto _OVER; - } - break; - } - - if (pStream->sourceDbUid == streamObj.sourceDbUid) { - ++numOfStream; - } - - sdbRelease(pMnode->pSdb, pStream); - if (numOfStream > MND_STREAM_MAX_NUM) { - mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM); - terrno = TSDB_CODE_MND_TOO_MANY_STREAMS; - sdbCancelFetch(pMnode->pSdb, pIter); - goto _OVER; - } - - if (pStream->targetStbUid == streamObj.targetStbUid) { - mError("Cannot write the same stable as other stream:%s", pStream->name); - terrno = TSDB_CODE_MND_INVALID_TARGET_TABLE; - sdbCancelFetch(pMnode->pSdb, pIter); - goto _OVER; - } - } + if (checkForNumOfStreams(pMnode, &streamObj) < 0) { + goto _OVER; } STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq, "create-stream"); @@ -866,11 +909,9 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { taosThreadMutexLock(&execInfo.lock); mDebug("stream tasks register into node list"); - keepStreamTasksInBuf(&streamObj, &execInfo); + saveStreamTasksInfo(&streamObj, &execInfo); taosThreadMutexUnlock(&execInfo.lock); - code = TSDB_CODE_ACTION_IN_PROGRESS; - SName dbname = {0}; tNameFromString(&dbname, createStreamReq.sourceDB, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); @@ -879,27 +920,26 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { // reuse this function for stream if (sql != NULL && sqlLen > 0) { - auditRecord(pReq, pMnode->clusterId, "createStream", dbname.dbname, name.dbname, sql, - sqlLen); - } - else{ + auditRecord(pReq, pMnode->clusterId, "createStream", dbname.dbname, name.dbname, sql, sqlLen); + } else { char detail[1000] = {0}; sprintf(detail, "dbname:%s, stream name:%s", dbname.dbname, name.dbname); auditRecord(pReq, pMnode->clusterId, "createStream", dbname.dbname, name.dbname, detail, strlen(detail)); } + _OVER: - if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { + if (terrno != TSDB_CODE_SUCCESS && terrno != TSDB_CODE_ACTION_IN_PROGRESS) { mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); } mndReleaseStream(pMnode, pStream); - tFreeSCMCreateStreamReq(&createStreamReq); tFreeStreamObj(&streamObj); - if(sql != NULL){ + if (sql != NULL) { taosMemoryFreeClear(sql); } - return code; + + return terrno; } int64_t mndStreamGenChkpId(SMnode *pMnode) { @@ -912,8 +952,11 @@ int64_t mndStreamGenChkpId(SMnode *pMnode) { if (pIter == NULL) break; maxChkpId = TMAX(maxChkpId, pStream->checkpointId); + mDebug("stream %p checkpoint %" PRId64 "", pStream, pStream->checkpointId); sdbRelease(pSdb, pStream); } + + mDebug("generated checkpoint %" PRId64 "", maxChkpId + 1); return maxChkpId + 1; } @@ -933,6 +976,22 @@ static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { return 0; } +static int32_t mndProcessStreamRemainChkptTmr(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SSdb * pSdb = pMnode->pSdb; + if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { + return 0; + } + + SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); + pMsg->checkpointId = 0; + + int32_t size = sizeof(SMStreamDoCheckpointMsg); + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, .pCont = pMsg, .contLen = size}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + return 0; +} + static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, int64_t streamId, int32_t taskId) { SStreamCheckpointSourceReq req = {0}; @@ -975,107 +1034,104 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in return 0; } -// static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStream, int64_t checkpointId) { -// int64_t timestampMs = taosGetTimestampMs(); -// if (timestampMs - pStream->checkpointFreq < tsStreamCheckpointInterval * 1000) { -// return -1; -// } -// STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, "stream-checkpoint"); -// if (pTrans == NULL) return -1; -// mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); -// if (mndTrancCheckConflict(pMnode, pTrans) != 0) { -// mError("failed to checkpoint of stream name%s, checkpointId: %" PRId64 ", reason:%s", pStream->name, -// checkpointId, -// tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); -// mndTransDrop(pTrans); -// return -1; -// } -// mDebug("start to trigger checkpoint for stream:%s, checkpoint: %" PRId64 "", pStream->name, checkpointId); -// atomic_store_64(&pStream->currentTick, 1); -// taosWLockLatch(&pStream->lock); -// // 1. redo action: broadcast checkpoint source msg for all source vg -// int32_t totLevel = taosArrayGetSize(pStream->tasks); -// for (int32_t i = 0; i < totLevel; i++) { -// SArray *pLevel = taosArrayGetP(pStream->tasks, i); -// SStreamTask *pTask = taosArrayGetP(pLevel, 0); -// if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { -// int32_t sz = taosArrayGetSize(pLevel); -// for (int32_t j = 0; j < sz; j++) { -// SStreamTask *pTask = taosArrayGetP(pLevel, j); -// /*A(pTask->info.nodeId > 0);*/ -// SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); -// if (pVgObj == NULL) { -// taosWUnLockLatch(&pStream->lock); -// mndTransDrop(pTrans); -// return -1; -// } +static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStream, int64_t checkpointId) { + int32_t code = -1; + int64_t timestampMs = taosGetTimestampMs(); + if (timestampMs - pStream->checkpointFreq < tsStreamCheckpointInterval * 1000) { + return -1; + } -// void *buf; -// int32_t tlen; -// if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, -// pTask->id.taskId) < 0) { -// mndReleaseVgroup(pMnode, pVgObj); -// taosWUnLockLatch(&pStream->lock); -// mndTransDrop(pTrans); -// return -1; -// } + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, true); + if (conflict) { + mndAddtoCheckpointWaitingList(pStream, checkpointId); + mWarn("checkpoint conflict with other trans in %s, ignore the checkpoint for stream:%s %" PRIx64, pStream->sourceDb, + pStream->name, pStream->uid); + return -1; + } -// STransAction action = {0}; -// action.epSet = mndGetVgroupEpset(pMnode, pVgObj); -// action.pCont = buf; -// action.contLen = tlen; -// action.msgType = TDMT_VND_STREAM_CHECK_POINT_SOURCE; + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, MND_STREAM_CHECKPOINT_NAME); + if (pTrans == NULL) { + return -1; + } -// mndReleaseVgroup(pMnode, pVgObj); + mndStreamRegisterTrans(pTrans, MND_STREAM_CHECKPOINT_NAME, pStream->sourceDb, pStream->targetDb); -// if (mndTransAppendRedoAction(pTrans, &action) != 0) { -// taosMemoryFree(buf); -// taosWUnLockLatch(&pStream->lock); -// mndReleaseStream(pMnode, pStream); -// mndTransDrop(pTrans); -// return -1; -// } -// } -// } -// } -// // 2. reset tick -// pStream->checkpointFreq = checkpointId; -// pStream->checkpointId = checkpointId; -// pStream->checkpointFreq = taosGetTimestampMs(); -// atomic_store_64(&pStream->currentTick, 0); -// // 3. commit log: stream checkpoint info -// pStream->version = pStream->version + 1; -// taosWUnLockLatch(&pStream->lock); + mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); + if (mndTrancCheckConflict(pMnode, pTrans) != 0) { + mError("failed to checkpoint of stream name%s, checkpointId: %" PRId64 ", reason:%s", pStream->name, checkpointId, + tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); + goto _ERR; + } -// // // code condtion + mDebug("start to trigger checkpoint for stream:%s, checkpoint: %" PRId64 "", pStream->name, checkpointId); -// SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); -// if (pCommitRaw == NULL) { -// mError("failed to prepare trans rebalance since %s", terrstr()); -// goto _ERR; -// } -// if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { -// sdbFreeRaw(pCommitRaw); -// mError("failed to prepare trans rebalance since %s", terrstr()); -// goto _ERR; -// } -// if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) { -// sdbFreeRaw(pCommitRaw); -// mError("failed to prepare trans rebalance since %s", terrstr()); -// goto _ERR; -// } + taosWLockLatch(&pStream->lock); + pStream->currentTick = 1; -// if (mndTransPrepare(pMnode, pTrans) != 0) { -// mError("failed to prepare trans rebalance since %s", terrstr()); -// goto _ERR; -// } -// mndTransDrop(pTrans); -// return 0; -// _ERR: -// mndTransDrop(pTrans); -// return -1; -// } + // 1. redo action: broadcast checkpoint source msg for all source vg + int32_t totLevel = taosArrayGetSize(pStream->tasks); + for (int32_t i = 0; i < totLevel; i++) { + SArray * pLevel = taosArrayGetP(pStream->tasks, i); + SStreamTask *p = taosArrayGetP(pLevel, 0); + + if (p->info.taskLevel == TASK_LEVEL__SOURCE) { + int32_t sz = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < sz; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + + SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); + if (pVgObj == NULL) { + taosWUnLockLatch(&pStream->lock); + goto _ERR; + } + + void * buf; + int32_t tlen; + if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId, + pTask->id.taskId) < 0) { + mndReleaseVgroup(pMnode, pVgObj); + taosWUnLockLatch(&pStream->lock); + goto _ERR; + } + + STransAction act = {0}; + SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); + mndReleaseVgroup(pMnode, pVgObj); + + initTransAction(&act, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset, TSDB_CODE_SYN_PROPOSE_NOT_READY); + if (mndTransAppendRedoAction(pTrans, &act) != 0) { + taosMemoryFree(buf); + taosWUnLockLatch(&pStream->lock); + goto _ERR; + } + } + } + } + + // 2. reset tick + pStream->checkpointId = checkpointId; + pStream->checkpointFreq = taosGetTimestampMs(); + pStream->currentTick = 0; + + // 3. commit log: stream checkpoint info + pStream->version = pStream->version + 1; + taosWUnLockLatch(&pStream->lock); + + if ((code = mndPersistTransLog(pStream, pTrans)) != TSDB_CODE_SUCCESS) { + return code; + } + + if ((code = mndTransPrepare(pMnode, pTrans)) != TSDB_CODE_SUCCESS) { + mError("failed to prepare trans rebalance since %s", terrstr()); + goto _ERR; + } + + code = 0; +_ERR: + mndTransDrop(pTrans); + return code; +} static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream, SMnode *pMnode, int64_t chkptId) { taosWLockLatch(&pStream->lock); @@ -1151,23 +1207,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream return 0; } -static const char *mndGetStreamDB(SMnode *pMnode) { - SSdb * pSdb = pMnode->pSdb; - SStreamObj *pStream = NULL; - void * pIter = NULL; - - pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) { - return NULL; - } - - const char *p = taosStrdup(pStream->sourceDb); - mndReleaseStream(pMnode, pStream); - sdbCancelFetch(pSdb, pIter); - return p; -} - -static int32_t initStreamNodeList(SMnode* pMnode) { +static int32_t initStreamNodeList(SMnode *pMnode) { if (execInfo.pNodeList == NULL || (taosArrayGetSize(execInfo.pNodeList) == 0)) { execInfo.pNodeList = taosArrayDestroy(execInfo.pNodeList); execInfo.pNodeList = extractNodeListFromStream(pMnode); @@ -1176,11 +1216,11 @@ static int32_t initStreamNodeList(SMnode* pMnode) { return taosArrayGetSize(execInfo.pNodeList); } -static bool taskNodeIsUpdated(SMnode* pMnode) { +static bool taskNodeIsUpdated(SMnode *pMnode) { // check if the node update happens or not taosThreadMutexLock(&execInfo.lock); - int32_t numOfNodes = initStreamNodeList(pMnode); + int32_t numOfNodes = initStreamNodeList(pMnode); if (numOfNodes == 0) { mDebug("stream task node change checking done, no vgroups exist, do nothing"); execInfo.ts = taosGetTimestampSec(); @@ -1220,6 +1260,38 @@ static bool taskNodeIsUpdated(SMnode* pMnode) { return nodeUpdated; } +static int32_t mndCheckNodeStatus(SMnode *pMnode) { + bool ready = true; + int64_t ts = taosGetTimestampSec(); + if (taskNodeIsUpdated(pMnode)) { + return -1; + } + + taosThreadMutexLock(&execInfo.lock); + if (taosArrayGetSize(execInfo.pNodeList) == 0) { + mDebug("stream task node change checking done, no vgroups exist, do nothing"); + execInfo.ts = ts; + } + + for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) { + STaskId * p = taosArrayGet(execInfo.pTaskList, i); + STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p)); + if (pEntry == NULL) { + continue; + } + + if (pEntry->status != TASK_STATUS__READY) { + mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued", + pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status)); + ready = false; + break; + } + } + + taosThreadMutexUnlock(&execInfo.lock); + return ready ? 0 : -1; +} + static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { SMnode * pMnode = pReq->info.node; SSdb * pSdb = pMnode->pSdb; @@ -1227,89 +1299,65 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { SStreamObj *pStream = NULL; int32_t code = 0; - // check if the node update happens or not - bool updated = taskNodeIsUpdated(pMnode); - if (updated) { - mWarn("checkpoint ignore, stream task nodes update detected"); - return -1; - } - - { // check if all tasks are in TASK_STATUS__READY status - bool ready = true; - taosThreadMutexLock(&execInfo.lock); - - // no streams exists, abort - int32_t numOfTasks = taosArrayGetSize(execInfo.pTaskList); - if (numOfTasks <= 0) { - taosThreadMutexUnlock(&execInfo.lock); - return 0; - } - - for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) { - STaskId * p = taosArrayGet(execInfo.pTaskList, i); - STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p)); - if (pEntry == NULL) { - continue; - } - - if (pEntry->status != TASK_STATUS__READY) { - mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued", - pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status)); - ready = false; - break; - } - } - taosThreadMutexUnlock(&execInfo.lock); - if (!ready) { - return 0; - } + if ((code = mndCheckNodeStatus(pMnode)) != 0) { + return code; } SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; - int64_t checkpointId = pMsg->checkpointId; - - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, NULL, MND_STREAM_CHECKPOINT_NAME); - if (pTrans == NULL) { - mError("failed to trigger checkpoint, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY)); - return -1; - } - - mDebug("start to trigger checkpoint, checkpointId: %" PRId64, checkpointId); - - const char *pDb = mndGetStreamDB(pMnode); - mndTransSetDbName(pTrans, pDb, "checkpoint"); - taosMemoryFree((void *)pDb); - - if (mndTransCheckConflict(pMnode, pTrans) != 0) { - mError("failed to trigger checkpoint, checkpointId: %" PRId64 ", reason:%s", checkpointId, - tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); - mndTransDrop(pTrans); - return -1; - } - - while (1) { - pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) break; - - code = mndAddStreamCheckpointToTrans(pTrans, pStream, pMnode, checkpointId); + while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { + code = mndProcessStreamCheckpointTrans(pMnode, pStream, pMsg->checkpointId); sdbRelease(pSdb, pStream); if (code == -1) { break; } } - if (code == 0) { - if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("failed to prepare trans rebalance since %s", terrstr()); + return code; +} + +static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + void * pIter = NULL; + int32_t code = 0; + + taosThreadMutexLock(&execInfo.lock); + int32_t num = taosHashGetSize(execInfo.transMgmt.pWaitingList); + taosThreadMutexUnlock(&execInfo.lock); + if (num == 0) { + return code; + } + + if ((code = mndCheckNodeStatus(pMnode)) != 0) { + return code; + } + + SArray *pList = taosArrayInit(4, sizeof(int64_t)); + while ((pIter = taosHashIterate(execInfo.transMgmt.pWaitingList, pIter)) != NULL) { + SCheckpointCandEntry *pEntry = pIter; + + SStreamObj *ps = mndAcquireStream(pMnode, pEntry->pName); + if (ps == NULL) { + continue; + } + mDebug("start to launch checkpoint for stream:%s %" PRIx64 " in candidate list", pEntry->pName, pEntry->streamId); + + code = mndProcessStreamCheckpointTrans(pMnode, ps, pEntry->checkpointId); + mndReleaseStream(pMnode, ps); + + if (code == TSDB_CODE_SUCCESS) { + taosArrayPush(pList, &pEntry->streamId); } } - mndTransDrop(pTrans); + for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) { + int64_t *pId = taosArrayGet(pList, i); - // only one trans here - taosThreadMutexLock(&execInfo.lock); - execInfo.activeCheckpoint = checkpointId; - taosThreadMutexUnlock(&execInfo.lock); + taosHashRemove(execInfo.transMgmt.pWaitingList, pId, sizeof(*pId)); + } + + int32_t remain = taosHashGetSize(execInfo.transMgmt.pWaitingList); + mDebug("%d in candidate list generated checkpoint, remaining:%d", (int32_t)taosArrayGetSize(pList), remain); + taosArrayDestroy(pList); return code; } @@ -1329,46 +1377,56 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (dropReq.igNotExists) { mInfo("stream:%s not exist, ignore not exist is set", dropReq.name); sdbRelease(pMnode->pSdb, pStream); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return 0; } else { terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; mError("stream:%s not exist failed to drop", dropReq.name); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } } if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { sdbRelease(pMnode->pSdb, pStream); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "drop-stream"); + // check if it is conflict with other trans in both sourceDb and targetDb. + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, true); + if (conflict) { + sdbRelease(pMnode->pSdb, pStream); + tFreeMDropStreamReq(&dropReq); + return -1; + } + + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, MND_STREAM_DROP_NAME); if (pTrans == NULL) { mError("stream:%s, failed to drop since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } - mInfo("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); + mInfo("trans:%d used to drop stream:%s", pTrans->id, dropReq.name); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); if (mndTransCheckConflict(pMnode, pTrans) != 0) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } + int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pStream->sourceDb, pStream->targetDb); + // drop all tasks if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } @@ -1376,7 +1434,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } @@ -1384,7 +1442,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } @@ -1392,13 +1450,12 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SName name = {0}; tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - // reuse this function for stream auditRecord(pReq, pMnode->clusterId, "dropStream", "", name.dbname, dropReq.sql, dropReq.sqlLen); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return TSDB_CODE_ACTION_IN_PROGRESS; } @@ -1538,7 +1595,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB char dstStr[20] = {0}; STR_TO_VARSTR(dstStr, sinkQuota) pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char*) dstStr, false); + colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false); char scanHistoryIdle[20 + VARSTR_HEADER_SIZE] = {0}; strcpy(scanHistoryIdle, "100a"); @@ -1546,7 +1603,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB memset(dstStr, 0, tListLen(dstStr)); STR_TO_VARSTR(dstStr, scanHistoryIdle) pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char*) dstStr, false); + colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false); numOfRows++; sdbRelease(pSdb, pStream); @@ -1561,6 +1618,123 @@ static void mndCancelGetNextStream(SMnode *pMnode, void *pIter) { sdbCancelFetch(pSdb, pIter); } +static void setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDataBlock *pBlock, int32_t numOfRows) { + SColumnInfoData *pColInfo; + int32_t cols = 0; + + // stream name + char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName)); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false); + + // task id + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + + char idstr[128] = {0}; + int32_t len = tintToHex(pTask->id.taskId, &idstr[4]); + idstr[2] = '0'; + idstr[3] = 'x'; + varDataSetLen(idstr, len + 2); + colDataSetVal(pColInfo, numOfRows, idstr, false); + + // node type + char nodeType[20 + VARSTR_HEADER_SIZE] = {0}; + varDataSetLen(nodeType, 5); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + if (pTask->info.nodeId > 0) { + memcpy(varDataVal(nodeType), "vnode", 5); + } else { + memcpy(varDataVal(nodeType), "snode", 5); + } + colDataSetVal(pColInfo, numOfRows, nodeType, false); + + // node id + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + int64_t nodeId = TMAX(pTask->info.nodeId, 0); + colDataSetVal(pColInfo, numOfRows, (const char *)&nodeId, false); + + // level + char level[20 + VARSTR_HEADER_SIZE] = {0}; + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + memcpy(varDataVal(level), "source", 6); + varDataSetLen(level, 6); + } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { + memcpy(varDataVal(level), "agg", 3); + varDataSetLen(level, 3); + } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + memcpy(varDataVal(level), "sink", 4); + varDataSetLen(level, 4); + } + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)level, false); + + // status + char status[20 + VARSTR_HEADER_SIZE] = {0}; + STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; + + STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); + if (pe == NULL) { + return; + } + + const char *pStatus = streamTaskGetStatusStr(pe->status); + STR_TO_VARSTR(status, pStatus); + + // status + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)status, false); + + // stage + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); + + // input queue + char vbuf[30] = {0}; + char buf[25] = {0}; + const char *queueInfoStr = "%4.2fMiB (%5.2f%)"; + sprintf(buf, queueInfoStr, pe->inputQUsed, pe->inputRate); + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + + // output queue + // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); + // STR_TO_VARSTR(vbuf, buf); + + // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + // colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); + + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + const char *sinkStr = "%.2fMiB"; + sprintf(buf, sinkStr, pe->sinkDataSize); + } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + // offset info + const char *offsetStr = "%" PRId64 " [%" PRId64 ", %" PRId64 "]"; + sprintf(buf, offsetStr, pe->processedVer, pe->verStart, pe->verEnd); + } + + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); +} + +static int32_t getNumOfTasks(SArray *pTaskList) { + int32_t numOfLevels = taosArrayGetSize(pTaskList); + + int32_t count = 0; + for (int32_t i = 0; i < numOfLevels; i++) { + SArray *pLevel = taosArrayGetP(pTaskList, i); + count += taosArrayGetSize(pLevel); + } + + return count; +} + static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rowsCapacity) { SMnode * pMnode = pReq->info.node; SSdb * pSdb = pMnode->pSdb; @@ -1576,137 +1750,25 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock // lock taosRLockLatch(&pStream->lock); - // count task num - int32_t sz = taosArrayGetSize(pStream->tasks); - - int32_t count = 0; - for (int32_t i = 0; i < sz; i++) { - SArray *pLevel = taosArrayGetP(pStream->tasks, i); - count += taosArrayGetSize(pLevel); - } - + int32_t count = getNumOfTasks(pStream->tasks); if (numOfRows + count > rowsCapacity) { blockDataEnsureCapacity(pBlock, numOfRows + count); } // add row for each task - for (int32_t i = 0; i < sz; i++) { + for (int32_t i = 0; i < taosArrayGetSize(pStream->tasks); i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); - int32_t levelCnt = taosArrayGetSize(pLevel); - for (int32_t j = 0; j < levelCnt; j++) { + int32_t numOfLevels = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < numOfLevels; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); - - SColumnInfoData *pColInfo; - int32_t cols = 0; - - // stream name - char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; - STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName)); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false); - - // task id - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - - char idstr[128] = {0}; - int32_t len = tintToHex(pTask->id.taskId, &idstr[4]); - idstr[2] = '0'; - idstr[3] = 'x'; - varDataSetLen(idstr, len + 2); - colDataSetVal(pColInfo, numOfRows, idstr, false); - - // node type - char nodeType[20 + VARSTR_HEADER_SIZE] = {0}; - varDataSetLen(nodeType, 5); - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - if (pTask->info.nodeId > 0) { - memcpy(varDataVal(nodeType), "vnode", 5); - } else { - memcpy(varDataVal(nodeType), "snode", 5); - } - colDataSetVal(pColInfo, numOfRows, nodeType, false); - - // node id - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - int64_t nodeId = TMAX(pTask->info.nodeId, 0); - colDataSetVal(pColInfo, numOfRows, (const char *)&nodeId, false); - - // level - char level[20 + VARSTR_HEADER_SIZE] = {0}; - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - memcpy(varDataVal(level), "source", 6); - varDataSetLen(level, 6); - } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { - memcpy(varDataVal(level), "agg", 3); - varDataSetLen(level, 3); - } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - memcpy(varDataVal(level), "sink", 4); - varDataSetLen(level, 4); - } - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)level, false); - - // status - char status[20 + VARSTR_HEADER_SIZE] = {0}; - - STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; - STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); - if (pe == NULL) { - continue; - } - - const char *pStatus = streamTaskGetStatusStr(pe->status); - STR_TO_VARSTR(status, pStatus); - - // status - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)status, false); - - // stage - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); - - // input queue - char vbuf[30] = {0}; - char buf[25] = {0}; - const char *queueInfoStr = "%4.2fMiB (%5.2f%)"; - sprintf(buf, queueInfoStr, pe->inputQUsed, pe->inputRate); - STR_TO_VARSTR(vbuf, buf); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); - - // output queue - // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); - // STR_TO_VARSTR(vbuf, buf); - - // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - // colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); - - if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - const char *sinkStr = "%.2fMiB"; - sprintf(buf, sinkStr, pe->sinkDataSize); - } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - // offset info - const char *offsetStr = "%" PRId64 " [%" PRId64 ", %" PRId64 "]"; - sprintf(buf, offsetStr, pe->processedVer, pe->verStart, pe->verEnd); - } - - STR_TO_VARSTR(vbuf, buf); - - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); - + setTaskAttrInResBlock(pStream, pTask, pBlock, numOfRows); numOfRows++; } } // unlock taosRUnLockLatch(&pStream->lock); - sdbRelease(pSdb, pStream); } @@ -1719,7 +1781,7 @@ static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter) { sdbCancelFetch(pSdb, pIter); } -static int32_t mndPauseStreamTask(SMnode* pMnode, STrans *pTrans, SStreamTask *pTask) { +static int32_t mndPauseStreamTask(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) { SVPauseStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVPauseStreamTaskReq)); if (pReq == NULL) { mError("failed to malloc in pause stream, size:%" PRIzu ", code:%s", sizeof(SVPauseStreamTaskReq), @@ -1745,7 +1807,7 @@ static int32_t mndPauseStreamTask(SMnode* pMnode, STrans *pTrans, SStreamTask *p return 0; } -int32_t mndPauseAllStreamTasks(SMnode* pMnode, STrans *pTrans, SStreamObj *pStream) { +int32_t mndPauseAllStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { SArray *tasks = pStream->tasks; int32_t size = taosArrayGetSize(tasks); @@ -1814,6 +1876,13 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { return -1; } + // check if it is conflict with other trans in both sourceDb and targetDb. + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, true); + if (conflict) { + sdbRelease(pMnode->pSdb, pStream); + return -1; + } + bool updated = taskNodeIsUpdated(pMnode); if (updated) { mError("tasks are not ready for pause, node update detected"); @@ -1822,7 +1891,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "pause-stream"); if (pTrans == NULL) { - mError("stream:%s, failed to pause stream since %s", pauseReq.name, terrstr()); + mError("stream:%s failed to pause stream since %s", pauseReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); return -1; } @@ -1836,7 +1905,9 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { return -1; } - // pause all tasks + int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_PAUSE_NAME, pStream->sourceDb, pStream->targetDb); + + // if nodeUpdate happened, not send pause trans if (mndPauseAllStreamTasks(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to pause task since %s", pauseReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); @@ -1864,7 +1935,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { return TSDB_CODE_ACTION_IN_PROGRESS; } -static int32_t mndResumeStreamTask(STrans *pTrans, SMnode* pMnode, SStreamTask *pTask, int8_t igUntreated) { +static int32_t mndResumeStreamTask(STrans *pTrans, SMnode *pMnode, SStreamTask *pTask, int8_t igUntreated) { SVResumeStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVResumeStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1888,7 +1959,7 @@ static int32_t mndResumeStreamTask(STrans *pTrans, SMnode* pMnode, SStreamTask * return 0; } -int32_t mndResumeAllStreamTasks(STrans *pTrans, SMnode* pMnode, SStreamObj *pStream, int8_t igUntreated) { +int32_t mndResumeAllStreamTasks(STrans *pTrans, SMnode *pMnode, SStreamObj *pStream, int8_t igUntreated) { int32_t size = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < size; i++) { SArray *pTasks = taosArrayGetP(pStream->tasks, i); @@ -1940,13 +2011,21 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { return -1; } - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "pause-stream"); - if (pTrans == NULL) { - mError("stream:%s, failed to pause stream since %s", pauseReq.name, terrstr()); + // check if it is conflict with other trans in both sourceDb and targetDb. + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, true); + if (conflict) { sdbRelease(pMnode->pSdb, pStream); return -1; } - mInfo("trans:%d, used to pause stream:%s", pTrans->id, pauseReq.name); + + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, MND_STREAM_RESUME_NAME); + if (pTrans == NULL) { + mError("stream:%s, failed to resume stream since %s", pauseReq.name, terrstr()); + sdbRelease(pMnode->pSdb, pStream); + return -1; + } + + mInfo("trans:%d used to resume stream:%s", pTrans->id, pauseReq.name); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); if (mndTransCheckConflict(pMnode, pTrans) != 0) { @@ -1955,6 +2034,8 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { return -1; } + int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_RESUME_NAME, pStream->sourceDb, pStream->targetDb); + // resume all tasks if (mndResumeAllStreamTasks(pTrans, pMnode, pStream, pauseReq.igUntreated) < 0) { mError("stream:%s, failed to drop task since %s", pauseReq.name, terrstr()); @@ -2115,8 +2196,8 @@ static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent) // 1. increase the replica does not affect the stream process. // 2. decreasing the replica may affect the stream task execution in the way that there is one or more running stream // tasks on the will be removed replica. -// 3. vgroup redistribution is an combination operation of first increase replica and then decrease replica. So we will -// handle it as mentioned in 1 & 2 items. +// 3. vgroup redistribution is an combination operation of first increase replica and then decrease replica. So we +// will handle it as mentioned in 1 & 2 items. static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList) { SVgroupChangeInfo info = { .pUpdateNodeList = taosArrayInit(4, sizeof(SNodeUpdateInfo)), @@ -2144,7 +2225,8 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP epsetAssign(&updateInfo.prevEp, &pPrevEntry->epset); epsetAssign(&updateInfo.newEp, &pCurrent->epset); taosArrayPush(info.pUpdateNodeList, &updateInfo); - + } + if (pCurrent->nodeId != SNODE_HANDLE) { SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId); taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0); mndReleaseVgroup(pMnode, pVgroup); @@ -2200,6 +2282,24 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { sdbRelease(pSdb, pVgroup); } + SSnodeObj *pObj = NULL; + while (1) { + pIter = sdbFetch(pSdb, SDB_SNODE, pIter, (void **)&pObj); + if (pIter == NULL) { + break; + } + + SNodeEntry entry = {0}; + addEpIntoEpSet(&entry.epset, pObj->pDnode->fqdn, pObj->pDnode->port); + entry.nodeId = SNODE_HANDLE; + + char buf[256] = {0}; + EPSET_TO_STR(&entry.epset, buf); + mDebug("take snode snapshot, nodeId:%d %s", entry.nodeId, buf); + taosArrayPush(pVgroupListSnapshot, &entry); + sdbRelease(pSdb, pObj); + } + return pVgroupListSnapshot; } @@ -2219,7 +2319,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange // here create only one trans if (pTrans == NULL) { - pTrans = doCreateTrans(pMnode, pStream, "stream-task-update"); + pTrans = doCreateTrans(pMnode, pStream, NULL, MND_STREAM_TASK_UPDATE_NAME, "update task epsets"); if (pTrans == NULL) { sdbRelease(pSdb, pStream); sdbCancelFetch(pSdb, pIter); @@ -2329,7 +2429,7 @@ static void doExtractTasksFromStream(SMnode *pMnode) { break; } - keepStreamTasksInBuf(pStream, &execInfo); + saveStreamTasksInfo(pStream, &execInfo); sdbRelease(pSdb, pStream); } } @@ -2339,7 +2439,6 @@ static int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) { if (p == NULL) { return TSDB_CODE_SUCCESS; } - taosHashRemove(pExecNode->pTaskMap, pRemovedId, sizeof(*pRemovedId)); for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { @@ -2377,6 +2476,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { STaskId * pId = taosArrayGet(execInfo.pTaskList, i); STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); + if (pEntry->nodeId == SNODE_HANDLE) continue; + bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId); if (!existed) { taosArrayPush(pRemovedTasks, pId); @@ -2392,9 +2493,9 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { (int32_t)taosArrayGetSize(execInfo.pTaskList)); int32_t size = taosArrayGetSize(pNodeSnapshot); - SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); - for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { - SNodeEntry* p = taosArrayGet(execInfo.pNodeList, i); + SArray *pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); + for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { + SNodeEntry *p = taosArrayGet(execInfo.pNodeList, i); for (int32_t j = 0; j < size; ++j) { SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); @@ -2413,6 +2514,18 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { return 0; } +// kill all trans in the dst DB +static void killAllCheckpointTrans(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo) { + void *pIter = NULL; + while ((pIter = taosHashIterate(pChangeInfo->pDBMap, pIter)) != NULL) { + char *pDb = (char *)pIter; + + size_t len = 0; + void * pKey = taosHashGetKey(pDb, &len); + killActiveCheckpointTrans(pMnode, pKey, len); + } +} + // this function runs by only one thread, so it is not multi-thread safe static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { int32_t code = 0; @@ -2454,7 +2567,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot); if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { // kill current active checkpoint transaction, since the transaction is vnode wide. - doKillActiveCheckpointTrans(pMnode); + killAllCheckpointTrans(pMnode, &changeInfo); + code = mndProcessVgroupChange(pMnode, &changeInfo); // keep the new vnode snapshot @@ -2500,7 +2614,7 @@ static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { return 0; } -void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { +void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode) { int32_t level = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < level; i++) { @@ -2543,8 +2657,9 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { STaskId *pId = taosArrayGet(pExecNode->pTaskList, k); if (pId->taskId == id.taskId && pId->streamId == id.streamId) { taosArrayRemove(pExecNode->pTaskList, k); - mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId, - (int32_t)taosArrayGetSize(pExecNode->pTaskList)); + + int32_t num = taosArrayGetSize(pExecNode->pTaskList); + mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId, num); break; } } @@ -2555,15 +2670,15 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList)); } -STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name) { - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, name); +STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char *pMsg) { + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, name); if (pTrans == NULL) { mError("failed to build trans:%s, reason: %s", name, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } - mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid); + mDebug("s-task:0x%" PRIx64 " start to build trans %s", pStream->uid, pMsg); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); if (mndTransCheckConflict(pMnode, pTrans) != 0) { @@ -2578,7 +2693,7 @@ STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name) { } int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { - STrans *pTrans = doCreateTrans(pMnode, pStream, "stream-task-reset"); + STrans *pTrans = doCreateTrans(pMnode, pStream, NULL, MND_STREAM_TASK_RESET_NAME, " reset from failed checkpoint"); if (pTrans == NULL) { return terrno; } @@ -2599,6 +2714,7 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { terrno = TSDB_CODE_OUT_OF_MEMORY; mError("failed to malloc in reset stream, size:%" PRIzu ", code:%s", sizeof(SVResetStreamTaskReq), tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + taosWUnLockLatch(&pStream->lock); return terrno; } @@ -2642,43 +2758,36 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { return TSDB_CODE_ACTION_IN_PROGRESS; } -int32_t doKillActiveCheckpointTrans(SMnode *pMnode) { - int32_t transId = 0; - SSdb * pSdb = pMnode->pSdb; - STrans *pTrans = NULL; - void * pIter = NULL; - - while (1) { - pIter = sdbFetch(pSdb, SDB_TRANS, pIter, (void **)&pTrans); - if (pIter == NULL) { - break; - } - - if (strncmp(pTrans->opername, MND_STREAM_CHECKPOINT_NAME, tListLen(pTrans->opername) - 1) == 0) { - transId = pTrans->id; - sdbRelease(pSdb, pTrans); - sdbCancelFetch(pSdb, pIter); - break; - } - - sdbRelease(pSdb, pTrans); - } - - if (transId == 0) { - mDebug("failed to find the checkpoint trans, reset not executed"); +int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDBName, size_t len) { + // data in the hash table will be removed automatically, no need to remove it here. + SStreamTransInfo *pTransInfo = taosHashGet(execInfo.transMgmt.pDBTrans, pDBName, len); + if (pTransInfo == NULL) { return TSDB_CODE_SUCCESS; } - pTrans = mndAcquireTrans(pMnode, transId); - mInfo("kill checkpoint trans:%d", transId); + // not checkpoint trans, ignore + if (strcmp(pTransInfo->name, MND_STREAM_CHECKPOINT_NAME) != 0) { + mDebug("not checkpoint trans, not kill it, name:%s, transId:%d", pTransInfo->name, pTransInfo->transId); + return TSDB_CODE_SUCCESS; + } + + STrans *pTrans = mndAcquireTrans(pMnode, pTransInfo->transId); + if (pTrans != NULL) { + mInfo("kill checkpoint transId:%d in Db:%s", pTransInfo->transId, pDBName); + mndKillTrans(pMnode, pTrans); + mndReleaseTrans(pMnode, pTrans); + } - mndKillTrans(pMnode, pTrans); - mndReleaseTrans(pMnode, pTrans); return TSDB_CODE_SUCCESS; } -int32_t mndResetFromCheckpoint(SMnode *pMnode) { - doKillActiveCheckpointTrans(pMnode); +static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) { + STrans *pTrans = mndAcquireTrans(pMnode, transId); + if (pTrans != NULL) { + mInfo("kill checkpoint transId:%d to reset task status", transId); + mndKillTrans(pMnode, pTrans); + mndReleaseTrans(pMnode, pTrans); + } // set all tasks status to be normal, refactor later to be stream level, instead of vnode level. SSdb * pSdb = pMnode->pSdb; @@ -2690,7 +2799,13 @@ int32_t mndResetFromCheckpoint(SMnode *pMnode) { break; } - // todo this transaction should exist be only one + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, false); + if (conflict) { + mError("stream:%s other trans exists in DB:%s & %s failed to start reset-status trans", pStream->name, + pStream->sourceDb, pStream->targetDb); + continue; + } + mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, create reset trans", pStream->name, pStream->uid); int32_t code = createStreamResetStatusTrans(pMnode, pStream); if (code != TSDB_CODE_SUCCESS) { @@ -2698,21 +2813,105 @@ int32_t mndResetFromCheckpoint(SMnode *pMnode) { return code; } } - return 0; } +static SStreamTask *mndGetStreamTask(STaskId *pId, SStreamObj *pStream) { + for (int32_t i = 0; i < taosArrayGetSize(pStream->tasks); i++) { + SArray *pLevel = taosArrayGetP(pStream->tasks, i); + + int32_t numOfLevels = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < numOfLevels; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + if (pTask->id.taskId == pId->taskId) { + return pTask; + } + } + } + + return NULL; +} + +static bool needDropRelatedFillhistoryTask(STaskStatusEntry *pTaskEntry, SStreamExecInfo *pExecNode) { + if (pTaskEntry->status == TASK_STATUS__STREAM_SCAN_HISTORY && pTaskEntry->statusLastDuration >= 10) { + if (!pTaskEntry->inputQChanging && pTaskEntry->inputQUnchangeCounter > 10) { + int32_t numOfReady = 0; + int32_t numOfTotal = 0; + for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { + STaskId *pId = taosArrayGet(pExecNode->pTaskList, k); + if (pTaskEntry->id.streamId == pId->streamId) { + numOfTotal++; + + if (pTaskEntry->id.taskId != pId->taskId) { + STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); + if (pEntry->status == TASK_STATUS__READY) { + numOfReady++; + } + } + } + } + + if (numOfReady > 0) { + mDebug("stream:0x%" PRIx64 + " %d tasks are ready, %d tasks in stream-scan-history for more than 50s, drop related fill-history task", + pTaskEntry->id.streamId, numOfReady, numOfTotal - numOfReady); + return true; + } else { + return false; + } + } + } + + return false; +} + +// currently only handle the sink task +// 1. sink task, drop related fill-history task msg is missing +// 2. other tasks are in ready state for at least 3 * hb_interval +static int32_t mndDropRelatedFillhistoryTask(SMnode *pMnode, STaskStatusEntry *pTaskEntry, SStreamObj *pStream) { + SStreamTask *pTask = mndGetStreamTask(&pTaskEntry->id, pStream); + if (pTask == NULL) { + mError("failed to get the stream task:0x%x, may have been dropped", (int32_t)pTaskEntry->id.taskId); + return -1; + } + + SVDropHTaskReq *pReq = rpcMallocCont(sizeof(SVDropHTaskReq)); + if (pReq == NULL) { + mError("failed to malloc in drop related fill-history task, size:%" PRIzu ", code:%s", sizeof(SVDropHTaskReq), + tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pReq->head.vgId = htonl(pTask->info.nodeId); + pReq->taskId = pTask->id.taskId; + pReq->streamId = pTask->id.streamId; + + SRpcMsg msg = {.info.noResp = 1}; + + initRpcMsg(&msg, TDMT_STREAM_HTASK_DROP, pReq, sizeof(SVDropHTaskReq)); + + mDebug("build and send drop related fill-history task for task:0x%x", pTask->id.taskId); + + SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); + SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); + mndReleaseVgroup(pMnode, pVgObj); + + tmsgSendReq(&epset, &msg); + return TSDB_CODE_SUCCESS; +} + int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { int32_t num = taosArrayGetSize(pNodeList); mInfo("set node expired for %d nodes", num); for (int k = 0; k < num; ++k) { - int32_t* pVgId = taosArrayGet(pNodeList, k); + int32_t *pVgId = taosArrayGet(pNodeList, k); mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num); int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); for (int i = 0; i < numOfNodes; ++i) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i); + SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, i); if (pNodeEntry->nodeId == *pVgId) { mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId); @@ -2725,13 +2924,13 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { return TSDB_CODE_SUCCESS; } -static void updateStageInfo(STaskStatusEntry* pTaskEntry, int32_t stage) { +static void updateStageInfo(STaskStatusEntry *pTaskEntry, int64_t stage) { int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); - for(int32_t j = 0; j < numOfNodes; ++j) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j); + for (int32_t j = 0; j < numOfNodes; ++j) { + SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, j); if (pNodeEntry->nodeId == pTaskEntry->nodeId) { - mInfo("vgId:%d stage updated from %d to %d, nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, - pTaskEntry->stage, stage, pTaskEntry->id.taskId); + mInfo("vgId:%d stage updated from %" PRId64 " to %" PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, + pTaskEntry->nodeId, pTaskEntry->stage, stage, pTaskEntry->id.taskId); pNodeEntry->stageUpdated = true; pTaskEntry->stage = stage; @@ -2775,8 +2974,10 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { setNodeEpsetExpiredFlag(req.pUpdateNodes); } + bool snodeChanged = false; for (int32_t i = 0; i < req.numOfTasks; ++i) { STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); + STaskStatusEntry *pTaskEntry = taosHashGet(execInfo.pTaskMap, &p->id, sizeof(p->id)); if (pTaskEntry == NULL) { mError("s-task:0x%" PRIx64 " not found in mnode task list", p->id.taskId); @@ -2785,7 +2986,20 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (pTaskEntry->stage != p->stage && pTaskEntry->stage != -1) { updateStageInfo(pTaskEntry, p->stage); + if (pTaskEntry->nodeId == SNODE_HANDLE) snodeChanged = true; } else { + // task is idle for more than 50 sec. + if (fabs(pTaskEntry->inputQUsed - p->inputQUsed) <= DBL_EPSILON) { + if (!pTaskEntry->inputQChanging) { + pTaskEntry->inputQUnchangeCounter++; + } else { + pTaskEntry->inputQChanging = false; + } + } else { + pTaskEntry->inputQChanging = true; + pTaskEntry->inputQUnchangeCounter = 0; + } + streamTaskStatusCopy(pTaskEntry, p); if (p->activeCheckpointId != 0) { if (activeCheckpointId != 0) { @@ -2800,9 +3014,28 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { } } - pTaskEntry->status = p->status; + if (p->status == pTaskEntry->status) { + pTaskEntry->statusLastDuration++; + } else { + pTaskEntry->status = p->status; + pTaskEntry->statusLastDuration = 0; + } + if (p->status != TASK_STATUS__READY) { mDebug("received s-task:0x%" PRIx64 " not in ready status:%s", p->id.taskId, streamTaskGetStatusStr(p->status)); + + if (p->status == TASK_STATUS__STREAM_SCAN_HISTORY) { + bool drop = needDropRelatedFillhistoryTask(pTaskEntry, &execInfo); + if (drop) { + SStreamObj *pStreamObj = mndGetStreamObj(pMnode, pTaskEntry->id.streamId); + if (pStreamObj == NULL) { + mError("failed to acquire the streamObj:0x%" PRIx64 " it may have been dropped", pStreamObj->uid); + } else { + mndDropRelatedFillhistoryTask(pMnode, pTaskEntry, pStreamObj); + mndReleaseStream(pMnode, pStreamObj); + } + } + } } } @@ -2813,11 +3046,11 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { SArray *p = mndTakeVgroupSnapshot(pMnode, &allReady); taosArrayDestroy(p); - if (allReady) { + if (allReady || snodeChanged) { // if the execInfo.activeCheckpoint == 0, the checkpoint is restoring from wal mInfo("checkpointId:%" PRId64 " failed, issue task-reset trans to reset all tasks status", execInfo.activeCheckpoint); - mndResetFromCheckpoint(pMnode); + mndResetStatusFromCheckpoint(pMnode, activeCheckpointId); } else { mInfo("not all vgroups are ready, wait for next HB from stream tasks"); } @@ -2829,3 +3062,22 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { taosArrayDestroy(req.pUpdateNodes); return TSDB_CODE_SUCCESS; } + +void freeCheckpointCandEntry(void *param) { + SCheckpointCandEntry *pEntry = param; + taosMemoryFreeClear(pEntry->pName); +} +SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId) { + void * pIter = NULL; + SSdb * pSdb = pMnode->pSdb; + SStreamObj *pStream = NULL; + + while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { + if (pStream->uid == streamId) { + sdbCancelFetch(pSdb, pIter); + return pStream; + } + } + + return NULL; +} diff --git a/source/dnode/mnode/impl/src/mndStreamTrans.c b/source/dnode/mnode/impl/src/mndStreamTrans.c new file mode 100644 index 0000000000..fa36d69d6e --- /dev/null +++ b/source/dnode/mnode/impl/src/mndStreamTrans.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "mndStream.h" +#include "mndTrans.h" + +typedef struct SKeyInfo { + void* pKey; + int32_t keyLen; +} SKeyInfo; + +static int32_t clearFinishedTrans(SMnode* pMnode); + +int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pName, const char* pSrcDb, const char* pDstDb) { + SStreamTransInfo info = {.transId = pTrans->id, .startTime = taosGetTimestampMs(), .name = pName}; + taosHashPut(execInfo.transMgmt.pDBTrans, pSrcDb, strlen(pSrcDb), &info, sizeof(SStreamTransInfo)); + + if (strcmp(pSrcDb, pDstDb) != 0) { + taosHashPut(execInfo.transMgmt.pDBTrans, pDstDb, strlen(pDstDb), &info, sizeof(SStreamTransInfo)); + } + + return 0; +} + +int32_t clearFinishedTrans(SMnode* pMnode) { + size_t keyLen = 0; + SArray* pList = taosArrayInit(4, sizeof(SKeyInfo)); + void* pIter = NULL; + + while ((pIter = taosHashIterate(execInfo.transMgmt.pDBTrans, pIter)) != NULL) { + SStreamTransInfo* pEntry = (SStreamTransInfo*)pIter; + + // let's clear the finished trans + STrans* pTrans = mndAcquireTrans(pMnode, pEntry->transId); + if (pTrans == NULL) { + void* pKey = taosHashGetKey(pEntry, &keyLen); + // key is the name of src/dst db name + SKeyInfo info = {.pKey = pKey, .keyLen = keyLen}; + + mDebug("transId:%d %s startTs:%" PRId64 "cleared due to finished", pEntry->transId, pEntry->name, + pEntry->startTime); + taosArrayPush(pList, &info); + } else { + mndReleaseTrans(pMnode, pTrans); + } + } + + size_t num = taosArrayGetSize(pList); + for (int32_t i = 0; i < num; ++i) { + SKeyInfo* pKey = taosArrayGet(pList, i); + taosHashRemove(execInfo.transMgmt.pDBTrans, pKey->pKey, pKey->keyLen); + } + + mDebug("clear %d finished stream-trans, remained:%d", (int32_t)num, taosHashGetSize(execInfo.transMgmt.pDBTrans)); + + terrno = TSDB_CODE_SUCCESS; + taosArrayDestroy(pList); + return 0; +} + +bool streamTransConflictOtherTrans(SMnode* pMnode, const char* pSrcDb, const char* pDstDb, bool lock) { + if (lock) { + taosThreadMutexLock(&execInfo.lock); + } + + int32_t num = taosHashGetSize(execInfo.transMgmt.pDBTrans); + if (num <= 0) { + if (lock) { + taosThreadMutexUnlock(&execInfo.lock); + } + return false; + } + + clearFinishedTrans(pMnode); + + SStreamTransInfo *pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, pSrcDb, strlen(pSrcDb)); + if (pEntry != NULL) { + if (lock) { + taosThreadMutexUnlock(&execInfo.lock); + } + mWarn("conflict with other transId:%d in Db:%s, trans:%s", pEntry->transId, pSrcDb, pEntry->name); + return true; + } + + pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, pDstDb, strlen(pDstDb)); + if (pEntry != NULL) { + if (lock) { + taosThreadMutexUnlock(&execInfo.lock); + } + mWarn("conflict with other transId:%d in Db:%s, trans:%s", pEntry->transId, pSrcDb, pEntry->name); + return true; + } + + if (lock) { + taosThreadMutexUnlock(&execInfo.lock); + } + + return false; +} + +int32_t mndAddtoCheckpointWaitingList(SStreamObj* pStream, int64_t checkpointId) { + SCheckpointCandEntry* pEntry = taosHashGet(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid)); + if (pEntry == NULL) { + SCheckpointCandEntry entry = {.streamId = pStream->uid, + .checkpointTs = taosGetTimestampMs(), + .checkpointId = checkpointId, + .pName = taosStrdup(pStream->name)}; + + taosHashPut(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid), &entry, sizeof(entry)); + int32_t size = taosHashGetSize(execInfo.transMgmt.pWaitingList); + + mDebug("stream:%" PRIx64 " add into waiting list due to conflict, ts:%" PRId64 " , checkpointId: %" PRId64 + ", total in waitingList:%d", + pStream->uid, entry.checkpointTs, checkpointId, size); + } else { + mDebug("stream:%" PRIx64 " ts:%" PRId64 ", checkpointId:%" PRId64 " already in waiting list, no need to add into", + pStream->uid, pEntry->checkpointTs, checkpointId); + } + + return TSDB_CODE_SUCCESS; +} diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index c9756ef814..7749decf91 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -834,7 +834,7 @@ int32_t mndTransCheckConflict(SMnode *pMnode, STrans *pTrans) { if (mndCheckTransConflict(pMnode, pTrans)) { terrno = TSDB_CODE_MND_TRANS_CONFLICT; mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); - return -1; + return terrno; } return 0; diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 66abfd6bc1..0e3b544508 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -708,9 +708,8 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { int32_t numOfAlterViews = taosHashGetSize(pUser->alterViews); int32_t numOfTopics = taosHashGetSize(pUser->topics); int32_t numOfUseDbs = taosHashGetSize(pUser->useDbs); - int32_t size = sizeof(SUserObj) + USER_RESERVE_SIZE + - (numOfReadDbs + numOfWriteDbs + numOfUseDbs) * TSDB_DB_FNAME_LEN + numOfTopics * TSDB_TOPIC_FNAME_LEN + - ipWhiteReserve; + int32_t size = sizeof(SUserObj) + USER_RESERVE_SIZE + (numOfReadDbs + numOfWriteDbs) * TSDB_DB_FNAME_LEN + + numOfTopics * TSDB_TOPIC_FNAME_LEN + ipWhiteReserve; char *stb = taosHashIterate(pUser->readTbs, NULL); while (stb != NULL) { @@ -720,7 +719,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->readTbs, stb); @@ -734,7 +733,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->writeTbs, stb); @@ -748,7 +747,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->alterTbs, stb); @@ -762,7 +761,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->readViews, stb); @@ -776,7 +775,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->writeViews, stb); @@ -790,11 +789,21 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { size += keyLen; size_t valueLen = 0; - valueLen = strlen(stb); + valueLen = strlen(stb) + 1; size += sizeof(int32_t); size += valueLen; stb = taosHashIterate(pUser->alterViews, stb); - } + } + + int32_t *useDb = taosHashIterate(pUser->useDbs, NULL); + while (useDb != NULL) { + size_t keyLen = 0; + void *key = taosHashGetKey(useDb, &keyLen); + size += sizeof(int32_t); + size += keyLen; + size += sizeof(int32_t); + useDb = taosHashIterate(pUser->useDbs, useDb); + } SSdbRaw *pRaw = sdbAllocRaw(SDB_USER, USER_VER_NUMBER, size); if (pRaw == NULL) goto _OVER; @@ -925,7 +934,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { stb = taosHashIterate(pUser->alterViews, stb); } - int32_t *useDb = taosHashIterate(pUser->useDbs, NULL); + useDb = taosHashIterate(pUser->useDbs, NULL); while (useDb != NULL) { size_t keyLen = 0; void *key = taosHashGetKey(useDb, &keyLen); diff --git a/source/dnode/mnode/impl/test/trans/trans1.cpp b/source/dnode/mnode/impl/test/trans/trans1.cpp index 92a442aa5e..aff1156449 100644 --- a/source/dnode/mnode/impl/test/trans/trans1.cpp +++ b/source/dnode/mnode/impl/test/trans/trans1.cpp @@ -38,7 +38,7 @@ class MndTestTrans1 : public ::testing::Test { test.ServerStop(); - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); int32_t writeLen = taosWriteFile(pFile, buffer, readLen); if (writeLen < 0 || writeLen == readLen) { ASSERT(1); diff --git a/source/dnode/mnode/sdb/inc/sdb.h b/source/dnode/mnode/sdb/inc/sdb.h index ddde645fae..0a20dcfd09 100644 --- a/source/dnode/mnode/sdb/inc/sdb.h +++ b/source/dnode/mnode/sdb/inc/sdb.h @@ -149,7 +149,8 @@ typedef enum { SDB_FUNC = 20, SDB_IDX = 21, SDB_VIEW = 22, - SDB_MAX = 23 + SDB_STREAM_SEQ = 23, + SDB_MAX = 24 } ESdbType; typedef struct SSdbRaw { @@ -169,11 +170,11 @@ typedef struct SSdbRow { } SSdbRow; typedef struct SSdb { - SMnode *pMnode; - SWal *pWal; + SMnode * pMnode; + SWal * pWal; int64_t sync; - char *currDir; - char *tmpDir; + char * currDir; + char * tmpDir; int64_t commitIndex; int64_t commitTerm; int64_t commitConfig; @@ -183,7 +184,7 @@ typedef struct SSdb { int64_t tableVer[SDB_MAX]; int64_t maxId[SDB_MAX]; EKeyType keyTypes[SDB_MAX]; - SHashObj *hashObjs[SDB_MAX]; + SHashObj * hashObjs[SDB_MAX]; TdThreadRwlock locks[SDB_MAX]; SdbInsertFp insertFps[SDB_MAX]; SdbUpdateFp updateFps[SDB_MAX]; @@ -198,25 +199,25 @@ typedef struct SSdb { typedef struct SSdbIter { TdFilePtr file; int64_t total; - char *name; + char * name; } SSdbIter; typedef struct { - ESdbType sdbType; - EKeyType keyType; - SdbDeployFp deployFp; - SdbEncodeFp encodeFp; - SdbDecodeFp decodeFp; - SdbInsertFp insertFp; - SdbUpdateFp updateFp; - SdbDeleteFp deleteFp; + ESdbType sdbType; + EKeyType keyType; + SdbDeployFp deployFp; + SdbEncodeFp encodeFp; + SdbDecodeFp decodeFp; + SdbInsertFp insertFp; + SdbUpdateFp updateFp; + SdbDeleteFp deleteFp; SdbValidateFp validateFp; } SSdbTable; typedef struct SSdbOpt { const char *path; - SMnode *pMnode; - SWal *pWal; + SMnode * pMnode; + SWal * pWal; int64_t sync; } SSdbOpt; @@ -393,7 +394,7 @@ int32_t sdbGetRawSoftVer(SSdbRaw *pRaw, int8_t *sver); int32_t sdbGetRawTotalSize(SSdbRaw *pRaw); SSdbRow *sdbAllocRow(int32_t objSize); -void *sdbGetRowObj(SSdbRow *pRow); +void * sdbGetRowObj(SSdbRow *pRow); void sdbFreeRow(SSdb *pSdb, SSdbRow *pRow, bool callFunc); int32_t sdbStartRead(SSdb *pSdb, SSdbIter **ppIter, int64_t *index, int64_t *term, int64_t *config); diff --git a/source/dnode/snode/CMakeLists.txt b/source/dnode/snode/CMakeLists.txt index ebfe80ecab..4cd8e26e78 100644 --- a/source/dnode/snode/CMakeLists.txt +++ b/source/dnode/snode/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(snode STATIC ${SNODE_SRC}) target_include_directories( snode PUBLIC "${TD_SOURCE_DIR}/include/dnode/snode" + PUBLIC "${TD_SOURCE_DIR}/include/dnode/vnode" private "${CMAKE_CURRENT_SOURCE_DIR}/inc" ) target_link_libraries( @@ -16,4 +17,5 @@ target_link_libraries( PRIVATE stream PRIVATE wal PRIVATE index + PRIVATE tqCommon ) diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 6f5b370826..380be1dd38 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -13,46 +13,32 @@ * along with this program. If not, see . */ -#include "rsync.h" #include "executor.h" +#include "rsync.h" #include "sndInt.h" -#include "tstream.h" +#include "tqCommon.h" #include "tuuid.h" -void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) { - char *msgStr = pMsg->pCont; - char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - int32_t code = 0; +#define sndError(...) \ + do { \ + if (sndDebugFlag & DEBUG_ERROR) { \ + taosPrintLog("SND ERROR ", DEBUG_ERROR, sndDebugFlag, __VA_ARGS__); \ + } \ + } while (0) - SStreamDispatchReq req; - SDecoder decoder; - tDecoderInit(&decoder, msgBody, msgLen); - if (tDecodeStreamDispatchReq(&decoder, &req) < 0) { - code = TSDB_CODE_MSG_DECODE_ERROR; - tDecoderClear(&decoder); - goto FAIL; - } +#define sndInfo(...) \ + do { \ + if (sndDebugFlag & DEBUG_INFO) { \ + taosPrintLog("SND INFO ", DEBUG_INFO, sndDebugFlag, __VA_ARGS__); \ + } \ + } while (0) - tDecoderClear(&decoder); - - SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId); - if (pTask) { - SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; - streamProcessDispatchMsg(pTask, &req, &rsp); - streamMetaReleaseTask(pSnode->pMeta, pTask); - rpcFreeCont(pMsg->pCont); - taosFreeQitem(pMsg); - return; - } - -FAIL: - if (pMsg->info.handle == NULL) return; - SRpcMsg rsp = { .code = code, .info = pMsg->info}; - tmsgSendRsp(&rsp); - rpcFreeCont(pMsg->pCont); - taosFreeQitem(pMsg); -} +#define sndDebug(...) \ + do { \ + if (sndDebugFlag & DEBUG_DEBUG) { \ + taosPrintLog("SND ", DEBUG_DEBUG, sndDebugFlag, __VA_ARGS__); \ + } \ + } while (0) int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->upstreamInfo.pList) != 0); @@ -60,47 +46,71 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer if (code != TSDB_CODE_SUCCESS) { return code; } + pTask->pBackend = NULL; streamTaskOpenAllUpstreamInput(pTask); - pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1); - if (pTask->pState == NULL) { - qError("s-task:%s failed to open state for task", pTask->id.idStr); - return -1; - } else { - qDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); + SStreamTask *pSateTask = pTask; + SStreamTask task = {0}; + if (pTask->info.fillHistory) { + task.id.streamId = pTask->streamTaskId.streamId; + task.id.taskId = pTask->streamTaskId.taskId; + task.pMeta = pTask->pMeta; + pSateTask = &task; } - int32_t numOfChildEp = taosArrayGetSize(pTask->upstreamInfo.pList); - SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory }; + pTask->pState = streamStateOpen(pSnode->path, pSateTask, false, -1, -1); + if (pTask->pState == NULL) { + sndError("s-task:%s failed to open state for task", pTask->id.idStr); + return -1; + } else { + sndDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); + } + + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList); + SReadHandle handle = { + .checkpointId = pTask->chkInfo.checkpointId, + .vnode = NULL, + .numOfVgroups = numOfVgroups, + .pStateBackend = pTask->pState, + .fillHistory = pTask->info.fillHistory, + .winRange = pTask->dataRange.window, + }; initStreamStateAPI(&handle.api); - pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0, pTask->id.taskId); + pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, SNODE_HANDLE, pTask->id.taskId); ASSERT(pTask->exec.pExecutor); + qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); - SCheckpointInfo* pChkInfo = &pTask->chkInfo; + SCheckpointInfo *pChkInfo = &pTask->chkInfo; // checkpoint ver is the kept version, handled data should be the next version. if (pTask->chkInfo.checkpointId != 0) { pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1; - qInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64, pTask->id.idStr, + sndInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); - } else { - if (pTask->chkInfo.nextProcessVer == -1) { - pTask->chkInfo.nextProcessVer = 0; - } } - char* p = NULL; + char *p = NULL; streamTaskGetStatus(pTask, &p); - qInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 - " nextProcessVer:%" PRId64 " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", - SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, - pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, pTask->info.triggerParam); - + if (pTask->info.fillHistory) { + sndInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + " nextProcessVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, related stream task:0x%x trigger:%" PRId64 " ms", + SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, + pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, + (int32_t)pTask->streamTaskId.taskId, pTask->info.triggerParam); + } else { + sndInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + " nextProcessVer:%" PRId64 + " child id:%d, level:%d, status:%s fill-history:%d, related fill-task:0x%x trigger:%" PRId64 " ms", + SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, + pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, + (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.triggerParam); + } return 0; } @@ -117,17 +127,19 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { } pSnode->msgCb = pOption->msgCb; - pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, -1); + pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, taosGetTimestampMs()); if (pSnode->pMeta == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto FAIL; } + if (streamMetaLoadAllTasks(pSnode->pMeta) < 0) { + goto FAIL; + } + stopRsync(); startRsync(); - // todo fix it: send msg to mnode to rollback to an existed checkpoint - streamMetaInitForSnode(pSnode->pMeta); return pSnode; FAIL: @@ -136,7 +148,14 @@ FAIL: return NULL; } +int32_t sndInit(SSnode * pSnode) { + resetStreamTaskStatus(pSnode->pMeta); + startStreamTasks(pSnode->pMeta); + return 0; +} + void sndClose(SSnode *pSnode) { + stopRsync(); streamMetaNotifyClose(pSnode->pMeta); streamMetaCommit(pSnode->pMeta); streamMetaClose(pSnode->pMeta); @@ -144,314 +163,49 @@ void sndClose(SSnode *pSnode) { taosMemoryFree(pSnode); } -int32_t sndGetLoad(SSnode *pSnode, SSnodeLoad *pLoad) { return 0; } - -int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { - int32_t code; - - // 1.deserialize msg and build task - SStreamTask *pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); - if (pTask == NULL) { - return -1; +int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { + switch (pMsg->msgType) { + case TDMT_STREAM_TASK_RUN: + return tqStreamTaskProcessRunReq(pSnode->pMeta, pMsg, true); + case TDMT_STREAM_TASK_DISPATCH: + return tqStreamTaskProcessDispatchReq(pSnode->pMeta, pMsg); + case TDMT_STREAM_TASK_DISPATCH_RSP: + return tqStreamTaskProcessDispatchRsp(pSnode->pMeta, pMsg); + case TDMT_STREAM_RETRIEVE: + return tqStreamTaskProcessRetrieveReq(pSnode->pMeta, pMsg); + case TDMT_STREAM_RETRIEVE_RSP: // 1036 + break; + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH: + return tqStreamTaskProcessScanHistoryFinishReq(pSnode->pMeta, pMsg); + case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP: + return tqStreamTaskProcessScanHistoryFinishRsp(pSnode->pMeta, pMsg); + case TDMT_VND_STREAM_TASK_CHECK: + return tqStreamTaskProcessCheckReq(pSnode->pMeta, pMsg); + case TDMT_VND_STREAM_TASK_CHECK_RSP: + return tqStreamTaskProcessCheckRsp(pSnode->pMeta, pMsg, true); + case TDMT_STREAM_TASK_CHECKPOINT_READY: + return tqStreamTaskProcessCheckpointReadyMsg(pSnode->pMeta, pMsg); + default: + sndError("invalid snode msg:%d", pMsg->msgType); + ASSERT(0); } - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t *)msg, msgLen); - code = tDecodeStreamTask(&decoder, pTask); - if (code < 0) { - tDecoderClear(&decoder); - taosMemoryFree(pTask); - return -1; - } - - tDecoderClear(&decoder); - - ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG); - - // 2.save task - streamMetaWLock(pSnode->pMeta); - - bool added = false; - code = streamMetaRegisterTask(pSnode->pMeta, -1, pTask, &added); - if (code < 0) { - streamMetaWUnLock(pSnode->pMeta); - return -1; - } - - int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta); - streamMetaWUnLock(pSnode->pMeta); - - char* p = NULL; - streamTaskGetStatus(pTask, &p); - - qDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE, - pTask->id.idStr, p, numOfTasks); - - EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; - streamTaskHandleEvent(pTask->status.pSM, event); - streamTaskCheckDownstream(pTask); - return 0; -} - -int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { - SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; - qDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId); - streamMetaUnregisterTask(pSnode->pMeta, pReq->streamId, pReq->taskId); - - // commit the update - streamMetaWLock(pSnode->pMeta); - int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta); - qDebug("vgId:%d task:0x%x dropped, remain tasks:%d", pSnode->pMeta->vgId, pReq->taskId, numOfTasks); - - if (streamMetaCommit(pSnode->pMeta) < 0) { - // persist to disk - } - streamMetaWUnLock(pSnode->pMeta); - return 0; -} - -int32_t sndProcessTaskRunReq(SSnode *pSnode, SRpcMsg *pMsg) { - SStreamTaskRunReq *pReq = pMsg->pCont; - - SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pReq->streamId, pReq->taskId); - if (pTask) { - streamExecTask(pTask); - streamMetaReleaseTask(pSnode->pMeta, pTask); - return 0; - } else { - return -1; - } -} - -int32_t sndProcessTaskDispatchReq(SSnode *pSnode, SRpcMsg *pMsg, bool exec) { - char *msgStr = pMsg->pCont; - char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - SStreamDispatchReq req; - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t *)msgBody, msgLen); - tDecodeStreamDispatchReq(&decoder, &req); - - SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId); - if (pTask) { - SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessDispatchMsg(pTask, &req, &rsp); - streamMetaReleaseTask(pSnode->pMeta, pTask); - return 0; - } else { - return -1; - } -} - -int32_t sndProcessTaskRetrieveReq(SSnode *pSnode, SRpcMsg *pMsg) { - char *msgStr = pMsg->pCont; - char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - SStreamRetrieveReq req; - SDecoder decoder; - tDecoderInit(&decoder, msgBody, msgLen); - tDecodeStreamRetrieveReq(&decoder, &req); - tDecoderClear(&decoder); - SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.dstTaskId); - - if (pTask) { - SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessRetrieveReq(pTask, &req, &rsp); - streamMetaReleaseTask(pSnode->pMeta, pTask); - tDeleteStreamRetrieveReq(&req); - return 0; - } else { - return -1; - } -} - -int32_t sndProcessTaskDispatchRsp(SSnode *pSnode, SRpcMsg *pMsg) { - SStreamDispatchRsp *pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - - pRsp->upstreamTaskId = htonl(pRsp->upstreamTaskId); - pRsp->streamId = htobe64(pRsp->streamId); - pRsp->msgId = htonl(pRsp->msgId); - - SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pRsp->streamId, pRsp->upstreamTaskId); - if (pTask) { - streamProcessDispatchRsp(pTask, pRsp, pMsg->code); - streamMetaReleaseTask(pSnode->pMeta, pTask); - return 0; - } else { - return -1; - } -} - -int32_t sndProcessTaskRetrieveRsp(SSnode *pSnode, SRpcMsg *pMsg) { - // return 0; } int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { switch (pMsg->msgType) { case TDMT_STREAM_TASK_DEPLOY: { - void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + void * pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); - return sndProcessTaskDeployReq(pSnode, pReq, len); + return tqStreamTaskProcessDeployReq(pSnode->pMeta, -1, pReq, len, true, true); } case TDMT_STREAM_TASK_DROP: - return sndProcessTaskDropReq(pSnode, pMsg->pCont, pMsg->contLen); + return tqStreamTaskProcessDropReq(pSnode->pMeta, pMsg->pCont, pMsg->contLen); + case TDMT_VND_STREAM_TASK_UPDATE: + return tqStreamTaskProcessUpdateReq(pSnode->pMeta, &pSnode->msgCb, pMsg, true); default: ASSERT(0); } return 0; -} - -int32_t sndProcessStreamTaskScanHistoryFinishReq(SSnode *pSnode, SRpcMsg *pMsg) { - char *msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - - // deserialize - SStreamScanHistoryFinishReq req; - - SDecoder decoder; - tDecoderInit(&decoder, msg, msgLen); - tDecodeStreamScanHistoryFinishReq(&decoder, &req); - tDecoderClear(&decoder); - - // find task - SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.downstreamTaskId); - if (pTask == NULL) { - return -1; - } - // do process request - if (streamProcessScanHistoryFinishReq(pTask, &req, &pMsg->info) < 0) { - streamMetaReleaseTask(pSnode->pMeta, pTask); - return -1; - } - - streamMetaReleaseTask(pSnode->pMeta, pTask); - return 0; -} - -int32_t sndProcessTaskRecoverFinishRsp(SSnode *pSnode, SRpcMsg *pMsg) { - // - return 0; -} - -int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { - char *msgStr = pMsg->pCont; - char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - - SStreamTaskCheckReq req; - SDecoder decoder; - - tDecoderInit(&decoder, (uint8_t *)msgBody, msgLen); - tDecodeStreamTaskCheckReq(&decoder, &req); - tDecoderClear(&decoder); - - int32_t taskId = req.downstreamTaskId; - - SStreamTaskCheckRsp rsp = { - .reqId = req.reqId, - .streamId = req.streamId, - .childId = req.childId, - .downstreamNodeId = req.downstreamNodeId, - .downstreamTaskId = req.downstreamTaskId, - .upstreamNodeId = req.upstreamNodeId, - .upstreamTaskId = req.upstreamTaskId, - }; - - SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, taskId); - - if (pTask != NULL) { - rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); - streamMetaReleaseTask(pSnode->pMeta, pTask); - char* p = NULL; - streamTaskGetStatus(pTask, &p); - qDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", - pTask->id.idStr, p, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); - } else { - rsp.status = TASK_DOWNSTREAM_NOT_READY; - qDebug("recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d", - taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); - } - - SEncoder encoder; - int32_t code; - int32_t len; - - tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code); - if (code < 0) { - qError("vgId:%d failed to encode task check rsp, task:0x%x", pSnode->pMeta->vgId, taskId); - return -1; - } - - void *buf = rpcMallocCont(sizeof(SMsgHead) + len); - ((SMsgHead *)buf)->vgId = htonl(req.upstreamNodeId); - - void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - tEncoderInit(&encoder, (uint8_t *)abuf, len); - tEncodeStreamTaskCheckRsp(&encoder, &rsp); - tEncoderClear(&encoder); - - SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info}; - - tmsgSendRsp(&rspMsg); - return 0; -} - -int32_t sndProcessStreamTaskCheckRsp(SSnode* pSnode, SRpcMsg* pMsg) { - char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t len = pMsg->contLen - sizeof(SMsgHead); - - int32_t code; - SStreamTaskCheckRsp rsp; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)pReq, len); - code = tDecodeStreamTaskCheckRsp(&decoder, &rsp); - - if (code < 0) { - tDecoderClear(&decoder); - return -1; - } - - tDecoderClear(&decoder); - qDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d", - rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status); - - SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, rsp.streamId, rsp.upstreamTaskId); - if (pTask == NULL) { - qError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId, - pSnode->pMeta->vgId); - return -1; - } - - code = streamProcessCheckRsp(pTask, &rsp); - streamMetaReleaseTask(pSnode->pMeta, pTask); - return code; -} - -int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { - switch (pMsg->msgType) { - case TDMT_STREAM_TASK_RUN: - return sndProcessTaskRunReq(pSnode, pMsg); - case TDMT_STREAM_TASK_DISPATCH: - return sndProcessTaskDispatchReq(pSnode, pMsg, true); - case TDMT_STREAM_TASK_DISPATCH_RSP: - return sndProcessTaskDispatchRsp(pSnode, pMsg); - case TDMT_STREAM_RETRIEVE: - return sndProcessTaskRetrieveReq(pSnode, pMsg); - case TDMT_STREAM_RETRIEVE_RSP: - return sndProcessTaskRetrieveRsp(pSnode, pMsg); - case TDMT_VND_STREAM_SCAN_HISTORY_FINISH: - return sndProcessStreamTaskScanHistoryFinishReq(pSnode, pMsg); - case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP: - return sndProcessTaskRecoverFinishRsp(pSnode, pMsg); - case TDMT_VND_STREAM_TASK_CHECK: - return sndProcessStreamTaskCheckReq(pSnode, pMsg); - case TDMT_VND_STREAM_TASK_CHECK_RSP: - return sndProcessStreamTaskCheckRsp(pSnode, pMsg); - default: - ASSERT(0); - } - return 0; -} +} \ No newline at end of file diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index dc43da7fe7..114051f02b 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -1,4 +1,5 @@ # vnode +add_subdirectory(src/tqCommon) add_library(vnode STATIC "") set( VNODE_SOURCE_FILES @@ -13,6 +14,8 @@ set( "src/vnd/vnodeSnapshot.c" "src/vnd/vnodeRetention.c" "src/vnd/vnodeInitApi.c" + "src/vnd/vnodeAsync.c" + "src/vnd/vnodeHash.c" # meta "src/meta/metaOpen.c" @@ -117,6 +120,7 @@ if (${BUILD_CONTRIB}) PUBLIC "inc" PUBLIC "src/inc" PUBLIC "${TD_SOURCE_DIR}/include/libs/scalar" + PUBLIC "${TD_SOURCE_DIR}/include/dnode/vnode" PUBLIC "${TD_SOURCE_DIR}/contrib/rocksdb/include" ) else() @@ -125,6 +129,7 @@ else() PUBLIC "inc" PUBLIC "src/inc" PUBLIC "${TD_SOURCE_DIR}/include/libs/scalar" + PUBLIC "${TD_SOURCE_DIR}/include/dnode/vnode" ) if (${TD_LINUX}) target_include_directories( @@ -158,6 +163,7 @@ target_link_libraries( PUBLIC transport PUBLIC stream PUBLIC index + PUBLIC tqCommon ) IF (TD_GRANT) diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index db01f7d995..cdf3fb6a2a 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -254,8 +254,6 @@ bool tqNextDataBlockFilterOut(STqReader *pReader, SHashObj *filterOutUids); int32_t tqRetrieveDataBlock(STqReader *pReader, SSDataBlock **pRes, const char *idstr); int32_t tqRetrieveTaosxBlock(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet); -int32_t vnodeEnqueueStreamMsg(SVnode *pVnode, SRpcMsg *pMsg); - // sma int32_t smaGetTSmaDays(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); diff --git a/source/dnode/vnode/src/inc/meta.h b/source/dnode/vnode/src/inc/meta.h index c74ccf6c11..7dbaa66d44 100644 --- a/source/dnode/vnode/src/inc/meta.h +++ b/source/dnode/vnode/src/inc/meta.h @@ -79,6 +79,7 @@ struct SMeta { char* path; SVnode* pVnode; + bool changed; TDB* pEnv; TXN* txn; TTB* pTbDb; diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index fdd449bf36..cf57623a43 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -43,9 +43,6 @@ extern "C" { typedef struct STqOffsetStore STqOffsetStore; -#define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1) -#define STREAM_EXEC_START_ALL_TASKS_ID (-2) -#define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3) #define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) // tqExec @@ -156,7 +153,6 @@ char* tqOffsetBuildFName(const char* path, int32_t fVer); int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname); // tqStream -int32_t tqResetStreamTaskStatus(STQ* pTq); int32_t tqStopStreamTasks(STQ* pTq); // tq util diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index ca9d22a987..88362239f5 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -309,7 +309,12 @@ int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STs void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); // tsdbMerge.c ============================================================================================== -int32_t tsdbSchedMerge(STsdb *tsdb, int32_t fid); +typedef struct { + STsdb *tsdb; + int32_t fid; +} SMergeArg; + +int32_t tsdbMerge(void *arg); // tsdbDiskData ============================================================================================== int32_t tDiskDataBuilderCreate(SDiskDataBuilder **ppBuilder); diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index 55b62dfe48..4036200d73 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -48,9 +48,32 @@ int32_t vnodeCheckCfg(const SVnodeCfg*); int32_t vnodeEncodeConfig(const void* pObj, SJson* pJson); int32_t vnodeDecodeConfig(const SJson* pJson, void* pObj); +// vnodeAsync.c +typedef struct SVAsync SVAsync; + +typedef enum { + EVA_PRIORITY_HIGH = 0, + EVA_PRIORITY_NORMAL, + EVA_PRIORITY_LOW, +} EVAPriority; + +#define VNODE_ASYNC_VALID_CHANNEL_ID(channelId) ((channelId) > 0) +#define VNODE_ASYNC_VALID_TASK_ID(taskId) ((taskId) > 0) + +int32_t vnodeAsyncInit(SVAsync** async, char* label); +int32_t vnodeAsyncDestroy(SVAsync** async); +int32_t vnodeAChannelInit(SVAsync* async, int64_t* channelId); +int32_t vnodeAChannelDestroy(SVAsync* async, int64_t channelId, bool waitRunning); +int32_t vnodeAsync(SVAsync* async, EVAPriority priority, int32_t (*execute)(void*), void (*complete)(void*), void* arg, + int64_t* taskId); +int32_t vnodeAsyncC(SVAsync* async, int64_t channelId, EVAPriority priority, int32_t (*execute)(void*), + void (*complete)(void*), void* arg, int64_t* taskId); +int32_t vnodeAWait(SVAsync* async, int64_t taskId); +int32_t vnodeACancel(SVAsync* async, int64_t taskId); +int32_t vnodeAsyncSetWorkers(SVAsync* async, int32_t numWorkers); + // vnodeModule.c -int vnodeScheduleTask(int (*execute)(void*), void* arg); -int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg); +extern SVAsync* vnodeAsyncHandle[2]; // vnodeBufPool.c typedef struct SVBufPoolNode SVBufPoolNode; @@ -110,7 +133,7 @@ int32_t vnodeAsyncCommit(SVnode* pVnode); bool vnodeShouldRollback(SVnode* pVnode); // vnodeSync.c -int32_t vnodeSyncOpen(SVnode *pVnode, char *path, int32_t vnodeVersion); +int32_t vnodeSyncOpen(SVnode* pVnode, char* path, int32_t vnodeVersion); int32_t vnodeSyncStart(SVnode* pVnode); void vnodeSyncPreClose(SVnode* pVnode); void vnodeSyncPostClose(SVnode* pVnode); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index df1720d4a7..8a4cbb5fd0 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -93,7 +93,11 @@ typedef struct SQueryNode SQueryNode; #define VNODE_RSMA2_DIR "rsma2" #define VNODE_TQ_STREAM "stream" +#if SUSPEND_RESUME_TEST // only for test purpose +#define VNODE_BUFPOOL_SEGMENTS 1 +#else #define VNODE_BUFPOOL_SEGMENTS 3 +#endif #define VND_INFO_FNAME "vnode.json" #define VND_INFO_FNAME_TMP "vnode_tmp.json" @@ -209,7 +213,7 @@ int32_t tsdbBegin(STsdb* pTsdb); // int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); int32_t tsdbCacheCommit(STsdb* pTsdb); int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo); -int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync); +int32_t tsdbRetention(STsdb* tsdb, int64_t now, int32_t sync); // int32_t tsdbFinishCommit(STsdb* pTsdb); // int32_t tsdbRollbackCommit(STsdb* pTsdb); int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg); @@ -232,8 +236,8 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg); -int32_t tqStartStreamTaskAsync(STQ* pTq, bool restart); int32_t tqRestartStreamTasks(STQ* pTq); int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); int32_t tqScanWal(STQ* pTq); @@ -262,7 +266,7 @@ int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t version, char* msg, int32_t msg int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec); +int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); @@ -448,13 +452,16 @@ struct SVnode { SVBufPool* recycleTail; SVBufPool* onRecycle; + // commit variables + int64_t commitChannel; + int64_t commitTask; + SMeta* pMeta; SSma* pSma; STsdb* pTsdb; SWal* pWal; STQ* pTq; SSink* pSink; - tsem_t canCommit; int64_t sync; TdThreadMutex lock; bool blocked; @@ -494,18 +501,18 @@ struct SSma { void* pRSmaEnv; }; -#define SMA_CFG(s) (&(s)->pVnode->config) -#define SMA_TSDB_CFG(s) (&(s)->pVnode->config.tsdbCfg) -#define SMA_RETENTION(s) ((SRetention*)&(s)->pVnode->config.tsdbCfg.retentions) -#define SMA_LOCKED(s) ((s)->locked) -#define SMA_META(s) ((s)->pVnode->pMeta) -#define SMA_VID(s) TD_VID((s)->pVnode) -#define SMA_TFS(s) ((s)->pVnode->pTfs) -#define SMA_TSMA_ENV(s) ((s)->pTSmaEnv) -#define SMA_RSMA_ENV(s) ((s)->pRSmaEnv) -#define SMA_RSMA_TSDB0(s) ((s)->pVnode->pTsdb) -#define SMA_RSMA_TSDB1(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L0]) -#define SMA_RSMA_TSDB2(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L1]) +#define SMA_CFG(s) (&(s)->pVnode->config) +#define SMA_TSDB_CFG(s) (&(s)->pVnode->config.tsdbCfg) +#define SMA_RETENTION(s) ((SRetention*)&(s)->pVnode->config.tsdbCfg.retentions) +#define SMA_LOCKED(s) ((s)->locked) +#define SMA_META(s) ((s)->pVnode->pMeta) +#define SMA_VID(s) TD_VID((s)->pVnode) +#define SMA_TFS(s) ((s)->pVnode->pTfs) +#define SMA_TSMA_ENV(s) ((s)->pTSmaEnv) +#define SMA_RSMA_ENV(s) ((s)->pRSmaEnv) +#define SMA_RSMA_TSDB0(s) ((s)->pVnode->pTsdb) +#define SMA_RSMA_TSDB1(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L0]) +#define SMA_RSMA_TSDB2(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L1]) #define SMA_RSMA_GET_TSDB(pVnode, level) ((level == 0) ? pVnode->pTsdb : pVnode->pSma->pRSmaTsdb[level - 1]) // sma diff --git a/source/dnode/vnode/src/meta/metaCommit.c b/source/dnode/vnode/src/meta/metaCommit.c index f5572e68dd..f8b41e413b 100644 --- a/source/dnode/vnode/src/meta/metaCommit.c +++ b/source/dnode/vnode/src/meta/metaCommit.c @@ -56,7 +56,7 @@ int metaPrepareAsyncCommit(SMeta *pMeta) { code = ttlMgrFlush(pMeta->pTtlMgr, pMeta->txn); metaULock(pMeta); code = tdbCommit(pMeta->pEnv, pMeta->txn); - + pMeta->changed = false; return code; } diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index f600925a0b..976ee616f9 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -251,6 +251,7 @@ int metaCreateSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) { ++pMeta->pVnode->config.vndStats.numOfSTables; + pMeta->changed = true; metaDebug("vgId:%d, stb:%s is created, suid:%" PRId64, TD_VID(pMeta->pVnode), pReq->name, pReq->suid); return 0; @@ -325,6 +326,8 @@ _drop_super_table: metaUpdTimeSeriesNum(pMeta); + pMeta->changed = true; + _exit: tdbFree(pKey); tdbFree(pData); @@ -424,6 +427,8 @@ int metaAlterSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) { metaTimeSeriesNotifyCheck(pMeta); } + pMeta->changed = true; + _exit: if (oStbEntry.pBuf) taosMemoryFree(oStbEntry.pBuf); tDecoderClear(&dc); @@ -847,6 +852,7 @@ int metaCreateTable(SMeta *pMeta, int64_t ver, SVCreateTbReq *pReq, STableMetaRs } } + pMeta->changed = true; metaDebug("vgId:%d, table:%s uid %" PRId64 " is created, type:%" PRId8, TD_VID(pMeta->pVnode), pReq->name, pReq->uid, pReq->type); return 0; @@ -895,6 +901,7 @@ int metaDropTable(SMeta *pMeta, int64_t version, SVDropTbReq *pReq, SArray *tbUi *tbUid = uid; } + pMeta->changed = true; _exit: tdbFree(pData); return rc; @@ -938,6 +945,8 @@ void metaDropTables(SMeta *pMeta, SArray *tbUids) { } } tSimpleHashCleanup(suidHash); + + pMeta->changed = true; } static int32_t metaFilterTableByHash(SMeta *pMeta, SArray *uidList) { @@ -1233,6 +1242,7 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl if (pAlterTbReq->colName == NULL) { terrno = TSDB_CODE_INVALID_MSG; + metaError("meta/table: null pAlterTbReq->colName"); return -1; } @@ -1300,20 +1310,27 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl SMetaEntry oldEntry = {.type = TSDB_NORMAL_TABLE, .uid = entry.uid}; oldEntry.ntbEntry.schemaRow.nCols = pSchema->nCols; - int32_t iCol = 0; + int32_t rowLen = -1; + if (pAlterTbReq->action == TSDB_ALTER_TABLE_ADD_COLUMN || + pAlterTbReq->action == TSDB_ALTER_TABLE_UPDATE_COLUMN_BYTES) { + rowLen = 0; + } + + int32_t iCol = 0, jCol = 0; + SSchema *qColumn = NULL; for (;;) { - pColumn = NULL; + qColumn = NULL; - if (iCol >= pSchema->nCols) break; - pColumn = &pSchema->pSchema[iCol]; + if (jCol >= pSchema->nCols) break; + qColumn = &pSchema->pSchema[jCol]; - if (NULL == pAlterTbReq->colName) { - metaError("meta/table: null pAlterTbReq->colName"); - return -1; + if (!pColumn && (strcmp(qColumn->name, pAlterTbReq->colName) == 0)) { + pColumn = qColumn; + iCol = jCol; + if (rowLen < 0) break; } - - if (strcmp(pColumn->name, pAlterTbReq->colName) == 0) break; - iCol++; + rowLen += qColumn->bytes; + ++jCol; } entry.version = version; @@ -1328,6 +1345,10 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl if ((terrno = grantCheck(TSDB_GRANT_TIMESERIES)) < 0) { goto _err; } + if (rowLen + pAlterTbReq->bytes > TSDB_MAX_BYTES_PER_ROW) { + terrno = TSDB_CODE_PAR_INVALID_ROW_LENGTH; + goto _err; + } pSchema->version++; pSchema->nCols++; pNewSchema = taosMemoryMalloc(sizeof(SSchema) * pSchema->nCols); @@ -1369,10 +1390,14 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl terrno = TSDB_CODE_VND_COL_NOT_EXISTS; goto _err; } - if (!IS_VAR_DATA_TYPE(pColumn->type) || pColumn->bytes > pAlterTbReq->colModBytes) { + if (!IS_VAR_DATA_TYPE(pColumn->type) || pColumn->bytes >= pAlterTbReq->colModBytes) { terrno = TSDB_CODE_VND_INVALID_TABLE_ACTION; goto _err; } + if (rowLen + pAlterTbReq->colModBytes - pColumn->bytes > TSDB_MAX_BYTES_PER_ROW) { + terrno = TSDB_CODE_PAR_INVALID_ROW_LENGTH; + goto _err; + } if (tqCheckColModifiable(pMeta->pVnode->pTq, uid, pColumn->colId) != 0) { terrno = TSDB_CODE_VND_COL_SUBSCRIBED; goto _err; @@ -1970,6 +1995,7 @@ _err: } int metaAlterTable(SMeta *pMeta, int64_t version, SVAlterTbReq *pReq, STableMetaRsp *pMetaRsp) { + pMeta->changed = true; switch (pReq->action) { case TSDB_ALTER_TABLE_ADD_COLUMN: case TSDB_ALTER_TABLE_DROP_COLUMN: diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 5dc29509a0..abe4c3f2fc 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -61,7 +61,7 @@ struct SRSmaQTaskInfoItem { int32_t len; int8_t type; int64_t suid; - void *qTaskInfo; + void * qTaskInfo; }; static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level) { @@ -185,7 +185,7 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) { void *pIter = NULL; while ((pIter = taosHashIterate(pStore->uidHash, pIter))) { tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); - SArray *pTbUids = *(SArray **)pIter; + SArray * pTbUids = *(SArray **)pIter; if (tdUpdateTbUidListImpl(pSma, pTbSuid, pTbUids, isAdd) != TSDB_CODE_SUCCESS) { taosHashCancelIterate(pStore->uidHash, pIter); @@ -213,7 +213,7 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui } SRSmaStat *pStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); - SHashObj *infoHash = NULL; + SHashObj * infoHash = NULL; if (!pStat || !(infoHash = RSMA_INFO_HASH(pStat))) { terrno = TSDB_CODE_RSMA_INVALID_STAT; return TSDB_CODE_FAILED; @@ -264,11 +264,11 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat int8_t idx) { if ((param->qmsgLen > 0) && param->qmsg[idx]) { SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]); - SRetention *pRetention = SMA_RETENTION(pSma); - STsdbCfg *pTsdbCfg = SMA_TSDB_CFG(pSma); - SVnode *pVnode = pSma->pVnode; + SRetention * pRetention = SMA_RETENTION(pSma); + STsdbCfg * pTsdbCfg = SMA_TSDB_CFG(pSma); + SVnode * pVnode = pSma->pVnode; char taskInfDir[TSDB_FILENAME_LEN] = {0}; - void *pStreamState = NULL; + void * pStreamState = NULL; // set the backend of stream state tdRSmaQTaskInfoGetFullPath(pVnode, pRSmaInfo->suid, idx + 1, pVnode->pTfs, taskInfDir); @@ -297,6 +297,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat sprintf(pStreamTask->exec.qmsg, "%s", RSMA_EXEC_TASK_FLAG); pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id); + pStreamTask->status.pSM = streamCreateStateMachine(pStreamTask); + pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -372,7 +374,7 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con } #endif - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + SSmaEnv * pEnv = SMA_RSMA_ENV(pSma); SRSmaStat *pStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); SRSmaInfo *pRSmaInfo = NULL; @@ -651,9 +653,7 @@ static int32_t tdRSmaProcessDelReq(SSma *pSma, int64_t suid, int8_t level, SBatc ((SMsgHead *)pBuf)->vgId = TD_VID(pSma->pVnode); - SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL, - .pCont = pBuf, - .contLen = len + sizeof(SMsgHead)}; + SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = pBuf, .contLen = len + sizeof(SMsgHead)}; code = tmsgPutToQueue(&pSma->pVnode->msgCb, WRITE_QUEUE, &delMsg); TSDB_CHECK_CODE(code, lino, _exit); } @@ -673,8 +673,8 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma int32_t code = 0; int32_t lino = 0; SSDataBlock *output = NULL; - SArray *pResList = pItem->pResList; - STSchema *pTSchema = pInfo->pTSchema; + SArray * pResList = pItem->pResList; + STSchema * pTSchema = pInfo->pTSchema; int64_t suid = pInfo->suid; while (1) { @@ -733,7 +733,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma } } - STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); + STsdb * sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); SSubmitReq2 *pReq = NULL; if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) { @@ -795,7 +795,7 @@ _exit: static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *pMsg, int32_t len, int32_t inputType, SRSmaInfo *pInfo, tb_uid_t suid) { int32_t size = RSMA_EXEC_MSG_HLEN + len; // header + payload - void *qItem = taosAllocateQitem(size, DEF_QITEM, 0); + void * qItem = taosAllocateQitem(size, DEF_QITEM, 0); if (!qItem) { return TSDB_CODE_FAILED; @@ -870,10 +870,10 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) { * @param level * @return int32_t */ -static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, SRSmaInfo *pInfo, - ERsmaExecType type, int8_t level) { +static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, + SRSmaInfo *pInfo, ERsmaExecType type, int8_t level) { int32_t idx = level - 1; - void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); + void * qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx); if (!qTaskInfo) { @@ -887,8 +887,9 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, return TSDB_CODE_FAILED; } - smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p, suid:%" PRIu64 ", nMsg:%d, submitReqVer:%" PRIi64 ", inputType:%d", SMA_VID(pSma), level, - RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize, version, inputType); + smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p, suid:%" PRIu64 ", nMsg:%d, submitReqVer:%" PRIi64 + ", inputType:%d", + SMA_VID(pSma), level, RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize, version, inputType); if ((terrno = qSetSMAInput(qTaskInfo, pMsg, msgSize, inputType)) < 0) { smaError("vgId:%d, rsma %" PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno)); @@ -912,7 +913,7 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) { int32_t code = 0; int32_t lino = 0; - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + SSmaEnv * pEnv = SMA_RSMA_ENV(pSma); SRSmaStat *pStat = NULL; SRSmaInfo *pRSmaInfo = NULL; @@ -1067,8 +1068,8 @@ _err: static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { int32_t code = 0; int32_t lino = 0; - SVnode *pVnode = pSma->pVnode; - SArray *suidList = NULL; + SVnode * pVnode = pSma->pVnode; + SArray * suidList = NULL; STbUidStore uidStore = {0}; SMetaReader mr = {0}; tb_uid_t suid = 0; @@ -1196,7 +1197,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { int32_t code = 0; int32_t lino = 0; int32_t nTaskInfo = 0; - SSma *pSma = pRSmaStat->pSma; + SSma * pSma = pRSmaStat->pSma; SVnode *pVnode = pSma->pVnode; if (taosHashGetSize(pInfoHash) <= 0) { @@ -1229,7 +1230,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) { do { int32_t nStreamFlushed = 0; int32_t nSleep = 0; - void *infoHash = NULL; + void * infoHash = NULL; while (true) { while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; @@ -1271,7 +1272,7 @@ _checkpoint: SStreamMeta *pMeta = NULL; int64_t checkpointId = taosGetTimestampNs(); bool checkpointBuilt = false; - void *infoHash = NULL; + void * infoHash = NULL; while ((infoHash = taosHashIterate(pInfoHash, infoHash))) { SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash; if (RSMA_INFO_IS_DEL(pRSmaInfo)) { @@ -1282,11 +1283,12 @@ _checkpoint: SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i); if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; - atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); - pTask->checkpointingId = checkpointId; - pTask->chkInfo.checkpointId = pTask->checkpointingId; + // atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); + pTask->chkInfo.checkpointingId = checkpointId; + pTask->chkInfo.checkpointId = checkpointId; // 1pTask->checkpointingId; pTask->chkInfo.checkpointVer = pItem->submitReqVer; pTask->info.triggerParam = pItem->fetchResultVer; + pTask->info.taskLevel = TASK_LEVEL_SMA; if (!checkpointBuilt) { // the stream states share one checkpoint @@ -1342,10 +1344,10 @@ _exit: * @param tmrId */ static void tdRSmaFetchTrigger(void *param, void *tmrId) { - SRSmaRef *pRSmaRef = NULL; - SSma *pSma = NULL; - SRSmaStat *pStat = NULL; - SRSmaInfo *pRSmaInfo = NULL; + SRSmaRef * pRSmaRef = NULL; + SSma * pSma = NULL; + SRSmaStat * pStat = NULL; + SRSmaInfo * pRSmaInfo = NULL; SRSmaInfoItem *pItem = NULL; if (!(pRSmaRef = taosHashGet(smaMgmt.refHash, ¶m, POINTER_BYTES))) { @@ -1513,7 +1515,7 @@ _err: } static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SArray *pSubmitArr, ERsmaExecType type) { - void *msg = NULL; + void * msg = NULL; int8_t resume = 0; int32_t nSubmit = 0; int32_t nDelete = 0; @@ -1628,11 +1630,11 @@ _err: int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { int32_t code = 0; int32_t lino = 0; - SVnode *pVnode = pSma->pVnode; - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + SVnode * pVnode = pSma->pVnode; + SSmaEnv * pEnv = SMA_RSMA_ENV(pSma); SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pEnv); - SHashObj *infoHash = NULL; - SArray *pSubmitArr = NULL; + SHashObj * infoHash = NULL; + SArray * pSubmitArr = NULL; bool isFetchAll = false; if (!pRSmaStat || !(infoHash = RSMA_INFO_HASH(pRSmaStat))) { @@ -1731,4 +1733,4 @@ _exit: smaError("vgId:%d, %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); } return code; -} +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 6dbeaef6cb..2b35628663 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -15,17 +15,12 @@ #include "tq.h" #include "vnd.h" +#include "tqCommon.h" typedef struct { int8_t inited; } STqMgmt; -typedef struct STaskUpdateEntry { - int64_t streamId; - int32_t taskId; - int32_t transId; -} STaskUpdateEntry; - static STqMgmt tqMgmt = {0}; // 0: not init @@ -88,7 +83,7 @@ void tqDestroyTqHandle(void* data) { taosMemoryFree(pData->msg); pData->msg = NULL; } - if (pData->block != NULL){ + if (pData->block != NULL) { blockDataDestroy(pData->block); } } @@ -591,9 +586,9 @@ int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg taosWLockLatch(&pTq->lock); bool exec = tqIsHandleExec(pHandle); - if(exec){ + if (exec) { tqInfo("vgId:%d, topic:%s, subscription is executing, delete wait for 10ms and retry, pHandle:%p", vgId, - pHandle->subKey, pHandle); + pHandle->subKey, pHandle); taosWUnLockLatch(&pTq->lock); taosMsleep(10); continue; @@ -710,12 +705,12 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg ret = tqMetaSaveHandle(pTq, req.subKey, &handle); taosWUnLockLatch(&pTq->lock); } else { - while(1){ + while (1) { taosWLockLatch(&pTq->lock); bool exec = tqIsHandleExec(pHandle); - if(exec){ - tqInfo("vgId:%d, topic:%s, subscription is executing, sub wait for 10ms and retry, pHandle:%p", pTq->pVnode->config.vgId, - pHandle->subKey, pHandle); + if (exec) { + tqInfo("vgId:%d, topic:%s, subscription is executing, sub wait for 10ms and retry, pHandle:%p", + pTq->pVnode->config.vgId, pHandle->subKey, pHandle); taosWUnLockLatch(&pTq->lock); taosMsleep(10); continue; @@ -724,7 +719,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId); } else { tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, - req.newConsumerId); + req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_store_32(&pHandle->epoch, 0); tqUnregisterPushHandle(pTq, pHandle); @@ -755,21 +750,27 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { SStreamTask* pStateTask = pTask; - SStreamTask task = {0}; + + STaskId taskId = {.streamId = 0, .taskId = 0}; if (pTask->info.fillHistory) { - task.id.streamId = pTask->streamTaskId.streamId; - task.id.taskId = pTask->streamTaskId.taskId; - task.pMeta = pTask->pMeta; - pStateTask = &task; + taskId.streamId = pTask->id.streamId; + taskId.taskId = pTask->id.taskId; + + pTask->id.streamId = pTask->streamTaskId.streamId; + pTask->id.taskId = pTask->streamTaskId.taskId; } - pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pStateTask, false, -1, -1); + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1); if (pTask->pState == NULL) { tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId); return -1; } else { tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } + if (pTask->info.fillHistory) { + pTask->id.streamId = taskId.streamId; + pTask->id.taskId = taskId.taskId; + } SReadHandle handle = { .checkpointId = pTask->chkInfo.checkpointId, @@ -790,15 +791,17 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { SStreamTask* pSateTask = pTask; - SStreamTask task = {0}; + // SStreamTask task = {0}; + + STaskId taskId = {.streamId = 0, .taskId = 0}; if (pTask->info.fillHistory) { - task.id.streamId = pTask->streamTaskId.streamId; - task.id.taskId = pTask->streamTaskId.taskId; - task.pMeta = pTask->pMeta; - pSateTask = &task; + taskId.streamId = pTask->id.streamId; + taskId.taskId = pTask->id.taskId; + pTask->id.streamId = pTask->streamTaskId.streamId; + pTask->id.taskId = pTask->streamTaskId.taskId; } - pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1); + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1); if (pTask->pState == NULL) { tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId); return -1; @@ -806,6 +809,11 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); } + if (pTask->info.fillHistory) { + pTask->id.streamId = taskId.streamId; + pTask->id.taskId = taskId.taskId; + } + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList); SReadHandle handle = { .checkpointId = pTask->chkInfo.checkpointId, @@ -856,11 +864,11 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond, pTask->id.taskId); } -// // reset the task status from unfinished transaction -// if (pTask->status.taskStatus == TASK_STATUS__PAUSE) { -// tqWarn("s-task:%s reset task status to be normal, status kept in taskMeta: Paused", pTask->id.idStr); -// pTask->status.taskStatus = TASK_STATUS__READY; -// } + // // reset the task status from unfinished transaction + // if (pTask->status.taskStatus == TASK_STATUS__PAUSE) { + // tqWarn("s-task:%s reset task status to be normal, status kept in taskMeta: Paused", pTask->id.idStr); + // pTask->status.taskStatus = TASK_STATUS__READY; + // } streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); @@ -896,172 +904,15 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { } int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { - char* msgStr = pMsg->pCont; - char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - SStreamMeta* pMeta = pTq->pStreamMeta; - - SStreamTaskCheckReq req; - SDecoder decoder; - - tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); - tDecodeStreamTaskCheckReq(&decoder, &req); - tDecoderClear(&decoder); - - int32_t taskId = req.downstreamTaskId; - - SStreamTaskCheckRsp rsp = { - .reqId = req.reqId, - .streamId = req.streamId, - .childId = req.childId, - .downstreamNodeId = req.downstreamNodeId, - .downstreamTaskId = req.downstreamTaskId, - .upstreamNodeId = req.upstreamNodeId, - .upstreamTaskId = req.upstreamTaskId, - }; - - // only the leader node handle the check request - if (pMeta->role == NODE_ROLE_FOLLOWER) { - tqError("s-task:0x%x invalid check msg from upstream:0x%x(vgId:%d), vgId:%d is follower, not handle check status msg", - taskId, req.upstreamTaskId, req.upstreamNodeId, pMeta->vgId); - rsp.status = TASK_DOWNSTREAM_NOT_LEADER; - } else { - SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, taskId); - if (pTask != NULL) { - rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); - streamMetaReleaseTask(pMeta, pTask); - - char* p = NULL; - streamTaskGetStatus(pTask, &p); - tqDebug("s-task:%s status:%s, stage:%d recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), check_status:%d", - pTask->id.idStr, p, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); - } else { - rsp.status = TASK_DOWNSTREAM_NOT_READY; - tqDebug("tq recv task check(taskId:0x%" PRIx64 "-0x%x not built yet) req(reqId:0x%" PRIx64 - ") from task:0x%x (vgId:%d), rsp check_status %d", - req.streamId, taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); - } - } - - return streamSendCheckRsp(pMeta, &req, &rsp, &pMsg->info, taskId); + return tqStreamTaskProcessCheckReq(pTq->pStreamMeta, pMsg); } int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) { - char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t len = pMsg->contLen - sizeof(SMsgHead); - int32_t vgId = pTq->pStreamMeta->vgId; - - int32_t code; - SStreamTaskCheckRsp rsp; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)pReq, len); - code = tDecodeStreamTaskCheckRsp(&decoder, &rsp); - if (code < 0) { - terrno = TSDB_CODE_INVALID_MSG; - tDecoderClear(&decoder); - tqError("vgId:%d failed to parse check rsp msg, code:%s", vgId, tstrerror(terrno)); - return -1; - } - - tDecoderClear(&decoder); - tqDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d", - rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status); - - if (!vnodeIsRoleLeader(pTq->pVnode)) { - tqError("vgId:%d not leader, task:0x%x not handle the check rsp, downstream:0x%x (vgId:%d)", vgId, - rsp.upstreamTaskId, rsp.downstreamTaskId, rsp.downstreamNodeId); - return code; - } - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.streamId, rsp.upstreamTaskId); - if (pTask == NULL) { - tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed or stopped", - rsp.streamId, rsp.upstreamTaskId, pTq->pStreamMeta->vgId); - terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; - return -1; - } - - code = streamProcessCheckRsp(pTask, &rsp); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return code; + return tqStreamTaskProcessCheckRsp(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode)); } int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { - int32_t code = 0; - int32_t vgId = TD_VID(pTq->pVnode); - - if (tsDisableStream) { - tqInfo("vgId:%d stream disabled, not deploy stream tasks", vgId); - return code; - } - - tqDebug("vgId:%d receive new stream task deploy msg, start to build stream task", vgId); - - // 1.deserialize msg and build task - int32_t size = sizeof(SStreamTask); - SStreamTask* pTask = taosMemoryCalloc(1, size); - if (pTask == NULL) { - tqError("vgId:%d failed to create stream task due to out of memory, alloc size:%d", vgId, size); - return TSDB_CODE_OUT_OF_MEMORY; - } - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - code = tDecodeStreamTask(&decoder, pTask); - tDecoderClear(&decoder); - - if (code != TSDB_CODE_SUCCESS) { - taosMemoryFree(pTask); - return TSDB_CODE_INVALID_MSG; - } - - SStreamMeta* pStreamMeta = pTq->pStreamMeta; - - // 2.save task, use the latest commit version as the initial start version of stream task. - int32_t taskId = pTask->id.taskId; - int64_t streamId = pTask->id.streamId; - bool added = false; - - streamMetaWLock(pStreamMeta); - code = streamMetaRegisterTask(pStreamMeta, sversion, pTask, &added); - int32_t numOfTasks = streamMetaGetNumOfTasks(pStreamMeta); - streamMetaWUnLock(pStreamMeta); - - if (code < 0) { - tqError("failed to add s-task:0x%x into vgId:%d meta, total:%d, code:%s", vgId, taskId, numOfTasks, tstrerror(code)); - tFreeStreamTask(pTask); - return code; - } - - // added into meta store, pTask cannot be reference since it may have been destroyed by other threads already now if - // it is added into the meta store - if (added) { - // only handled in the leader node - if (vnodeIsRoleLeader(pTq->pVnode)) { - tqDebug("vgId:%d s-task:0x%x is deployed and add into meta, numOfTasks:%d", vgId, taskId, numOfTasks); - SStreamTask* p = streamMetaAcquireTask(pStreamMeta, streamId, taskId); - - bool restored = pTq->pVnode->restored; - if (p != NULL && restored && p->info.fillHistory == 0) { - EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(p)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; - streamTaskHandleEvent(p->status.pSM, event); - } else if (!restored) { - tqWarn("s-task:%s not launched since vnode(vgId:%d) not ready", p->id.idStr, vgId); - } - - if (p != NULL) { - streamMetaReleaseTask(pStreamMeta, p); - } - } else { - tqDebug("vgId:%d not leader, not launch stream task s-task:0x%x", vgId, taskId); - } - } else { - tqWarn("vgId:%d failed to add s-task:0x%x, since already exists in meta store", vgId, taskId); - tFreeStreamTask(pTask); - } - - return code; + return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, sversion, msg, msgLen, vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored); } static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask, STQ* pTq) { @@ -1069,7 +920,7 @@ static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask int64_t nextProcessedVer = pStreamTask->hTaskInfo.haltVer; // if it's an source task, extract the last version in wal. - SVersionRange *pRange = &pTask->dataRange.range; + SVersionRange* pRange = &pTask->dataRange.range; bool done = streamHistoryTaskSetVerRangeStep2(pTask, nextProcessedVer); pTask->execInfo.step2Start = taosGetTimestampMs(); @@ -1081,7 +932,7 @@ static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask } else { STimeWindow* pWindow = &pTask->dataRange.window; tqDebug("s-task:%s level:%d verRange:%" PRId64 " - %" PRId64 " window:%" PRId64 "-%" PRId64 - ", do secondary scan-history from WAL after halt the related stream task:%s", + ", do secondary scan-history from WAL after halt the related stream task:%s", id, pTask->info.taskLevel, pRange->minVer, pRange->maxVer, pWindow->skey, pWindow->ekey, pStreamTask->id.idStr); ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__WAITING); @@ -1095,7 +946,7 @@ static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask tqDebug("s-task:%s wal reader start scan WAL verRange:%" PRId64 "-%" PRId64 ", set sched-status:%d", id, dstVer, pTask->dataRange.range.maxVer, TASK_SCHED_STATUS__INACTIVE); - /*int8_t status = */streamTaskSetSchedStatusInactive(pTask); + /*int8_t status = */ streamTaskSetSchedStatusInactive(pTask); // now the fill-history task starts to scan data from wal files. int32_t code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE); @@ -1124,7 +975,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { streamTaskGetStatus(pTask, &pStatus); // avoid multi-thread exec - while(1) { + while (1) { int32_t sentinel = atomic_val_compare_exchange_32(&pTask->status.inScanHistorySentinel, 0, 1); if (sentinel != 0) { tqDebug("s-task:%s already in scan-history func, wait for 100ms, and try again", id); @@ -1136,16 +987,10 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { // let's decide which step should be executed now if (pTask->execInfo.step1Start == 0) { - ASSERT(pTask->status.pauseAllowed == false); int64_t ts = taosGetTimestampMs(); pTask->execInfo.step1Start = ts; tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts); - - // NOTE: in case of stream task, scan-history data in wal is not allowed to pause - if (pTask->info.fillHistory == 1) { - streamTaskEnablePause(pTask); - } } else { if (pTask->execInfo.step2Start == 0) { tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64 ", already elapsed:%.2fs", @@ -1171,7 +1016,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { return 0; } - int64_t st = taosGetTimestampMs(); + int64_t st = taosGetTimestampMs(); SScanhistoryDataInfo retInfo = streamScanHistoryData(pTask, st); double el = (taosGetTimestampMs() - st) / 1000.0; @@ -1204,13 +1049,13 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { tqDebug("s-task:%s scan-history(step 1) ended, elapsed time:%.2fs", id, pTask->execInfo.step1El); if (pTask->info.fillHistory) { - SStreamTask* pStreamTask = NULL; + SStreamTask* pStreamTask = NULL; // 1. get the related stream task pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId); if (pStreamTask == NULL) { - tqError("failed to find s-task:0x%"PRIx64", it may have been destroyed, drop related fill-history task:%s", - pTask->streamTaskId.taskId, pTask->id.idStr); + tqError("failed to find s-task:0x%" PRIx64 ", it may have been destroyed, drop related fill-history task:%s", + pTask->streamTaskId.taskId, pTask->id.idStr); tqDebug("s-task:%s fill-history task set status to be dropping", id); streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id); @@ -1249,189 +1094,39 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { // only the agg tasks and the sink tasks will receive this message from upstream tasks int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg) { - char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - - // deserialize - SStreamScanHistoryFinishReq req = {0}; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - tDecodeStreamScanHistoryFinishReq(&decoder, &req); - tDecoderClear(&decoder); - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.downstreamTaskId); - if (pTask == NULL) { - tqError("vgId:%d process scan history finish msg, failed to find task:0x%x, it may be destroyed", - pTq->pStreamMeta->vgId, req.downstreamTaskId); - return -1; - } - - tqDebug("s-task:%s receive scan-history finish msg from task:0x%x", pTask->id.idStr, req.upstreamTaskId); - - int32_t code = streamProcessScanHistoryFinishReq(pTask, &req, &pMsg->info); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return code; + return tqStreamTaskProcessScanHistoryFinishReq(pTq->pStreamMeta, pMsg); } int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg) { - char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - - // deserialize - SStreamCompleteHistoryMsg req = {0}; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - tDecodeCompleteHistoryDataMsg(&decoder, &req); - tDecoderClear(&decoder); - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.upstreamTaskId); - if (pTask == NULL) { - tqError("vgId:%d process scan history finish rsp, failed to find task:0x%x, it may be destroyed", - pTq->pStreamMeta->vgId, req.upstreamTaskId); - return -1; - } - - int32_t remain = atomic_sub_fetch_32(&pTask->notReadyTasks, 1); - if (remain > 0) { - tqDebug("s-task:%s scan-history finish rsp received from downstream task:0x%x, unfinished remain:%d", - pTask->id.idStr, req.downstreamId, remain); - } else { - tqDebug( - "s-task:%s scan-history finish rsp received from downstream task:0x%x, all downstream tasks rsp scan-history " - "completed msg", - pTask->id.idStr, req.downstreamId); - streamProcessScanHistoryFinishRsp(pTask); - } - - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; + return tqStreamTaskProcessScanHistoryFinishRsp(pTq->pStreamMeta, pMsg); } int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTaskRunReq* pReq = pMsg->pCont; int32_t taskId = pReq->taskId; - int32_t vgId = TD_VID(pTq->pVnode); if (taskId == STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID) { // all tasks are extracted submit data from the wal tqScanWal(pTq); return 0; - } else if (taskId == STREAM_EXEC_START_ALL_TASKS_ID) { - tqStartStreamTasks(pTq); - return 0; - } else if (taskId == STREAM_EXEC_RESTART_ALL_TASKS_ID) { - tqRestartStreamTasks(pTq); - return 0; } - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, taskId); - if (pTask != NULL) { // even in halt status, the data in inputQ must be processed - char* p = NULL; - if (streamTaskReadyToRun(pTask, &p)) { - tqDebug("vgId:%d s-task:%s start to process block from inputQ, next checked ver:%" PRId64, vgId, pTask->id.idStr, - pTask->chkInfo.nextProcessVer); - streamExecTask(pTask); - } else { - int8_t status = streamTaskSetSchedStatusInactive(pTask); - tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId, - pTask->id.idStr, p, status); - } - - streamMetaReleaseTask(pTq->pStreamMeta, pTask); + int32_t code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode)); + if(code == 0 && taskId > 0){ tqScanWalAsync(pTq, false); - return 0; - } else { // NOTE: pTask->status.schedStatus is not updated since it is not be handled by the run exec. - // todo add one function to handle this - tqError("vgId:%d failed to found s-task, taskId:0x%x may have been dropped", vgId, taskId); - return -1; } + return code; } -int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { - char* msgStr = pMsg->pCont; - char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - - SStreamDispatchReq req = {0}; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); - tDecodeStreamDispatchReq(&decoder, &req); - tDecoderClear(&decoder); - - tqDebug("s-task:0x%x recv dispatch msg from 0x%x(vgId:%d)", req.taskId, req.upstreamTaskId, req.upstreamNodeId); - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.taskId); - if (pTask) { - SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessDispatchMsg(pTask, &req, &rsp); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; - } else { - tqError("vgId:%d failed to find task:0x%x to handle the dispatch req, it may have been destroyed already", - pTq->pStreamMeta->vgId, req.taskId); - tDeleteStreamDispatchReq(&req); - return -1; - } +int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg) { + return tqStreamTaskProcessDispatchReq(pTq->pStreamMeta, pMsg); } int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { - SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - - int32_t vgId = pTq->pStreamMeta->vgId; - pRsp->upstreamTaskId = htonl(pRsp->upstreamTaskId); - pRsp->streamId = htobe64(pRsp->streamId); - pRsp->downstreamTaskId = htonl(pRsp->downstreamTaskId); - pRsp->downstreamNodeId = htonl(pRsp->downstreamNodeId); - pRsp->stage = htobe64(pRsp->stage); - pRsp->msgId = htonl(pRsp->msgId); - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pRsp->streamId, pRsp->upstreamTaskId); - if (pTask) { - streamProcessDispatchRsp(pTask, pRsp, pMsg->code); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return TSDB_CODE_SUCCESS; - } else { - tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, pRsp->upstreamTaskId); - terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; - return terrno; - } + return tqStreamTaskProcessDispatchRsp(pTq->pStreamMeta, pMsg); } int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) { - SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; - - int32_t vgId = TD_VID(pTq->pVnode); - SStreamMeta* pMeta = pTq->pStreamMeta; - tqDebug("vgId:%d receive msg to drop s-task:0x%x", vgId, pReq->taskId); - - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); - if (pTask != NULL) { - // drop the related fill-history task firstly - if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { - STaskId* pHTaskId = &pTask->hTaskInfo.id; - streamMetaUnregisterTask(pMeta, pHTaskId->streamId, pHTaskId->taskId); - tqDebug("vgId:%d drop fill-history task:0x%x dropped firstly", vgId, (int32_t)pHTaskId->taskId); - } - streamMetaReleaseTask(pMeta, pTask); - } - - // drop the stream task now - streamMetaUnregisterTask(pMeta, pReq->streamId, pReq->taskId); - - // commit the update - streamMetaWLock(pMeta); - int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); - tqDebug("vgId:%d task:0x%x dropped, remain tasks:%d", vgId, pReq->taskId, numOfTasks); - - if (streamMetaCommit(pMeta) < 0) { - // persist to disk - } - streamMetaWUnLock(pMeta); - - return 0; + return tqStreamTaskProcessDropReq(pTq->pStreamMeta, msg, msgLen); } int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { @@ -1484,7 +1179,6 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, int32_t level = pTask->info.taskLevel; if (level == TASK_LEVEL__SINK) { if (status == TASK_STATUS__UNINIT) { - } streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; @@ -1526,12 +1220,12 @@ int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, pReq->taskId); - int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated); + int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated); if (code != 0) { return code; } - STaskId* pHTaskId = &pTask->hTaskInfo.id; + STaskId* pHTaskId = &pTask->hTaskInfo.id; SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pHTaskId->streamId, pHTaskId->taskId); if (pHistoryTask) { code = tqProcessTaskResumeImpl(pTq, pHistoryTask, sversion, pReq->igUntreated); @@ -1541,30 +1235,7 @@ int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms } int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) { - char* msgStr = pMsg->pCont; - char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - SDecoder decoder; - - SStreamRetrieveReq req; - tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); - tDecodeStreamRetrieveReq(&decoder, &req); - tDecoderClear(&decoder); - - int32_t vgId = pTq->pStreamMeta->vgId; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.dstTaskId); - if (pTask == NULL) { - tqError("vgId:%d process retrieve req, failed to acquire task:0x%x, it may have been dropped already", vgId, - req.dstTaskId); - return -1; - } - - SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessRetrieveReq(pTask, &req, &rsp); - - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - tDeleteStreamRetrieveReq(&req); - return 0; + return tqStreamTaskProcessRetrieveReq(pTq->pStreamMeta, pMsg); } int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { @@ -1572,88 +1243,6 @@ int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { return 0; } -// todo refactor. -int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { - STQ* pTq = pVnode->pTq; - int32_t vgId = pVnode->config.vgId; - - SMsgHead* msgStr = pMsg->pCont; - char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); - int32_t code = 0; - - SStreamDispatchReq req; - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); - if (tDecodeStreamDispatchReq(&decoder, &req) < 0) { - code = TSDB_CODE_MSG_DECODE_ERROR; - tDecoderClear(&decoder); - goto FAIL; - } - tDecoderClear(&decoder); - - int32_t taskId = req.taskId; - tqDebug("vgId:%d receive dispatch msg to s-task:0x%" PRIx64 "-0x%x", vgId, req.streamId, taskId); - - // for test purpose -// if (req.type == STREAM_INPUT__CHECKPOINT_TRIGGER) { -// code = TSDB_CODE_STREAM_TASK_NOT_EXIST; -// goto FAIL; -// } - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, taskId); - if (pTask != NULL) { - SRpcMsg rsp = {.info = pMsg->info, .code = 0}; - streamProcessDispatchMsg(pTask, &req, &rsp); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - rpcFreeCont(pMsg->pCont); - taosFreeQitem(pMsg); - return 0; - } else { - tDeleteStreamDispatchReq(&req); - } - - code = TSDB_CODE_STREAM_TASK_NOT_EXIST; - -FAIL: - if (pMsg->info.handle == NULL) { - tqError("s-task:0x%x vgId:%d msg handle is null, abort enqueue dispatch msg", vgId, taskId); - return -1; - } - - SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); - if (pRspHead == NULL) { - SRpcMsg rsp = {.code = TSDB_CODE_OUT_OF_MEMORY, .info = pMsg->info}; - tqError("s-task:0x%x send dispatch error rsp, code:%s", taskId, tstrerror(code)); - tmsgSendRsp(&rsp); - rpcFreeCont(pMsg->pCont); - taosFreeQitem(pMsg); - return -1; - } - - pRspHead->vgId = htonl(req.upstreamNodeId); - ASSERT(pRspHead->vgId != 0); - - SStreamDispatchRsp* pRsp = POINTER_SHIFT(pRspHead, sizeof(SMsgHead)); - pRsp->streamId = htobe64(req.streamId); - pRsp->upstreamTaskId = htonl(req.upstreamTaskId); - pRsp->upstreamNodeId = htonl(req.upstreamNodeId); - pRsp->downstreamNodeId = htonl(pVnode->config.vgId); - pRsp->downstreamTaskId = htonl(req.taskId); - pRsp->msgId = htonl(req.msgId); - pRsp->stage = htobe64(req.stage); - pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL; - - int32_t len = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); - SRpcMsg rsp = {.code = code, .info = pMsg->info, .contLen = len, .pCont = pRspHead}; - tqError("s-task:0x%x send dispatch error rsp, code:%s", taskId, tstrerror(code)); - - tmsgSendRsp(&rsp); - rpcFreeCont(pMsg->pCont); - taosFreeQitem(pMsg); - return -1; -} - int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; @@ -1669,7 +1258,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) tqDebug("vgId:%d not leader, ignore checkpoint-source msg, s-task:0x%x", vgId, req.taskId); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); - tmsgSendRsp(&rsp); // error occurs + tmsgSendRsp(&rsp); // error occurs return TSDB_CODE_SUCCESS; } @@ -1677,7 +1266,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) tqDebug("vgId:%d checkpoint-source msg received during restoring, s-task:0x%x ignore it", vgId, req.taskId); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); - tmsgSendRsp(&rsp); // error occurs + tmsgSendRsp(&rsp); // error occurs return TSDB_CODE_SUCCESS; } @@ -1689,7 +1278,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) tqError("vgId:%d failed to decode checkpoint-source msg, code:%s", vgId, tstrerror(code)); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); - tmsgSendRsp(&rsp); // error occurs + tmsgSendRsp(&rsp); // error occurs return code; } tDecoderClear(&decoder); @@ -1700,22 +1289,22 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) req.taskId); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); - tmsgSendRsp(&rsp); // error occurs + tmsgSendRsp(&rsp); // error occurs return TSDB_CODE_SUCCESS; } - // downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req. if (pTask->status.downstreamReady != 1) { - pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id - pTask->checkpointingId = req.checkpointId; + pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id + pTask->chkInfo.checkpointingId = req.checkpointId; - qError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64 - ", set it failure", pTask->id.idStr, req.checkpointId); + tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64 + ", set it failure", + pTask->id.idStr, req.checkpointId); streamMetaReleaseTask(pMeta, pTask); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); - tmsgSendRsp(&rsp); // error occurs + tmsgSendRsp(&rsp); // error occurs return TSDB_CODE_SUCCESS; } @@ -1725,24 +1314,24 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) { tqError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure", - pTask->id.idStr, req.checkpointId); + pTask->id.idStr, req.checkpointId); taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); - tmsgSendRsp(&rsp); // error occurs + tmsgSendRsp(&rsp); // error occurs return TSDB_CODE_SUCCESS; } // check if the checkpoint msg already sent or not. if (status == TASK_STATUS__CK) { - ASSERT(pTask->checkpointingId == req.checkpointId); + ASSERT(pTask->chkInfo.checkpointingId == req.checkpointId); tqWarn("s-task:%s recv checkpoint-source msg again checkpointId:%" PRId64 " already received, ignore this msg and continue process checkpoint", - pTask->id.idStr, pTask->checkpointingId); + pTask->id.idStr, pTask->chkInfo.checkpointingId); taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); @@ -1758,21 +1347,17 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) // set the initial value for generating check point // set the mgmt epset info according to the checkout source msg from mnode, todo update mgmt epset if needed - if (pMeta->chkptNotReadyTasks == 0) { - pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks; - } - total = pMeta->numOfStreamTasks; streamMetaWUnLock(pMeta); qInfo("s-task:%s (vgId:%d) level:%d receive checkpoint-source msg chkpt:%" PRId64 ", total checkpoint reqs:%d", - pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, total); + pTask->id.idStr, vgId, pTask->info.taskLevel, req.checkpointId, total); code = streamAddCheckpointSourceRspMsg(&req, &pMsg->info, pTask, 1); if (code != TSDB_CODE_SUCCESS) { SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); - tmsgSendRsp(&rsp); // error occurs + tmsgSendRsp(&rsp); // error occurs return code; } @@ -1782,229 +1367,21 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) // downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) { - int32_t vgId = TD_VID(pTq->pVnode); - SStreamMeta* pMeta = pTq->pStreamMeta; - char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t len = pMsg->contLen - sizeof(SMsgHead); - int32_t code = 0; - - SStreamCheckpointReadyMsg req = {0}; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)msg, len); - if (tDecodeStreamCheckpointReadyMsg(&decoder, &req) < 0) { - code = TSDB_CODE_MSG_DECODE_ERROR; - tDecoderClear(&decoder); - return code; - } - tDecoderClear(&decoder); - - SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId); - if (pTask == NULL) { - tqError("vgId:%d failed to find s-task:0x%x, it may have been destroyed already", vgId, req.downstreamTaskId); - return code; - } - - tqDebug("vgId:%d s-task:%s received the checkpoint ready msg from task:0x%x (vgId:%d), handle it", vgId, - pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId); - - streamProcessCheckpointReadyMsg(pTask); - streamMetaReleaseTask(pMeta, pTask); - return code; + return tqStreamTaskProcessCheckpointReadyMsg(pTq->pStreamMeta, pMsg); } int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { - SStreamMeta* pMeta = pTq->pStreamMeta; - int32_t vgId = TD_VID(pTq->pVnode); - char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t len = pMsg->contLen - sizeof(SMsgHead); - SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS}; - - SStreamTaskNodeUpdateMsg req = {0}; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)msg, len); - if (tDecodeStreamTaskUpdateMsg(&decoder, &req) < 0) { - rsp.code = TSDB_CODE_MSG_DECODE_ERROR; - tqError("vgId:%d failed to decode task update msg, code:%s", vgId, tstrerror(rsp.code)); - tDecoderClear(&decoder); - return rsp.code; - } - - tDecoderClear(&decoder); - - // update the nodeEpset when it exists - streamMetaWLock(pMeta); - - // the task epset may be updated again and again, when replaying the WAL, the task may be in stop status. - STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); - if (ppTask == NULL || *ppTask == NULL) { - tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", pMeta->vgId, - req.taskId); - rsp.code = TSDB_CODE_SUCCESS; - streamMetaWUnLock(pMeta); - - taosArrayDestroy(req.pNodeList); - return rsp.code; - } - - SStreamTask* pTask = *ppTask; - - if (pMeta->updateInfo.transId != req.transId) { - pMeta->updateInfo.transId = req.transId; - tqInfo("s-task:%s receive new trans to update nodeEp msg from mnode, transId:%d", pTask->id.idStr, req.transId); - // info needs to be kept till the new trans to update the nodeEp arrived. - taosHashClear(pMeta->updateInfo.pTasks); - } else { - tqDebug("s-task:%s recv trans to update nodeEp from mnode, transId:%d", pTask->id.idStr, req.transId); - } - - STaskUpdateEntry entry = {.streamId = req.streamId, .taskId = req.taskId, .transId = req.transId}; - void* exist = taosHashGet(pMeta->updateInfo.pTasks, &entry, sizeof(STaskUpdateEntry)); - if (exist != NULL) { - tqDebug("s-task:%s (vgId:%d) already update in trans:%d, discard the nodeEp update msg", pTask->id.idStr, vgId, - req.transId); - rsp.code = TSDB_CODE_SUCCESS; - streamMetaWUnLock(pMeta); - taosArrayDestroy(req.pNodeList); - return rsp.code; - } - - streamMetaWUnLock(pMeta); - - // the following two functions should not be executed within the scope of meta lock to avoid deadlock - streamTaskUpdateEpsetInfo(pTask, req.pNodeList); - streamTaskResetStatus(pTask); - - // continue after lock the meta again - streamMetaWLock(pMeta); - - SStreamTask** ppHTask = NULL; - if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { - ppHTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &pTask->hTaskInfo.id, sizeof(pTask->hTaskInfo.id)); - if (ppHTask == NULL || *ppHTask == NULL) { - tqError("vgId:%d failed to acquire fill-history task:0x%x when handling update, it may have been dropped already", - pMeta->vgId, req.taskId); - CLEAR_RELATED_FILLHISTORY_TASK(pTask); - } else { - tqDebug("s-task:%s fill-history task update nodeEp along with stream task", (*ppHTask)->id.idStr); - streamTaskUpdateEpsetInfo(*ppHTask, req.pNodeList); - } - } - - { - streamMetaSaveTask(pMeta, pTask); - if (ppHTask != NULL) { - streamMetaSaveTask(pMeta, *ppHTask); - } - - if (streamMetaCommit(pMeta) < 0) { - // persist to disk - } - } - - streamTaskStop(pTask); - - // keep the already handled info - taosHashPut(pMeta->updateInfo.pTasks, &entry, sizeof(entry), NULL, 0); - - if (ppHTask != NULL) { - streamTaskStop(*ppHTask); - tqDebug("s-task:%s task nodeEp update completed, streamTask and related fill-history task closed", pTask->id.idStr); - taosHashPut(pMeta->updateInfo.pTasks, &(*ppHTask)->id, sizeof(pTask->id), NULL, 0); - } else { - tqDebug("s-task:%s task nodeEp update completed, streamTask closed", pTask->id.idStr); - } - - rsp.code = 0; - - // possibly only handle the stream task. - int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); - int32_t updateTasks = taosHashGetSize(pMeta->updateInfo.pTasks); - - pMeta->startInfo.tasksWillRestart = 1; - - if (updateTasks < numOfTasks) { - tqDebug("vgId:%d closed tasks:%d, unclosed:%d, all tasks will be started when nodeEp update completed", vgId, - updateTasks, (numOfTasks - updateTasks)); - streamMetaWUnLock(pMeta); - } else { - if (!pTq->pVnode->restored) { - tqDebug("vgId:%d vnode restore not completed, not restart the tasks, clear the start after nodeUpdate flag", vgId); - pMeta->startInfo.tasksWillRestart = 0; - streamMetaWUnLock(pMeta); - } else { - tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks); - -#if 1 - tqStartStreamTaskAsync(pTq, true); - streamMetaWUnLock(pMeta); -#else - streamMetaWUnLock(pMeta); - - // For debug purpose. - // the following procedure consume many CPU resource, result in the re-election of leader - // with high probability. So we employ it as a test case for the stream processing framework, with - // checkpoint/restart/nodeUpdate etc. - while(1) { - int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); - if (startVal == 0) { - break; - } - - tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); - taosMsleep(500); - } - - while (streamMetaTaskInTimer(pMeta)) { - tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); - taosMsleep(100); - } - - streamMetaWLock(pMeta); - - int32_t code = streamMetaReopen(pMeta); - if (code != 0) { - tqError("vgId:%d failed to reopen stream meta", vgId); - streamMetaWUnLock(pMeta); - taosArrayDestroy(req.pNodeList); - return -1; - } - - streamMetaInitBackend(pMeta); - - if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { - tqError("vgId:%d failed to load stream tasks", vgId); - streamMetaWUnLock(pMeta); - taosArrayDestroy(req.pNodeList); - return -1; - } - - if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { - tqInfo("vgId:%d start all stream tasks after all being updated", vgId); - tqResetStreamTaskStatus(pTq); - tqStartStreamTaskAsync(pTq, false); - } else { - tqInfo("vgId:%d, follower node not start stream tasks", vgId); - } - streamMetaWUnLock(pMeta); -#endif - } - } - - taosArrayDestroy(req.pNodeList); - return rsp.code; + return tqStreamTaskProcessUpdateReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, pMsg, pTq->pVnode->restored); } int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { - SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*) pMsg->pCont; + SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg->pCont; SStreamMeta* pMeta = pTq->pStreamMeta; SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); if (pTask == NULL) { - tqError("vgId:%d process task-reset req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, - pReq->taskId); + tqError("vgId:%d process task-reset req, failed to acquire task:0x%x, it may have been dropped already", + pMeta->vgId, pReq->taskId); return TSDB_CODE_SUCCESS; } @@ -2012,11 +1389,44 @@ int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { // clear flag set during do checkpoint, and open inputQ for all upstream tasks if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__CK) { - streamTaskClearCheckInfo(pTask); - taosArrayClear(pTask->pReadyMsgList); + streamTaskClearCheckInfo(pTask, true); streamTaskSetStatusReady(pTask); } streamMetaReleaseTask(pMeta, pTask); return TSDB_CODE_SUCCESS; } + +int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) { + SVDropHTaskReq* pReq = (SVDropHTaskReq*)pMsg->pCont; + + SStreamMeta* pMeta = pTq->pStreamMeta; + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); + if (pTask == NULL) { + tqError("vgId:%d process drop fill-history task req, failed to acquire task:0x%x, it may have been dropped already", + pMeta->vgId, pReq->taskId); + return TSDB_CODE_SUCCESS; + } + + tqDebug("s-task:%s receive drop fill-history msg from mnode", pTask->id.idStr); + if (pTask->hTaskInfo.id.taskId == 0) { + tqError("vgId:%d s-task:%s not have related fill-history task", pMeta->vgId, pTask->id.idStr); + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; + } + + ETaskStatus status = streamTaskGetStatus(pTask, NULL); + ASSERT(status == TASK_STATUS__STREAM_SCAN_HISTORY); + + streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE); + + SStreamTaskId id = {.streamId = pTask->hTaskInfo.id.streamId, .taskId = pTask->hTaskInfo.id.taskId}; + streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &id); + + // clear the scheduler status + streamTaskSetSchedStatusInactive(pTask); + tqDebug("s-task:%s set scheduler status:%d after drop fill-history task", pTask->id.idStr, pTask->status.schedStatus); + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; +} + diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index 7a8147f83b..2ab710176d 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -104,8 +104,8 @@ int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) { pHdr->type = SNAP_DATA_STREAM_STATE_BACKEND; pHdr->size = len; memcpy(pHdr->data, rowData, len); - tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode)); taosMemoryFree(rowData); + tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode)); return code; _err: @@ -139,7 +139,7 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS pWriter->sver = sver; pWriter->ever = ever; - sprintf(tdir, "%s%s%s%s%s", pTq->path, TD_DIRSEP, VNODE_TQ_STREAM, TD_DIRSEP, "received"); + sprintf(tdir, "%s%s%s", pTq->path, TD_DIRSEP, VNODE_TQ_STREAM); taosMkDir(tdir); SStreamSnapWriter* pSnapWriter = NULL; @@ -167,25 +167,19 @@ int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) return code; } -int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) { - tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); - - streamMetaWLock(pWriter->pTq->pStreamMeta); - int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta); - if (code == 0) { - streamMetaInitBackend(pWriter->pTq->pStreamMeta); - code = streamStateLoadTasks(pWriter); - } - - streamMetaWUnLock(pWriter->pTq->pStreamMeta); - tqDebug("vgId:%d, vnode %s succ to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); - taosMemoryFree(pWriter); - return code; -} - -int32_t streamStateLoadTasks(SStreamStateWriter* pWriter) { return streamMetaLoadAllTasks(pWriter->pTq->pStreamMeta); } int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData) { tqDebug("vgId:%d, vnode %s snapshot write data", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); return streamSnapWrite(pWriter->pWriterImpl, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); } +int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) { + tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + int32_t code = streamStateLoadTasks(pWriter); + tqDebug("vgId:%d, vnode %s succ to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); + taosMemoryFree(pWriter); + return code; +} + +int32_t streamStateLoadTasks(SStreamStateWriter* pWriter) { + return streamMetaReloadAllTasks(pWriter->pTq->pStreamMeta); +} diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 4c0491da86..1b0a76e81c 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -60,160 +60,14 @@ int32_t tqScanWal(STQ* pTq) { return 0; } -int32_t tqStartStreamTasks(STQ* pTq) { - int32_t code = TSDB_CODE_SUCCESS; - int32_t vgId = TD_VID(pTq->pVnode); - SStreamMeta* pMeta = pTq->pStreamMeta; - - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks); - if (numOfTasks == 0) { - return TSDB_CODE_SUCCESS; - } - - SArray* pTaskList = NULL; - streamMetaWLock(pMeta); - pTaskList = taosArrayDup(pMeta->pTaskList, NULL); - taosHashClear(pMeta->startInfo.pReadyTaskSet); - taosHashClear(pMeta->startInfo.pFailedTaskSet); - pMeta->startInfo.startTs = taosGetTimestampMs(); - streamMetaWUnLock(pMeta); - - // broadcast the check downstream tasks msg - for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); - if (pTask == NULL) { - continue; - } - - // fill-history task can only be launched by related stream tasks. - if (pTask->info.fillHistory == 1) { - streamMetaReleaseTask(pMeta, pTask); - continue; - } - - if (pTask->status.downstreamReady == 1) { - if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { - tqDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task", - pTask->id.idStr); - streamLaunchFillHistoryTask(pTask); - } - - streamMetaUpdateTaskDownstreamStatus(pTask, pTask->execInfo.init, pTask->execInfo.start, true); - streamMetaReleaseTask(pMeta, pTask); - continue; - } - - EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; - int32_t ret = streamTaskHandleEvent(pTask->status.pSM, event); - if (ret != TSDB_CODE_SUCCESS) { - code = ret; - } - - streamMetaReleaseTask(pMeta, pTask); - } - - taosArrayDestroy(pTaskList); - return code; -} - -int32_t tqRestartStreamTasks(STQ* pTq) { - SStreamMeta* pMeta = pTq->pStreamMeta; - int32_t vgId = pMeta->vgId; - int32_t code = 0; - int64_t st = taosGetTimestampMs(); - - while(1) { - int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); - if (startVal == 0) { - break; - } - - tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); - taosMsleep(500); - } - - terrno = 0; - tqInfo("vgId:%d tasks are all updated and stopped, restart all tasks, triggered by transId:%d", vgId, - pMeta->updateInfo.transId); - - while (streamMetaTaskInTimer(pMeta)) { - tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); - taosMsleep(100); - } - - streamMetaWLock(pMeta); - code = streamMetaReopen(pMeta); - if (code != TSDB_CODE_SUCCESS) { - tqError("vgId:%d failed to reopen stream meta", vgId); - streamMetaWUnLock(pMeta); - code = terrno; - return code; - } - - streamMetaInitBackend(pMeta); - int64_t el = taosGetTimestampMs() - st; - - tqInfo("vgId:%d close&reload state elapsed time:%.3fs", vgId, el/1000.); - - code = streamMetaLoadAllTasks(pTq->pStreamMeta); - if (code != TSDB_CODE_SUCCESS) { - tqError("vgId:%d failed to load stream tasks, code:%s", vgId, tstrerror(terrno)); - streamMetaWUnLock(pMeta); - code = terrno; - return code; - } - - if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { - tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); - tqResetStreamTaskStatus(pTq); - - streamMetaWUnLock(pMeta); - tqStartStreamTasks(pTq); - } else { - streamMetaResetStartInfo(&pMeta->startInfo); - streamMetaWUnLock(pMeta); - tqInfo("vgId:%d, follower node not start stream tasks", vgId); - } - - code = terrno; - return code; -} - -int32_t tqStartStreamTaskAsync(STQ* pTq, bool restart) { - SStreamMeta* pMeta = pTq->pStreamMeta; - int32_t vgId = pMeta->vgId; - - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - if (numOfTasks == 0) { - tqDebug("vgId:%d no stream tasks existed to run", vgId); - return 0; - } - - SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); - if (pRunReq == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr()); - return -1; - } - - tqDebug("vgId:%d start all %d stream task(s) async", vgId, numOfTasks); - pRunReq->head.vgId = vgId; - pRunReq->streamId = 0; - pRunReq->taskId = restart? STREAM_EXEC_RESTART_ALL_TASKS_ID:STREAM_EXEC_START_ALL_TASKS_ID; - - SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; - tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); - return 0; -} - int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; + bool alreadyRestored = pTq->pVnode->restored; + // do not launch the stream tasks, if it is a follower or not restored vnode. - if (!(vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored)) { + if (!(vnodeIsRoleLeader(pTq->pVnode) && alreadyRestored)) { return TSDB_CODE_SUCCESS; } @@ -255,7 +109,9 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { return -1; } - tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks); + tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d, restored:%d", vgId, numOfTasks, + alreadyRestored); + pRunReq->head.vgId = vgId; pRunReq->streamId = 0; pRunReq->taskId = STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID; @@ -297,27 +153,6 @@ int32_t tqStopStreamTasks(STQ* pTq) { return 0; } -int32_t tqResetStreamTaskStatus(STQ* pTq) { - SStreamMeta* pMeta = pTq->pStreamMeta; - int32_t vgId = TD_VID(pTq->pVnode); - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - - tqDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, numOfTasks); - if (numOfTasks == 0) { - return TSDB_CODE_SUCCESS; - } - - for (int32_t i = 0; i < numOfTasks; ++i) { - SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i); - - STaskId id = {.streamId = pTaskId->streamId, .taskId = pTaskId->taskId}; - SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); - streamTaskResetStatus(*pTask); - } - - return 0; -} - int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId) { // seek the stored version and extract data from WAL int64_t firstVer = walReaderGetValidFirstVer(pTask->exec.pWalReader); diff --git a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c index f22ecc3daf..f966e90b9a 100644 --- a/source/dnode/vnode/src/tq/tqStreamTaskSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamTaskSnap.c @@ -238,7 +238,6 @@ int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t goto _err; } tDecoderClear(&decoder); - // tdbTbInsert(TTB *pTb, const void *pKey, int keyLen, const void *pVal, int valLen, TXN *pTxn) int64_t key[2] = {taskId.streamId, taskId.taskId}; taosWLockLatch(&pTq->pStreamMeta->lock); diff --git a/source/dnode/vnode/src/tqCommon/CMakeLists.txt b/source/dnode/vnode/src/tqCommon/CMakeLists.txt new file mode 100644 index 0000000000..aea0e709e3 --- /dev/null +++ b/source/dnode/vnode/src/tqCommon/CMakeLists.txt @@ -0,0 +1,20 @@ +aux_source_directory(. TQ_SOURCE_FILES) +add_library(tqCommon STATIC ${TQ_SOURCE_FILES}) +target_include_directories( + tqCommon + PUBLIC "../inc" + PUBLIC "../../inc" +) + +target_link_libraries( + tqCommon + PRIVATE stream + PRIVATE common + PRIVATE transport + PRIVATE executor + PRIVATE index + PRIVATE qcom + PRIVATE qworker + PRIVATE sync + PRIVATE tfs +) diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c new file mode 100644 index 0000000000..aee2aaa244 --- /dev/null +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -0,0 +1,809 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tstream.h" +#include "tmsgcb.h" +#include "tq.h" + +typedef struct STaskUpdateEntry { + int64_t streamId; + int32_t taskId; + int32_t transId; +} STaskUpdateEntry; + +int32_t tqStreamTaskStartAsync(SStreamMeta* pMeta, SMsgCb* cb, bool restart) { + int32_t vgId = pMeta->vgId; + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + if (numOfTasks == 0) { + tqDebug("vgId:%d no stream tasks existed to run", vgId); + return 0; + } + + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr()); + return -1; + } + + tqDebug("vgId:%d start all %d stream task(s) async", vgId, numOfTasks); + pRunReq->head.vgId = vgId; + pRunReq->streamId = 0; + pRunReq->taskId = restart? STREAM_EXEC_RESTART_ALL_TASKS_ID:STREAM_EXEC_START_ALL_TASKS_ID; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(cb, STREAM_QUEUE, &msg); + return 0; +} + +int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pMsg, bool restored) { + int32_t vgId = pMeta->vgId; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS}; + + SStreamTaskNodeUpdateMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamTaskUpdateMsg(&decoder, &req) < 0) { + rsp.code = TSDB_CODE_MSG_DECODE_ERROR; + tqError("vgId:%d failed to decode task update msg, code:%s", vgId, tstrerror(rsp.code)); + tDecoderClear(&decoder); + return rsp.code; + } + + tDecoderClear(&decoder); + + // update the nodeEpset when it exists + streamMetaWLock(pMeta); + + // the task epset may be updated again and again, when replaying the WAL, the task may be in stop status. + STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + if (ppTask == NULL || *ppTask == NULL) { + tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", pMeta->vgId, + req.taskId); + rsp.code = TSDB_CODE_SUCCESS; + streamMetaWUnLock(pMeta); + + taosArrayDestroy(req.pNodeList); + return rsp.code; + } + + SStreamTask* pTask = *ppTask; + + if (pMeta->updateInfo.transId != req.transId) { + pMeta->updateInfo.transId = req.transId; + tqInfo("s-task:%s receive new trans to update nodeEp msg from mnode, transId:%d", pTask->id.idStr, req.transId); + // info needs to be kept till the new trans to update the nodeEp arrived. + taosHashClear(pMeta->updateInfo.pTasks); + } else { + tqDebug("s-task:%s recv trans to update nodeEp from mnode, transId:%d", pTask->id.idStr, req.transId); + } + + STaskUpdateEntry entry = {.streamId = req.streamId, .taskId = req.taskId, .transId = req.transId}; + void* exist = taosHashGet(pMeta->updateInfo.pTasks, &entry, sizeof(STaskUpdateEntry)); + if (exist != NULL) { + tqDebug("s-task:%s (vgId:%d) already update in trans:%d, discard the nodeEp update msg", pTask->id.idStr, vgId, + req.transId); + rsp.code = TSDB_CODE_SUCCESS; + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return rsp.code; + } + + streamMetaWUnLock(pMeta); + + // the following two functions should not be executed within the scope of meta lock to avoid deadlock + streamTaskUpdateEpsetInfo(pTask, req.pNodeList); + streamTaskResetStatus(pTask); + + // continue after lock the meta again + streamMetaWLock(pMeta); + + SStreamTask** ppHTask = NULL; + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + ppHTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &pTask->hTaskInfo.id, sizeof(pTask->hTaskInfo.id)); + if (ppHTask == NULL || *ppHTask == NULL) { + tqError("vgId:%d failed to acquire fill-history task:0x%x when handling update, it may have been dropped already", + pMeta->vgId, req.taskId); + CLEAR_RELATED_FILLHISTORY_TASK(pTask); + } else { + tqDebug("s-task:%s fill-history task update nodeEp along with stream task", (*ppHTask)->id.idStr); + streamTaskUpdateEpsetInfo(*ppHTask, req.pNodeList); + } + } + + { + streamMetaSaveTask(pMeta, pTask); + if (ppHTask != NULL) { + streamMetaSaveTask(pMeta, *ppHTask); + } + + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + } + + streamTaskStop(pTask); + + // keep the already handled info + taosHashPut(pMeta->updateInfo.pTasks, &entry, sizeof(entry), NULL, 0); + + if (ppHTask != NULL) { + streamTaskStop(*ppHTask); + tqDebug("s-task:%s task nodeEp update completed, streamTask and related fill-history task closed", pTask->id.idStr); + taosHashPut(pMeta->updateInfo.pTasks, &(*ppHTask)->id, sizeof(pTask->id), NULL, 0); + } else { + tqDebug("s-task:%s task nodeEp update completed, streamTask closed", pTask->id.idStr); + } + + rsp.code = 0; + + // possibly only handle the stream task. + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + int32_t updateTasks = taosHashGetSize(pMeta->updateInfo.pTasks); + + pMeta->startInfo.tasksWillRestart = 1; + + if (updateTasks < numOfTasks) { + tqDebug("vgId:%d closed tasks:%d, unclosed:%d, all tasks will be started when nodeEp update completed", vgId, + updateTasks, (numOfTasks - updateTasks)); + streamMetaWUnLock(pMeta); + } else { + if (!restored) { + tqDebug("vgId:%d vnode restore not completed, not restart the tasks, clear the start after nodeUpdate flag", vgId); + pMeta->startInfo.tasksWillRestart = 0; + streamMetaWUnLock(pMeta); + } else { + tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks); +#if 1 + tqStreamTaskStartAsync(pMeta, cb, true); + streamMetaWUnLock(pMeta); +#else + streamMetaWUnLock(pMeta); + + // For debug purpose. + // the following procedure consume many CPU resource, result in the re-election of leader + // with high probability. So we employ it as a test case for the stream processing framework, with + // checkpoint/restart/nodeUpdate etc. + while (1) { + int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); + if (startVal == 0) { + break; + } + + tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); + taosMsleep(500); + } + + while (streamMetaTaskInTimer(pMeta)) { + tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + taosMsleep(100); + } + + streamMetaWLock(pMeta); + + int32_t code = streamMetaReopen(pMeta); + if (code != 0) { + tqError("vgId:%d failed to reopen stream meta", vgId); + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return -1; + } + + streamMetaInitBackend(pMeta); + + if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { + tqError("vgId:%d failed to load stream tasks", vgId); + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return -1; + } + + if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { + tqInfo("vgId:%d start all stream tasks after all being updated", vgId); + resetStreamTaskStatus(pTq->pStreamMeta); + tqStartStreamTaskAsync(pTq, false); + } else { + tqInfo("vgId:%d, follower node not start stream tasks", vgId); + } + streamMetaWUnLock(pMeta); +#endif + } + } + + taosArrayDestroy(req.pNodeList); + return rsp.code; +} + +int32_t tqStreamTaskProcessDispatchReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { + char* msgStr = pMsg->pCont; + char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + + SStreamDispatchReq req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); + if (tDecodeStreamDispatchReq(&decoder, &req) < 0) { + tDecoderClear(&decoder); + return TSDB_CODE_MSG_DECODE_ERROR; + } + tDecoderClear(&decoder); + + tqDebug("s-task:0x%x recv dispatch msg from 0x%x(vgId:%d)", req.taskId, req.upstreamTaskId, req.upstreamNodeId); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.taskId); + if (pTask) { + SRpcMsg rsp = {.info = pMsg->info, .code = 0}; + if (streamProcessDispatchMsg(pTask, &req, &rsp) != 0){ + return -1; + } + tDeleteStreamDispatchReq(&req); + streamMetaReleaseTask(pMeta, pTask); + return 0; + } else { + tqError("vgId:%d failed to find task:0x%x to handle the dispatch req, it may have been destroyed already", + pMeta->vgId, req.taskId); + + SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); + if (pRspHead == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("s-task:0x%x send dispatch error rsp, out of memory", req.taskId); + return -1; + } + + pRspHead->vgId = htonl(req.upstreamNodeId); + ASSERT(pRspHead->vgId != 0); + + SStreamDispatchRsp* pRsp = POINTER_SHIFT(pRspHead, sizeof(SMsgHead)); + pRsp->streamId = htobe64(req.streamId); + pRsp->upstreamTaskId = htonl(req.upstreamTaskId); + pRsp->upstreamNodeId = htonl(req.upstreamNodeId); + pRsp->downstreamNodeId = htonl(pMeta->vgId); + pRsp->downstreamTaskId = htonl(req.taskId); + pRsp->msgId = htonl(req.msgId); + pRsp->stage = htobe64(req.stage); + pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL; + + int32_t len = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); + SRpcMsg rsp = {.code = TSDB_CODE_STREAM_TASK_NOT_EXIST, .info = pMsg->info, .contLen = len, .pCont = pRspHead}; + tqError("s-task:0x%x send dispatch error rsp, no task", req.taskId); + + tmsgSendRsp(&rsp); + tDeleteStreamDispatchReq(&req); + + return 0; + } +} + +int32_t tqStreamTaskProcessDispatchRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { + SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + + int32_t vgId = pMeta->vgId; + pRsp->upstreamTaskId = htonl(pRsp->upstreamTaskId); + pRsp->streamId = htobe64(pRsp->streamId); + pRsp->downstreamTaskId = htonl(pRsp->downstreamTaskId); + pRsp->downstreamNodeId = htonl(pRsp->downstreamNodeId); + pRsp->stage = htobe64(pRsp->stage); + pRsp->msgId = htonl(pRsp->msgId); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pRsp->streamId, pRsp->upstreamTaskId); + if (pTask) { + streamProcessDispatchRsp(pTask, pRsp, pMsg->code); + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; + } else { + tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, pRsp->upstreamTaskId); + terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; + return terrno; + } +} + +int32_t tqStreamTaskProcessRetrieveReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { + char* msgStr = pMsg->pCont; + char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + SDecoder decoder; + + SStreamRetrieveReq req; + tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); + tDecodeStreamRetrieveReq(&decoder, &req); + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.dstTaskId); + if (pTask == NULL) { + tqError("vgId:%d process retrieve req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, + req.dstTaskId); + return -1; + } + + SRpcMsg rsp = {.info = pMsg->info, .code = 0}; + streamProcessRetrieveReq(pTask, &req, &rsp); + + streamMetaReleaseTask(pMeta, pTask); + tDeleteStreamRetrieveReq(&req); + return 0; +} + +int32_t tqStreamTaskProcessScanHistoryFinishReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + + // deserialize + SStreamScanHistoryFinishReq req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, msgLen); + tDecodeStreamScanHistoryFinishReq(&decoder, &req); + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.downstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d process scan history finish msg, failed to find task:0x%x, it may be destroyed", + pMeta->vgId, req.downstreamTaskId); + return -1; + } + + tqDebug("s-task:%s receive scan-history finish msg from task:0x%x", pTask->id.idStr, req.upstreamTaskId); + + int32_t code = streamProcessScanHistoryFinishReq(pTask, &req, &pMsg->info); + streamMetaReleaseTask(pMeta, pTask); + return code; +} + +int32_t tqStreamTaskProcessScanHistoryFinishRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + + // deserialize + SStreamCompleteHistoryMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, msgLen); + tDecodeCompleteHistoryDataMsg(&decoder, &req); + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d process scan history finish rsp, failed to find task:0x%x, it may be destroyed", + pMeta->vgId, req.upstreamTaskId); + return -1; + } + + int32_t remain = atomic_sub_fetch_32(&pTask->notReadyTasks, 1); + if (remain > 0) { + tqDebug("s-task:%s scan-history finish rsp received from downstream task:0x%x, unfinished remain:%d", + pTask->id.idStr, req.downstreamId, remain); + } else { + tqDebug( + "s-task:%s scan-history finish rsp received from downstream task:0x%x, all downstream tasks rsp scan-history " + "completed msg", + pTask->id.idStr, req.downstreamId); + streamProcessScanHistoryFinishRsp(pTask); + } + + streamMetaReleaseTask(pMeta, pTask); + return 0; +} + +int32_t tqStreamTaskProcessCheckReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { + char* msgStr = pMsg->pCont; + char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + + SStreamTaskCheckReq req; + SDecoder decoder; + + tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); + tDecodeStreamTaskCheckReq(&decoder, &req); + tDecoderClear(&decoder); + + int32_t taskId = req.downstreamTaskId; + + SStreamTaskCheckRsp rsp = { + .reqId = req.reqId, + .streamId = req.streamId, + .childId = req.childId, + .downstreamNodeId = req.downstreamNodeId, + .downstreamTaskId = req.downstreamTaskId, + .upstreamNodeId = req.upstreamNodeId, + .upstreamTaskId = req.upstreamTaskId, + }; + + // only the leader node handle the check request + if (pMeta->role == NODE_ROLE_FOLLOWER) { + tqError("s-task:0x%x invalid check msg from upstream:0x%x(vgId:%d), vgId:%d is follower, not handle check status msg", + taskId, req.upstreamTaskId, req.upstreamNodeId, pMeta->vgId); + rsp.status = TASK_DOWNSTREAM_NOT_LEADER; + } else { + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, taskId); + if (pTask != NULL) { + rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage, &rsp.oldStage); + streamMetaReleaseTask(pMeta, pTask); + + char* p = NULL; + streamTaskGetStatus(pTask, &p); + tqDebug("s-task:%s status:%s, stage:%"PRId64" recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), check_status:%d", + pTask->id.idStr, p, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + } else { + rsp.status = TASK_DOWNSTREAM_NOT_READY; + tqDebug("tq recv task check(taskId:0x%" PRIx64 "-0x%x not built yet) req(reqId:0x%" PRIx64 + ") from task:0x%x (vgId:%d), rsp check_status %d", + req.streamId, taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + } + } + + return streamSendCheckRsp(pMeta, &req, &rsp, &pMsg->info, taskId); +} + +int32_t tqStreamTaskProcessCheckRsp(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader) { + char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + int32_t vgId = pMeta->vgId; + + int32_t code; + SStreamTaskCheckRsp rsp; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)pReq, len); + code = tDecodeStreamTaskCheckRsp(&decoder, &rsp); + if (code < 0) { + terrno = TSDB_CODE_INVALID_MSG; + tDecoderClear(&decoder); + tqError("vgId:%d failed to parse check rsp msg, code:%s", vgId, tstrerror(terrno)); + return -1; + } + + tDecoderClear(&decoder); + tqDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d", + rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status); + + if (!isLeader) { + tqError("vgId:%d not leader, task:0x%x not handle the check rsp, downstream:0x%x (vgId:%d)", vgId, + rsp.upstreamTaskId, rsp.downstreamTaskId, rsp.downstreamNodeId); + return code; + } + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, rsp.streamId, rsp.upstreamTaskId); + if (pTask == NULL) { + tqError("tq failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed or stopped", + rsp.streamId, rsp.upstreamTaskId, vgId); + terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; + return -1; + } + + code = streamProcessCheckRsp(pTask, &rsp); + streamMetaReleaseTask(pMeta, pTask); + return code; +} + +int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg) { + int32_t vgId = pMeta->vgId; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + int32_t code = 0; + + SStreamCheckpointReadyMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamCheckpointReadyMsg(&decoder, &req) < 0) { + code = TSDB_CODE_MSG_DECODE_ERROR; + tDecoderClear(&decoder); + return code; + } + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d failed to find s-task:0x%x, it may have been destroyed already", vgId, req.downstreamTaskId); + return code; + } + + tqDebug("vgId:%d s-task:%s received the checkpoint ready msg from task:0x%x (vgId:%d), handle it", vgId, + pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId); + + streamProcessCheckpointReadyMsg(pTask); + streamMetaReleaseTask(pMeta, pTask); + return code; +} + +int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, int64_t sversion, char* msg, int32_t msgLen, bool isLeader, bool restored) { + int32_t code = 0; + int32_t vgId = pMeta->vgId; + + if (tsDisableStream) { + tqInfo("vgId:%d stream disabled, not deploy stream tasks", vgId); + return code; + } + + tqDebug("vgId:%d receive new stream task deploy msg, start to build stream task", vgId); + + // 1.deserialize msg and build task + int32_t size = sizeof(SStreamTask); + SStreamTask* pTask = taosMemoryCalloc(1, size); + if (pTask == NULL) { + tqError("vgId:%d failed to create stream task due to out of memory, alloc size:%d", vgId, size); + return TSDB_CODE_OUT_OF_MEMORY; + } + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, msgLen); + code = tDecodeStreamTask(&decoder, pTask); + tDecoderClear(&decoder); + + if (code != TSDB_CODE_SUCCESS) { + taosMemoryFree(pTask); + return TSDB_CODE_INVALID_MSG; + } + + // 2.save task, use the latest commit version as the initial start version of stream task. + int32_t taskId = pTask->id.taskId; + int64_t streamId = pTask->id.streamId; + bool added = false; + + streamMetaWLock(pMeta); + code = streamMetaRegisterTask(pMeta, sversion, pTask, &added); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + streamMetaWUnLock(pMeta); + + if (code < 0) { + tqError("failed to add s-task:0x%x into vgId:%d meta, total:%d, code:%s", vgId, taskId, numOfTasks, tstrerror(code)); + tFreeStreamTask(pTask); + return code; + } + + // added into meta store, pTask cannot be reference since it may have been destroyed by other threads already now if + // it is added into the meta store + if (added) { + // only handled in the leader node + if (isLeader) { + tqDebug("vgId:%d s-task:0x%x is deployed and add into meta, numOfTasks:%d", vgId, taskId, numOfTasks); + SStreamTask* p = streamMetaAcquireTask(pMeta, streamId, taskId); + + if (p != NULL && restored && p->info.fillHistory == 0) { + EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(p)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; + streamTaskHandleEvent(p->status.pSM, event); + } else if (!restored) { + tqWarn("s-task:%s not launched since vnode(vgId:%d) not ready", p->id.idStr, vgId); + } + + if (p != NULL) { + streamMetaReleaseTask(pMeta, p); + } + } else { + tqDebug("vgId:%d not leader, not launch stream task s-task:0x%x", vgId, taskId); + } + } else { + tqWarn("vgId:%d failed to add s-task:0x%x, since already exists in meta store", vgId, taskId); + tFreeStreamTask(pTask); + } + + return code; +} + +int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen) { + SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; + + int32_t vgId = pMeta->vgId; + tqDebug("vgId:%d receive msg to drop s-task:0x%x", vgId, pReq->taskId); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); + if (pTask != NULL) { + // drop the related fill-history task firstly + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + STaskId* pHTaskId = &pTask->hTaskInfo.id; + streamMetaUnregisterTask(pMeta, pHTaskId->streamId, pHTaskId->taskId); + tqDebug("vgId:%d drop fill-history task:0x%x dropped firstly", vgId, (int32_t)pHTaskId->taskId); + } + streamMetaReleaseTask(pMeta, pTask); + } + + // drop the stream task now + streamMetaUnregisterTask(pMeta, pReq->streamId, pReq->taskId); + + // commit the update + streamMetaWLock(pMeta); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + tqDebug("vgId:%d task:0x%x dropped, remain tasks:%d", vgId, pReq->taskId, numOfTasks); + + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + streamMetaWUnLock(pMeta); + + return 0; +} + +int32_t startStreamTasks(SStreamMeta* pMeta) { + int32_t code = TSDB_CODE_SUCCESS; + int32_t vgId = pMeta->vgId; + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + tqDebug("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks); + if (numOfTasks == 0) { + return TSDB_CODE_SUCCESS; + } + + SArray* pTaskList = NULL; + streamMetaWLock(pMeta); + pTaskList = taosArrayDup(pMeta->pTaskList, NULL); + taosHashClear(pMeta->startInfo.pReadyTaskSet); + taosHashClear(pMeta->startInfo.pFailedTaskSet); + pMeta->startInfo.startTs = taosGetTimestampMs(); + streamMetaWUnLock(pMeta); + + // broadcast the check downstream tasks msg + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); + if (pTask == NULL) { + continue; + } + + // fill-history task can only be launched by related stream tasks. + if (pTask->info.fillHistory == 1) { + streamMetaReleaseTask(pMeta, pTask); + continue; + } + + if (pTask->status.downstreamReady == 1) { + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + tqDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task", + pTask->id.idStr); + streamLaunchFillHistoryTask(pTask); + } + + streamMetaUpdateTaskDownstreamStatus(pTask, pTask->execInfo.init, pTask->execInfo.start, true); + streamMetaReleaseTask(pMeta, pTask); + continue; + } + + EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; + int32_t ret = streamTaskHandleEvent(pTask->status.pSM, event); + if (ret != TSDB_CODE_SUCCESS) { + code = ret; + } + + streamMetaReleaseTask(pMeta, pTask); + } + + taosArrayDestroy(pTaskList); + return code; +} + +int32_t resetStreamTaskStatus(SStreamMeta* pMeta) { + int32_t vgId = pMeta->vgId; + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + + tqDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, numOfTasks); + if (numOfTasks == 0) { + return TSDB_CODE_SUCCESS; + } + + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i); + + STaskId id = {.streamId = pTaskId->streamId, .taskId = pTaskId->taskId}; + SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + streamTaskResetStatus(*pTask); + } + + return 0; +} + +static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { + int32_t vgId = pMeta->vgId; + int32_t code = 0; + int64_t st = taosGetTimestampMs(); + + while(1) { + int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); + if (startVal == 0) { + break; + } + + tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); + taosMsleep(500); + } + + terrno = 0; + tqInfo("vgId:%d tasks are all updated and stopped, restart all tasks, triggered by transId:%d", vgId, + pMeta->updateInfo.transId); + + while (streamMetaTaskInTimer(pMeta)) { + tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + taosMsleep(100); + } + + streamMetaWLock(pMeta); + code = streamMetaReopen(pMeta); + if (code != TSDB_CODE_SUCCESS) { + tqError("vgId:%d failed to reopen stream meta", vgId); + streamMetaWUnLock(pMeta); + code = terrno; + return code; + } + + streamMetaInitBackend(pMeta); + int64_t el = taosGetTimestampMs() - st; + + tqInfo("vgId:%d close&reload state elapsed time:%.3fs", vgId, el/1000.); + + code = streamMetaLoadAllTasks(pMeta); + if (code != TSDB_CODE_SUCCESS) { + tqError("vgId:%d failed to load stream tasks, code:%s", vgId, tstrerror(terrno)); + streamMetaWUnLock(pMeta); + code = terrno; + return code; + } + + if (isLeader && !tsDisableStream) { + tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); + resetStreamTaskStatus(pMeta); + + streamMetaWUnLock(pMeta); + startStreamTasks(pMeta); + } else { + streamMetaResetStartInfo(&pMeta->startInfo); + streamMetaWUnLock(pMeta); + tqInfo("vgId:%d, follower node not start stream tasks", vgId); + } + + code = terrno; + return code; +} + +int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLeader) { + SStreamTaskRunReq* pReq = pMsg->pCont; + + int32_t taskId = pReq->taskId; + int32_t vgId = pMeta->vgId; + + if (taskId == STREAM_EXEC_START_ALL_TASKS_ID) { + startStreamTasks(pMeta); + return 0; + } else if (taskId == STREAM_EXEC_RESTART_ALL_TASKS_ID) { + restartStreamTasks(pMeta, isLeader); + return 0; + } + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, taskId); + if (pTask != NULL) { // even in halt status, the data in inputQ must be processed + char* p = NULL; + if (streamTaskReadyToRun(pTask, &p)) { + tqDebug("vgId:%d s-task:%s start to process block from inputQ, next checked ver:%" PRId64, vgId, pTask->id.idStr, + pTask->chkInfo.nextProcessVer); + streamExecTask(pTask); + } else { + int8_t status = streamTaskSetSchedStatusInactive(pTask); + tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId, + pTask->id.idStr, p, status); + } + + streamMetaReleaseTask(pMeta, pTask); + return 0; + } else { // NOTE: pTask->status.schedStatus is not updated since it is not be handled by the run exec. + // todo add one function to handle this + tqError("vgId:%d failed to found s-task, taskId:0x%x may have been dropped", vgId, taskId); + return -1; + } +} + + diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 918d0bd7d0..5076599753 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -1131,9 +1131,13 @@ int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKE char **values_list = taosMemoryCalloc(num_keys * 2, sizeof(char *)); size_t *values_list_sizes = taosMemoryCalloc(num_keys * 2, sizeof(size_t)); char **errs = taosMemoryCalloc(num_keys * 2, sizeof(char *)); + + (void)tsdbCacheCommit(pTsdb); + taosThreadMutexLock(&pTsdb->lruMutex); + taosThreadMutexLock(&pTsdb->rCache.rMutex); - rocksMayWrite(pTsdb, true, false, false); + // rocksMayWrite(pTsdb, true, false, false); rocksdb_multi_get(pTsdb->rCache.db, pTsdb->rCache.readoptions, num_keys * 2, (const char *const *)keys_list, keys_list_sizes, values_list, values_list_sizes, errs); taosThreadMutexUnlock(&pTsdb->rCache.rMutex); diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c index 48b622e324..a974eb27bf 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit2.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -421,7 +421,7 @@ static int32_t tsdbCommitFileSetBegin(SCommitter2 *committer) { if (mtime < committer->ctx->now - tsS3UploadDelaySec) { committer->ctx->skipTsRow = true; } - } else if (s3Size(object_name) > 0) { + } else /*if (s3Size(object_name) > 0) */ { committer->ctx->skipTsRow = true; } } diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index df6b85a889..e1625c9ddb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -45,7 +45,7 @@ static int32_t tsdbDataFileReadHeadFooter(SDataFileReader *reader) { int32_t ftype = TSDB_FTYPE_HEAD; if (reader->fd[ftype]) { code = tsdbReadFile(reader->fd[ftype], reader->config->files[ftype].file.size - sizeof(SHeadFooter), - (uint8_t *)reader->headFooter, sizeof(SHeadFooter)); + (uint8_t *)reader->headFooter, sizeof(SHeadFooter), 0); TSDB_CHECK_CODE(code, lino, _exit); } @@ -67,7 +67,7 @@ static int32_t tsdbDataFileReadTombFooter(SDataFileReader *reader) { int32_t ftype = TSDB_FTYPE_TOMB; if (reader->fd[ftype]) { code = tsdbReadFile(reader->fd[ftype], reader->config->files[ftype].file.size - sizeof(STombFooter), - (uint8_t *)reader->tombFooter, sizeof(STombFooter)); + (uint8_t *)reader->tombFooter, sizeof(STombFooter), 0); TSDB_CHECK_CODE(code, lino, _exit); } reader->ctx->tombFooterLoaded = true; @@ -161,7 +161,7 @@ int32_t tsdbDataFileReadBrinBlk(SDataFileReader *reader, const TBrinBlkArray **b } code = tsdbReadFile(reader->fd[TSDB_FTYPE_HEAD], reader->headFooter->brinBlkPtr->offset, data, - reader->headFooter->brinBlkPtr->size); + reader->headFooter->brinBlkPtr->size, 0); if (code) { taosMemoryFree(data); TSDB_CHECK_CODE(code, lino, _exit); @@ -191,7 +191,8 @@ int32_t tsdbDataFileReadBrinBlock(SDataFileReader *reader, const SBrinBlk *brinB code = tRealloc(&reader->config->bufArr[0], brinBlk->dp->size); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbReadFile(reader->fd[TSDB_FTYPE_HEAD], brinBlk->dp->offset, reader->config->bufArr[0], brinBlk->dp->size); + code = + tsdbReadFile(reader->fd[TSDB_FTYPE_HEAD], brinBlk->dp->offset, reader->config->bufArr[0], brinBlk->dp->size, 0); TSDB_CHECK_CODE(code, lino, _exit); int32_t size = 0; @@ -232,7 +233,8 @@ int32_t tsdbDataFileReadBlockData(SDataFileReader *reader, const SBrinRecord *re code = tRealloc(&reader->config->bufArr[0], record->blockSize); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset, reader->config->bufArr[0], record->blockSize); + code = + tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset, reader->config->bufArr[0], record->blockSize, 0); TSDB_CHECK_CODE(code, lino, _exit); code = tDecmprBlockData(reader->config->bufArr[0], record->blockSize, bData, &reader->config->bufArr[1]); @@ -257,8 +259,8 @@ int32_t tsdbDataFileReadBlockDataByColumn(SDataFileReader *reader, const SBrinRe code = tRealloc(&reader->config->bufArr[0], record->blockKeySize); TSDB_CHECK_CODE(code, lino, _exit); - code = - tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset, reader->config->bufArr[0], record->blockKeySize); + code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset, reader->config->bufArr[0], record->blockKeySize, + 0); TSDB_CHECK_CODE(code, lino, _exit); // hdr @@ -296,10 +298,46 @@ int32_t tsdbDataFileReadBlockDataByColumn(SDataFileReader *reader, const SBrinRe TSDB_CHECK_CODE(code, lino, _exit); code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset + record->blockKeySize, - reader->config->bufArr[0], hdr->szBlkCol); + reader->config->bufArr[0], hdr->szBlkCol, 0); TSDB_CHECK_CODE(code, lino, _exit); } + int64_t szHint = 0; + if (bData->nColData > 3) { + int64_t offset = 0; + SBlockCol bc = {.cid = 0}; + SBlockCol *blockCol = &bc; + + size = 0; + SColData *colData = tBlockDataGetColDataByIdx(bData, 0); + while (blockCol && blockCol->cid < colData->cid) { + if (size < hdr->szBlkCol) { + size += tGetBlockCol(reader->config->bufArr[0] + size, blockCol); + } else { + ASSERT(size == hdr->szBlkCol); + blockCol = NULL; + } + } + + if (blockCol && blockCol->flag == HAS_VALUE) { + offset = blockCol->offset; + + SColData *colDataEnd = tBlockDataGetColDataByIdx(bData, bData->nColData - 1); + while (blockCol && blockCol->cid < colDataEnd->cid) { + if (size < hdr->szBlkCol) { + size += tGetBlockCol(reader->config->bufArr[0] + size, blockCol); + } else { + ASSERT(size == hdr->szBlkCol); + blockCol = NULL; + } + } + + if (blockCol && blockCol->flag == HAS_VALUE) { + szHint = blockCol->offset + blockCol->szBitmap + blockCol->szOffset + blockCol->szValue - offset; + } + } + } + SBlockCol bc[1] = {{.cid = 0}}; SBlockCol *blockCol = bc; @@ -338,7 +376,7 @@ int32_t tsdbDataFileReadBlockDataByColumn(SDataFileReader *reader, const SBrinRe code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset + record->blockKeySize + hdr->szBlkCol + blockCol->offset, - reader->config->bufArr[1], size1); + reader->config->bufArr[1], size1, i > 0 ? 0 : szHint); TSDB_CHECK_CODE(code, lino, _exit); code = tsdbDecmprColData(reader->config->bufArr[1], blockCol, hdr->cmprAlg, hdr->nRow, colData, @@ -366,7 +404,7 @@ int32_t tsdbDataFileReadBlockSma(SDataFileReader *reader, const SBrinRecord *rec code = tRealloc(&reader->config->bufArr[0], record->smaSize); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbReadFile(reader->fd[TSDB_FTYPE_SMA], record->smaOffset, reader->config->bufArr[0], record->smaSize); + code = tsdbReadFile(reader->fd[TSDB_FTYPE_SMA], record->smaOffset, reader->config->bufArr[0], record->smaSize, 0); TSDB_CHECK_CODE(code, lino, _exit); // decode sma data @@ -405,7 +443,7 @@ int32_t tsdbDataFileReadTombBlk(SDataFileReader *reader, const TTombBlkArray **t } code = tsdbReadFile(reader->fd[TSDB_FTYPE_TOMB], reader->tombFooter->tombBlkPtr->offset, data, - reader->tombFooter->tombBlkPtr->size); + reader->tombFooter->tombBlkPtr->size, 0); if (code) { taosMemoryFree(data); TSDB_CHECK_CODE(code, lino, _exit); @@ -435,7 +473,8 @@ int32_t tsdbDataFileReadTombBlock(SDataFileReader *reader, const STombBlk *tombB code = tRealloc(&reader->config->bufArr[0], tombBlk->dp->size); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbReadFile(reader->fd[TSDB_FTYPE_TOMB], tombBlk->dp->offset, reader->config->bufArr[0], tombBlk->dp->size); + code = + tsdbReadFile(reader->fd[TSDB_FTYPE_TOMB], tombBlk->dp->offset, reader->config->bufArr[0], tombBlk->dp->size, 0); TSDB_CHECK_CODE(code, lino, _exit); int32_t size = 0; @@ -488,8 +527,8 @@ struct SDataFileWriter { STombBlock tombBlock[1]; int32_t tombBlockIdx; // range - SVersionRange range; - SVersionRange tombRange; + SVersionRange range; + SVersionRange tombRange; } ctx[1]; STFile files[TSDB_FTYPE_MAX]; diff --git a/source/dnode/vnode/src/tsdb/tsdbDef.h b/source/dnode/vnode/src/tsdb/tsdbDef.h index da2445dee5..0f512e1306 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDef.h +++ b/source/dnode/vnode/src/tsdb/tsdbDef.h @@ -34,7 +34,7 @@ typedef struct SFDataPtr { extern int32_t tsdbOpenFile(const char *path, STsdb *pTsdb, int32_t flag, STsdbFD **ppFD); extern void tsdbCloseFile(STsdbFD **ppFD); extern int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size); -extern int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size); +extern int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size, int64_t szHint); extern int32_t tsdbFsyncFile(STsdbFD *pFD); #ifdef __cplusplus diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index c0c74d6b87..732f46467e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -104,7 +104,7 @@ static int32_t tsdbSaveFSToFile(STsdbFS *pFS, const char *fname) { taosCalcChecksumAppend(0, pData, size); // save to file - TdFilePtr pFD = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + TdFilePtr pFD = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 70a83ebdbb..add8da52e0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -20,8 +20,6 @@ #define BLOCK_COMMIT_FACTOR 3 -extern int vnodeScheduleTask(int (*execute)(void *), void *arg); -extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg); extern void remove_file(const char *fname, bool last_level); #define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT @@ -85,7 +83,7 @@ static int32_t save_json(const cJSON *json, const char *fname) { char *data = cJSON_PrintUnformatted(json); if (data == NULL) return TSDB_CODE_OUT_OF_MEMORY; - TdFilePtr fp = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + TdFilePtr fp = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (fp == NULL) { code = TAOS_SYSTEM_ERROR(code); goto _exit; @@ -651,7 +649,6 @@ _exit: static int32_t close_file_system(STFileSystem *fs) { TARRAY2_CLEAR(fs->fSetArr, tsdbTFileSetClear); TARRAY2_CLEAR(fs->fSetArrTmp, tsdbTFileSetClear); - // TODO return 0; } @@ -748,36 +745,31 @@ _exit: return code; } -static void tsdbDoWaitBgTask(STFileSystem *fs, STFSBgTask *task) { - task->numWait++; - taosThreadCondWait(task->done, &fs->tsdb->mutex); - task->numWait--; +int32_t tsdbFSCancelAllBgTask(STFileSystem *fs) { + TARRAY2(int64_t) channelArr = {0}; - if (task->numWait == 0) { - taosThreadCondDestroy(task->done); - if (task->destroy) { - task->destroy(task->arg); + // collect all open channels + taosThreadMutexLock(&fs->tsdb->mutex); + STFileSet *fset; + TARRAY2_FOREACH(fs->fSetArr, fset) { + if (VNODE_ASYNC_VALID_CHANNEL_ID(fset->bgTaskChannel)) { + TARRAY2_APPEND(&channelArr, fset->bgTaskChannel); + fset->bgTaskChannel = 0; } - taosMemoryFree(task); } -} + taosThreadMutexUnlock(&fs->tsdb->mutex); -static void tsdbDoDoneBgTask(STFileSystem *fs, STFSBgTask *task) { - if (task->numWait > 0) { - taosThreadCondBroadcast(task->done); - } else { - taosThreadCondDestroy(task->done); - if (task->destroy) { - task->destroy(task->arg); - } - taosMemoryFree(task); - } + // destroy all channels + int64_t channel; + TARRAY2_FOREACH(&channelArr, channel) { vnodeAChannelDestroy(vnodeAsyncHandle[1], channel, true); } + TARRAY2_DESTROY(&channelArr, NULL); + return 0; } int32_t tsdbCloseFS(STFileSystem **fs) { if (fs[0] == NULL) return 0; - tsdbFSDisableBgTask(fs[0]); + tsdbFSCancelAllBgTask(*fs); close_file_system(fs[0]); destroy_fs(fs); return 0; @@ -901,7 +893,7 @@ int32_t tsdbFSEditCommit(STFileSystem *fs) { if (mtime < now - tsS3UploadDelaySec) { skipMerge = true; } - } else if (s3Size(object_name) > 0) { + } else /* if (s3Size(object_name) > 0) */ { skipMerge = true; } } @@ -910,7 +902,20 @@ int32_t tsdbFSEditCommit(STFileSystem *fs) { } if (!skipMerge) { - code = tsdbSchedMerge(fs->tsdb, fset->fid); + code = tsdbTFileSetOpenChannel(fset); + TSDB_CHECK_CODE(code, lino, _exit); + + SMergeArg *arg = taosMemoryMalloc(sizeof(*arg)); + if (arg == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + arg->tsdb = fs->tsdb; + arg->fid = fset->fid; + + code = vnodeAsyncC(vnodeAsyncHandle[1], fset->bgTaskChannel, EVA_PRIORITY_HIGH, tsdbMerge, taosMemoryFree, + arg, NULL); TSDB_CHECK_CODE(code, lino, _exit); } } @@ -939,7 +944,11 @@ int32_t tsdbFSEditCommit(STFileSystem *fs) { } } - if (tsdbTFileSetIsEmpty(fset) && fset->bgTaskRunning == NULL) { + if (tsdbTFileSetIsEmpty(fset)) { + if (VNODE_ASYNC_VALID_CHANNEL_ID(fset->bgTaskChannel)) { + vnodeAChannelDestroy(vnodeAsyncHandle[1], fset->bgTaskChannel, false); + fset->bgTaskChannel = 0; + } TARRAY2_REMOVE(fs->fSetArr, i, tsdbTFileSetClear); } else { i++; @@ -1179,137 +1188,4 @@ _out: pHash = NULL; } return code; -} - -const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"}; - -static int32_t tsdbFSRunBgTask(void *arg) { - STFSBgTask *task = (STFSBgTask *)arg; - STFileSystem *fs = task->fs; - STFileSet *fset; - - tsdbFSGetFSet(fs, task->fid, &fset); - - ASSERT(fset != NULL && fset->bgTaskRunning == task); - - task->launchTime = taosGetTimestampMs(); - task->run(task->arg); - task->finishTime = taosGetTimestampMs(); - - tsdbDebug("vgId:%d bg task:%s task id:%" PRId64 " finished, schedule time:%" PRId64 " launch time:%" PRId64 - " finish time:%" PRId64, - TD_VID(fs->tsdb->pVnode), gFSBgTaskName[task->type], task->taskid, task->scheduleTime, task->launchTime, - task->finishTime); - - taosThreadMutexLock(&fs->tsdb->mutex); - - // free last - tsdbDoDoneBgTask(fs, task); - fset->bgTaskRunning = NULL; - - // schedule next - if (fset->bgTaskNum > 0) { - if (fs->stop) { - while (fset->bgTaskNum > 0) { - STFSBgTask *nextTask = fset->bgTaskQueue->next; - nextTask->prev->next = nextTask->next; - nextTask->next->prev = nextTask->prev; - fset->bgTaskNum--; - tsdbDoDoneBgTask(fs, nextTask); - } - } else { - // pop task from head - fset->bgTaskRunning = fset->bgTaskQueue->next; - fset->bgTaskRunning->prev->next = fset->bgTaskRunning->next; - fset->bgTaskRunning->next->prev = fset->bgTaskRunning->prev; - fset->bgTaskNum--; - vnodeScheduleTaskEx(1, tsdbFSRunBgTask, fset->bgTaskRunning); - } - } - - taosThreadMutexUnlock(&fs->tsdb->mutex); - return 0; -} - -// IMPORTANT: the caller must hold the fs->tsdb->mutex -int32_t tsdbFSScheduleBgTask(STFileSystem *fs, int32_t fid, EFSBgTaskT type, int32_t (*run)(void *), - void (*destroy)(void *), void *arg, int64_t *taskid) { - if (fs->stop) { - if (destroy) { - destroy(arg); - } - return 0; - } - - STFileSet *fset; - tsdbFSGetFSet(fs, fid, &fset); - - ASSERT(fset != NULL); - - for (STFSBgTask *task = fset->bgTaskQueue->next; task != fset->bgTaskQueue; task = task->next) { - if (task->type == type) { - if (destroy) { - destroy(arg); - } - return 0; - } - } - - // do schedule task - STFSBgTask *task = taosMemoryCalloc(1, sizeof(STFSBgTask)); - if (task == NULL) return TSDB_CODE_OUT_OF_MEMORY; - taosThreadCondInit(task->done, NULL); - - task->fs = fs; - task->fid = fid; - task->type = type; - task->run = run; - task->destroy = destroy; - task->arg = arg; - task->scheduleTime = taosGetTimestampMs(); - task->taskid = ++fs->taskid; - - if (fset->bgTaskRunning == NULL && fset->bgTaskNum == 0) { - // launch task directly - fset->bgTaskRunning = task; - vnodeScheduleTaskEx(1, tsdbFSRunBgTask, task); - } else { - // add to the queue tail - fset->bgTaskNum++; - task->next = fset->bgTaskQueue; - task->prev = fset->bgTaskQueue->prev; - task->prev->next = task; - task->next->prev = task; - } - - if (taskid) *taskid = task->taskid; - return 0; -} - -int32_t tsdbFSDisableBgTask(STFileSystem *fs) { - taosThreadMutexLock(&fs->tsdb->mutex); - for (;;) { - fs->stop = true; - bool done = true; - - STFileSet *fset; - TARRAY2_FOREACH(fs->fSetArr, fset) { - if (fset->bgTaskRunning) { - tsdbDoWaitBgTask(fs, fset->bgTaskRunning); - done = false; - break; - } - } - - if (done) break; - } - taosThreadMutexUnlock(&fs->tsdb->mutex); - return 0; -} - -int32_t tsdbFSEnableBgTask(STFileSystem *fs) { - taosThreadMutexLock(&fs->tsdb->mutex); - fs->stop = false; - taosThreadMutexUnlock(&fs->tsdb->mutex); - return 0; -} +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index a3a8e2f575..74453126cf 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -55,11 +55,6 @@ int64_t tsdbFSAllocEid(STFileSystem *fs); int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype); int32_t tsdbFSEditCommit(STFileSystem *fs); int32_t tsdbFSEditAbort(STFileSystem *fs); -// background task -int32_t tsdbFSScheduleBgTask(STFileSystem *fs, int32_t fid, EFSBgTaskT type, int32_t (*run)(void *), - void (*destroy)(void *), void *arg, int64_t *taskid); -int32_t tsdbFSDisableBgTask(STFileSystem *fs); -int32_t tsdbFSEnableBgTask(STFileSystem *fs); // other int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset); int32_t tsdbFSCheckCommit(STsdb *tsdb, int32_t fid); diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index 61bedcb996..025671ff3d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -14,6 +14,7 @@ */ #include "tsdbFSet2.h" +#include "vnd.h" int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl) { if (!(lvl[0] = taosMemoryMalloc(sizeof(SSttLvl)))) return TSDB_CODE_OUT_OF_MEMORY; @@ -451,10 +452,7 @@ int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset) { TARRAY2_INIT(fset[0]->lvlArr); // background task queue - fset[0]->bgTaskNum = 0; - fset[0]->bgTaskQueue->next = fset[0]->bgTaskQueue; - fset[0]->bgTaskQueue->prev = fset[0]->bgTaskQueue; - fset[0]->bgTaskRunning = NULL; + fset[0]->bgTaskChannel = 0; // block commit variables taosThreadCondInit(&fset[0]->canCommit, NULL); @@ -650,3 +648,8 @@ bool tsdbTFileSetIsEmpty(const STFileSet *fset) { } return TARRAY2_SIZE(fset->lvlArr) == 0; } + +int32_t tsdbTFileSetOpenChannel(STFileSet *fset) { + if (VNODE_ASYNC_VALID_CHANNEL_ID(fset->bgTaskChannel)) return 0; + return vnodeAChannelInit(vnodeAsyncHandle[1], &fset->bgTaskChannel); +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index 34f174ade7..32028db352 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -22,14 +22,12 @@ extern "C" { #endif -typedef struct STFileSet STFileSet; -typedef struct STFileOp STFileOp; -typedef struct SSttLvl SSttLvl; +typedef struct STFileOp STFileOp; +typedef struct SSttLvl SSttLvl; typedef TARRAY2(STFileObj *) TFileObjArray; typedef TARRAY2(SSttLvl *) TSttLvlArray; typedef TARRAY2(STFileOp) TFileOpArray; typedef struct STFileSystem STFileSystem; -typedef struct STFSBgTask STFSBgTask; typedef enum { TSDB_FOP_NONE = 0, @@ -72,33 +70,8 @@ bool tsdbTFileSetIsEmpty(const STFileSet *fset); // stt int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl); int32_t tsdbSttLvlClear(SSttLvl **lvl); - -typedef enum { - TSDB_BG_TASK_MERGER = 1, - TSDB_BG_TASK_RETENTION, - TSDB_BG_TASK_COMPACT, -} EFSBgTaskT; - -struct STFSBgTask { - STFileSystem *fs; - int32_t fid; - - EFSBgTaskT type; - int32_t (*run)(void *arg); - void (*destroy)(void *arg); - void *arg; - - TdThreadCond done[1]; - int32_t numWait; - - int64_t taskid; - int64_t scheduleTime; - int64_t launchTime; - int64_t finishTime; - - struct STFSBgTask *prev; - struct STFSBgTask *next; -}; +// open channel +int32_t tsdbTFileSetOpenChannel(STFileSet *fset); struct STFileOp { tsdb_fop_t optype; @@ -118,10 +91,8 @@ struct STFileSet { STFileObj *farr[TSDB_FTYPE_MAX]; // file array TSttLvlArray lvlArr[1]; // level array - // background task queue - int32_t bgTaskNum; - STFSBgTask bgTaskQueue[1]; - STFSBgTask *bgTaskRunning; + // background task channel + int64_t bgTaskChannel; // block commit variables TdThreadCond canCommit; diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index 62b37cd0a6..3ee0c482a7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -177,7 +177,7 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); // open - pFD = taosOpenFile(fname, TD_FILE_WRITE); + pFD = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_WRITE_THROUGH); if (pFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; diff --git a/source/dnode/vnode/src/tsdb/tsdbMerge.c b/source/dnode/vnode/src/tsdb/tsdbMerge.c index 7babaa6e28..b47b951b2b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMerge.c +++ b/source/dnode/vnode/src/tsdb/tsdbMerge.c @@ -17,11 +17,6 @@ #define TSDB_MAX_LEVEL 2 // means max level is 3 -typedef struct { - STsdb *tsdb; - int32_t fid; -} SMergeArg; - typedef struct { STsdb *tsdb; int32_t fid; @@ -528,7 +523,7 @@ static int32_t tsdbMergeGetFSet(SMerger *merger) { return 0; } -static int32_t tsdbMerge(void *arg) { +int32_t tsdbMerge(void *arg) { int32_t code = 0; int32_t lino = 0; SMergeArg *mergeArg = (SMergeArg *)arg; @@ -568,7 +563,7 @@ static int32_t tsdbMerge(void *arg) { if (mtime < now - tsS3UploadDelaySec) { skipMerge = true; } - } else if (s3Size(object_name) > 0) { + } else /* if (s3Size(object_name) > 0) */ { skipMerge = true; } } @@ -597,18 +592,3 @@ _exit: tsdbTFileSetClear(&merger->fset); return code; } - -int32_t tsdbSchedMerge(STsdb *tsdb, int32_t fid) { - SMergeArg *arg = taosMemoryMalloc(sizeof(*arg)); - if (arg == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - arg->tsdb = tsdb; - arg->fid = fid; - - int32_t code = tsdbFSScheduleBgTask(tsdb->pFS, fid, TSDB_BG_TASK_MERGER, tsdbMerge, taosMemoryFree, arg, NULL); - if (code) taosMemoryFree(arg); - - return code; -} diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index e7018638a6..853d0d7374 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -48,7 +48,7 @@ static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScan static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, STsdbReader* pReader); -static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); +static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SReadCostSummary* pCost); static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idstr, int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); @@ -60,6 +60,7 @@ static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBl static int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order); static void resetTableListIndex(SReaderStatus* pStatus); static void getMemTableTimeRange(STsdbReader* pReader, int64_t* pMaxKey, int64_t* pMinKey); +static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo); static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } @@ -170,7 +171,7 @@ static int32_t filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader, bo return TSDB_CODE_SUCCESS; } - SCostSummary* pCost = &pReader->cost; + SReadCostSummary* pCost = &pReader->cost; pIter->pLastBlockReader->uid = 0; tMergeTreeClose(&pIter->pLastBlockReader->mergeTree); @@ -294,11 +295,7 @@ static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) } static int32_t tsdbInitReaderLock(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexInit(&pReader->readerMutex, NULL); - + int32_t code = taosThreadMutexInit(&pReader->readerMutex, NULL); qTrace("tsdb/read: %p, post-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); return code; @@ -327,22 +324,14 @@ static int32_t tsdbAcquireReader(STsdbReader* pReader) { } static int32_t tsdbTryAcquireReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexTryLock(&pReader->readerMutex); - + int32_t code = taosThreadMutexTryLock(&pReader->readerMutex); qTrace("tsdb/read: %p, post-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); return code; } static int32_t tsdbReleaseReader(STsdbReader* pReader) { - int32_t code = -1; - qTrace("tsdb/read: %p, pre-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); - - code = taosThreadMutexUnlock(&pReader->readerMutex); - + int32_t code = taosThreadMutexUnlock(&pReader->readerMutex); qTrace("tsdb/read: %p, post-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); return code; @@ -437,6 +426,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void pReader->bDurationOrder = true; tsdbInitReaderLock(pReader); + tsem_init(&pReader->resumeAfterSuspend, 0, 0); *ppReader = pReader; return code; @@ -577,7 +567,12 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockN if (isEmptyQueryTimeWindow(&w)) { k += 1; - continue; + + if (k >= numOfTables) { + break; + } else { + continue; + } } // 1. time range check @@ -1015,8 +1010,8 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) { // check if current block are all handled if (pDumpInfo->rowIndex >= 0 && pDumpInfo->rowIndex < pRecord->numRow) { int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - if (outOfTimeWindow(ts, - &pReader->info.window)) { // the remain data has out of query time window, ignore current block + if (outOfTimeWindow(ts, &pReader->info.window)) { + // the remain data has out of query time window, ignore current block setBlockAllDumped(pDumpInfo, ts, pReader->info.order); } } else { @@ -1123,16 +1118,12 @@ static bool getNeighborBlockOfSameTable(SDataBlockIter* pBlockIter, SFileDataBlo } int32_t step = asc ? 1 : -1; - // *nextIndex = pBlockInfo->tbBlockIdx + step; - // *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); STableDataBlockIdx* pTableDataBlockIdx = taosArrayGet(pTableBlockScanInfo->pBlockIdxList, pBlockInfo->tbBlockIdx + step); SFileDataBlockInfo* p = taosArrayGet(pBlockIter->blockList, pTableDataBlockIdx->globalIndex); memcpy(pRecord, &p->record, sizeof(SBrinRecord)); *nextIndex = pBlockInfo->tbBlockIdx + step; - - // tMapDataGetItemByIdx(&pTableBlockScanInfo->mapData, pIndex->ordinalIndex, pBlock, tGetDataBlk); return true; } @@ -1376,23 +1367,19 @@ static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* return TSDB_CODE_SUCCESS; } - SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; - int64_t st = taosGetTimestampUs(); + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->resBlockInfo.capacity, pReader); - blockDataUpdateTsWindow(pBlock, pReader->suppInfo.slotId[0]); - pBlock->info.id.uid = pBlockScanInfo->uid; + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pBlockScanInfo); - setComposedBlockFlag(pReader, true); - - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; tsdbDebug("%p build data block from cache completed, elapsed time:%.2f ms, numOfRows:%" PRId64 ", brange:%" PRId64 " - %" PRId64 ", uid:%" PRIu64 ", %s", - pReader, elapsedTime, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey, + pReader, el, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey, pBlockScanInfo->uid, pReader->idStr); - pReader->cost.buildmemBlock += elapsedTime; + pReader->cost.buildmemBlock += el; return code; } @@ -2293,13 +2280,12 @@ static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlock return code; } -static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { +void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; pResBlock->info.dataLoad = 1; blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); - setComposedBlockFlag(pReader, true); pReader->cost.composedBlocks += 1; @@ -2356,7 +2342,6 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { pBlockScanInfo = *pReader->status.pTableIter; if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { - // setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->info.order); return code; } } @@ -2436,7 +2421,7 @@ int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order) { return ASCENDING_TRAVERSE(order) ? 0 : taosArrayGetSize(pDelSkyline) - 1; } -int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost) { +int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SReadCostSummary* pCost) { int32_t code = 0; int32_t newDelDataInFile = taosArrayGetSize(pBlockScanInfo->pFileDelData); if (newDelDataInFile == 0 && @@ -2962,6 +2947,8 @@ static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader, int64_t en SReaderStatus* pStatus = &pReader->status; STableUidList* pUidList = &pStatus->uidList; + tsdbDebug("seq load data blocks from cache, %s", pReader->idStr); + while (1) { if (pReader->code != TSDB_CODE_SUCCESS) { tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); @@ -3069,6 +3056,8 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; SDataBlockIter* pBlockIter = &pReader->status.blockIter; + tsdbDebug("seq load data blocks from stt files %s", pReader->idStr); + while (1) { terrno = 0; @@ -3800,7 +3789,6 @@ int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t e int32_t code = TSDB_CODE_SUCCESS; do { - // SRow* pTSRow = NULL; TSDBROW row = {.type = -1}; bool freeTSRow = false; tsdbGetNextRowInMem(pBlockScanInfo, pReader, &row, endKey, &freeTSRow); @@ -3809,6 +3797,7 @@ int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t e } if (row.type == TSDBROW_ROW_FMT) { + int64_t ts = row.pTSRow->ts;; code = doAppendRowFromTSRow(pBlock, pReader, row.pTSRow, pBlockScanInfo); if (freeTSRow) { @@ -3818,13 +3807,17 @@ int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t e if (code) { return code; } + + pBlockScanInfo->lastProcKey = ts; } else { code = doAppendRowFromFileBlock(pBlock, pReader, row.pBlockData, row.iRow); if (code) { break; } + pBlockScanInfo->lastProcKey = row.pBlockData->aTSKEY[row.iRow]; } + // no data in buffer, return immediately if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) { break; @@ -4138,7 +4131,7 @@ void tsdbReaderClose2(STsdbReader* pReader) { tsdbDataFileReaderClose(&pReader->pFileReader); } - SCostSummary* pCost = &pReader->cost; + SReadCostSummary* pCost = &pReader->cost; SFilesetIter* pFilesetIter = &pReader->status.fileIter; if (pFilesetIter->pLastBlockReader != NULL) { SLastBlockReader* pLReader = pFilesetIter->pLastBlockReader; @@ -4153,6 +4146,7 @@ void tsdbReaderClose2(STsdbReader* pReader) { tsdbUntakeReadSnap2(pReader, pReader->pReadSnap, true); pReader->pReadSnap = NULL; + tsem_destroy(&pReader->resumeAfterSuspend); tsdbReleaseReader(pReader); tsdbUninitReaderLock(pReader); @@ -4185,6 +4179,8 @@ int32_t tsdbReaderSuspend2(STsdbReader* pReader) { SReaderStatus* pStatus = &pReader->status; STableBlockScanInfo* pBlockScanInfo = NULL; + pReader->status.suspendInvoked = true; // record the suspend status + if (pStatus->loadFromFile) { SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); if (pBlockInfo != NULL) { @@ -4198,80 +4194,26 @@ int32_t tsdbReaderSuspend2(STsdbReader* pReader) { tsdbDataFileReaderClose(&pReader->pFileReader); - SCostSummary* pCost = &pReader->cost; + SReadCostSummary* pCost = &pReader->cost; pReader->status.pLDataIterArray = destroySttBlockReader(pReader->status.pLDataIterArray, &pCost->sttCost); pReader->status.pLDataIterArray = taosArrayInit(4, POINTER_BYTES); - // resetDataBlockScanInfo excluding lastKey - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - pInfo->pFileDelData = taosArrayDestroy(pInfo->pFileDelData); - } - } else { - // resetDataBlockScanInfo excluding lastKey - STableBlockScanInfo** p = NULL; - int32_t iter = 0; - - while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { - STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; - - pInfo->iterInit = false; - pInfo->iter.hasVal = false; - pInfo->iiter.hasVal = false; - - if (pInfo->iter.iter != NULL) { - pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); - } - - if (pInfo->iiter.iter != NULL) { - pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); - } - - pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); - } - - pBlockScanInfo = pStatus->pTableIter == NULL ? NULL : *pStatus->pTableIter; - if (pBlockScanInfo) { - // save lastKey to restore memory iterator - STimeWindow w = pReader->resBlockInfo.pResBlock->info.window; - pBlockScanInfo->lastProcKey = ASCENDING_TRAVERSE(pReader->info.order) ? w.ekey : w.skey; - - // reset current current table's data block scan info, - pBlockScanInfo->iterInit = false; - - pBlockScanInfo->iter.hasVal = false; - pBlockScanInfo->iiter.hasVal = false; - if (pBlockScanInfo->iter.iter != NULL) { - pBlockScanInfo->iter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iter.iter); - } - - if (pBlockScanInfo->iiter.iter != NULL) { - pBlockScanInfo->iiter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iiter.iter); - } - - pBlockScanInfo->pBlockList = taosArrayDestroy(pBlockScanInfo->pBlockList); - pBlockScanInfo->pBlockIdxList = taosArrayDestroy(pBlockScanInfo->pBlockIdxList); - // TODO: keep skyline for reuse - pBlockScanInfo->delSkyline = taosArrayDestroy(pBlockScanInfo->delSkyline); - } } + // resetDataBlockScanInfo excluding lastKey + STableBlockScanInfo** p = NULL; + + int32_t step = ASCENDING_TRAVERSE(pReader->info.order)? 1:-1; + + int32_t iter = 0; + while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { + STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; + clearBlockScanInfo(pInfo); + pInfo->sttKeyInfo.nextProcKey = pInfo->lastProcKey + step; + } + + pStatus->uidList.currentIndex = 0; + initReaderStatus(pStatus); + tsdbUntakeReadSnap2(pReader, pReader->pReadSnap, false); pReader->pReadSnap = NULL; if (pReader->bDurationOrder) { @@ -4280,6 +4222,10 @@ int32_t tsdbReaderSuspend2(STsdbReader* pReader) { } pReader->flag = READER_STATUS_SUSPEND; +#if SUSPEND_RESUME_TEST + tsem_post(&pReader->resumeAfterSuspend); +#endif + tsdbDebug("reader: %p suspended uid %" PRIu64 " in this query %s", pReader, pBlockScanInfo ? pBlockScanInfo->uid : 0, pReader->idStr); return code; @@ -4491,6 +4437,16 @@ int32_t tsdbNextDataBlock2(STsdbReader* pReader, bool* hasNext) { SReaderStatus* pStatus = &pReader->status; + // NOTE: the following codes is used to perform test for suspend/resume for tsdbReader when it blocks the commit + // the data should be ingested in round-robin and all the child tables should be createted before ingesting data + // the version range of query will be used to identify the correctness of suspend/resume functions. + // this function will blocked before loading the SECOND block from vnode-buffer, and restart itself from sst-files +#if SUSPEND_RESUME_TEST + if (!pReader->status.suspendInvoked && !pReader->status.loadFromFile) { + tsem_wait(&pReader->resumeAfterSuspend); + } +#endif + code = tsdbAcquireReader(pReader); qTrace("tsdb/read: %p, take read mutex, code: %d", pReader, code); @@ -5174,4 +5130,4 @@ void tsdbReaderSetCloseFlag(STsdbReader* pReader) { /*pReader->code = TSDB_CODE_ void tsdbReaderSetNotifyCb(STsdbReader* pReader, TsdReaderNotifyCbFn notifyFn, void* param) { pReader->notifyFn = notifyFn; pReader->notifyParam = param; -} \ No newline at end of file +} diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c index 305399e0af..24c526a906 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c @@ -210,6 +210,7 @@ void clearBlockScanInfo(STableBlockScanInfo* p) { p->iterInit = false; p->iter.hasVal = false; p->iiter.hasVal = false; + p->sttKeyInfo.status = STT_FILE_READER_UNINIT; if (p->iter.iter != NULL) { p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter); diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h index c7331e1913..87c5e1b508 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -97,7 +97,7 @@ typedef struct SResultBlockInfo { int64_t capacity; } SResultBlockInfo; -typedef struct SCostSummary { +typedef struct SReadCostSummary { int64_t numOfBlocks; double blockLoadTime; double buildmemBlock; @@ -111,7 +111,7 @@ typedef struct SCostSummary { double createScanInfoList; double createSkylineIterTime; double initLastBlockReader; -} SCostSummary; +} SReadCostSummary; typedef struct STableUidList { uint64_t* tableUidList; // access table uid list in uid ascending order list @@ -123,12 +123,6 @@ typedef struct { int32_t numOfSttFiles; } SBlockNumber; -typedef struct SBlockIndex { - int32_t ordinalIndex; - int64_t inFileOffset; - STimeWindow window; // todo replace it with overlap flag. -} SBlockIndex; - typedef struct SBlockOrderWrapper { int64_t uid; int64_t offset; @@ -193,6 +187,7 @@ typedef struct SFileBlockDumpInfo { } SFileBlockDumpInfo; typedef struct SReaderStatus { + bool suspendInvoked; bool loadFromFile; // check file stage bool composedDataBlock; // the returned data block is a composed block or not SSHashObj* pTableMap; // SHash @@ -227,7 +222,8 @@ struct STsdbReader { int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows SBlockLoadSuppInfo suppInfo; STsdbReadSnap* pReadSnap; - SCostSummary cost; + tsem_t resumeAfterSuspend; + SReadCostSummary cost; SHashObj** pIgnoreTables; SSHashObj* pSchemaMap; // keep the retrieved schema info, to avoid the overhead by repeatly load schema SDataFileReader* pFileReader; // the file reader diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index 8b9cae42fc..e6f419362c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -26,7 +26,17 @@ static int32_t tsdbOpenFileImpl(STsdbFD *pFD) { if (pFD->pFD == NULL) { int errsv = errno; const char *object_name = taosDirEntryBaseName((char *)path); - long s3_size = tsS3Enabled ? s3Size(object_name) : 0; + long s3_size = 0; + if (tsS3Enabled) { + long size = s3Size(object_name); + if (size < 0) { + code = terrno = TSDB_CODE_FAILED_TO_CONNECT_S3; + goto _exit; + } + + s3_size = size; + } + if (tsS3Enabled && !strncmp(path + strlen(path) - 5, ".data", 5) && s3_size > 0) { #ifndef S3_BLOCK_CACHE s3EvictCache(path, s3_size); @@ -48,6 +58,7 @@ static int32_t tsdbOpenFileImpl(STsdbFD *pFD) { // pFD->szFile = s3_size; #endif } else { + tsdbInfo("no file: %s", path); code = TAOS_SYSTEM_ERROR(errsv); // taosMemoryFree(pFD); goto _exit; @@ -283,7 +294,7 @@ _exit: return code; } -static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) { +static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size, int64_t szHint) { int32_t code = 0; int64_t n = 0; int32_t szPgCont = PAGE_CONTENT_SIZE(pFD->szPage); @@ -330,7 +341,7 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64 memcpy(pBuf + n, pFD->pBuf + bOffset, nRead); n += nRead; - pgno++; + ++pgno; bOffset = 0; } @@ -339,7 +350,12 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64 uint8_t *pBlock = NULL; int64_t retrieve_offset = PAGE_OFFSET(pgno, pFD->szPage); int64_t pgnoEnd = pgno - 1 + (bOffset + size - n + szPgCont - 1) / szPgCont; - int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage; + + if (szHint > 0) { + pgnoEnd = pgno - 1 + (bOffset + szHint - n + szPgCont - 1) / szPgCont; + } + + int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage; code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, 1, &pBlock); if (code != TSDB_CODE_SUCCESS) { goto _exit; @@ -350,6 +366,10 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64 for (int i = 0; i < nPage; ++i) { tsdbCacheSetPageS3(pFD->pTsdb->pgCache, pFD, pgno, pBlock + i * pFD->szPage); + if (szHint > 0 && n >= size) { + ++pgno; + continue; + } memcpy(pFD->pBuf, pBlock + i * pFD->szPage, pFD->szPage); // check @@ -364,7 +384,7 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64 memcpy(pBuf + n, pFD->pBuf + bOffset, nRead); n += nRead; - pgno++; + ++pgno; bOffset = 0; } @@ -375,7 +395,7 @@ _exit: return code; } -int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) { +int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size, int64_t szHint) { int32_t code = 0; if (!pFD->pFD) { code = tsdbOpenFileImpl(pFD); @@ -385,7 +405,7 @@ int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) } if (pFD->s3File && tsS3BlockSize < 0) { - return tsdbReadFileS3(pFD, offset, pBuf, size); + return tsdbReadFileS3(pFD, offset, pBuf, size, szHint); } else { return tsdbReadFileImp(pFD, offset, pBuf, size); } @@ -1141,7 +1161,7 @@ int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx) { if (code) goto _err; // read - code = tsdbReadFile(pReader->pHeadFD, offset, pReader->aBuf[0], size); + code = tsdbReadFile(pReader->pHeadFD, offset, pReader->aBuf[0], size, 0); if (code) goto _err; // decode @@ -1178,7 +1198,7 @@ int32_t tsdbReadSttBlk(SDataFReader *pReader, int32_t iStt, SArray *aSttBlk) { if (code) goto _err; // read - code = tsdbReadFile(pReader->aSttFD[iStt], offset, pReader->aBuf[0], size); + code = tsdbReadFile(pReader->aSttFD[iStt], offset, pReader->aBuf[0], size, 0); if (code) goto _err; // decode @@ -1211,7 +1231,7 @@ int32_t tsdbReadDataBlk(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *m if (code) goto _err; // read - code = tsdbReadFile(pReader->pHeadFD, offset, pReader->aBuf[0], size); + code = tsdbReadFile(pReader->pHeadFD, offset, pReader->aBuf[0], size, 0); if (code) goto _err; // decode @@ -1242,7 +1262,7 @@ int32_t tsdbReadBlockSma(SDataFReader *pReader, SDataBlk *pDataBlk, SArray *aCol if (code) goto _err; // read - code = tsdbReadFile(pReader->pSmaFD, pSmaInfo->offset, pReader->aBuf[0], pSmaInfo->size); + code = tsdbReadFile(pReader->pSmaFD, pSmaInfo->offset, pReader->aBuf[0], pSmaInfo->size, 0); if (code) goto _err; // decode @@ -1276,7 +1296,7 @@ static int32_t tsdbReadBlockDataImpl(SDataFReader *pReader, SBlockInfo *pBlkInfo code = tRealloc(&pReader->aBuf[0], pBlkInfo->szKey); if (code) goto _err; - code = tsdbReadFile(pFD, pBlkInfo->offset, pReader->aBuf[0], pBlkInfo->szKey); + code = tsdbReadFile(pFD, pBlkInfo->offset, pReader->aBuf[0], pBlkInfo->szKey, 0); if (code) goto _err; SDiskDataHdr hdr; @@ -1322,7 +1342,7 @@ static int32_t tsdbReadBlockDataImpl(SDataFReader *pReader, SBlockInfo *pBlkInfo code = tRealloc(&pReader->aBuf[0], hdr.szBlkCol); if (code) goto _err; - code = tsdbReadFile(pFD, offset, pReader->aBuf[0], hdr.szBlkCol); + code = tsdbReadFile(pFD, offset, pReader->aBuf[0], hdr.szBlkCol, 0); if (code) goto _err; } @@ -1366,7 +1386,7 @@ static int32_t tsdbReadBlockDataImpl(SDataFReader *pReader, SBlockInfo *pBlkInfo code = tRealloc(&pReader->aBuf[1], size); if (code) goto _err; - code = tsdbReadFile(pFD, offset, pReader->aBuf[1], size); + code = tsdbReadFile(pFD, offset, pReader->aBuf[1], size, 0); if (code) goto _err; code = tsdbDecmprColData(pReader->aBuf[1], pBlockCol, hdr.cmprAlg, hdr.nRow, pColData, &pReader->aBuf[2]); @@ -1392,7 +1412,7 @@ int32_t tsdbReadDataBlockEx(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockDat if (code) goto _err; // read - code = tsdbReadFile(pReader->pDataFD, pBlockInfo->offset, pReader->aBuf[0], pBlockInfo->szBlock); + code = tsdbReadFile(pReader->pDataFD, pBlockInfo->offset, pReader->aBuf[0], pBlockInfo->szBlock, 0); if (code) goto _err; // decmpr @@ -1444,7 +1464,7 @@ int32_t tsdbReadSttBlockEx(SDataFReader *pReader, int32_t iStt, SSttBlk *pSttBlk TSDB_CHECK_CODE(code, lino, _exit); // read - code = tsdbReadFile(pReader->aSttFD[iStt], pSttBlk->bInfo.offset, pReader->aBuf[0], pSttBlk->bInfo.szBlock); + code = tsdbReadFile(pReader->aSttFD[iStt], pSttBlk->bInfo.offset, pReader->aBuf[0], pSttBlk->bInfo.szBlock, 0); TSDB_CHECK_CODE(code, lino, _exit); // decmpr @@ -1700,7 +1720,7 @@ int32_t tsdbReadDelDatav1(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelDa if (code) goto _err; // read - code = tsdbReadFile(pReader->pReadH, offset, pReader->aBuf[0], size); + code = tsdbReadFile(pReader->pReadH, offset, pReader->aBuf[0], size, 0); if (code) goto _err; // // decode @@ -1740,7 +1760,7 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx) { if (code) goto _err; // read - code = tsdbReadFile(pReader->pReadH, offset, pReader->aBuf[0], size); + code = tsdbReadFile(pReader->pReadH, offset, pReader->aBuf[0], size, 0); if (code) goto _err; // decode diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index f6888ba9cb..d8f1ad7c6c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -249,7 +249,7 @@ _exit: if (code) { TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); } else { - tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); + tsdbDebug("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); } return code; } @@ -279,7 +279,7 @@ _exit: if (code) { TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); } else { - tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); + tsdbDebug("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); } tsdbFSDestroyCopySnapshot(&rtner->fsetArr); return code; @@ -391,32 +391,6 @@ _exit: static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); } -static int32_t tsdbDoRetentionSync(void *arg) { - int32_t code = 0; - int32_t lino = 0; - SRTNer rtner[1] = {0}; - - code = tsdbDoRetentionBegin(arg, rtner); - TSDB_CHECK_CODE(code, lino, _exit); - - STFileSet *fset; - TARRAY2_FOREACH(rtner->fsetArr, fset) { - code = tsdbDoRetentionOnFileSet(rtner, fset); - TSDB_CHECK_CODE(code, lino, _exit); - } - - code = tsdbDoRetentionEnd(rtner); - TSDB_CHECK_CODE(code, lino, _exit); - -_exit: - if (code) { - TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); - } - tsem_post(&((SRtnArg *)arg)->tsdb->pVnode->canCommit); - tsdbFreeRtnArg(arg); - return code; -} - static int32_t tsdbDoRetentionAsync(void *arg) { int32_t code = 0; int32_t lino = 0; @@ -454,49 +428,41 @@ _exit: int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) { int32_t code = 0; - if (sync) { // sync retention + taosThreadMutexLock(&tsdb->mutex); + + STFileSet *fset; + TARRAY2_FOREACH(tsdb->pFS->fSetArr, fset) { + code = tsdbTFileSetOpenChannel(fset); + if (code) { + taosThreadMutexUnlock(&tsdb->mutex); + return code; + } + SRtnArg *arg = taosMemoryMalloc(sizeof(*arg)); if (arg == NULL) { + taosThreadMutexUnlock(&tsdb->mutex); return TSDB_CODE_OUT_OF_MEMORY; } arg->tsdb = tsdb; arg->now = now; - arg->fid = INT32_MAX; + arg->fid = fset->fid; - tsem_wait(&tsdb->pVnode->canCommit); - code = vnodeScheduleTask(tsdbDoRetentionSync, arg); + if (sync) { + code = vnodeAsyncC(vnodeAsyncHandle[0], tsdb->pVnode->commitChannel, EVA_PRIORITY_LOW, tsdbDoRetentionAsync, + tsdbFreeRtnArg, arg, NULL); + } else { + code = vnodeAsyncC(vnodeAsyncHandle[1], fset->bgTaskChannel, EVA_PRIORITY_LOW, tsdbDoRetentionAsync, + tsdbFreeRtnArg, arg, NULL); + } if (code) { - tsem_post(&tsdb->pVnode->canCommit); - taosMemoryFree(arg); + tsdbFreeRtnArg(arg); + taosThreadMutexUnlock(&tsdb->mutex); return code; } - } else { // async retention - taosThreadMutexLock(&tsdb->mutex); - - STFileSet *fset; - TARRAY2_FOREACH(tsdb->pFS->fSetArr, fset) { - SRtnArg *arg = taosMemoryMalloc(sizeof(*arg)); - if (arg == NULL) { - taosThreadMutexUnlock(&tsdb->mutex); - return TSDB_CODE_OUT_OF_MEMORY; - } - - arg->tsdb = tsdb; - arg->now = now; - arg->fid = fset->fid; - - code = tsdbFSScheduleBgTask(tsdb->pFS, fset->fid, TSDB_BG_TASK_RETENTION, tsdbDoRetentionAsync, tsdbFreeRtnArg, - arg, NULL); - if (code) { - tsdbFreeRtnArg(arg); - taosThreadMutexUnlock(&tsdb->mutex); - return code; - } - } - - taosThreadMutexUnlock(&tsdb->mutex); } + taosThreadMutexUnlock(&tsdb->mutex); + return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index a9da0fbcec..104c9b2f35 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1032,9 +1032,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, void* pRang int32_t code = 0; int32_t lino = 0; - // disable background tasks - tsdbFSDisableBgTask(pTsdb->pFS); - // start to write writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; @@ -1107,7 +1104,6 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** writer, int8_t rollback) { taosThreadMutexUnlock(&writer[0]->tsdb->mutex); } - tsdbFSEnableBgTask(tsdb->pFS); tsdbIterMergerClose(&writer[0]->ctx->tombIterMerger); tsdbIterMergerClose(&writer[0]->ctx->dataIterMerger); @@ -1595,3 +1591,6 @@ _out: return code; } + +extern int32_t tsdbFSCancelAllBgTask(STFileSystem* fs); +int32_t tsdbCancelAllBgTask(STsdb* tsdb) { return tsdbFSCancelAllBgTask(tsdb->pFS); } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c index 7c3b185e20..f26c6540df 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -60,7 +60,7 @@ int32_t tsdbSttFileReaderOpen(const char *fname, const SSttFileReaderConfig *con int64_t offset = config->file->size - sizeof(SSttFooter); ASSERT(offset >= TSDB_FHDR_SIZE); - code = tsdbReadFile(reader[0]->fd, offset, (uint8_t *)(reader[0]->footer), sizeof(SSttFooter)); + code = tsdbReadFile(reader[0]->fd, offset, (uint8_t *)(reader[0]->footer), sizeof(SSttFooter), 0); TSDB_CHECK_CODE(code, lino, _exit); _exit: @@ -97,7 +97,7 @@ int32_t tsdbSttFileReadStatisBlk(SSttFileReader *reader, const TStatisBlkArray * if (!data) return TSDB_CODE_OUT_OF_MEMORY; int32_t code = - tsdbReadFile(reader->fd, reader->footer->statisBlkPtr->offset, data, reader->footer->statisBlkPtr->size); + tsdbReadFile(reader->fd, reader->footer->statisBlkPtr->offset, data, reader->footer->statisBlkPtr->size, 0); if (code) { taosMemoryFree(data); return code; @@ -125,7 +125,7 @@ int32_t tsdbSttFileReadTombBlk(SSttFileReader *reader, const TTombBlkArray **tom if (!data) return TSDB_CODE_OUT_OF_MEMORY; int32_t code = - tsdbReadFile(reader->fd, reader->footer->tombBlkPtr->offset, data, reader->footer->tombBlkPtr->size); + tsdbReadFile(reader->fd, reader->footer->tombBlkPtr->offset, data, reader->footer->tombBlkPtr->size, 0); if (code) { taosMemoryFree(data); return code; @@ -152,7 +152,8 @@ int32_t tsdbSttFileReadSttBlk(SSttFileReader *reader, const TSttBlkArray **sttBl void *data = taosMemoryMalloc(reader->footer->sttBlkPtr->size); if (!data) return TSDB_CODE_OUT_OF_MEMORY; - int32_t code = tsdbReadFile(reader->fd, reader->footer->sttBlkPtr->offset, data, reader->footer->sttBlkPtr->size); + int32_t code = + tsdbReadFile(reader->fd, reader->footer->sttBlkPtr->offset, data, reader->footer->sttBlkPtr->size, 0); if (code) { taosMemoryFree(data); return code; @@ -177,7 +178,7 @@ int32_t tsdbSttFileReadBlockData(SSttFileReader *reader, const SSttBlk *sttBlk, code = tRealloc(&reader->config->bufArr[0], sttBlk->bInfo.szBlock); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szBlock); + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szBlock, 0); TSDB_CHECK_CODE(code, lino, _exit); code = tDecmprBlockData(reader->config->bufArr[0], sttBlk->bInfo.szBlock, bData, &reader->config->bufArr[1]); @@ -209,7 +210,7 @@ int32_t tsdbSttFileReadBlockDataByColumn(SSttFileReader *reader, const SSttBlk * code = tRealloc(&reader->config->bufArr[0], sttBlk->bInfo.szKey); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szKey); + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szKey, 0); TSDB_CHECK_CODE(code, lino, _exit); // hdr @@ -255,7 +256,7 @@ int32_t tsdbSttFileReadBlockDataByColumn(SSttFileReader *reader, const SSttBlk * TSDB_CHECK_CODE(code, lino, _exit); code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset + sttBlk->bInfo.szKey, reader->config->bufArr[0], - hdr->szBlkCol); + hdr->szBlkCol, 0); TSDB_CHECK_CODE(code, lino, _exit); } @@ -296,7 +297,7 @@ int32_t tsdbSttFileReadBlockDataByColumn(SSttFileReader *reader, const SSttBlk * TSDB_CHECK_CODE(code, lino, _exit); code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset + sttBlk->bInfo.szKey + hdr->szBlkCol + blockCol->offset, - reader->config->bufArr[1], size1); + reader->config->bufArr[1], size1, 0); TSDB_CHECK_CODE(code, lino, _exit); code = tsdbDecmprColData(reader->config->bufArr[1], blockCol, hdr->cmprAlg, hdr->nRow, colData, @@ -321,7 +322,7 @@ int32_t tsdbSttFileReadTombBlock(SSttFileReader *reader, const STombBlk *tombBlk code = tRealloc(&reader->config->bufArr[0], tombBlk->dp->size); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbReadFile(reader->fd, tombBlk->dp->offset, reader->config->bufArr[0], tombBlk->dp->size); + code = tsdbReadFile(reader->fd, tombBlk->dp->offset, reader->config->bufArr[0], tombBlk->dp->size, 0); if (code) TSDB_CHECK_CODE(code, lino, _exit); int64_t size = 0; @@ -352,7 +353,7 @@ int32_t tsdbSttFileReadStatisBlock(SSttFileReader *reader, const SStatisBlk *sta code = tRealloc(&reader->config->bufArr[0], statisBlk->dp->size); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbReadFile(reader->fd, statisBlk->dp->offset, reader->config->bufArr[0], statisBlk->dp->size); + code = tsdbReadFile(reader->fd, statisBlk->dp->offset, reader->config->bufArr[0], statisBlk->dp->size, 0); TSDB_CHECK_CODE(code, lino, _exit); int64_t size = 0; @@ -405,7 +406,7 @@ struct SSttFileWriter { }; static int32_t tsdbFileDoWriteSttBlockData(STsdbFD *fd, SBlockData *blockData, int8_t cmprAlg, int64_t *fileSize, - TSttBlkArray *sttBlkArray, uint8_t **bufArr, SVersionRange *range) { + TSttBlkArray *sttBlkArray, uint8_t **bufArr, SVersionRange *range) { if (blockData->nRow == 0) return 0; int32_t code = 0; diff --git a/source/dnode/vnode/src/vnd/vnodeAsync.c b/source/dnode/vnode/src/vnd/vnodeAsync.c new file mode 100644 index 0000000000..c95d2324aa --- /dev/null +++ b/source/dnode/vnode/src/vnd/vnodeAsync.c @@ -0,0 +1,719 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "vnd.h" +#include "vnodeHash.h" + +typedef struct SVATask SVATask; +typedef struct SVAChannel SVAChannel; + +#define VNODE_ASYNC_DEFAULT_WORKERS 4 +#define VNODE_ASYNC_MAX_WORKERS 256 + +// priority + +#define EVA_PRIORITY_MAX (EVA_PRIORITY_LOW + 1) + +// worker +typedef enum { + EVA_WORKER_STATE_UINIT = 0, + EVA_WORKER_STATE_ACTIVE, + EVA_WORKER_STATE_IDLE, + EVA_WORKER_STATE_STOP, +} EVWorkerState; + +typedef struct { + SVAsync *async; + int32_t workerId; + EVWorkerState state; + TdThread thread; + SVATask *runningTask; +} SVWorker; + +// task +typedef enum { + EVA_TASK_STATE_WAITTING = 0, + EVA_TASK_STATE_RUNNING, +} EVATaskState; + +struct SVATask { + int64_t taskId; + EVAPriority priority; + int32_t priorScore; + SVAChannel *channel; + int32_t (*execute)(void *); + void (*complete)(void *); + void *arg; + EVATaskState state; + + // wait + int32_t numWait; + TdThreadCond waitCond; + + // queue + struct SVATask *prev; + struct SVATask *next; +}; + +#define VATASK_PIORITY(task_) ((task_)->priority - ((task_)->priorScore / 4)) + +// async channel +typedef enum { + EVA_CHANNEL_STATE_OPEN = 0, + EVA_CHANNEL_STATE_CLOSE, +} EVAChannelState; + +struct SVAChannel { + int64_t channelId; + EVAChannelState state; + SVATask queue[EVA_PRIORITY_MAX]; + SVATask *scheduled; + + SVAChannel *prev; + SVAChannel *next; +}; + +// async handle +struct SVAsync { + const char *label; + + TdThreadMutex mutex; + TdThreadCond hasTask; + bool stop; + + // worker + int32_t numWorkers; + int32_t numLaunchWorkers; + int32_t numIdleWorkers; + SVWorker workers[VNODE_ASYNC_MAX_WORKERS]; + + // channel + int64_t nextChannelId; + int32_t numChannels; + SVAChannel chList; + SVHashTable *channelTable; + + // task + int64_t nextTaskId; + int32_t numTasks; + SVATask queue[EVA_PRIORITY_MAX]; + SVHashTable *taskTable; +}; + +static int32_t vnodeAsyncTaskDone(SVAsync *async, SVATask *task) { + int32_t ret; + + if (task->channel != NULL && task->channel->scheduled == task) { + task->channel->scheduled = NULL; + if (task->channel->state == EVA_CHANNEL_STATE_CLOSE) { + taosMemoryFree(task->channel); + } else { + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + SVATask *nextTask = task->channel->queue[i].next; + if (nextTask != &task->channel->queue[i]) { + if (task->channel->scheduled == NULL) { + task->channel->scheduled = nextTask; + nextTask->next->prev = nextTask->prev; + nextTask->prev->next = nextTask->next; + } else { + nextTask->priorScore++; + int32_t newPriority = VATASK_PIORITY(nextTask); + if (newPriority != i) { + // remove from current priority queue + nextTask->prev->next = nextTask->next; + nextTask->next->prev = nextTask->prev; + // add to new priority queue + nextTask->next = &task->channel->queue[newPriority]; + nextTask->prev = task->channel->queue[newPriority].prev; + nextTask->next->prev = nextTask; + nextTask->prev->next = nextTask; + } + } + } + } + + if (task->channel->scheduled != NULL) { + int32_t priority = VATASK_PIORITY(task->channel->scheduled); + task->channel->scheduled->next = &async->queue[priority]; + task->channel->scheduled->prev = async->queue[priority].prev; + task->channel->scheduled->next->prev = task->channel->scheduled; + task->channel->scheduled->prev->next = task->channel->scheduled; + } + } + } + + ret = vHashDrop(async->taskTable, task); + if (ret != 0) { + ASSERT(0); + } + async->numTasks--; + + // call complete callback + if (task->complete) { + task->complete(task->arg); + } + + if (task->numWait == 0) { + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + } else if (task->numWait == 1) { + taosThreadCondSignal(&task->waitCond); + } else { + taosThreadCondBroadcast(&task->waitCond); + } + return 0; +} + +static int32_t vnodeAsyncCancelAllTasks(SVAsync *async) { + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + while (async->queue[i].next != &async->queue[i]) { + SVATask *task = async->queue[i].next; + task->prev->next = task->next; + task->next->prev = task->prev; + vnodeAsyncTaskDone(async, task); + } + } + return 0; +} + +static void *vnodeAsyncLoop(void *arg) { + SVWorker *worker = (SVWorker *)arg; + SVAsync *async = worker->async; + + setThreadName(async->label); + + for (;;) { + taosThreadMutexLock(&async->mutex); + + // finish last running task + if (worker->runningTask != NULL) { + vnodeAsyncTaskDone(async, worker->runningTask); + worker->runningTask = NULL; + } + + for (;;) { + if (async->stop || worker->workerId >= async->numWorkers) { + if (async->stop) { // cancel all tasks + vnodeAsyncCancelAllTasks(async); + } + worker->state = EVA_WORKER_STATE_STOP; + async->numLaunchWorkers--; + taosThreadMutexUnlock(&async->mutex); + return NULL; + } + + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + SVATask *task = async->queue[i].next; + if (task != &async->queue[i]) { + if (worker->runningTask == NULL) { + worker->runningTask = task; + task->prev->next = task->next; + task->next->prev = task->prev; + } else { // promote priority + task->priorScore++; + int32_t priority = VATASK_PIORITY(task); + if (priority != i) { + // remove from current priority queue + task->prev->next = task->next; + task->next->prev = task->prev; + // add to new priority queue + task->next = &async->queue[priority]; + task->prev = async->queue[priority].prev; + task->next->prev = task; + task->prev->next = task; + } + } + } + } + + if (worker->runningTask == NULL) { + worker->state = EVA_WORKER_STATE_IDLE; + async->numIdleWorkers++; + taosThreadCondWait(&async->hasTask, &async->mutex); + async->numIdleWorkers--; + worker->state = EVA_WORKER_STATE_ACTIVE; + } else { + worker->runningTask->state = EVA_TASK_STATE_RUNNING; + break; + } + } + + taosThreadMutexUnlock(&async->mutex); + + // do run the task + worker->runningTask->execute(worker->runningTask->arg); + } + + return NULL; +} + +static uint32_t vnodeAsyncTaskHash(const void *obj) { + SVATask *task = (SVATask *)obj; + return MurmurHash3_32((const char *)(&task->taskId), sizeof(task->taskId)); +} + +static int32_t vnodeAsyncTaskCompare(const void *obj1, const void *obj2) { + SVATask *task1 = (SVATask *)obj1; + SVATask *task2 = (SVATask *)obj2; + if (task1->taskId < task2->taskId) { + return -1; + } else if (task1->taskId > task2->taskId) { + return 1; + } + return 0; +} + +static uint32_t vnodeAsyncChannelHash(const void *obj) { + SVAChannel *channel = (SVAChannel *)obj; + return MurmurHash3_32((const char *)(&channel->channelId), sizeof(channel->channelId)); +} + +static int32_t vnodeAsyncChannelCompare(const void *obj1, const void *obj2) { + SVAChannel *channel1 = (SVAChannel *)obj1; + SVAChannel *channel2 = (SVAChannel *)obj2; + if (channel1->channelId < channel2->channelId) { + return -1; + } else if (channel1->channelId > channel2->channelId) { + return 1; + } + return 0; +} + +int32_t vnodeAsyncInit(SVAsync **async, char *label) { + int32_t ret; + + if (async == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + if (label == NULL) { + label = "anonymous"; + } + + (*async) = (SVAsync *)taosMemoryCalloc(1, sizeof(SVAsync) + strlen(label) + 1); + if ((*async) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + strcpy((char *)((*async) + 1), label); + (*async)->label = (const char *)((*async) + 1); + + taosThreadMutexInit(&(*async)->mutex, NULL); + taosThreadCondInit(&(*async)->hasTask, NULL); + (*async)->stop = false; + + // worker + (*async)->numWorkers = VNODE_ASYNC_DEFAULT_WORKERS; + (*async)->numLaunchWorkers = 0; + (*async)->numIdleWorkers = 0; + for (int32_t i = 0; i < VNODE_ASYNC_MAX_WORKERS; i++) { + (*async)->workers[i].async = (*async); + (*async)->workers[i].workerId = i; + (*async)->workers[i].state = EVA_WORKER_STATE_UINIT; + (*async)->workers[i].runningTask = NULL; + } + + // channel + (*async)->nextChannelId = 0; + (*async)->numChannels = 0; + (*async)->chList.prev = &(*async)->chList; + (*async)->chList.next = &(*async)->chList; + ret = vHashInit(&(*async)->channelTable, vnodeAsyncChannelHash, vnodeAsyncChannelCompare); + if (ret != 0) { + taosThreadMutexDestroy(&(*async)->mutex); + taosThreadCondDestroy(&(*async)->hasTask); + taosMemoryFree(*async); + return ret; + } + + // task + (*async)->nextTaskId = 0; + (*async)->numTasks = 0; + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + (*async)->queue[i].next = &(*async)->queue[i]; + (*async)->queue[i].prev = &(*async)->queue[i]; + } + ret = vHashInit(&(*async)->taskTable, vnodeAsyncTaskHash, vnodeAsyncTaskCompare); + if (ret != 0) { + vHashDestroy(&(*async)->channelTable); + taosThreadMutexDestroy(&(*async)->mutex); + taosThreadCondDestroy(&(*async)->hasTask); + taosMemoryFree(*async); + return ret; + } + + return 0; +} + +int32_t vnodeAsyncDestroy(SVAsync **async) { + if ((*async) == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + // set stop and broadcast + taosThreadMutexLock(&(*async)->mutex); + (*async)->stop = true; + taosThreadCondBroadcast(&(*async)->hasTask); + taosThreadMutexUnlock(&(*async)->mutex); + + // join all workers + for (int32_t i = 0; i < VNODE_ASYNC_MAX_WORKERS; i++) { + taosThreadMutexLock(&(*async)->mutex); + EVWorkerState state = (*async)->workers[i].state; + taosThreadMutexUnlock(&(*async)->mutex); + + if (state == EVA_WORKER_STATE_UINIT) { + continue; + } + + taosThreadJoin((*async)->workers[i].thread, NULL); + ASSERT((*async)->workers[i].state == EVA_WORKER_STATE_STOP); + (*async)->workers[i].state = EVA_WORKER_STATE_UINIT; + } + + // close all channels + for (SVAChannel *channel = (*async)->chList.next; channel != &(*async)->chList; channel = (*async)->chList.next) { + channel->next->prev = channel->prev; + channel->prev->next = channel->next; + + int32_t ret = vHashDrop((*async)->channelTable, channel); + if (ret) { + ASSERT(0); + } + (*async)->numChannels--; + taosMemoryFree(channel); + } + + ASSERT((*async)->numLaunchWorkers == 0); + ASSERT((*async)->numIdleWorkers == 0); + ASSERT((*async)->numChannels == 0); + ASSERT((*async)->numTasks == 0); + + taosThreadMutexDestroy(&(*async)->mutex); + taosThreadCondDestroy(&(*async)->hasTask); + + vHashDestroy(&(*async)->channelTable); + vHashDestroy(&(*async)->taskTable); + taosMemoryFree(*async); + *async = NULL; + + return 0; +} + +static int32_t vnodeAsyncLaunchWorker(SVAsync *async) { + for (int32_t i = 0; i < async->numWorkers; i++) { + ASSERT(async->workers[i].state != EVA_WORKER_STATE_IDLE); + if (async->workers[i].state == EVA_WORKER_STATE_ACTIVE) { + continue; + } else if (async->workers[i].state == EVA_WORKER_STATE_STOP) { + taosThreadJoin(async->workers[i].thread, NULL); + async->workers[i].state = EVA_WORKER_STATE_UINIT; + } + + taosThreadCreate(&async->workers[i].thread, NULL, vnodeAsyncLoop, &async->workers[i]); + async->workers[i].state = EVA_WORKER_STATE_ACTIVE; + async->numLaunchWorkers++; + break; + } + return 0; +} + +int32_t vnodeAsync(SVAsync *async, EVAPriority priority, int32_t (*execute)(void *), void (*complete)(void *), + void *arg, int64_t *taskId) { + return vnodeAsyncC(async, 0, priority, execute, complete, arg, taskId); +} + +int32_t vnodeAsyncC(SVAsync *async, int64_t channelId, EVAPriority priority, int32_t (*execute)(void *), + void (*complete)(void *), void *arg, int64_t *taskId) { + if (async == NULL || execute == NULL || channelId < 0) { + return TSDB_CODE_INVALID_PARA; + } + + int64_t id; + + // create task object + SVATask *task = (SVATask *)taosMemoryCalloc(1, sizeof(SVATask)); + if (task == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + task->priority = priority; + task->priorScore = 0; + task->execute = execute; + task->complete = complete; + task->arg = arg; + task->state = EVA_TASK_STATE_WAITTING; + task->numWait = 0; + taosThreadCondInit(&task->waitCond, NULL); + + // schedule task + taosThreadMutexLock(&async->mutex); + + if (channelId == 0) { + task->channel = NULL; + } else { + SVAChannel channel = {.channelId = channelId}; + vHashGet(async->channelTable, &channel, (void **)&task->channel); + if (task->channel == NULL) { + taosThreadMutexUnlock(&async->mutex); + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + return TSDB_CODE_INVALID_PARA; + } + } + + task->taskId = id = ++async->nextTaskId; + + // add task to hash table + int32_t ret = vHashPut(async->taskTable, task); + if (ret != 0) { + taosThreadMutexUnlock(&async->mutex); + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + return ret; + } + + async->numTasks++; + + // add task to queue + if (task->channel == NULL || task->channel->scheduled == NULL) { + // add task to async->queue + if (task->channel) { + task->channel->scheduled = task; + } + + task->next = &async->queue[priority]; + task->prev = async->queue[priority].prev; + task->next->prev = task; + task->prev->next = task; + + // signal worker or launch new worker + if (async->numIdleWorkers > 0) { + taosThreadCondSignal(&(async->hasTask)); + } else if (async->numLaunchWorkers < async->numWorkers) { + vnodeAsyncLaunchWorker(async); + } + } else if (task->channel->scheduled->state == EVA_TASK_STATE_RUNNING || + priority >= VATASK_PIORITY(task->channel->scheduled)) { + // add task to task->channel->queue + task->next = &task->channel->queue[priority]; + task->prev = task->channel->queue[priority].prev; + task->next->prev = task; + task->prev->next = task; + } else { + // remove task->channel->scheduled from queue + task->channel->scheduled->prev->next = task->channel->scheduled->next; + task->channel->scheduled->next->prev = task->channel->scheduled->prev; + + // promote priority and add task->channel->scheduled to task->channel->queue + task->channel->scheduled->priorScore++; + int32_t newPriority = VATASK_PIORITY(task->channel->scheduled); + task->channel->scheduled->next = &task->channel->queue[newPriority]; + task->channel->scheduled->prev = task->channel->queue[newPriority].prev; + task->channel->scheduled->next->prev = task->channel->scheduled; + task->channel->scheduled->prev->next = task->channel->scheduled; + + // add task to queue + task->channel->scheduled = task; + task->next = &async->queue[priority]; + task->prev = async->queue[priority].prev; + task->next->prev = task; + task->prev->next = task; + } + + taosThreadMutexUnlock(&async->mutex); + + if (taskId != NULL) { + *taskId = id; + } + + return 0; +} + +int32_t vnodeAWait(SVAsync *async, int64_t taskId) { + if (async == NULL || taskId <= 0) { + return TSDB_CODE_INVALID_PARA; + } + + SVATask *task = NULL; + SVATask task2 = {.taskId = taskId}; + + taosThreadMutexLock(&async->mutex); + + vHashGet(async->taskTable, &task2, (void **)&task); + if (task) { + task->numWait++; + taosThreadCondWait(&task->waitCond, &async->mutex); + task->numWait--; + + if (task->numWait == 0) { + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + } + } + + taosThreadMutexUnlock(&async->mutex); + + return 0; +} + +int32_t vnodeACancel(SVAsync *async, int64_t taskId) { + if (async == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + int32_t ret = 0; + SVATask *task = NULL; + SVATask task2 = {.taskId = taskId}; + + taosThreadMutexLock(&async->mutex); + + vHashGet(async->taskTable, &task2, (void **)&task); + if (task) { + if (task->state == EVA_TASK_STATE_WAITTING) { + // remove from queue + task->next->prev = task->prev; + task->prev->next = task->next; + vnodeAsyncTaskDone(async, task); + } else { + ret = 0; // task is running, should return code TSDB_CODE_BUSY ?? + } + } + + taosThreadMutexUnlock(&async->mutex); + + return ret; +} + +int32_t vnodeAsyncSetWorkers(SVAsync *async, int32_t numWorkers) { + if (async == NULL || numWorkers <= 0 || numWorkers > VNODE_ASYNC_MAX_WORKERS) { + return TSDB_CODE_INVALID_PARA; + } + + taosThreadMutexLock(&async->mutex); + async->numWorkers = numWorkers; + if (async->numIdleWorkers > 0) { + taosThreadCondBroadcast(&async->hasTask); + } + taosThreadMutexUnlock(&async->mutex); + + return 0; +} + +int32_t vnodeAChannelInit(SVAsync *async, int64_t *channelId) { + if (async == NULL || channelId == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + // create channel object + SVAChannel *channel = (SVAChannel *)taosMemoryMalloc(sizeof(SVAChannel)); + if (channel == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + channel->state = EVA_CHANNEL_STATE_OPEN; + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + channel->queue[i].next = &channel->queue[i]; + channel->queue[i].prev = &channel->queue[i]; + } + channel->scheduled = NULL; + + // register channel + taosThreadMutexLock(&async->mutex); + + channel->channelId = *channelId = ++async->nextChannelId; + + // add to hash table + int32_t ret = vHashPut(async->channelTable, channel); + if (ret != 0) { + taosThreadMutexUnlock(&async->mutex); + taosMemoryFree(channel); + return ret; + } + + // add to list + channel->next = &async->chList; + channel->prev = async->chList.prev; + channel->next->prev = channel; + channel->prev->next = channel; + + async->numChannels++; + + taosThreadMutexUnlock(&async->mutex); + + return 0; +} + +int32_t vnodeAChannelDestroy(SVAsync *async, int64_t channelId, bool waitRunning) { + if (async == NULL || channelId <= 0) { + return TSDB_CODE_INVALID_PARA; + } + + SVAChannel *channel = NULL; + SVAChannel channel2 = {.channelId = channelId}; + + taosThreadMutexLock(&async->mutex); + + vHashGet(async->channelTable, &channel2, (void **)&channel); + if (channel) { + // unregister channel + channel->next->prev = channel->prev; + channel->prev->next = channel->next; + vHashDrop(async->channelTable, channel); + async->numChannels--; + + // cancel all waiting tasks + for (int32_t i = 0; i < EVA_PRIORITY_MAX; i++) { + while (channel->queue[i].next != &channel->queue[i]) { + SVATask *task = channel->queue[i].next; + task->prev->next = task->next; + task->next->prev = task->prev; + vnodeAsyncTaskDone(async, task); + } + } + + // cancel or wait the scheduled task + if (channel->scheduled == NULL || channel->scheduled->state == EVA_TASK_STATE_WAITTING) { + if (channel->scheduled) { + channel->scheduled->prev->next = channel->scheduled->next; + channel->scheduled->next->prev = channel->scheduled->prev; + vnodeAsyncTaskDone(async, channel->scheduled); + } + taosMemoryFree(channel); + } else { + if (waitRunning) { + // wait task + SVATask *task = channel->scheduled; + task->numWait++; + taosThreadCondWait(&task->waitCond, &async->mutex); + task->numWait--; + if (task->numWait == 0) { + taosThreadCondDestroy(&task->waitCond); + taosMemoryFree(task); + } + + taosMemoryFree(channel); + } else { + channel->state = EVA_CHANNEL_STATE_CLOSE; + } + } + } else { + taosThreadMutexUnlock(&async->mutex); + return TSDB_CODE_INVALID_PARA; + } + + taosThreadMutexUnlock(&async->mutex); + + return 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 50ca2f5d03..c8cd167393 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -13,9 +13,10 @@ * along with this program. If not, see . */ +#include "meta.h" +#include "sync.h" #include "vnd.h" #include "vnodeInt.h" -#include "sync.h" extern int32_t tsdbPreCommit(STsdb *pTsdb); extern int32_t tsdbCommitBegin(STsdb *pTsdb, SCommitInfo *pInfo); @@ -155,7 +156,8 @@ int vnodeShouldCommit(SVnode *pVnode, bool atExit) { taosThreadMutexLock(&pVnode->mutex); if (pVnode->inUse && diskAvail) { - needCommit = (pVnode->inUse->size > pVnode->inUse->node.size) || (pVnode->inUse->size > 0 && atExit); + needCommit = (pVnode->inUse->size > pVnode->inUse->node.size) || + (atExit && (pVnode->inUse->size > 0 || pVnode->pMeta->changed)); } taosThreadMutexUnlock(&pVnode->mutex); return needCommit; @@ -177,7 +179,7 @@ int vnodeSaveInfo(const char *dir, const SVnodeInfo *pInfo) { } // save info to a vnode_tmp.json - pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) { vError("failed to open info file:%s for write:%s", fname, terrstr()); terrno = TAOS_SYSTEM_ERROR(errno); @@ -201,10 +203,8 @@ int vnodeSaveInfo(const char *dir, const SVnodeInfo *pInfo) { // free info binary taosMemoryFree(data); - vInfo("vgId:%d, vnode info is saved, fname:%s replica:%d selfIndex:%d changeVersion:%d", - pInfo->config.vgId, fname, - pInfo->config.syncCfg.replicaNum, pInfo->config.syncCfg.myIndex, - pInfo->config.syncCfg.changeVersion); + vInfo("vgId:%d, vnode info is saved, fname:%s replica:%d selfIndex:%d changeVersion:%d", pInfo->config.vgId, fname, + pInfo->config.syncCfg.replicaNum, pInfo->config.syncCfg.myIndex, pInfo->config.syncCfg.changeVersion); return 0; @@ -287,9 +287,10 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { char dir[TSDB_FILENAME_LEN] = {0}; int64_t lastCommitted = pInfo->info.state.committed; - tsem_wait(&pVnode->canCommit); + // wait last commit task + vnodeAWait(vnodeAsyncHandle[0], pVnode->commitTask); - if(syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; + if (syncNodeGetConfig(pVnode->sync, &pVnode->config.syncCfg) != 0) goto _exit; pVnode->state.commitTerm = pVnode->state.applyTerm; @@ -377,12 +378,11 @@ static int32_t vnodeCommitTask(void *arg) { vnodeReturnBufPool(pVnode); _exit: - // end commit - tsem_post(&pVnode->canCommit); - taosMemoryFree(pInfo); return code; } +static void vnodeCompleteCommit(void *arg) { taosMemoryFree(arg); } + int vnodeAsyncCommit(SVnode *pVnode) { int32_t code = 0; @@ -399,14 +399,14 @@ int vnodeAsyncCommit(SVnode *pVnode) { } // schedule the task - code = vnodeScheduleTask(vnodeCommitTask, pInfo); + code = vnodeAsyncC(vnodeAsyncHandle[0], pVnode->commitChannel, EVA_PRIORITY_HIGH, vnodeCommitTask, + vnodeCompleteCommit, pInfo, &pVnode->commitTask); _exit: if (code) { if (NULL != pInfo) { taosMemoryFree(pInfo); } - tsem_post(&pVnode->canCommit); vError("vgId:%d, %s failed since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), pVnode->state.commitID); } else { @@ -418,8 +418,7 @@ _exit: int vnodeSyncCommit(SVnode *pVnode) { vnodeAsyncCommit(pVnode); - tsem_wait(&pVnode->canCommit); - tsem_post(&pVnode->canCommit); + vnodeAWait(vnodeAsyncHandle[0], pVnode->commitTask); return 0; } @@ -499,7 +498,7 @@ _exit: } bool vnodeShouldRollback(SVnode *pVnode) { - char tFName[TSDB_FILENAME_LEN] = {0}; + char tFName[TSDB_FILENAME_LEN] = {0}; int32_t offset = 0; vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, tFName, TSDB_FILENAME_LEN); @@ -510,7 +509,7 @@ bool vnodeShouldRollback(SVnode *pVnode) { } void vnodeRollback(SVnode *pVnode) { - char tFName[TSDB_FILENAME_LEN] = {0}; + char tFName[TSDB_FILENAME_LEN] = {0}; int32_t offset = 0; vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, tFName, TSDB_FILENAME_LEN); diff --git a/source/dnode/vnode/src/vnd/vnodeHash.c b/source/dnode/vnode/src/vnd/vnodeHash.c new file mode 100644 index 0000000000..33602f6581 --- /dev/null +++ b/source/dnode/vnode/src/vnd/vnodeHash.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "vnodeHash.h" + +#define VNODE_HASH_DEFAULT_NUM_BUCKETS 1024 + +typedef struct SVHashEntry SVHashEntry; + +struct SVHashEntry { + SVHashEntry* next; + void* obj; +}; + +struct SVHashTable { + uint32_t (*hash)(const void*); + int32_t (*compare)(const void*, const void*); + int32_t numEntries; + uint32_t numBuckets; + SVHashEntry** buckets; +}; + +static int32_t vHashRehash(SVHashTable* ht, uint32_t newNumBuckets) { + SVHashEntry** newBuckets = (SVHashEntry**)taosMemoryCalloc(newNumBuckets, sizeof(SVHashEntry*)); + if (newBuckets == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + for (int32_t i = 0; i < ht->numBuckets; i++) { + SVHashEntry* entry = ht->buckets[i]; + while (entry != NULL) { + SVHashEntry* next = entry->next; + uint32_t bucketIndex = ht->hash(entry->obj) % newNumBuckets; + entry->next = newBuckets[bucketIndex]; + newBuckets[bucketIndex] = entry; + entry = next; + } + } + + taosMemoryFree(ht->buckets); + ht->buckets = newBuckets; + ht->numBuckets = newNumBuckets; + + return 0; +} + +int32_t vHashInit(SVHashTable** ht, uint32_t (*hash)(const void*), int32_t (*compare)(const void*, const void*)) { + if (ht == NULL || hash == NULL || compare == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + (*ht) = (SVHashTable*)taosMemoryMalloc(sizeof(SVHashTable)); + if (*ht == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + (*ht)->hash = hash; + (*ht)->compare = compare; + (*ht)->numEntries = 0; + (*ht)->numBuckets = VNODE_HASH_DEFAULT_NUM_BUCKETS; + (*ht)->buckets = (SVHashEntry**)taosMemoryCalloc((*ht)->numBuckets, sizeof(SVHashEntry*)); + if ((*ht)->buckets == NULL) { + taosMemoryFree(*ht); + return TSDB_CODE_OUT_OF_MEMORY; + } + + return 0; +} + +int32_t vHashDestroy(SVHashTable** ht) { + if (ht == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + if (*ht) { + ASSERT((*ht)->numEntries == 0); + taosMemoryFree((*ht)->buckets); + taosMemoryFree(*ht); + (*ht) = NULL; + } + return 0; +} + +int32_t vHashPut(SVHashTable* ht, void* obj) { + if (ht == NULL || obj == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + uint32_t bucketIndex = ht->hash(obj) % ht->numBuckets; + for (SVHashEntry* entry = ht->buckets[bucketIndex]; entry != NULL; entry = entry->next) { + if (ht->compare(entry->obj, obj) == 0) { + return TSDB_CODE_DUP_KEY; + } + } + + if (ht->numEntries >= ht->numBuckets) { + vHashRehash(ht, ht->numBuckets * 2); + bucketIndex = ht->hash(obj) % ht->numBuckets; + } + + SVHashEntry* entry = (SVHashEntry*)taosMemoryMalloc(sizeof(SVHashEntry)); + if (entry == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + entry->obj = obj; + entry->next = ht->buckets[bucketIndex]; + ht->buckets[bucketIndex] = entry; + ht->numEntries++; + + return 0; +} + +int32_t vHashGet(SVHashTable* ht, const void* obj, void** retObj) { + if (ht == NULL || obj == NULL || retObj == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + uint32_t bucketIndex = ht->hash(obj) % ht->numBuckets; + for (SVHashEntry* entry = ht->buckets[bucketIndex]; entry != NULL; entry = entry->next) { + if (ht->compare(entry->obj, obj) == 0) { + *retObj = entry->obj; + return 0; + } + } + + *retObj = NULL; + return TSDB_CODE_NOT_FOUND; +} + +int32_t vHashDrop(SVHashTable* ht, const void* obj) { + if (ht == NULL || obj == NULL) { + return TSDB_CODE_INVALID_PARA; + } + + uint32_t bucketIndex = ht->hash(obj) % ht->numBuckets; + for (SVHashEntry** entry = &ht->buckets[bucketIndex]; *entry != NULL; entry = &(*entry)->next) { + if (ht->compare((*entry)->obj, obj) == 0) { + SVHashEntry* tmp = *entry; + *entry = (*entry)->next; + taosMemoryFree(tmp); + ht->numEntries--; + if (ht->numBuckets > VNODE_HASH_DEFAULT_NUM_BUCKETS && ht->numEntries < ht->numBuckets / 4) { + vHashRehash(ht, ht->numBuckets / 2); + } + return 0; + } + } + + return TSDB_CODE_NOT_FOUND; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeHash.h b/source/dnode/vnode/src/vnd/vnodeHash.h new file mode 100644 index 0000000000..86f6f9ac87 --- /dev/null +++ b/source/dnode/vnode/src/vnd/vnodeHash.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _VNODE_HAS_H_ +#define _VNODE_HAS_H_ + +#include "vnd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SVHashTable SVHashTable; + +int32_t vHashInit(SVHashTable** ht, uint32_t (*hash)(const void*), int32_t (*compare)(const void*, const void*)); +int32_t vHashDestroy(SVHashTable** ht); +int32_t vHashPut(SVHashTable* ht, void* obj); +int32_t vHashGet(SVHashTable* ht, const void* obj, void** retObj); +int32_t vHashDrop(SVHashTable* ht, const void* obj); + +#ifdef __cplusplus +} +#endif + +#endif /*_VNODE_HAS_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeModule.c b/source/dnode/vnode/src/vnd/vnodeModule.c index df08fb8a2b..4e3cee42c6 100644 --- a/source/dnode/vnode/src/vnd/vnodeModule.c +++ b/source/dnode/vnode/src/vnd/vnodeModule.c @@ -16,65 +16,25 @@ #include "cos.h" #include "vnd.h" -typedef struct SVnodeTask SVnodeTask; -struct SVnodeTask { - SVnodeTask* next; - SVnodeTask* prev; - int (*execute)(void*); - void* arg; -}; +static volatile int32_t VINIT = 0; -typedef struct { - int nthreads; - TdThread* threads; - TdThreadMutex mutex; - TdThreadCond hasTask; - SVnodeTask queue; -} SVnodeThreadPool; - -struct SVnodeGlobal { - int8_t init; - int8_t stop; - SVnodeThreadPool tp[2]; -}; - -struct SVnodeGlobal vnodeGlobal; - -static void* loop(void* arg); +SVAsync* vnodeAsyncHandle[2]; int vnodeInit(int nthreads) { - int8_t init; - int ret; + int32_t init; - init = atomic_val_compare_exchange_8(&(vnodeGlobal.init), 0, 1); + init = atomic_val_compare_exchange_32(&VINIT, 0, 1); if (init) { return 0; } - vnodeGlobal.stop = 0; - for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) { - taosThreadMutexInit(&vnodeGlobal.tp[i].mutex, NULL); - taosThreadCondInit(&vnodeGlobal.tp[i].hasTask, NULL); + // vnode-commit + vnodeAsyncInit(&vnodeAsyncHandle[0], "vnode-commit"); + vnodeAsyncSetWorkers(vnodeAsyncHandle[0], nthreads); - taosThreadMutexLock(&vnodeGlobal.tp[i].mutex); - - vnodeGlobal.tp[i].queue.next = &vnodeGlobal.tp[i].queue; - vnodeGlobal.tp[i].queue.prev = &vnodeGlobal.tp[i].queue; - - taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex)); - - vnodeGlobal.tp[i].nthreads = nthreads; - vnodeGlobal.tp[i].threads = taosMemoryCalloc(nthreads, sizeof(TdThread)); - if (vnodeGlobal.tp[i].threads == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - vError("failed to init vnode module since:%s", tstrerror(terrno)); - return -1; - } - - for (int j = 0; j < nthreads; j++) { - taosThreadCreate(&(vnodeGlobal.tp[i].threads[j]), NULL, loop, &vnodeGlobal.tp[i]); - } - } + // vnode-merge + vnodeAsyncInit(&vnodeAsyncHandle[1], "vnode-merge"); + vnodeAsyncSetWorkers(vnodeAsyncHandle[1], nthreads); if (walInit() < 0) { return -1; @@ -90,99 +50,15 @@ int vnodeInit(int nthreads) { } void vnodeCleanup() { - int8_t init; - - init = atomic_val_compare_exchange_8(&(vnodeGlobal.init), 1, 0); + int32_t init = atomic_val_compare_exchange_32(&VINIT, 1, 0); if (init == 0) return; // set stop - vnodeGlobal.stop = 1; - for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) { - taosThreadMutexLock(&(vnodeGlobal.tp[i].mutex)); - taosThreadCondBroadcast(&(vnodeGlobal.tp[i].hasTask)); - taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex)); - - // wait for threads - for (int j = 0; j < vnodeGlobal.tp[i].nthreads; j++) { - taosThreadJoin(vnodeGlobal.tp[i].threads[j], NULL); - } - - // clear source - taosMemoryFreeClear(vnodeGlobal.tp[i].threads); - taosThreadCondDestroy(&(vnodeGlobal.tp[i].hasTask)); - taosThreadMutexDestroy(&(vnodeGlobal.tp[i].mutex)); - } + vnodeAsyncDestroy(&vnodeAsyncHandle[0]); + vnodeAsyncDestroy(&vnodeAsyncHandle[1]); walCleanUp(); tqCleanUp(); smaCleanUp(); s3CleanUp(); } - -int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg) { - SVnodeTask* pTask; - - ASSERT(!vnodeGlobal.stop); - - pTask = taosMemoryMalloc(sizeof(*pTask)); - if (pTask == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - pTask->execute = execute; - pTask->arg = arg; - - taosThreadMutexLock(&(vnodeGlobal.tp[tpid].mutex)); - pTask->next = &vnodeGlobal.tp[tpid].queue; - pTask->prev = vnodeGlobal.tp[tpid].queue.prev; - vnodeGlobal.tp[tpid].queue.prev->next = pTask; - vnodeGlobal.tp[tpid].queue.prev = pTask; - taosThreadCondSignal(&(vnodeGlobal.tp[tpid].hasTask)); - taosThreadMutexUnlock(&(vnodeGlobal.tp[tpid].mutex)); - - return 0; -} - -int vnodeScheduleTask(int (*execute)(void*), void* arg) { return vnodeScheduleTaskEx(0, execute, arg); } - -/* ------------------------ STATIC METHODS ------------------------ */ -static void* loop(void* arg) { - SVnodeThreadPool* tp = (SVnodeThreadPool*)arg; - SVnodeTask* pTask; - int ret; - - if (tp == &vnodeGlobal.tp[0]) { - setThreadName("vnode-commit"); - } else if (tp == &vnodeGlobal.tp[1]) { - setThreadName("vnode-merge"); - } - - for (;;) { - taosThreadMutexLock(&(tp->mutex)); - for (;;) { - pTask = tp->queue.next; - if (pTask == &tp->queue) { - // no task - if (vnodeGlobal.stop) { - taosThreadMutexUnlock(&(tp->mutex)); - return NULL; - } else { - taosThreadCondWait(&(tp->hasTask), &(tp->mutex)); - } - } else { - // has task - pTask->prev->next = pTask->next; - pTask->next->prev = pTask->prev; - break; - } - } - - taosThreadMutexUnlock(&(tp->mutex)); - - pTask->execute(pTask->arg); - taosMemoryFree(pTask); - } - - return NULL; -} diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index ff79e83d72..946ce9d278 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -129,8 +129,8 @@ int32_t vnodeAlterReplica(const char *path, SAlterVnodeReplicaReq *pReq, int32_t } pCfg->changeVersion = pReq->changeVersion; - vInfo("vgId:%d, save config while alter, replicas:%d totalReplicas:%d selfIndex:%d changeVersion:%d", - pReq->vgId, pCfg->replicaNum, pCfg->totalReplicaNum, pCfg->myIndex, pCfg->changeVersion); + vInfo("vgId:%d, save config while alter, replicas:%d totalReplicas:%d selfIndex:%d changeVersion:%d", pReq->vgId, + pCfg->replicaNum, pCfg->totalReplicaNum, pCfg->myIndex, pCfg->changeVersion); info.config.syncCfg = *pCfg; ret = vnodeSaveInfo(dir, &info); @@ -396,10 +396,14 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC pVnode->blocked = false; tsem_init(&pVnode->syncSem, 0, 0); - tsem_init(&(pVnode->canCommit), 0, 1); taosThreadMutexInit(&pVnode->mutex, NULL); taosThreadCondInit(&pVnode->poolNotEmpty, NULL); + if (vnodeAChannelInit(vnodeAsyncHandle[0], &pVnode->commitChannel) != 0) { + vError("vgId:%d, failed to init commit channel", TD_VID(pVnode)); + goto _err; + } + int8_t rollback = vnodeShouldRollback(pVnode); // open buffer pool @@ -487,7 +491,6 @@ _err: if (pVnode->pMeta) metaClose(&pVnode->pMeta); if (pVnode->freeList) vnodeCloseBufPool(pVnode); - tsem_destroy(&(pVnode->canCommit)); taosMemoryFree(pVnode); return NULL; } @@ -501,7 +504,8 @@ void vnodePostClose(SVnode *pVnode) { vnodeSyncPostClose(pVnode); } void vnodeClose(SVnode *pVnode) { if (pVnode) { - tsem_wait(&pVnode->canCommit); + vnodeAWait(vnodeAsyncHandle[0], pVnode->commitTask); + vnodeAChannelDestroy(vnodeAsyncHandle[0], pVnode->commitChannel, true); vnodeSyncClose(pVnode); vnodeQueryClose(pVnode); tqClose(pVnode->pTq); @@ -510,10 +514,8 @@ void vnodeClose(SVnode *pVnode) { smaClose(pVnode->pSma); if (pVnode->pMeta) metaClose(&pVnode->pMeta); vnodeCloseBufPool(pVnode); - tsem_post(&pVnode->canCommit); // destroy handle - tsem_destroy(&(pVnode->canCommit)); tsem_destroy(&pVnode->syncSem); taosThreadCondDestroy(&pVnode->poolNotEmpty); taosThreadMutexDestroy(&pVnode->mutex); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 91244e321f..f2ef11e9ed 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#include "vnd.h" #include "tsdb.h" +#include "vnd.h" // SVSnapReader ======================================================== struct SVSnapReader { @@ -32,11 +32,11 @@ struct SVSnapReader { TSnapRangeArray *pRanges; STsdbSnapReader *pTsdbReader; // tq - int8_t tqHandleDone; - STqSnapReader *pTqSnapReader; - int8_t tqOffsetDone; - STqOffsetReader *pTqOffsetReader; - int8_t tqCheckInfoDone; + int8_t tqHandleDone; + STqSnapReader *pTqSnapReader; + int8_t tqOffsetDone; + STqOffsetReader *pTqOffsetReader; + int8_t tqCheckInfoDone; STqCheckInfoReader *pTqCheckInfoReader; // stream int8_t streamTaskDone; @@ -458,8 +458,8 @@ struct SVSnapWriter { TSnapRangeArray *pRanges; STsdbSnapWriter *pTsdbSnapWriter; // tq - STqSnapWriter *pTqSnapWriter; - STqOffsetWriter *pTqOffsetWriter; + STqSnapWriter *pTqSnapWriter; + STqOffsetWriter *pTqOffsetWriter; STqCheckInfoWriter *pTqCheckInfoWriter; // stream SStreamTaskWriter *pStreamTaskWriter; @@ -519,6 +519,8 @@ _out: return code; } +extern int32_t tsdbCancelAllBgTask(STsdb *tsdb); + int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter **ppWriter) { int32_t code = 0; SVSnapWriter *pWriter = NULL; @@ -526,8 +528,8 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter int64_t ever = pParam->end; // commit memory data - vnodeAsyncCommit(pVnode); - tsem_wait(&pVnode->canCommit); + vnodeSyncCommit(pVnode); + tsdbCancelAllBgTask(pVnode->pTsdb); // alloc pWriter = (SVSnapWriter *)taosMemoryCalloc(1, sizeof(*pWriter)); @@ -657,7 +659,6 @@ _exit: vInfo("vgId:%d, vnode snapshot writer closed, rollback:%d", TD_VID(pVnode), rollback); taosMemoryFree(pWriter); } - tsem_post(&pVnode->canCommit); return code; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 33b4114009..1f951097a4 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -463,7 +463,6 @@ int32_t vnodePreProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg) { break; } -_exit: if (code) { vError("vgId:%d, failed to preprocess write request since %s, msg type:%s", TD_VID(pVnode), tstrerror(code), TMSG_INFO(pMsg->msgType)); @@ -595,6 +594,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg tqProcessTaskResetReq(pVnode->pTq, pMsg); } } break; + case TDMT_STREAM_HTASK_DROP: { + if (pVnode->restored && vnodeIsLeader(pVnode)) { + tqProcessTaskDropHTask(pVnode->pTq, pMsg); + } + } break; case TDMT_VND_ALTER_CONFIRM: needCommit = pVnode->config.hashChange; if (vnodeProcessAlterConfirmReq(pVnode, ver, pReq, len, pRsp) < 0) { @@ -762,7 +766,7 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) case TDMT_STREAM_TASK_RUN: return tqProcessTaskRunReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH: - return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true); + return tqProcessTaskDispatchReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH_RSP: return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); case TDMT_VND_STREAM_TASK_CHECK: diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 3944f8ed91..817d5124a2 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -18,6 +18,7 @@ #include "sync.h" #include "tsdb.h" #include "vnd.h" +#include "tqCommon.h" #define BATCH_ENABLE 0 @@ -569,8 +570,8 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId); } else { vInfo("vgId:%d sync restore finished, start to launch stream tasks", pVnode->config.vgId); - tqResetStreamTaskStatus(pVnode->pTq); - tqStartStreamTaskAsync(pVnode->pTq, false); + resetStreamTaskStatus(pVnode->pTq->pStreamMeta); + tqStreamTaskStartAsync(pMeta, &pVnode->msgCb, false); } } else { vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId); diff --git a/source/libs/executor/src/dynqueryctrloperator.c b/source/libs/executor/src/dynqueryctrloperator.c index 8fc46e0239..9e7f1144f8 100755 --- a/source/libs/executor/src/dynqueryctrloperator.c +++ b/source/libs/executor/src/dynqueryctrloperator.c @@ -503,15 +503,15 @@ static void seqJoinLaunchNewRetrieveImpl(SOperatorInfo* pOperator, SSDataBlock** T_LONG_JMP(pOperator->pTaskInfo->env, pOperator->pTaskInfo->code); } - qError("dynamic post task begin"); + qDebug("%s dynamic post task begin", GET_TASKID(pOperator->pTaskInfo)); *ppRes = pOperator->pDownstream[1]->fpSet.getNextExtFn(pOperator->pDownstream[1], pParam); if (*ppRes) { pPost->isStarted = true; pStbJoin->execInfo.postBlkNum++; pStbJoin->execInfo.postBlkRows += (*ppRes)->info.rows; - qError("join res block retrieved"); + qDebug("%s join res block retrieved", GET_TASKID(pOperator->pTaskInfo)); } else { - qError("Empty join res block retrieved"); + qDebug("%s Empty join res block retrieved", GET_TASKID(pOperator->pTaskInfo)); } } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 8af3765f87..34f35c3ba4 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1525,6 +1525,17 @@ static int32_t generateIntervalScanRange(SStreamScanInfo* pInfo, SSDataBlock* pS return TSDB_CODE_SUCCESS; } +static void calBlockTbName(SStreamScanInfo* pInfo, SSDataBlock* pBlock) { + SExprSupp* pTbNameCalSup = &pInfo->tbnameCalSup; + blockDataCleanup(pInfo->pCreateTbRes); + if (pInfo->tbnameCalSup.numOfExprs == 0 && pInfo->tagCalSup.numOfExprs == 0) { + pBlock->info.parTbName[0] = 0; + } else { + appendCreateTableRow(pInfo->pStreamScanOp->pTaskInfo->streamInfo.pState, &pInfo->tbnameCalSup, &pInfo->tagCalSup, + pBlock->info.id.groupId, pBlock, 0, pInfo->pCreateTbRes, &pInfo->stateStore); + } +} + static int32_t generateDeleteResultBlock(SStreamScanInfo* pInfo, SSDataBlock* pSrcBlock, SSDataBlock* pDestBlock) { blockDataCleanup(pDestBlock); int32_t rows = pSrcBlock->info.rows; @@ -1549,15 +1560,21 @@ static int32_t generateDeleteResultBlock(SStreamScanInfo* pInfo, SSDataBlock* pS for (int32_t i = 0; i < pSrcBlock->info.rows; i++) { uint64_t srcUid = srcUidData[i]; uint64_t groupId = srcGp[i]; - char* tbname[VARSTR_HEADER_SIZE + TSDB_TABLE_NAME_LEN] = {0}; + char tbname[VARSTR_HEADER_SIZE + TSDB_TABLE_NAME_LEN] = {0}; if (groupId == 0) { groupId = getGroupIdByData(pInfo, srcUid, srcStartTsCol[i], ver); } if (pInfo->tbnameCalSup.pExprInfo) { void* parTbname = NULL; - pInfo->stateStore.streamStateGetParName(pInfo->pStreamScanOp->pTaskInfo->streamInfo.pState, groupId, &parTbname); - - memcpy(varDataVal(tbname), parTbname, TSDB_TABLE_NAME_LEN); + code = pInfo->stateStore.streamStateGetParName(pInfo->pStreamScanOp->pTaskInfo->streamInfo.pState, groupId, &parTbname); + if (code != TSDB_CODE_SUCCESS) { + SSDataBlock* pPreRes = readPreVersionData(pInfo->pTableScanOp, srcUid, srcStartTsCol[i], srcStartTsCol[i], ver); + printDataBlock(pPreRes, "pre res", GET_TASKID(pInfo->pStreamScanOp->pTaskInfo)); + calBlockTbName(pInfo, pPreRes); + memcpy(varDataVal(tbname), pPreRes->info.parTbName, strlen(pPreRes->info.parTbName)); + } else { + memcpy(varDataVal(tbname), parTbname, TSDB_TABLE_NAME_LEN); + } varDataSetLen(tbname, strlen(varDataVal(tbname))); pInfo->stateStore.streamStateFreeVal(parTbname); } @@ -1583,17 +1600,6 @@ static int32_t generateScanRange(SStreamScanInfo* pInfo, SSDataBlock* pSrcBlock, return code; } -static void calBlockTbName(SStreamScanInfo* pInfo, SSDataBlock* pBlock) { - SExprSupp* pTbNameCalSup = &pInfo->tbnameCalSup; - blockDataCleanup(pInfo->pCreateTbRes); - if (pInfo->tbnameCalSup.numOfExprs == 0 && pInfo->tagCalSup.numOfExprs == 0) { - pBlock->info.parTbName[0] = 0; - } else { - appendCreateTableRow(pInfo->pStreamScanOp->pTaskInfo->streamInfo.pState, &pInfo->tbnameCalSup, &pInfo->tagCalSup, - pBlock->info.id.groupId, pBlock, 0, pInfo->pCreateTbRes, &pInfo->stateStore); - } -} - void appendOneRowToStreamSpecialBlock(SSDataBlock* pBlock, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t* pUid, uint64_t* pGp, void* pTbName) { SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); diff --git a/source/libs/executor/src/streameventwindowoperator.c b/source/libs/executor/src/streameventwindowoperator.c index 9b987ff1a4..9f1610e08d 100644 --- a/source/libs/executor/src/streameventwindowoperator.c +++ b/source/libs/executor/src/streameventwindowoperator.c @@ -610,6 +610,13 @@ void streamEventReloadState(SOperatorInfo* pOperator) { compactEventWindow(pOperator, &curInfo, pInfo->pSeUpdated, pInfo->pSeDeleted, false); qDebug("===stream=== reload state. save result %" PRId64 ", %" PRIu64, curInfo.winInfo.sessionWin.win.skey, curInfo.winInfo.sessionWin.groupId); + if (IS_VALID_SESSION_WIN(curInfo.winInfo)) { + saveSessionOutputBuf(pAggSup, &curInfo.winInfo); + } + + if (!curInfo.pWinFlag->endFlag) { + continue; + } if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { saveResult(curInfo.winInfo, pInfo->pSeUpdated); @@ -621,10 +628,6 @@ void streamEventReloadState(SOperatorInfo* pOperator) { getSessionHashKey(&curInfo.winInfo.sessionWin, &key); tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &curInfo.winInfo, sizeof(SResultWindowInfo)); } - - if (IS_VALID_SESSION_WIN(curInfo.winInfo)) { - saveSessionOutputBuf(pAggSup, &curInfo.winInfo); - } } taosMemoryFree(pBuf); diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 86d83e05ad..893848f010 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -276,15 +276,15 @@ static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SSHashObj* resWins) { void* pIte = NULL; int32_t iter = 0; while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { - SWinKey* pKey = tSimpleHashGetKey(pIte, NULL); - uint64_t groupId = pKey->groupId; - TSKEY ts = pKey->ts; + SWinKey* pKey = tSimpleHashGetKey(pIte, NULL); + uint64_t groupId = pKey->groupId; + TSKEY ts = pKey->ts; SRowBuffPos* pPos = *(SRowBuffPos**)pIte; if (!pPos->beUpdated) { continue; } pPos->beUpdated = false; - int32_t code = saveWinResultInfo(ts, groupId, pPos, resWins); + int32_t code = saveWinResultInfo(ts, groupId, pPos, resWins); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -1091,10 +1091,10 @@ void doStreamIntervalDecodeOpState(void* buf, int32_t len, SOperatorInfo* pOpera int32_t mapSize = 0; buf = taosDecodeFixedI32(buf, &mapSize); for (int32_t i = 0; i < mapSize; i++) { - SWinKey key = {0}; + SWinKey key = {0}; buf = decodeSWinKey(buf, &key); SRowBuffPos* pPos = NULL; - int32_t resSize = pInfo->aggSup.resultRowSize; + int32_t resSize = pInfo->aggSup.resultRowSize; pInfo->stateStore.streamStateAddIfNotExist(pInfo->pState, &key, (void**)&pPos, &resSize); tSimpleHashPut(pInfo->aggSup.pResultRowHashTable, &key, sizeof(SWinKey), &pPos, POINTER_BYTES); } @@ -1165,7 +1165,7 @@ static SSDataBlock* buildIntervalResult(SOperatorInfo* pOperator) { return NULL; } -int32_t copyUpdateResult(SSHashObj** ppWinUpdated, SArray* pUpdated, __compar_fn_t compar) { +int32_t copyUpdateResult(SSHashObj** ppWinUpdated, SArray* pUpdated, __compar_fn_t compar) { void* pIte = NULL; int32_t iter = 0; while ((pIte = tSimpleHashIterate(*ppWinUpdated, pIte, &iter)) != NULL) { @@ -1402,10 +1402,12 @@ void streamIntervalReloadState(SOperatorInfo* pOperator) { void* pBuf = NULL; int32_t code = pInfo->stateStore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size); - TSKEY ts = *(TSKEY*)pBuf; - taosMemoryFree(pBuf); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); - pInfo->stateStore.streamStateReloadInfo(pInfo->pState, ts); + if (code == 0) { + TSKEY ts = *(TSKEY*)pBuf; + taosMemoryFree(pBuf); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); + pInfo->stateStore.streamStateReloadInfo(pInfo->pState, ts); + } } SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.reloadStreamStateFn) { @@ -1723,8 +1725,8 @@ void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT pCurWin->sessionWin.win.skey = startTs; pCurWin->sessionWin.win.ekey = endTs; int32_t size = pAggSup->resultRowSize; - int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, - pAggSup->gap, (void**)&pCurWin->pStatePos, &size); + int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, + pAggSup->gap, (void**)&pCurWin->pStatePos, &size); if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) { code = TSDB_CODE_FAILED; clearOutputBuf(pAggSup->pState, pCurWin->pStatePos, &pAggSup->pSessionAPI->stateStore); @@ -1822,9 +1824,9 @@ void removeSessionResults(SStreamAggSupporter* pAggSup, SSHashObj* pHashMap, SAr } } -int32_t updateSessionWindowInfo(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t groupId, - int32_t rows, int32_t start, int64_t gap, SSHashObj* pResultRows, SSHashObj* pStUpdated, - SSHashObj* pStDeleted) { +int32_t updateSessionWindowInfo(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo, TSKEY* pStartTs, + TSKEY* pEndTs, uint64_t groupId, int32_t rows, int32_t start, int64_t gap, + SSHashObj* pResultRows, SSHashObj* pStUpdated, SSHashObj* pStDeleted) { for (int32_t i = start; i < rows; ++i) { if (!isInWindow(pWinInfo, pStartTs[i], gap) && (!pEndTs || !isInWindow(pWinInfo, pEndTs[i], gap))) { return i - start; @@ -1856,8 +1858,8 @@ static int32_t initSessionOutputBuf(SResultWindowInfo* pWinInfo, SResultRow** pR } int32_t doOneWindowAggImpl(SColumnInfoData* pTimeWindowData, SResultWindowInfo* pCurWin, SResultRow** pResult, - int32_t startIndex, int32_t winRows, int32_t rows, int32_t numOutput, - SOperatorInfo* pOperator, int64_t winDelta) { + int32_t startIndex, int32_t winRows, int32_t rows, int32_t numOutput, + SOperatorInfo* pOperator, int64_t winDelta) { SExprSupp* pSup = &pOperator->exprSupp; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; int32_t code = initSessionOutputBuf(pCurWin, pResult, pSup->pCtx, numOutput, pSup->rowEntryInfoOffset); @@ -1981,9 +1983,10 @@ static void compactSessionSemiWindow(SOperatorInfo* pOperator, SResultWindowInfo } int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) { - qDebug("===stream===try save session result skey:%" PRId64 ", ekey:%" PRId64 ".pos%d", - pWinInfo->sessionWin.win.skey, pWinInfo->sessionWin.win.ekey, pWinInfo->pStatePos->needFree); - return pAggSup->stateStore.streamStateSessionPut(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pStatePos, pAggSup->resultRowSize); + qDebug("===stream===try save session result skey:%" PRId64 ", ekey:%" PRId64 ".pos%d", pWinInfo->sessionWin.win.skey, + pWinInfo->sessionWin.win.ekey, pWinInfo->pStatePos->needFree); + return pAggSup->stateStore.streamStateSessionPut(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pStatePos, + pAggSup->resultRowSize); } static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, SSHashObj* pStUpdated, @@ -2045,7 +2048,8 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pStUpdated) { code = saveResult(winInfo, pStUpdated); if (code != TSDB_CODE_SUCCESS) { - qError("%s do stream session aggregate impl, set result error, code %s", GET_TASKID(pTaskInfo), tstrerror(code)); + qError("%s do stream session aggregate impl, set result error, code %s", GET_TASKID(pTaskInfo), + tstrerror(code)); T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); } } @@ -2084,8 +2088,8 @@ void doDeleteTimeWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SArr inline int32_t sessionKeyCompareAsc(const void* pKey1, const void* pKey2) { SResultWindowInfo* pWinInfo1 = (SResultWindowInfo*)pKey1; SResultWindowInfo* pWinInfo2 = (SResultWindowInfo*)pKey2; - SSessionKey* pWin1 = &pWinInfo1->sessionWin; - SSessionKey* pWin2 = &pWinInfo2->sessionWin; + SSessionKey* pWin1 = &pWinInfo1->sessionWin; + SSessionKey* pWin2 = &pWinInfo2->sessionWin; if (pWin1->groupId > pWin2->groupId) { return 1; @@ -2290,9 +2294,9 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) { SResultWindowInfo* pWinInfo = taosArrayGet(pGroupResInfo->pRows, i); - SRowBuffPos* pPos = pWinInfo->pStatePos; - SResultRow* pRow = NULL; - SSessionKey* pKey = (SSessionKey*) pPos->pKey; + SRowBuffPos* pPos = pWinInfo->pStatePos; + SResultRow* pRow = NULL; + SSessionKey* pKey = (SSessionKey*)pPos->pKey; if (pBlock->info.id.groupId == 0) { pBlock->info.id.groupId = pKey->groupId; @@ -2312,7 +2316,7 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa } } - int32_t code = pAPI->stateStore.streamStateGetByPos(pState, pPos, (void**)&pRow); + int32_t code = pAPI->stateStore.streamStateGetByPos(pState, pPos, (void**)&pRow); if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { ASSERT(pBlock->info.rows > 0); break; @@ -2325,7 +2329,7 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa pGroupResInfo->index += 1; continue; } - + doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset); // no results, continue to check the next one if (pRow->numOfRows == 0) { @@ -2409,7 +2413,7 @@ void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) { return; } SResultWindowInfo* pWinInfo = taosArrayGet(pAllWins, size - 1); - SSessionKey* pSeKey = pWinInfo->pStatePos->pKey; + SSessionKey* pSeKey = pWinInfo->pStatePos->pKey; taosArrayPush(pMaxWins, pSeKey); if (pSeKey->groupId == 0) { return; @@ -2716,7 +2720,8 @@ void resetWinRange(STimeWindow* winRange) { void getSessionWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SResultWindowInfo* pWinInfo) { int32_t rowSize = pAggSup->resultRowSize; - int32_t code = pAggSup->stateStore.streamStateSessionGet(pAggSup->pState, pKey, (void**)&pWinInfo->pStatePos, &rowSize); + int32_t code = + pAggSup->stateStore.streamStateSessionGet(pAggSup->pState, pKey, (void**)&pWinInfo->pStatePos, &rowSize); if (code == TSDB_CODE_SUCCESS) { pWinInfo->sessionWin = *pKey; pWinInfo->isOutput = true; @@ -2730,16 +2735,16 @@ void getSessionWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, void streamSessionSemiReloadState(SOperatorInfo* pOperator) { SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; resetWinRange(&pAggSup->winRange); SResultWindowInfo winInfo = {0}; - int32_t size = 0; - void* pBuf = NULL; - int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); - int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); - SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; + int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); + SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; ASSERT(size == num * sizeof(SSessionKey) + sizeof(TSKEY)); for (int32_t i = 0; i < num; i++) { SResultWindowInfo winInfo = {0}; @@ -2763,12 +2768,12 @@ void streamSessionReloadState(SOperatorInfo* pOperator) { SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; resetWinRange(&pAggSup->winRange); - int32_t size = 0; - void* pBuf = NULL; - int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, - strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); - int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); - SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, + strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); + int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); + SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; ASSERT(size == num * sizeof(SSessionKey) + sizeof(TSKEY)); TSKEY ts = *(TSKEY*)((char*)pBuf + size - sizeof(TSKEY)); @@ -2887,7 +2892,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh pInfo->recvGetAll = false; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; - // for stream + // for stream void* buff = NULL; int32_t len = 0; int32_t res = @@ -2924,7 +2929,8 @@ static void clearStreamSessionOperator(SStreamSessionAggOperatorInfo* pInfo) { pInfo->streamAggSup.stateStore.streamStateSessionClear(pInfo->streamAggSup.pState); } -void deleteSessionWinState(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SSHashObj* pMapUpdate, SSHashObj* pMapDelete) { +void deleteSessionWinState(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SSHashObj* pMapUpdate, + SSHashObj* pMapDelete) { SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); doDeleteTimeWindows(pAggSup, pBlock, pWins); removeSessionResults(pAggSup, pMapUpdate, pWins); @@ -3023,7 +3029,7 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { copyUpdateResult(&pInfo->pStUpdated, pInfo->pUpdated, sessionKeyCompareAsc); removeSessionDeleteResults(pInfo->pStDeleted, pInfo->pUpdated); - if(pInfo->isHistoryOp) { + if (pInfo->isHistoryOp) { getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); } @@ -3057,8 +3063,9 @@ SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream pOperator->operatorType = pPhyNode->type; if (pPhyNode->type != QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, - destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + pOperator->fpSet = + createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, destroyStreamSessionAggOperatorInfo, + optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionSemiReloadState); } setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), pPhyNode->type, false, OP_NOT_OPENED, pInfo, @@ -3174,7 +3181,7 @@ void getStateWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SS pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pNextWin->winInfo.sessionWin); int32_t nextSize = pAggSup->resultRowSize; int32_t code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->winInfo.sessionWin, - (void**)&pNextWin->winInfo.pStatePos, &nextSize); + (void**)&pNextWin->winInfo.pStatePos, &nextSize); if (code != TSDB_CODE_SUCCESS) { SET_SESSION_WIN_INVALID(pNextWin->winInfo); } else { @@ -3187,8 +3194,8 @@ void getStateWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SS pNextWin->winInfo.isOutput = true; } pAggSup->stateStore.streamStateFreeCur(pCur); - qDebug("===stream===get state next win buff. skey:%" PRId64 ", endkey:%" PRId64, pNextWin->winInfo.sessionWin.win.skey, - pNextWin->winInfo.sessionWin.win.ekey); + qDebug("===stream===get state next win buff. skey:%" PRId64 ", endkey:%" PRId64, + pNextWin->winInfo.sessionWin.win.skey, pNextWin->winInfo.sessionWin.win.ekey); } void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, char* pKeyData, @@ -3257,13 +3264,13 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, pNextWin->winInfo.isOutput = true; } pAggSup->stateStore.streamStateFreeCur(pCur); - qDebug("===stream===set state next win buff. skey:%" PRId64 ", endkey:%" PRId64, pNextWin->winInfo.sessionWin.win.skey, - pNextWin->winInfo.sessionWin.win.ekey); + qDebug("===stream===set state next win buff. skey:%" PRId64 ", endkey:%" PRId64, + pNextWin->winInfo.sessionWin.win.skey, pNextWin->winInfo.sessionWin.win.ekey); } -int32_t updateStateWindowInfo(SStreamAggSupporter* pAggSup, SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin, TSKEY* pTs, uint64_t groupId, - SColumnInfoData* pKeyCol, int32_t rows, int32_t start, bool* allEqual, - SSHashObj* pResultRows, SSHashObj* pSeUpdated, SSHashObj* pSeDeleted) { +int32_t updateStateWindowInfo(SStreamAggSupporter* pAggSup, SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin, + TSKEY* pTs, uint64_t groupId, SColumnInfoData* pKeyCol, int32_t rows, int32_t start, + bool* allEqual, SSHashObj* pResultRows, SSHashObj* pSeUpdated, SSHashObj* pSeDeleted) { *allEqual = true; for (int32_t i = start; i < rows; ++i) { char* pKeyData = colDataGetData(pKeyCol, i); @@ -3338,7 +3345,7 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl SStateWindowInfo nextWin = {0}; setStateOutputBuf(pAggSup, tsCols[i], groupId, pKeyData, &curWin, &nextWin); releaseOutputBuf(pAggSup->pState, nextWin.winInfo.pStatePos, &pAPI->stateStore); - + setSessionWinOutputInfo(pSeUpdated, &curWin.winInfo); winRows = updateStateWindowInfo(pAggSup, &curWin, &nextWin, tsCols, groupId, pKeyColInfo, rows, i, &allEqual, pAggSup->pResultRows, pSeUpdated, pStDeleted); @@ -3475,6 +3482,7 @@ void doStreamStateSaveCheckpoint(SOperatorInfo* pOperator) { len = doStreamStateEncodeOpState(&pBuf, len, pOperator, true); pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_CHECKPOINT_NAME, strlen(STREAM_STATE_OP_CHECKPOINT_NAME), buf, len); + taosMemoryFree(buf); } static SSDataBlock* buildStateResult(SOperatorInfo* pOperator) { @@ -3614,10 +3622,11 @@ void streamStateReleaseState(SOperatorInfo* pOperator) { static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SResultWindowInfo* pNextWin, SSHashObj* pStUpdated, SSHashObj* pStDeleted) { - SExprSupp* pSup = &pOperator->exprSupp; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStreamStateAggOperatorInfo* pInfo = pOperator->info; - compactTimeWindow(pSup, &pInfo->streamAggSup, &pInfo->twAggSup, pTaskInfo, pCurWin, pNextWin, pStUpdated, pStDeleted, false); + compactTimeWindow(pSup, &pInfo->streamAggSup, &pInfo->twAggSup, pTaskInfo, pCurWin, pNextWin, pStUpdated, pStDeleted, + false); } void streamStateReloadState(SOperatorInfo* pOperator) { @@ -3629,7 +3638,7 @@ void streamStateReloadState(SOperatorInfo* pOperator) { int32_t size = 0; void* pBuf = NULL; int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME, - strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size); + strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size); int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); qDebug("===stream=== reload state. get result count:%d", num); SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; @@ -4010,8 +4019,9 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED, pInfo, pTaskInfo); - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL, - destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + pOperator->fpSet = + createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo, + optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); pInfo->stateStore = pTaskInfo->storageAPI.stateStore; diff --git a/source/libs/index/src/indexFilter.c b/source/libs/index/src/indexFilter.c index bfdcd2b030..7ed36fbf9e 100644 --- a/source/libs/index/src/indexFilter.c +++ b/source/libs/index/src/indexFilter.c @@ -328,6 +328,7 @@ static int32_t sifInitParam(SNode *node, SIFParam *param, SIFCtx *ctx) { SIF_ERR_RET(scalarGenerateSetFromList((void **)¶m->pFilter, node, nl->node.resType.type)); if (taosHashPut(ctx->pRes, &node, POINTER_BYTES, param, sizeof(*param))) { taosHashCleanup(param->pFilter); + param->pFilter = NULL; indexError("taosHashPut nodeList failed, size:%d", (int32_t)sizeof(*param)); SIF_ERR_RET(TSDB_CODE_OUT_OF_MEMORY); } diff --git a/source/libs/nodes/src/nodesUtilFuncs.c b/source/libs/nodes/src/nodesUtilFuncs.c index d167d81c82..e730ccf21b 100644 --- a/source/libs/nodes/src/nodesUtilFuncs.c +++ b/source/libs/nodes/src/nodesUtilFuncs.c @@ -887,6 +887,7 @@ void nodesDestroyNode(SNode* pNode) { taosHashCleanup(pStmt->pSubTableHashObj); taosHashCleanup(pStmt->pTableNameHashObj); taosHashCleanup(pStmt->pDbFNameHashObj); + taosHashCleanup(pStmt->pTableCxtHashObj); if (pStmt->freeHashFunc) { pStmt->freeHashFunc(pStmt->pTableBlockHashObj); } diff --git a/source/libs/parser/inc/parInsertUtil.h b/source/libs/parser/inc/parInsertUtil.h index ce8c2d8a3d..b20587dd43 100644 --- a/source/libs/parser/inc/parInsertUtil.h +++ b/source/libs/parser/inc/parInsertUtil.h @@ -50,7 +50,7 @@ void insCheckTableDataOrder(STableDataCxt *pTableCxt, TSKEY tsKey); int32_t insGetTableDataCxt(SHashObj *pHash, void *id, int32_t idLen, STableMeta *pTableMeta, SVCreateTbReq **pCreateTbReq, STableDataCxt **pTableCxt, bool colMode, bool ignoreColVals); int32_t initTableColSubmitData(STableDataCxt *pTableCxt); -int32_t insMergeTableDataCxt(SHashObj *pTableHash, SArray **pVgDataBlocks); +int32_t insMergeTableDataCxt(SHashObj *pTableHash, SArray **pVgDataBlocks, bool isRebuild); int32_t insBuildVgDataBlocks(SHashObj *pVgroupsHashObj, SArray *pVgDataBlocks, SArray **pDataBlocks); void insDestroyTableDataCxtHashMap(SHashObj *pTableCxtHash); void insDestroyVgroupDataCxt(SVgroupDataCxt *pVgCxt); diff --git a/source/libs/parser/inc/parTranslater.h b/source/libs/parser/inc/parTranslater.h index e06626fb00..55ea71a368 100644 --- a/source/libs/parser/inc/parTranslater.h +++ b/source/libs/parser/inc/parTranslater.h @@ -23,6 +23,7 @@ extern "C" { #include "parToken.h" #include "parUtil.h" #include "parser.h" +#include "cmdnodes.h" typedef struct STranslateContext { SParseContext* pParseCxt; @@ -46,7 +47,9 @@ typedef struct STranslateContext { SNode* pPostRoot; } STranslateContext; +bool biRewriteToTbnameFunc(STranslateContext* pCxt, SNode** ppNode); int32_t biRewriteSelectStar(STranslateContext* pCxt, SSelectStmt* pSelect); +int32_t biCheckCreateTableTbnameCol(STranslateContext* pCxt, SCreateTableStmt* pStmt); int32_t findTable(STranslateContext* pCxt, const char* pTableAlias, STableNode** pOutput); int32_t getTargetMetaImpl(SParseContext* pParCxt, SParseMetaCache* pMetaCache, const SName* pName, STableMeta** pMeta, bool couldBeView); diff --git a/source/libs/parser/src/parInsertSml.c b/source/libs/parser/src/parInsertSml.c index f2194402da..2dbba38212 100644 --- a/source/libs/parser/src/parInsertSml.c +++ b/source/libs/parser/src/parInsertSml.c @@ -425,7 +425,7 @@ SQuery* smlInitHandle() { int32_t smlBuildOutput(SQuery* handle, SHashObj* pVgHash) { SVnodeModifyOpStmt* pStmt = (SVnodeModifyOpStmt*)(handle)->pRoot; // merge according to vgId - int32_t code = insMergeTableDataCxt(pStmt->pTableBlockHashObj, &pStmt->pVgDataBlocks); + int32_t code = insMergeTableDataCxt(pStmt->pTableBlockHashObj, &pStmt->pVgDataBlocks, true); if (code != TSDB_CODE_SUCCESS) { uError("insMergeTableDataCxt failed"); return code; diff --git a/source/libs/parser/src/parInsertSql.c b/source/libs/parser/src/parInsertSql.c index 2b8516d37b..31b016458a 100644 --- a/source/libs/parser/src/parInsertSql.c +++ b/source/libs/parser/src/parInsertSql.c @@ -55,6 +55,7 @@ typedef struct SInsertParseContext { bool usingDuplicateTable; bool forceUpdate; bool needTableTagVal; + bool needRequest; // whether or not request server } SInsertParseContext; typedef int32_t (*_row_append_fn_t)(SMsgBuf* pMsgBuf, const void* value, int32_t len, void* param); @@ -652,6 +653,10 @@ static int32_t parseTagValue(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStm } static int32_t buildCreateTbReq(SVnodeModifyOpStmt* pStmt, STag* pTag, SArray* pTagName) { + if (pStmt->pCreateTblReq) { + tdDestroySVCreateTbReq(pStmt->pCreateTblReq); + taosMemoryFreeClear(pStmt->pCreateTblReq); + } pStmt->pCreateTblReq = taosMemoryCalloc(1, sizeof(SVCreateTbReq)); if (NULL == pStmt->pCreateTblReq) { return TSDB_CODE_OUT_OF_MEMORY; @@ -1797,9 +1802,10 @@ static void clearStbRowsDataContext(SStbRowsDataContext* pStbRowsCxt) { taosMemoryFreeClear(pStbRowsCxt->pCreateCtbReq); } -static int32_t parseOneStbRow(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt, const char** ppSql, - SStbRowsDataContext* pStbRowsCxt, bool* pGotRow, SToken* pToken) { - bool bFirstTable = false; +static int32_t parseOneStbRow(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt, const char** ppSql, + SStbRowsDataContext* pStbRowsCxt, bool* pGotRow, SToken* pToken, + STableDataCxt** ppTableDataCxt) { + bool bFirstTable = false; int32_t code = getStbRowValues(pCxt, pStmt, ppSql, pStbRowsCxt, pGotRow, pToken, &bFirstTable); if (code != TSDB_CODE_SUCCESS || !*pGotRow) { return code; @@ -1809,15 +1815,14 @@ static int32_t parseOneStbRow(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pSt code = processCtbAutoCreationAndCtbMeta(pCxt, pStmt, pStbRowsCxt); } - STableDataCxt* pTableDataCxt = NULL; code = insGetTableDataCxt(pStmt->pTableBlockHashObj, &pStbRowsCxt->pCtbMeta->uid, sizeof(pStbRowsCxt->pCtbMeta->uid), - pStbRowsCxt->pCtbMeta, &pStbRowsCxt->pCreateCtbReq, &pTableDataCxt, false, true); - initTableColSubmitData(pTableDataCxt); + pStbRowsCxt->pCtbMeta, &pStbRowsCxt->pCreateCtbReq, ppTableDataCxt, false, true); + initTableColSubmitData(*ppTableDataCxt); if (code == TSDB_CODE_SUCCESS) { - SRow** pRow = taosArrayReserve(pTableDataCxt->pData->aRowP, 1); - code = tRowBuild(pStbRowsCxt->aColVals, pTableDataCxt->pSchema, pRow); + SRow** pRow = taosArrayReserve((*ppTableDataCxt)->pData->aRowP, 1); + code = tRowBuild(pStbRowsCxt->aColVals, (*ppTableDataCxt)->pSchema, pRow); if (TSDB_CODE_SUCCESS == code) { - insCheckTableDataOrder(pTableDataCxt, TD_ROW_KEY(*pRow)); + insCheckTableDataOrder(*ppTableDataCxt, TD_ROW_KEY(*pRow)); } } @@ -1915,7 +1920,8 @@ static int32_t parseValues(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt, if (!pStmt->stbSyntax) { code = parseOneRow(pCxt, &pStmt->pSql, rowsDataCxt.pTableDataCxt, &gotRow, pToken); } else { - code = parseOneStbRow(pCxt, pStmt, &pStmt->pSql, rowsDataCxt.pStbRowsCxt, &gotRow, pToken); + STableDataCxt* pTableDataCxt = NULL; + code = parseOneStbRow(pCxt, pStmt, &pStmt->pSql, rowsDataCxt.pStbRowsCxt, &gotRow, pToken, &pTableDataCxt); } } @@ -1979,7 +1985,14 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt if (!pStmt->stbSyntax) { code = parseOneRow(pCxt, (const char**)&pRow, rowsDataCxt.pTableDataCxt, &gotRow, &token); } else { - code = parseOneStbRow(pCxt, pStmt, (const char**)&pRow, rowsDataCxt.pStbRowsCxt, &gotRow, &token); + STableDataCxt* pTableDataCxt = NULL; + code = parseOneStbRow(pCxt, pStmt, (const char**)&pRow, rowsDataCxt.pStbRowsCxt, &gotRow, &token, &pTableDataCxt); + if (code == TSDB_CODE_SUCCESS) { + SStbRowsDataContext* pStbRowsCxt = rowsDataCxt.pStbRowsCxt; + void* pData = pTableDataCxt; + taosHashPut(pStmt->pTableCxtHashObj, &pStbRowsCxt->pCtbMeta->uid, sizeof(pStbRowsCxt->pCtbMeta->uid), &pData, + POINTER_BYTES); + } } if (code && firstLine) { firstLine = false; @@ -1992,7 +2005,7 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt (*pNumOfRows)++; } - if (TSDB_CODE_SUCCESS == code && (*pNumOfRows) > tsMaxInsertBatchRows) { + if (TSDB_CODE_SUCCESS == code && (*pNumOfRows) >= tsMaxInsertBatchRows) { pStmt->fileProcessing = true; break; } @@ -2003,7 +2016,7 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt parserDebug("0x%" PRIx64 " %d rows have been parsed", pCxt->pComCxt->requestId, *pNumOfRows); - if (TSDB_CODE_SUCCESS == code && 0 == (*pNumOfRows) && + if (TSDB_CODE_SUCCESS == code && 0 == (*pNumOfRows) && 0 == pStmt->totalRowsNum && (!TSDB_QUERY_HAS_TYPE(pStmt->insertType, TSDB_QUERY_TYPE_STMT_INSERT)) && !pStmt->fileProcessing) { code = buildSyntaxErrMsg(&pCxt->msg, "no any data points", NULL); } @@ -2011,6 +2024,12 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt } static int32_t parseDataFromFileImpl(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pStmt, SRowsDataContext rowsDataCxt) { + // init only for file + if (NULL == pStmt->pTableCxtHashObj) { + pStmt->pTableCxtHashObj = + taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); + } + int32_t numOfRows = 0; int32_t code = parseCsvFile(pCxt, pStmt, rowsDataCxt, &numOfRows); if (TSDB_CODE_SUCCESS == code) { @@ -2022,7 +2041,18 @@ static int32_t parseDataFromFileImpl(SInsertParseContext* pCxt, SVnodeModifyOpSt } else { parserDebug("0x%" PRIx64 " insert from csv. File is too large, do it in batches.", pCxt->pComCxt->requestId); } + if (pStmt->insertType != TSDB_QUERY_TYPE_FILE_INSERT) { + return buildSyntaxErrMsg(&pCxt->msg, "keyword VALUES or FILE is exclusive", NULL); + } } + + // just record pTableCxt whose data come from file + if (!pStmt->stbSyntax && numOfRows > 0) { + void* pData = rowsDataCxt.pTableDataCxt; + taosHashPut(pStmt->pTableCxtHashObj, &pStmt->pTableMeta->uid, sizeof(pStmt->pTableMeta->uid), &pData, + POINTER_BYTES); + } + return code; } @@ -2061,6 +2091,9 @@ static int32_t parseDataClause(SInsertParseContext* pCxt, SVnodeModifyOpStmt* pS NEXT_TOKEN(pStmt->pSql, token); switch (token.type) { case TK_VALUES: + if (TSDB_QUERY_HAS_TYPE(pStmt->insertType, TSDB_QUERY_TYPE_FILE_INSERT)) { + return buildSyntaxErrMsg(&pCxt->msg, "keyword VALUES or FILE is exclusive", token.z); + } return parseValuesClause(pCxt, pStmt, rowsDataCxt, &token); case TK_FILE: return parseFileClause(pCxt, pStmt, rowsDataCxt, &token); @@ -2275,8 +2308,25 @@ static int32_t parseInsertBodyBottom(SInsertParseContext* pCxt, SVnodeModifyOpSt return setStmtInfo(pCxt, pStmt); } + // release old array alloced by merge + pStmt->freeArrayFunc(pStmt->pVgDataBlocks); + pStmt->pVgDataBlocks = NULL; + + bool fileOnly = (pStmt->insertType == TSDB_QUERY_TYPE_FILE_INSERT); + if (fileOnly) { + // none data, skip merge & buildvgdata + if (0 == taosHashGetSize(pStmt->pTableCxtHashObj)) { + pCxt->needRequest = false; + return TSDB_CODE_SUCCESS; + } + } + // merge according to vgId - int32_t code = insMergeTableDataCxt(pStmt->pTableBlockHashObj, &pStmt->pVgDataBlocks); + int32_t code = insMergeTableDataCxt(fileOnly ? pStmt->pTableCxtHashObj : pStmt->pTableBlockHashObj, + &pStmt->pVgDataBlocks, pStmt->fileProcessing); + // clear tmp hashobj only + taosHashClear(pStmt->pTableCxtHashObj); + if (TSDB_CODE_SUCCESS == code) { code = insBuildVgDataBlocks(pStmt->pVgroupsHashObj, pStmt->pVgDataBlocks, &pStmt->pDataBlocks); } @@ -2718,6 +2768,7 @@ int32_t parseInsertSql(SParseContext* pCxt, SQuery** pQuery, SCatalogReq* pCatal .msg = {.buf = pCxt->pMsg, .len = pCxt->msgLen}, .missCache = false, .usingDuplicateTable = false, + .needRequest = true, .forceUpdate = (NULL != pCatalogReq ? pCatalogReq->forceUpdate : false)}; int32_t code = initInsertQuery(&context, pCatalogReq, pMetaData, pQuery); @@ -2732,5 +2783,10 @@ int32_t parseInsertSql(SParseContext* pCxt, SQuery** pQuery, SCatalogReq* pCatal code = setRefreshMeta(*pQuery); } insDestroyBoundColInfo(&context.tags); + + // if no data to insert, set emptyMode to avoid request server + if (!context.needRequest) { + (*pQuery)->execMode = QUERY_EXEC_MODE_EMPTY_RESULT; + } return code; } diff --git a/source/libs/parser/src/parInsertStmt.c b/source/libs/parser/src/parInsertStmt.c index 5137deca2e..a88aec20b3 100644 --- a/source/libs/parser/src/parInsertStmt.c +++ b/source/libs/parser/src/parInsertStmt.c @@ -58,7 +58,7 @@ int32_t qBuildStmtOutput(SQuery* pQuery, SHashObj* pVgHash, SHashObj* pBlockHash // merge according to vgId if (taosHashGetSize(pBlockHash) > 0) { - code = insMergeTableDataCxt(pBlockHash, &pVgDataBlocks); + code = insMergeTableDataCxt(pBlockHash, &pVgDataBlocks, true); } if (TSDB_CODE_SUCCESS == code) { code = insBuildVgDataBlocks(pVgHash, pVgDataBlocks, &pStmt->pDataBlocks); diff --git a/source/libs/parser/src/parInsertUtil.c b/source/libs/parser/src/parInsertUtil.c index 21b093c76c..a924ed68b0 100644 --- a/source/libs/parser/src/parInsertUtil.c +++ b/source/libs/parser/src/parInsertUtil.c @@ -289,6 +289,14 @@ static int32_t rebuildTableData(SSubmitTbData* pSrc, SSubmitTbData** pDst) { pTmp->uid = pSrc->uid; pTmp->sver = pSrc->sver; pTmp->pCreateTbReq = NULL; + if (pTmp->flags & SUBMIT_REQ_AUTO_CREATE_TABLE) { + if (pSrc->pCreateTbReq) { + cloneSVreateTbReq(pSrc->pCreateTbReq, &pTmp->pCreateTbReq); + } else { + pTmp->flags &= ~SUBMIT_REQ_AUTO_CREATE_TABLE; + } + } + if (pTmp->flags & SUBMIT_REQ_COLUMN_DATA_FORMAT) { pTmp->aCol = taosArrayInit(128, sizeof(SColData)); if (NULL == pTmp->aCol) { @@ -416,15 +424,21 @@ void insDestroyTableDataCxtHashMap(SHashObj* pTableCxtHash) { taosHashCleanup(pTableCxtHash); } -static int32_t fillVgroupDataCxt(STableDataCxt* pTableCxt, SVgroupDataCxt* pVgCxt) { +static int32_t fillVgroupDataCxt(STableDataCxt* pTableCxt, SVgroupDataCxt* pVgCxt, bool isRebuild) { if (NULL == pVgCxt->pData->aSubmitTbData) { pVgCxt->pData->aSubmitTbData = taosArrayInit(128, sizeof(SSubmitTbData)); if (NULL == pVgCxt->pData->aSubmitTbData) { return TSDB_CODE_OUT_OF_MEMORY; } } + + // push data to submit, rebuild empty data for next submit taosArrayPush(pVgCxt->pData->aSubmitTbData, pTableCxt->pData); - rebuildTableData(pTableCxt->pData, &pTableCxt->pData); + if (isRebuild) { + rebuildTableData(pTableCxt->pData, &pTableCxt->pData); + } else { + taosMemoryFreeClear(pTableCxt->pData); + } qDebug("add tableDataCxt uid:%" PRId64 " to vgId:%d", pTableCxt->pMeta->uid, pVgCxt->vgId); @@ -467,7 +481,7 @@ int insColDataComp(const void* lp, const void* rp) { return 0; } -int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks) { +int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks, bool isRebuild) { SHashObj* pVgroupHash = taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false); SArray* pVgroupList = taosArrayInit(8, POINTER_BYTES); if (NULL == pVgroupHash || NULL == pVgroupList) { @@ -502,6 +516,13 @@ int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks) { tColDataSortMerge(pTableCxt->pData->aCol); } else { + // skip the table has no data to insert + // eg: import a csv without valid data + // if (0 == taosArrayGetSize(pTableCxt->pData->aRowP)) { + // qWarn("no row in tableDataCxt uid:%" PRId64 " ", pTableCxt->pMeta->uid); + // p = taosHashIterate(pTableHash, p); + // continue; + // } if (!pTableCxt->ordered) { code = tRowSort(pTableCxt->pData->aRowP); } @@ -520,7 +541,7 @@ int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks) { pVgCxt = *(SVgroupDataCxt**)pp; } if (TSDB_CODE_SUCCESS == code) { - code = fillVgroupDataCxt(pTableCxt, pVgCxt); + code = fillVgroupDataCxt(pTableCxt, pVgCxt, isRebuild); } } if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 1a65a29259..5c30384a6b 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -263,6 +263,7 @@ static const SSysTableShowAdapter sysTableShowAdapter[] = { static int32_t translateSubquery(STranslateContext* pCxt, SNode* pNode); static int32_t translateQuery(STranslateContext* pCxt, SNode* pNode); static EDealRes translateValue(STranslateContext* pCxt, SValueNode* pVal); +static EDealRes translateFunction(STranslateContext* pCxt, SFunctionNode** pFunc); static int32_t createSimpleSelectStmtFromProjList(const char* pDb, const char* pTable, SNodeList* pProjectionList, SSelectStmt** pStmt); static int32_t createLastTsSelectStmt(char* pDb, char* pTable, STableMeta* pMeta, SNode** pQuery); @@ -1091,6 +1092,12 @@ static EDealRes translateColumnUseAlias(STranslateContext* pCxt, SColumnNode** p return DEAL_RES_CONTINUE; } +#ifndef TD_ENTERPRISE +bool biRewriteToTbnameFunc(STranslateContext* pCxt, SNode** ppNode) { + return false; +} +#endif + static EDealRes translateColumn(STranslateContext* pCxt, SColumnNode** pCol) { if (NULL == pCxt->pCurrStmt || (isSelectStmt(pCxt->pCurrStmt) && NULL == ((SSelectStmt*)pCxt->pCurrStmt)->pFromTable)) { @@ -1102,6 +1109,13 @@ static EDealRes translateColumn(STranslateContext* pCxt, SColumnNode** pCol) { return DEAL_RES_CONTINUE; } + if (pCxt->pParseCxt->biMode) { + SNode** ppNode = (SNode**)pCol; + if (biRewriteToTbnameFunc(pCxt, ppNode)) { + return translateFunction(pCxt, (SFunctionNode**)ppNode); + } + } + EDealRes res = DEAL_RES_CONTINUE; if ('\0' != (*pCol)->tableAlias[0]) { res = translateColumnWithPrefix(pCxt, pCol); @@ -1433,14 +1447,18 @@ static int32_t dataTypeComp(const SDataType* l, const SDataType* r) { static EDealRes translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) { if (isMultiResFunc(pOp->pLeft)) { - return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pLeft))->aliasName); + generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pLeft))->aliasName); + return DEAL_RES_ERROR; } if (isMultiResFunc(pOp->pRight)) { - return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName); + generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName); + return DEAL_RES_ERROR; } - if (TSDB_CODE_SUCCESS != scalarGetOperatorResultType(pOp)) { - return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, pOp->node.aliasName); + int32_t res = scalarGetOperatorResultType(pOp); + if (TSDB_CODE_SUCCESS != res) { + pCxt->errCode = res; + return DEAL_RES_ERROR; } return DEAL_RES_CONTINUE; @@ -2208,7 +2226,8 @@ static EDealRes translateFunction(STranslateContext* pCxt, SFunctionNode** pFunc SNode* pParam = NULL; FOREACH(pParam, (*pFunc)->pParameterList) { if (isMultiResFunc(pParam)) { - return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)pParam)->aliasName); + pCxt->errCode = TSDB_CODE_FUNC_FUNTION_PARA_NUM; + return DEAL_RES_ERROR; } } @@ -5708,6 +5727,12 @@ static int32_t checkTableDeleteMarkOption(STranslateContext* pCxt, STableOptions return code; } +#ifndef TD_ENTERPRISE +int32_t biCheckCreateTableTbnameCol(STranslateContext* pCxt, SCreateTableStmt* pStmt) { + return TSDB_CODE_SUCCESS; +} +#endif + static int32_t checkCreateTable(STranslateContext* pCxt, SCreateTableStmt* pStmt, bool createStable) { if (NULL != strchr(pStmt->tableName, '.')) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_IDENTIFIER_NAME, @@ -5746,7 +5771,9 @@ static int32_t checkCreateTable(STranslateContext* pCxt, SCreateTableStmt* pStmt "configured with the 'TTL' option"); } } - + if (pCxt->pParseCxt->biMode != 0 && TSDB_CODE_SUCCESS == code) { + code = biCheckCreateTableTbnameCol(pCxt, pStmt); + } return code; } @@ -7958,7 +7985,7 @@ static int32_t translateDropStream(STranslateContext* pCxt, SDropStreamStmt* pSt tNameGetFullDbName(&name, dropReq.name); dropReq.igNotExists = pStmt->ignoreNotExists; int32_t code = buildCmdMsg(pCxt, TDMT_MND_DROP_STREAM, (FSerializeFunc)tSerializeSMDropStreamReq, &dropReq); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return code; } diff --git a/source/libs/parser/test/parSelectTest.cpp b/source/libs/parser/test/parSelectTest.cpp index 68ded3afdd..53d97d0699 100644 --- a/source/libs/parser/test/parSelectTest.cpp +++ b/source/libs/parser/test/parSelectTest.cpp @@ -383,7 +383,7 @@ TEST_F(ParserSelectTest, semanticCheck) { run("SELECT LAST(*) + SUM(c1) FROM t1", TSDB_CODE_PAR_WRONG_VALUE_TYPE); - run("SELECT CEIL(LAST(ts, c1)) FROM t1", TSDB_CODE_PAR_WRONG_VALUE_TYPE); + run("SELECT CEIL(LAST(ts, c1)) FROM t1", TSDB_CODE_FUNC_FUNTION_PARA_NUM); // TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION run("SELECT c2 FROM t1 tt1 join t1 tt2 on COUNT(*) > 0", TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION); diff --git a/source/libs/scalar/src/scalar.c b/source/libs/scalar/src/scalar.c index 3e003234cf..90cec4522f 100644 --- a/source/libs/scalar/src/scalar.c +++ b/source/libs/scalar/src/scalar.c @@ -208,6 +208,7 @@ void sclFreeParam(SScalarParam *param) { if (param->columnData != NULL) { colDataDestroy(param->columnData); taosMemoryFreeClear(param->columnData); + param->columnData = NULL; } if (param->pHashFilter != NULL) { @@ -845,6 +846,7 @@ int32_t sclExecOperator(SOperatorNode *node, SScalarCtx *ctx, SScalarParam *outp SScalarParam *params = NULL; int32_t rowNum = 0; int32_t code = 0; + int32_t paramNum = 0; // json not support in in operator if (nodeType(node->pLeft) == QUERY_NODE_VALUE) { @@ -865,7 +867,7 @@ int32_t sclExecOperator(SOperatorNode *node, SScalarCtx *ctx, SScalarParam *outp _bin_scalar_fn_t OperatorFn = getBinScalarOperatorFn(node->opType); - int32_t paramNum = scalarGetOperatorParamNum(node->opType); + paramNum = scalarGetOperatorParamNum(node->opType); SScalarParam *pLeft = ¶ms[0]; SScalarParam *pRight = paramNum > 1 ? ¶ms[1] : NULL; diff --git a/source/libs/scalar/src/sclfunc.c b/source/libs/scalar/src/sclfunc.c index 6144ebd340..734d1e7d17 100644 --- a/source/libs/scalar/src/sclfunc.c +++ b/source/libs/scalar/src/sclfunc.c @@ -1457,13 +1457,29 @@ int32_t timeTruncateFunction(SScalarParam *pInput, int32_t inputNum, SScalarPara } case 604800000: { /* 1w */ if (tsDigits == TSDB_TIME_PRECISION_MILLI_DIGITS) { - timeVal = timeVal / 1000 / 604800 * 604800 * 1000; + if (ignoreTz) { + timeVal = timeVal - (timeVal + offsetFromTz(timezone, 1000)) % (((int64_t)604800) * 1000); + } else { + timeVal = timeVal / 1000 / 604800 * 604800 * 1000; + } } else if (tsDigits == TSDB_TIME_PRECISION_MICRO_DIGITS) { - timeVal = timeVal / 1000000 / 604800 * 604800 * 1000000; + if (ignoreTz) { + timeVal = timeVal - (timeVal + offsetFromTz(timezone, 1000000)) % (((int64_t)604800) * 1000000); + } else { + timeVal = timeVal / 1000000 / 604800 * 604800 * 1000000; + } } else if (tsDigits == TSDB_TIME_PRECISION_NANO_DIGITS) { - timeVal = timeVal / 1000000000 / 604800 * 604800 * 1000000000; + if (ignoreTz) { + timeVal = timeVal - (timeVal + offsetFromTz(timezone, 1000000000)) % (((int64_t)604800) * 1000000000); + } else { + timeVal = timeVal / 1000000000 / 604800 * 604800 * 1000000000; + } } else if (tsDigits <= TSDB_TIME_PRECISION_SEC_DIGITS) { - timeVal = timeVal * factor / factor / 604800 * 604800 * factor; + if (ignoreTz) { + timeVal = timeVal - (timeVal + offsetFromTz(timezone, 1)) % (((int64_t)604800L) * factor); + } else { + timeVal = timeVal * factor / factor / 604800 * 604800 * factor; + } } else { colDataSetNULL(pOutput->columnData, i); continue; diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index 3eadea3cdd..bed0f79f02 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -17,6 +17,7 @@ #define _STREAM_BACKEDN_ROCKSDB_H_ #include "rocksdb/c.h" +//#include "streamInt.h" #include "streamState.h" #include "tcoding.h" #include "tcommon.h" @@ -42,15 +43,110 @@ typedef struct { TdThreadMutex cfMutex; SHashObj* cfInst; int64_t defaultCfInit; + } SBackendWrapper; +typedef struct { + void* tableOpt; +} RocksdbCfParam; + +typedef struct { + rocksdb_t* db; + rocksdb_writeoptions_t* writeOpt; + rocksdb_readoptions_t* readOpt; + rocksdb_options_t* dbOpt; + rocksdb_env_t* env; + rocksdb_cache_t* cache; + + rocksdb_column_family_handle_t** pCf; + rocksdb_comparator_t** pCompares; + rocksdb_options_t** pCfOpts; + RocksdbCfParam* pCfParams; + + rocksdb_compactionfilterfactory_t* filterFactory; + TdThreadMutex mutex; + char* idstr; + char* path; + int64_t refId; + + void* pTask; + int64_t streamId; + int64_t taskId; + int64_t chkpId; + SArray* chkpSaved; + SArray* chkpInUse; + int32_t chkpCap; + TdThreadRwlock chkpDirLock; + int64_t dataWritten; + +} STaskDbWrapper; + +typedef struct SDbChkp { + int8_t init; + char* pCurrent; + char* pManifest; + SArray* pSST; + int64_t preCkptId; + int64_t curChkpId; + char* path; + + char* buf; + int32_t len; + + // ping-pong buf + SHashObj* pSstTbl[2]; + int8_t idx; + + SArray* pAdd; + SArray* pDel; + int8_t update; + + TdThreadRwlock rwLock; +} SDbChkp; +typedef struct { + int8_t init; + char* pCurrent; + char* pManifest; + SArray* pSST; + int64_t preCkptId; + int64_t curChkpId; + char* path; + + char* buf; + int32_t len; + + // ping-pong buf + SHashObj* pSstTbl[2]; + int8_t idx; + + SArray* pAdd; + SArray* pDel; + int8_t update; + + SHashObj* pDbChkpTbl; + + TdThreadRwlock rwLock; +} SBkdMgt; + +bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId); void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId); void streamBackendCleanup(void* arg); void streamBackendHandleCleanup(void* arg); int32_t streamBackendLoadCheckpointInfo(void* pMeta); -int32_t streamBackendDoCheckpoint(void* pMeta, uint64_t checkpointId); +int32_t streamBackendDoCheckpoint(void* pMeta, int64_t checkpointId); SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); +int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst); + +STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId); +void taskDbDestroy(void* pBackend, bool flush); +void taskDbDestroy2(void* pBackend); +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); + +void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId); + +void* taskDbAddRef(void* pTaskDb); +void taskDbRemoveRef(void* pTaskDb); int streamStateOpenBackend(void* backend, SStreamState* pState); void streamStateCloseBackend(SStreamState* pState, bool remove); @@ -122,7 +218,7 @@ int32_t streamDefaultGet_rocksdb(SStreamState* pState, const void* key, void** p int32_t streamDefaultDel_rocksdb(SStreamState* pState, const void* key); int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, const void* end, SArray* result); void* streamDefaultIterCreate_rocksdb(SStreamState* pState); -bool streamDefaultIterValid_rocksdb(void* iter); +bool streamDefaultIterValid_rocksdb(void* iter); void streamDefaultIterSeek_rocksdb(void* iter, const char* key); void streamDefaultIterNext_rocksdb(void* iter); char* streamDefaultIterKey_rocksdb(void* iter, int32_t* len); @@ -146,5 +242,20 @@ int32_t streamBackendTriggerChkp(void* pMeta, char* dst); int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId); int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId); +int32_t taskDbBuildSnap(void* arg, SArray* pSnap); + // int32_t streamDefaultIter_rocksdb(SStreamState* pState, const void* start, const void* end, SArray* result); + +// STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId); +// void taskDbDestroy(void* pDb, bool flush); + +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); + +SBkdMgt* bkdMgtCreate(char* path); +int32_t bkdMgtAddChkp(SBkdMgt* bm, char* task, char* path); +int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, char* name); +int32_t bkdMgtDumpTo(SBkdMgt* bm, char* taskId, char* dname); +void bkdMgtDestroy(SBkdMgt* bm); + +int32_t taskDbGenChkpUploadData(void* arg, void* bkdMgt, int64_t chkpId, int8_t type, char** path, SArray* list); #endif \ No newline at end of file diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index b76a967d0d..7b8dae8be7 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -18,9 +18,9 @@ #include "executor.h" #include "query.h" -#include "tstream.h" #include "streamBackendRocksdb.h" #include "trpc.h" +#include "tstream.h" #ifdef __cplusplus extern "C" { @@ -32,13 +32,13 @@ extern "C" { #define MAX_RETRY_LAUNCH_HISTORY_TASK 40 #define RETRY_LAUNCH_INTERVAL_INC_RATE 1.2 -#define MAX_BLOCK_NAME_NUM 1024 -#define DISPATCH_RETRY_INTERVAL_MS 300 -#define MAX_CONTINUE_RETRY_COUNT 5 +#define MAX_BLOCK_NAME_NUM 1024 +#define DISPATCH_RETRY_INTERVAL_MS 300 +#define MAX_CONTINUE_RETRY_COUNT 5 -#define META_HB_CHECK_INTERVAL 200 -#define META_HB_SEND_IDLE_COUNTER 25 // send hb every 5 sec -#define STREAM_TASK_KEY_LEN ((sizeof(int64_t)) << 1) +#define META_HB_CHECK_INTERVAL 200 +#define META_HB_SEND_IDLE_COUNTER 25 // send hb every 5 sec +#define STREAM_TASK_KEY_LEN ((sizeof(int64_t)) << 1) #define STREAM_TASK_QUEUE_CAPACITY 20480 #define STREAM_TASK_QUEUE_CAPACITY_IN_SIZE (30) @@ -68,14 +68,21 @@ typedef struct SStreamContinueExecInfo { SRpcMsg msg; } SStreamContinueExecInfo; +typedef struct { + int64_t streamId; + int64_t taskId; + int64_t chkpId; + char* dbPrefixPath; +} SStreamTaskSnap; struct STokenBucket { - int32_t numCapacity; // total capacity, available token per second - int32_t numOfToken; // total available tokens - int32_t numRate; // number of token per second - double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second - double quotaRemain; // not consumed bytes per second - double quotaRate; // number of token per second - int64_t fillTimestamp; // fill timestamp + int32_t numCapacity; // total capacity, available token per second + int32_t numOfToken; // total available tokens + int32_t numRate; // number of token per second + double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second + double quotaRemain; // not consumed bytes per second + double quotaRate; // number of token per second + int64_t tokenFillTimestamp; // fill timestamp + int64_t quotaFillTimestamp; // fill timestamp }; struct SStreamQueue { @@ -86,13 +93,14 @@ struct SStreamQueue { }; extern SStreamGlobalEnv streamEnv; -extern int32_t streamBackendId; -extern int32_t streamBackendCfWrapperId; +extern int32_t streamBackendId; +extern int32_t streamBackendCfWrapperId; +extern int32_t taskDbWrapperId; -void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration); -int32_t streamDispatchStreamBlock(SStreamTask* pTask); -void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups); -int32_t getNumOfDispatchBranch(SStreamTask* pTask); +void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration); +int32_t streamDispatchStreamBlock(SStreamTask* pTask); +void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups); +int32_t getNumOfDispatchBranch(SStreamTask* pTask); int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); @@ -105,6 +113,8 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); +int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId); +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId); int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); @@ -112,25 +122,37 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32 int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask); int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask); int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask); - -int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, int32_t* blockSize); -int32_t streamQueueItemGetSize(const SStreamQueueItem* pItem); -void streamQueueItemIncSize(const SStreamQueueItem* pItem, int32_t size); -const char* streamQueueItemGetTypeStr(int32_t type); - -SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); - -int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen); -int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq); -int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); -int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); -int32_t streamTransferStateToStreamTask(SStreamTask* pTask); - int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const char*); STaskId streamTaskExtractKey(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); +int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, + int32_t* pLen); +int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); +void streamClearChkptReadyMsg(SStreamTask* pTask); +int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, + int32_t* blockSize); +int32_t streamQueueItemGetSize(const SStreamQueueItem* pItem); +void streamQueueItemIncSize(const SStreamQueueItem* pItem, int32_t size); +const char* streamQueueItemGetTypeStr(int32_t type); +SStreamQueueItem* streamQueueMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); + +int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq); +int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); +int32_t streamTransferStateToStreamTask(SStreamTask* pTask); + +// <<<<<<< HEAD +// void streamClearChkptReadyMsg(SStreamTask* pTask); + +// int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const +// char*); STaskId streamTaskExtractKey(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo* +// pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); + +// void streamMetaResetStartInfo(STaskStartInfo* pMeta); + +// ======= +// >>>>>>> 3.0 SStreamQueue* streamQueueOpen(int64_t cap); void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); void streamQueueProcessSuccess(SStreamQueue* queue); @@ -139,21 +161,25 @@ void* streamQueueNextItem(SStreamQueue* pQueue); void streamFreeQitem(SStreamQueueItem* data); int32_t streamQueueGetItemSize(const SStreamQueue* pQueue); -typedef enum UPLOAD_TYPE{ +typedef enum UPLOAD_TYPE { UPLOAD_DISABLE = -1, UPLOAD_S3 = 0, UPLOAD_RSYNC = 1, } UPLOAD_TYPE; UPLOAD_TYPE getUploadType(); -int uploadCheckpoint(char* id, char* path); -int downloadCheckpoint(char* id, char* path); -int deleteCheckpoint(char* id); -int deleteCheckpointFile(char* id, char* name); +int uploadCheckpoint(char* id, char* path); +int downloadCheckpoint(char* id, char* path); +int deleteCheckpoint(char* id); +int deleteCheckpointFile(char* id, char* name); +int downloadCheckpointByName(char* id, char* fname, char* dstName); -int32_t onNormalTaskReady(SStreamTask* pTask); -int32_t onScanhistoryTaskReady(SStreamTask* pTask); +int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask); +int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask); +typedef int32_t (*__stream_async_exec_fn_t)(void* param); + +int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code); #ifdef __cplusplus } #endif diff --git a/source/libs/stream/inc/streamsm.h b/source/libs/stream/inc/streamsm.h index be3665fde7..7be655fbed 100644 --- a/source/libs/stream/inc/streamsm.h +++ b/source/libs/stream/inc/streamsm.h @@ -32,8 +32,8 @@ typedef int32_t (*__state_trans_fn)(SStreamTask*); typedef int32_t (*__state_trans_succ_fn)(SStreamTask*); typedef struct SAttachedEventInfo { - ETaskStatus status; // required status that this event can be handled - EStreamTaskEvent event; // the delayed handled event + ETaskStatus status; // required status that this event can be handled + EStreamTaskEvent event; // the delayed handled event } SAttachedEventInfo; typedef struct STaskStateTrans { @@ -64,8 +64,8 @@ typedef struct SStreamEventInfo { const char* name; } SStreamEventInfo; -SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask); -void* streamDestroyStateMachine(SStreamTaskSM* pSM); +// SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask); +// void* streamDestroyStateMachine(SStreamTaskSM* pSM); #ifdef __cplusplus } #endif diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 34b4677235..1c874f34de 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -276,6 +276,7 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S int32_t code = buildDispatchRsp(pTask, pReq, status, &pRsp->pCont); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s failed to build dispatch rsp, msgId:%d, code:%s", id, pReq->msgId, tstrerror(code)); + terrno = code; return code; } @@ -283,7 +284,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S tmsgSendRsp(pRsp); } - tDeleteStreamDispatchReq(pReq); streamSchedExec(pTask); return 0; diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index c23483fffb..630650025d 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -20,34 +20,10 @@ #include "tcommon.h" #include "tref.h" -typedef struct { - int8_t init; - char* pCurrent; - char* pManifest; - SArray* pSST; - int64_t preCkptId; - int64_t curChkpId; - char* path; - - char* buf; - int32_t len; - - // ping-pong buf - SHashObj* pSstTbl[2]; - int8_t idx; - - SArray* pAdd; - SArray* pDel; - int8_t update; -} SBackendManager; - typedef struct SCompactFilteFactory { void* status; } SCompactFilteFactory; -typedef struct { - void* tableOpt; -} RocksdbCfParam; typedef struct { rocksdb_t* db; rocksdb_column_family_handle_t** pHandle; @@ -61,46 +37,65 @@ typedef struct { rocksdb_comparator_t** pCompares; } RocksdbCfInst; -uint32_t nextPow2(uint32_t x); - int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf); -void destroyRocksdbCfInst(RocksdbCfInst* inst); +void destroyRocksdbCfInst(RocksdbCfInst* inst); +int32_t getCfIdx(const char* cfName); +STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath); + +void destroyCompactFilteFactory(void* arg); +void destroyCompactFilte(void* arg); +const char* compactFilteFactoryName(void* arg); +const char* compactFilteFactoryNameSess(void* arg); +const char* compactFilteFactoryNameState(void* arg); +const char* compactFilteFactoryNameFunc(void* arg); +const char* compactFilteFactoryNameFill(void* arg); + +const char* compactFilteName(void* arg); +const char* compactFilteNameSess(void* arg); +const char* compactFilteNameState(void* arg); +const char* compactFilteNameFill(void* arg); +const char* compactFilteNameFunc(void* arg); -void destroyCompactFilteFactory(void* arg); -void destroyCompactFilte(void* arg); -const char* compactFilteFactoryName(void* arg); -const char* compactFilteName(void* arg); unsigned char compactFilte(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, char** newval, size_t* newvlen, unsigned char* value_changed); rocksdb_compactionfilter_t* compactFilteFactoryCreateFilter(void* arg, rocksdb_compactionfiltercontext_t* ctx); +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterSess(void* arg, rocksdb_compactionfiltercontext_t* ctx); +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterState(void* arg, rocksdb_compactionfiltercontext_t* ctx); +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterFunc(void* arg, rocksdb_compactionfiltercontext_t* ctx); +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterFill(void* arg, rocksdb_compactionfiltercontext_t* ctx); -const char* cfName[] = {"default", "state", "fill", "sess", "func", "parname", "partag"}; +typedef int (*__db_key_encode_fn_t)(void* key, char* buf); +typedef int (*__db_key_decode_fn_t)(void* key, char* buf); +typedef int (*__db_key_tostr_fn_t)(void* key, char* buf); +typedef const char* (*__db_key_cmpname_fn_t)(void* statue); +typedef int (*__db_key_cmp_fn_t)(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); +typedef void (*__db_key_cmp_destroy_fn_t)(void* state); +typedef int32_t (*__db_value_encode_fn_t)(void* value, int32_t vlen, int64_t ttl, char** dest); +typedef int32_t (*__db_value_decode_fn_t)(void* value, int32_t vlen, int64_t* ttl, char** dest); -typedef int (*EncodeFunc)(void* key, char* buf); -typedef int (*DecodeFunc)(void* key, char* buf); -typedef int (*ToStringFunc)(void* key, char* buf); -typedef const char* (*CompareName)(void* statue); -typedef int (*BackendCmpFunc)(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); -typedef void (*DestroyFunc)(void* state); -typedef int32_t (*EncodeValueFunc)(void* value, int32_t vlen, int64_t ttl, char** dest); -typedef int32_t (*DecodeValueFunc)(void* value, int32_t vlen, int64_t* ttl, char** dest); +typedef rocksdb_compactionfilter_t* (*__db_factory_create_fn_t)(void* arg, rocksdb_compactionfiltercontext_t* ctx); +typedef const char* (*__db_factory_name_fn_t)(void* arg); +typedef void (*__db_factory_destroy_fn_t)(void* arg); typedef struct { - const char* key; - int32_t len; - int idx; - BackendCmpFunc cmpFunc; - EncodeFunc enFunc; - DecodeFunc deFunc; - ToStringFunc toStrFunc; - CompareName cmpName; - DestroyFunc detroyFunc; - EncodeValueFunc enValueFunc; - DecodeValueFunc deValueFunc; + const char* key; + int32_t len; + int idx; + __db_key_cmp_fn_t cmpKey; + __db_key_encode_fn_t enFunc; + __db_key_decode_fn_t deFunc; + __db_key_tostr_fn_t toStrFunc; + __db_key_cmpname_fn_t cmpName; + __db_key_cmp_destroy_fn_t destroyCmp; + __db_value_encode_fn_t enValueFunc; + __db_value_decode_fn_t deValueFunc; + + __db_factory_create_fn_t createFilter; + __db_factory_destroy_fn_t destroyFilter; + __db_factory_name_fn_t funcName; } SCfInit; -#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX)); const char* compareDefaultName(void* name); const char* compareStateName(void* name); const char* compareWinKeyName(void* name); @@ -139,296 +134,71 @@ int parKeyEncode(void* k, char* buf); int parKeyDecode(void* k, char* buf); int parKeyToString(void* k, char* buf); -int stremaValueEncode(void* k, char* buf); -int streamValueDecode(void* k, char* buf); -int32_t streamValueToString(void* k, char* buf); -int32_t streaValueIsStale(void* k, int64_t ts); -void destroyFunc(void* arg); +int32_t valueEncode(void* value, int32_t vlen, int64_t ttl, char** dest); +int32_t valueDecode(void* value, int32_t vlen, int64_t* ttl, char** dest); +int32_t valueToString(void* k, char* buf); +int32_t valueIsStale(void* k, int64_t ts); -int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest); -int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest); +void destroyCompare(void* arg); -SBackendManager* bkdMgtCreate(char* path) { - SBackendManager* p = taosMemoryCalloc(1, sizeof(SBackendManager)); - p->curChkpId = 0; - p->preCkptId = 0; - p->pSST = taosArrayInit(64, sizeof(void*)); - p->path = taosStrdup(path); - p->len = strlen(path) + 128; - p->buf = taosMemoryCalloc(1, p->len); +static bool streamStateIterSeekAndValid(rocksdb_iterator_t* iter, char* buf, size_t len); +static rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfName, + rocksdb_snapshot_t** snapshot, rocksdb_readoptions_t** readOpt); - p->idx = 0; - p->pSstTbl[0] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); - p->pSstTbl[1] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); - - p->pAdd = taosArrayInit(64, sizeof(void*)); - p->pDel = taosArrayInit(64, sizeof(void*)); - p->update = 0; - return p; -} -void bkdMgtDestroy(SBackendManager* bm) { - if (bm == NULL) return; - - taosMemoryFree(bm->buf); - taosMemoryFree(bm->path); - - taosArrayDestroyP(bm->pSST, taosMemoryFree); - taosArrayDestroyP(bm->pAdd, taosMemoryFree); - taosArrayDestroyP(bm->pDel, taosMemoryFree); - - taosHashCleanup(bm->pSstTbl[0]); - taosHashCleanup(bm->pSstTbl[1]); - - taosMemoryFree(bm->pCurrent); - taosMemoryFree(bm->pManifest); - - taosMemoryFree(bm); -} - -int32_t compareHashTableImpl(SHashObj* p1, SHashObj* p2, SArray* diff) { - int32_t code = 0; - size_t len = 0; - void* pIter = taosHashIterate(p2, NULL); - while (pIter) { - char* name = taosHashGetKey(pIter, &len); - if (!taosHashGet(p1, name, len)) { - char* p = taosStrdup(name); - taosArrayPush(diff, &p); - } - pIter = taosHashIterate(p2, pIter); - } - return code; -} -int32_t compareHashTable(SHashObj* p1, SHashObj* p2, SArray* add, SArray* del) { - int32_t code = 0; - - code = compareHashTableImpl(p1, p2, add); - code = compareHashTableImpl(p2, p1, del); - - return code; -} -int32_t bkdMgtGetDelta(SBackendManager* bm, int64_t chkpId, SArray* list) { - const char* pCurrent = "CURRENT"; - int32_t currLen = strlen(pCurrent); - - const char* pManifest = "MANIFEST-"; - int32_t maniLen = strlen(pManifest); - - const char* pSST = ".sst"; - int32_t sstLen = strlen(pSST); - - memset(bm->buf, 0, bm->len); - sprintf(bm->buf, "%s%scheckpoint%" PRId64 "", bm->path, TD_DIRSEP, chkpId); - - taosArrayClearP(bm->pAdd, taosMemoryFree); - taosArrayClearP(bm->pDel, taosMemoryFree); - - TdDirPtr pDir = taosOpenDir(bm->buf); - TdDirEntryPtr de = NULL; - int8_t dummy = 0; - while ((de = taosReadDir(pDir)) != NULL) { - char* name = taosGetDirEntryName(de); - if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; - if (strlen(name) == currLen && strcmp(name, pCurrent) == 0) { - taosMemoryFreeClear(bm->pCurrent); - bm->pCurrent = taosStrdup(name); - taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); - continue; - } - - if (strlen(name) >= maniLen && strncmp(name, pManifest, maniLen) == 0) { - taosMemoryFreeClear(bm->pManifest); - bm->pManifest = taosStrdup(name); - taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); - continue; - } - if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { - // char* p = taosStrdup(name); - taosHashPut(bm->pSstTbl[1 - bm->idx], name, strlen(name), &dummy, sizeof(dummy)); - continue; - } - } - if (bm->init == 0) { - bm->preCkptId = -1; - bm->curChkpId = chkpId; - bm->init = 1; - - void* pIter = taosHashIterate(bm->pSstTbl[1 - bm->idx], NULL); - while (pIter) { - size_t len; - char* name = taosHashGetKey(pIter, &len); - if (name != NULL && len != 0) { - taosArrayPush(bm->pAdd, &name); - } - pIter = taosHashIterate(bm->pSstTbl[1 - bm->idx], pIter); - } - if (taosArrayGetSize(bm->pAdd) > 0) bm->update = 1; - } else { - int32_t code = compareHashTable(bm->pSstTbl[bm->idx], bm->pSstTbl[1 - bm->idx], bm->pAdd, bm->pDel); - if (code != 0) { - // dead code - taosArrayClearP(bm->pAdd, taosMemoryFree); - taosArrayClearP(bm->pDel, taosMemoryFree); - taosHashClear(bm->pSstTbl[1 - bm->idx]); - bm->update = 0; - taosCloseDir(&pDir); - return code; - } - - bm->preCkptId = bm->curChkpId; - bm->curChkpId = chkpId; - if (taosArrayGetSize(bm->pAdd) == 0 && taosArrayGetSize(bm->pDel) == 0) { - bm->update = 0; - } - } - taosHashClear(bm->pSstTbl[bm->idx]); - bm->idx = 1 - bm->idx; - - taosCloseDir(&pDir); - - return 0; -} - -int32_t bkdMgtDumpTo(SBackendManager* bm, char* dname) { - int32_t code = 0; - int32_t len = bm->len + 128; - - char* srcBuf = taosMemoryCalloc(1, len); - char* dstBuf = taosMemoryCalloc(1, len); - - char* srcDir = taosMemoryCalloc(1, len); - char* dstDir = taosMemoryCalloc(1, len); - - sprintf(srcDir, "%s%s%s%" PRId64 "", bm->path, TD_DIRSEP, "checkpoint", bm->curChkpId); - sprintf(dstDir, "%s%s%s", bm->path, TD_DIRSEP, dname); - - if (!taosDirExist(srcDir)) { - stError("failed to dump srcDir %s, reason: not exist such dir", srcDir); - code = -1; - goto _ERROR; - } - - code = taosMkDir(dstDir); - if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to mkdir srcDir %s, reason: %s", dstDir, terrstr()); - goto _ERROR; - } - - // clear current file - memset(dstBuf, 0, len); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pCurrent); - taosRemoveFile(dstBuf); - - memset(dstBuf, 0, len); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pManifest); - taosRemoveFile(dstBuf); - - // add file to $name dir - for (int i = 0; i < taosArrayGetSize(bm->pAdd); i++) { - memset(dstBuf, 0, len); - memset(srcBuf, 0, len); - - char* filename = taosArrayGetP(bm->pAdd, i); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, filename); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); - - taosCopyFile(srcBuf, dstBuf); - } - // del file in $name - for (int i = 0; i < taosArrayGetSize(bm->pDel); i++) { - memset(dstBuf, 0, len); - memset(srcBuf, 0, len); - - char* filename = taosArrayGetP(bm->pDel, i); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); - taosRemoveFile(dstBuf); - } - - // copy current file to dst dir - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, bm->pCurrent); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pCurrent); - taosCopyFile(srcBuf, dstBuf); - - // copy manifest file to dst dir - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, bm->pManifest); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, bm->pManifest); - taosCopyFile(srcBuf, dstBuf); - - // clear delta data buf - taosArrayClearP(bm->pAdd, taosMemoryFree); - taosArrayClearP(bm->pDel, taosMemoryFree); - -_ERROR: - taosMemoryFree(srcBuf); - taosMemoryFree(dstBuf); - taosMemoryFree(srcDir); - taosMemoryFree(dstDir); - return code; -} +#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX)); +int32_t copyFiles(const char* src, const char* dst); +uint32_t nextPow2(uint32_t x); SCfInit ginitDict[] = { {"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName, - destroyFunc, encodeValueFunc, decodeValueFunc}, - {"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, + destroyCompare, valueEncode, valueDecode, compactFilteFactoryCreateFilter, destroyCompactFilteFactory, + compactFilteFactoryName}, + + {"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilterState, destroyCompactFilteFactory, + compactFilteFactoryNameState}, + + {"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilterFill, destroyCompactFilteFactory, + compactFilteFactoryNameFill}, + {"sess", 4, 3, stateSessionKeyDBComp, stateSessionKeyEncode, stateSessionKeyDecode, stateSessionKeyToString, - compareSessionKeyName, destroyFunc, encodeValueFunc, decodeValueFunc}, - {"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, + compareSessionKeyName, destroyCompare, valueEncode, valueDecode, compactFilteFactoryCreateFilterSess, + destroyCompactFilteFactory, compactFilteFactoryNameSess}, + + {"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilterFunc, destroyCompactFilteFactory, + compactFilteFactoryNameFunc}, + + {"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilter, destroyCompactFilteFactory, compactFilteFactoryName}, + + {"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyCompare, + valueEncode, valueDecode, compactFilteFactoryCreateFilter, destroyCompactFilteFactory, compactFilteFactoryName}, }; -bool isValidCheckpoint(const char* dir) { return true; } - -int32_t copyFiles(const char* src, const char* dst) { - int32_t code = 0; - // opt later, just hard link - int32_t sLen = strlen(src); - int32_t dLen = strlen(dst); - char* srcName = taosMemoryCalloc(1, sLen + 64); - char* dstName = taosMemoryCalloc(1, dLen + 64); - - TdDirPtr pDir = taosOpenDir(src); - if (pDir == NULL) { - taosMemoryFree(srcName); - taosMemoryFree(dstName); - return -1; - } - - TdDirEntryPtr de = NULL; - while ((de = taosReadDir(pDir)) != NULL) { - char* name = taosGetDirEntryName(de); - if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; - - sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); - sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); - if (!taosDirEntryIsDir(de)) { - code = taosCopyFile(srcName, dstName); - if (code == -1) { - goto _err; - } +int32_t getCfIdx(const char* cfName) { + int idx = -1; + size_t len = strlen(cfName); + for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { + if (len == ginitDict[i].len && strncmp(cfName, ginitDict[i].key, strlen(cfName)) == 0) { + idx = i; + break; } - - memset(srcName, 0, sLen + 64); - memset(dstName, 0, dLen + 64); } - -_err: - taosMemoryFreeClear(srcName); - taosMemoryFreeClear(dstName); - taosCloseDir(&pDir); - return code >= 0 ? 0 : -1; + return idx; } + +bool isValidCheckpoint(const char* dir) { + return true; + STaskDbWrapper* pDb = taskDbOpenImpl(NULL, NULL, (char*)dir); + if (pDb == NULL) { + return false; + } + taskDbDestroy(pDb, false); + return true; +} + int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { // impl later int32_t code = 0; @@ -451,6 +221,7 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { } taosMkDir(state); code = copyFiles(chkp, state); + stInfo("copy snap file from %s to %s", chkp, state); if (code != 0) { stError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno))); } else { @@ -468,7 +239,246 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { return 0; } +int32_t remoteChkp_readMetaData(char* path, SArray* list) { + char* metaPath = taosMemoryCalloc(1, strlen(path)); + sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META"); + TdFilePtr pFile = taosOpenFile(path, TD_FILE_READ); + + char buf[128] = {0}; + if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { + taosMemoryFree(metaPath); + taosCloseFile(&pFile); + return -1; + } + int32_t len = strlen(buf); + for (int i = 0; i < len; i++) { + if (buf[i] == '\n') { + char* item = taosMemoryCalloc(1, i + 1); + memcpy(item, buf, i); + taosArrayPush(list, &item); + + item = taosMemoryCalloc(1, len - i); + memcpy(item, buf + i + 1, len - i - 1); + taosArrayPush(list, &item); + } + } + + taosCloseFile(&pFile); + taosMemoryFree(metaPath); + return 0; +} +int32_t remoteChkp_validMetaFile(char* name, char* prename, int64_t chkpId) { + int8_t valid = 0; + for (int i = 0; i < strlen(name); i++) { + if (name[i] == '_') { + memcpy(prename, name, i); + if (taosStr2int64(name + i + 1) != chkpId) { + break; + } else { + valid = 1; + } + } + } + return valid; +} +int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { + int32_t complete = 1; + int32_t len = strlen(path) + 32; + char* src = taosMemoryCalloc(1, len); + char* dst = taosMemoryCalloc(1, len); + + int8_t count = 0; + for (int i = 0; i < taosArrayGetSize(list); i++) { + char* p = taosArrayGetP(list, i); + sprintf(src, "%s%s%s", path, TD_DIRSEP, p); + + // check file exist + if (taosStatFile(src, NULL, NULL, NULL) != 0) { + complete = 0; + break; + } + + // check file name + char temp[64] = {0}; + if (remoteChkp_validMetaFile(p, temp, chkpId)) { + count++; + } + + // rename file + sprintf(dst, "%s%s%s", path, TD_DIRSEP, temp); + taosRenameFile(src, dst); + + memset(src, 0, len); + memset(dst, 0, len); + } + if (count != taosArrayGetSize(list)) { + complete = 0; + } + + taosMemoryFree(src); + taosMemoryFree(dst); + + return complete == 1 ? 0 : -1; +} + +int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + // impl later + int32_t code = 0; + if (taosIsDir(chkpPath)) { + taosRemoveDir(chkpPath); + } + if (taosIsDir(defaultPath)) { + taosRemoveDir(defaultPath); + } + + code = downloadCheckpoint(key, chkpPath); + if (code != 0) { + return code; + } + code = copyFiles(chkpPath, defaultPath); + + return code; +} +int32_t rebuildFromRemoteChkp_s3(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + int32_t code = downloadCheckpoint(key, chkpPath); + if (code != 0) { + return code; + } + + int32_t len = strlen(defaultPath) + 32; + char* tmp = taosMemoryCalloc(1, len); + sprintf(tmp, "%s%s", defaultPath, "_tmp"); + if (taosIsDir(tmp)) taosRemoveDir(tmp); + if (taosIsDir(defaultPath)) taosRenameFile(defaultPath, tmp); + + SArray* list = taosArrayInit(2, sizeof(void*)); + code = remoteChkp_readMetaData(chkpPath, list); + if (code == 0) { + code = remoteChkp_validAndCvtMeta(chkpPath, list, chkpId); + } + taosArrayDestroyP(list, taosMemoryFree); + + if (code == 0) { + taosMkDir(defaultPath); + code = copyFiles(chkpPath, defaultPath); + } + + if (code != 0) { + if (taosIsDir(defaultPath)) taosRemoveDir(defaultPath); + if (taosIsDir(tmp)) taosRenameFile(tmp, defaultPath); + } else { + taosRemoveDir(tmp); + } + + taosMemoryFree(tmp); + return code; +} +int32_t rebuildFromRemoteChkp(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + UPLOAD_TYPE type = getUploadType(); + if (type == UPLOAD_S3) { + return rebuildFromRemoteChkp_s3(key, chkpPath, chkpId, defaultPath); + } else if (type == UPLOAD_RSYNC) { + return rebuildFromRemoteChkp_rsync(key, chkpPath, chkpId, defaultPath); + } + return -1; +} + +int32_t rebuildFromLocalChkp(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + int32_t code = -1; + int32_t len = strlen(defaultPath) + 32; + char* tmp = taosMemoryCalloc(1, len); + sprintf(tmp, "%s%s", defaultPath, "_tmp"); + + if (taosIsDir(tmp)) taosRemoveDir(tmp); + if (taosIsDir(defaultPath)) taosRenameFile(defaultPath, tmp); + + if (taosIsDir(chkpPath) && isValidCheckpoint(chkpPath)) { + if (taosIsDir(tmp)) { + taosRemoveDir(tmp); + } + taosMkDir(defaultPath); + code = copyFiles(chkpPath, defaultPath); + if (code != 0) { + stError("failed to restart stream backend from %s, reason: %s", chkpPath, tstrerror(TAOS_SYSTEM_ERROR(errno))); + } else { + stInfo("start to restart stream backend at checkpoint path: %s", chkpPath); + } + } + if (code != 0) { + if (taosIsDir(defaultPath)) taosRemoveDir(defaultPath); + if (taosIsDir(tmp)) taosRenameFile(tmp, defaultPath); + } else { + taosRemoveDir(tmp); + } + + taosMemoryFree(tmp); + return code; +} + +int32_t rebuildFromlocalDefault(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + int32_t code = 0; + return code; +} + +int32_t rebuildDirFromChkp2(const char* path, char* key, int64_t chkpId, char** dbPrefixPath, char** dbPath) { + // impl later + int32_t code = 0; + + char* prefixPath = taosMemoryCalloc(1, strlen(path) + 128); + sprintf(prefixPath, "%s%s%s", path, TD_DIRSEP, key); + + if (!taosIsDir(prefixPath)) { + code = taosMkDir(prefixPath); + ASSERT(code == 0); + } + + char* defaultPath = taosMemoryCalloc(1, strlen(path) + 256); + sprintf(defaultPath, "%s%s%s", prefixPath, TD_DIRSEP, "state"); + if (!taosIsDir(defaultPath)) { + taosMulMkDir(defaultPath); + } + + char* chkpPath = taosMemoryCalloc(1, strlen(path) + 256); + if (chkpId != 0) { + sprintf(chkpPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); + code = rebuildFromLocalChkp(key, chkpPath, chkpId, defaultPath); + if (code != 0) { + code = rebuildFromRemoteChkp(key, chkpPath, chkpId, defaultPath); + } + + if (code != 0) { + stInfo("failed to start stream backend at %s, reason: %s, restart from default defaultPath dir:%s", chkpPath, + tstrerror(TAOS_SYSTEM_ERROR(errno)), defaultPath); + code = taosMkDir(defaultPath); + } + } else { + sprintf(chkpPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", + (int64_t)-1); + + code = rebuildFromLocalChkp(key, chkpPath, -1, defaultPath); + if (code != 0) { + code = taosMkDir(defaultPath); + } + } + taosMemoryFree(chkpPath); + + *dbPath = defaultPath; + *dbPrefixPath = prefixPath; + + return code; +} + +bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId) { + bool exist = true; + char* state = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(state, "%s%s%s", path, TD_DIRSEP, "state"); + if (!taosDirExist(state)) { + exist = false; + } + taosMemoryFree(state); + return exist; +} void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { char* backendPath = NULL; int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); @@ -519,6 +529,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { if (err != NULL) { stError("failed to open rocksdb, path:%s, reason:%s", backendPath, err); taosMemoryFreeClear(err); + rocksdb_list_column_families_destroy(cfs, nCf); goto _EXIT; } } else { @@ -647,7 +658,7 @@ void streamBackendHandleCleanup(void* arg) { taosThreadRwlockDestroy(&wrapper->rwLock); wrapper->rocksdb = NULL; - taosReleaseRef(streamBackendId, wrapper->backendId); + // taosReleaseRef(streamBackendId, wrapper->backendId); stDebug("end to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); taosMemoryFree(wrapper); @@ -727,8 +738,65 @@ int32_t delObsoleteCheckpoint(void* arg, const char* path) { taosArrayDestroy(chkpDel); return 0; } +/* + * checkpointSave |--cp1--|--cp2--|--cp3--|--cp4--|--cp5--| + * chkpInUse: |--cp2--|--cp4--| + * chkpInUse is doing translation, cannot del until + * replication is finished + */ +int32_t chkpMayDelObsolete(void* arg, int64_t chkpId, char* path) { + STaskDbWrapper* pBackend = arg; -static int32_t compareCheckpoint(const void* a, const void* b) { + taosThreadRwlockWrlock(&pBackend->chkpDirLock); + + taosArrayPush(pBackend->chkpSaved, &chkpId); + + SArray* chkpDel = taosArrayInit(8, sizeof(int64_t)); + SArray* chkpDup = taosArrayInit(8, sizeof(int64_t)); + + int64_t firsId = 0; + if (taosArrayGetSize(pBackend->chkpInUse) >= 1) { + firsId = *(int64_t*)taosArrayGet(pBackend->chkpInUse, 0); + + for (int i = 0; i < taosArrayGetSize(pBackend->chkpSaved); i++) { + int64_t id = *(int64_t*)taosArrayGet(pBackend->chkpSaved, i); + if (id >= firsId) { + taosArrayPush(chkpDup, &id); + } else { + taosArrayPush(chkpDel, &id); + } + } + } else { + int32_t sz = taosArrayGetSize(pBackend->chkpSaved); + int32_t dsz = sz - pBackend->chkpCap; // del size + + for (int i = 0; i < dsz; i++) { + int64_t id = *(int64_t*)taosArrayGet(pBackend->chkpSaved, i); + taosArrayPush(chkpDel, &id); + } + for (int i = dsz < 0 ? 0 : dsz; i < sz; i++) { + int64_t id = *(int64_t*)taosArrayGet(pBackend->chkpSaved, i); + taosArrayPush(chkpDup, &id); + } + } + taosArrayDestroy(pBackend->chkpSaved); + pBackend->chkpSaved = chkpDup; + + taosThreadRwlockUnlock(&pBackend->chkpDirLock); + + for (int i = 0; i < taosArrayGetSize(chkpDel); i++) { + int64_t id = *(int64_t*)taosArrayGet(chkpDel, i); + char tbuf[256] = {0}; + sprintf(tbuf, "%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, id); + if (taosIsDir(tbuf)) { + taosRemoveDir(tbuf); + } + } + taosArrayDestroy(chkpDel); + return 0; +} + +static int32_t chkpIdComp(const void* a, const void* b) { int64_t x = *(int64_t*)a; int64_t y = *(int64_t*)b; return x < y ? -1 : 1; @@ -773,7 +841,7 @@ int32_t streamBackendLoadCheckpointInfo(void* arg) { continue; } } - taosArraySort(suffix, compareCheckpoint); + taosArraySort(suffix, chkpIdComp); // free previous chkpSaved taosArrayClear(pMeta->chkpSaved); for (int i = 0; i < taosArrayGetSize(suffix); i++) { @@ -788,30 +856,50 @@ int32_t streamBackendLoadCheckpointInfo(void* arg) { } int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t*** ppHandle, SArray* refs) { - SArray* pHandle = taosArrayInit(16, POINTER_BYTES); - void* pIter = taosHashIterate(pMeta->pTaskBackendUnique, NULL); - while (pIter) { - int64_t id = *(int64_t*)pIter; + return 0; + // SArray* pHandle = taosArrayInit(16, POINTER_BYTES); + // void* pIter = taosHashIterate(pMeta->pTaskDbUnique, NULL); + // while (pIter) { + // int64_t id = *(int64_t*)pIter; - SBackendCfWrapper* wrapper = taosAcquireRef(streamBackendCfWrapperId, id); - if (wrapper == NULL) { - pIter = taosHashIterate(pMeta->pTaskBackendUnique, pIter); - continue; + // SBackendCfWrapper* wrapper = taosAcquireRef(streamBackendCfWrapperId, id); + // if (wrapper == NULL) { + // pIter = taosHashIterate(pMeta->pTaskDbUnique, pIter); + // continue; + // } + + // taosThreadRwlockRdlock(&wrapper->rwLock); + // for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { + // if (wrapper->pHandle[i]) { + // rocksdb_column_family_handle_t* p = wrapper->pHandle[i]; + // taosArrayPush(pHandle, &p); + // } + // } + // taosThreadRwlockUnlock(&wrapper->rwLock); + + // taosArrayPush(refs, &id); + // } + + // int32_t nCf = taosArrayGetSize(pHandle); + + // rocksdb_column_family_handle_t** ppCf = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); + // for (int i = 0; i < nCf; i++) { + // ppCf[i] = taosArrayGetP(pHandle, i); + // } + // taosArrayDestroy(pHandle); + + // *ppHandle = ppCf; + // return nCf; +} + +int32_t chkpGetAllDbCfHandle2(STaskDbWrapper* pBackend, rocksdb_column_family_handle_t*** ppHandle) { + SArray* pHandle = taosArrayInit(8, POINTER_BYTES); + for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { + if (pBackend->pCf[i]) { + rocksdb_column_family_handle_t* p = pBackend->pCf[i]; + taosArrayPush(pHandle, &p); } - - taosThreadRwlockRdlock(&wrapper->rwLock); - for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { - if (wrapper->pHandle[i]) { - rocksdb_column_family_handle_t* p = wrapper->pHandle[i]; - taosArrayPush(pHandle, &p); - } - } - taosThreadRwlockUnlock(&wrapper->rwLock); - - taosArrayPush(refs, &id); - pIter = taosHashIterate(pMeta->pTaskBackendUnique, pIter); } - int32_t nCf = taosArrayGetSize(pHandle); if (nCf == 0) { taosArrayDestroy(pHandle); @@ -827,6 +915,7 @@ int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t* *ppHandle = ppCf; return nCf; } + int32_t chkpDoDbCheckpoint(rocksdb_t* db, char* path) { int32_t code = -1; char* err = NULL; @@ -865,7 +954,8 @@ int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32 rocksdb_flushoptions_destroy(flushOpt); return code; } -int32_t chkpPreCheckDir(char* path, int64_t chkpId, char** chkpDir, char** chkpIdDir) { + +int32_t chkpPreBuildDir(char* path, int64_t chkpId, char** chkpDir, char** chkpIdDir) { int32_t code = 0; char* pChkpDir = taosMemoryCalloc(1, 256); char* pChkpIdDir = taosMemoryCalloc(1, 256); @@ -880,145 +970,111 @@ int32_t chkpPreCheckDir(char* path, int64_t chkpId, char** chkpDir, char** chkpI return code; } - sprintf(pChkpIdDir, "%s%scheckpoint%" PRId64, pChkpDir, TD_DIRSEP, chkpId); + sprintf(pChkpIdDir, "%s%s%s%" PRId64, pChkpDir, TD_DIRSEP, "checkpoint", chkpId); if (taosIsDir(pChkpIdDir)) { stInfo("stream rm exist checkpoint%s", pChkpIdDir); - taosRemoveFile(pChkpIdDir); + taosRemoveDir(pChkpIdDir); } *chkpDir = pChkpDir; *chkpIdDir = pChkpIdDir; return 0; } - -int32_t streamBackendTriggerChkp(void* arg, char* dst) { +int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { SStreamMeta* pMeta = arg; - int64_t backendRid = pMeta->streamBackendRid; - int32_t code = -1; + void* pIter = taosHashIterate(pMeta->pTaskDbUnique, NULL); + int32_t code = 0; - SArray* refs = taosArrayInit(16, sizeof(int64_t)); - rocksdb_column_family_handle_t** ppCf = NULL; + while (pIter) { + STaskDbWrapper* pTaskDb = *(STaskDbWrapper**)pIter; + taskDbAddRef(pTaskDb); - int64_t st = taosGetTimestampMs(); - SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid); + code = taskDbDoCheckpoint(pTaskDb, pTaskDb->chkpId); + taskDbRemoveRef(pTaskDb); - if (pHandle == NULL || pHandle->db == NULL) { - goto _ERROR; + SStreamTask* pTask = pTaskDb->pTask; + SStreamTaskSnap snap = {.streamId = pTask->id.streamId, + .taskId = pTask->id.taskId, + .chkpId = pTaskDb->chkpId, + .dbPrefixPath = taosStrdup(pTaskDb->path)}; + taosArrayPush(pSnap, &snap); + pIter = taosHashIterate(pMeta->pTaskDbUnique, pIter); } - int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); - stDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, dst, nCf); - - code = chkpPreFlushDb(pHandle->db, ppCf, nCf); - if (code == 0) { - code = chkpDoDbCheckpoint(pHandle->db, dst); - if (code != 0) { - stError("stream backend:%p failed to do checkpoint at:%s", pHandle, dst); - } else { - stDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, dst, - taosGetTimestampMs() - st); - } - } else { - stError("stream backend:%p failed to flush db at:%s", pHandle, dst); - } - - // release all ref to cfWrapper; - for (int i = 0; i < taosArrayGetSize(refs); i++) { - int64_t id = *(int64_t*)taosArrayGet(refs, i); - taosReleaseRef(streamBackendCfWrapperId, id); - } - -_ERROR: - taosMemoryFree(ppCf); - taosReleaseRef(streamBackendId, backendRid); - taosArrayDestroy(refs); return code; } int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId) { - if (arg == NULL) return 0; + // if (arg == NULL) return 0; - SStreamMeta* pMeta = arg; - taosWLockLatch(&pMeta->chkpDirLock); - taosArrayPush(pMeta->chkpInUse, &chkpId); - taosWUnLockLatch(&pMeta->chkpDirLock); + // SStreamMeta* pMeta = arg; + // taosWLockLatch(&pMeta->chkpDirLock); + // taosArrayPush(pMeta->chkpInUse, &chkpId); + // taosWUnLockLatch(&pMeta->chkpDirLock); return 0; } int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId) { - if (arg == NULL) return 0; - - SStreamMeta* pMeta = arg; - taosWLockLatch(&pMeta->chkpDirLock); - if (taosArrayGetSize(pMeta->chkpInUse) > 0) { - int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpInUse, 0); - if (id == chkpId) { - taosArrayPopFrontBatch(pMeta->chkpInUse, 1); - } - } - taosWUnLockLatch(&pMeta->chkpDirLock); return 0; + // if (arg == NULL) return 0; + + // SStreamMeta* pMeta = arg; + // taosWLockLatch(&pMeta->chkpDirLock); + // if (taosArrayGetSize(pMeta->chkpInUse) > 0) { + // int64_t id = *(int64_t*)taosArrayGet(pMeta->chkpInUse, 0); + // if (id == chkpId) { + // taosArrayPopFrontBatch(pMeta->chkpInUse, 1); + // } + // } + // taosWUnLockLatch(&pMeta->chkpDirLock); } -int32_t streamBackendDoCheckpoint(void* arg, uint64_t checkpointId) { - SStreamMeta* pMeta = arg; - int64_t backendRid = pMeta->streamBackendRid; - int64_t st = taosGetTimestampMs(); - int32_t code = -1; +/* + 0 +*/ +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId) { + STaskDbWrapper* pTaskDb = arg; + int64_t st = taosGetTimestampMs(); + int32_t code = -1; + int64_t refId = pTaskDb->refId; - SArray* refs = taosArrayInit(16, sizeof(int64_t)); - - rocksdb_column_family_handle_t** ppCf = NULL; + if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { + return -1; + } char* pChkpDir = NULL; char* pChkpIdDir = NULL; - if (chkpPreCheckDir(pMeta->path, checkpointId, &pChkpDir, &pChkpIdDir) != 0) { - taosArrayDestroy(refs); - return code; + if (chkpPreBuildDir(pTaskDb->path, chkpId, &pChkpDir, &pChkpIdDir) != 0) { + code = -1; + goto _EXIT; } - - SBackendWrapper* pHandle = taosAcquireRef(streamBackendId, backendRid); - if (pHandle == NULL || pHandle->db == NULL) { - stError("failed to acquire state-backend handle"); - goto _ERROR; - } - // Get all cf and acquire cfWrappter - int32_t nCf = chkpGetAllDbCfHandle(pMeta, &ppCf, refs); - stDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pHandle, pChkpIdDir, nCf); + rocksdb_column_family_handle_t** ppCf = NULL; - code = chkpPreFlushDb(pHandle->db, ppCf, nCf); - if (code == 0) { - code = chkpDoDbCheckpoint(pHandle->db, pChkpIdDir); - if (code != 0) { - stError("stream backend:%p failed to do checkpoint at:%s", pHandle, pChkpIdDir); + int32_t nCf = chkpGetAllDbCfHandle2(pTaskDb, &ppCf); + qDebug("stream backend:%p start to do checkpoint at:%s, cf num: %d ", pTaskDb, pChkpIdDir, nCf); + + if ((code = chkpPreFlushDb(pTaskDb->db, ppCf, nCf)) == 0) { + if ((code = chkpDoDbCheckpoint(pTaskDb->db, pChkpIdDir)) != 0) { + stError("stream backend:%p failed to do checkpoint at:%s", pTaskDb, pChkpIdDir); } else { - stDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pHandle, pChkpIdDir, - taosGetTimestampMs() - st); + qDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pTaskDb, pChkpIdDir, + taosGetTimestampMs() - st); } } else { - stError("stream backend:%p failed to flush db at:%s", pHandle, pChkpIdDir); - } - // release all ref to cfWrapper; - for (int i = 0; i < taosArrayGetSize(refs); i++) { - int64_t id = *(int64_t*)taosArrayGet(refs, i); - taosReleaseRef(streamBackendCfWrapperId, id); - } - if (code == 0) { - taosWLockLatch(&pMeta->chkpDirLock); - taosArrayPush(pMeta->chkpSaved, &checkpointId); - taosWUnLockLatch(&pMeta->chkpDirLock); - - // delete obsolte checkpoint - delObsoleteCheckpoint(arg, pChkpDir); - pMeta->chkpId = checkpointId; + stError("stream backend:%p failed to flush db at:%s", pTaskDb, pChkpIdDir); } -_ERROR: - taosReleaseRef(streamBackendId, backendRid); - taosArrayDestroy(refs); - taosMemoryFree(ppCf); + code = chkpMayDelObsolete(pTaskDb, chkpId, pChkpDir); + pTaskDb->dataWritten = 0; + + pTaskDb->chkpId = chkpId; + +_EXIT: taosMemoryFree(pChkpDir); taosMemoryFree(pChkpIdDir); + taosReleaseRef(taskDbWrapperId, refId); + taosMemoryFree(ppCf); return code; } +int32_t streamBackendDoCheckpoint(void* arg, int64_t chkpId) { return taskDbDoCheckpoint(arg, chkpId); } SListNode* streamBackendAddCompare(void* backend, void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)backend; @@ -1039,15 +1095,33 @@ void streamBackendDelCompare(void* backend, void* arg) { taosMemoryFree(node); } } -void streamStateDestroy_rocksdb(SStreamState* pState, bool remove) { streamStateCloseBackend(pState, remove); } -static bool streamStateIterSeekAndValid(rocksdb_iterator_t* iter, char* buf, size_t len); +void streamStateDestroy_rocksdb(SStreamState* pState, bool remove) { streamStateCloseBackend(pState, remove); } +void destroyRocksdbCfInst(RocksdbCfInst* inst) { + int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]); + if (inst->pHandle) { + for (int i = 0; i < cfLen; i++) { + if (inst->pHandle[i]) rocksdb_column_family_handle_destroy((inst->pHandle)[i]); + } + taosMemoryFree(inst->pHandle); + } + + if (inst->cfOpt) { + for (int i = 0; i < cfLen; i++) { + rocksdb_options_destroy(inst->cfOpt[i]); + rocksdb_block_based_options_destroy(((RocksdbCfParam*)inst->param)[i].tableOpt); + } + taosMemoryFreeClear(inst->cfOpt); + taosMemoryFreeClear(inst->param); + } + if (inst->wOpt) rocksdb_writeoptions_destroy(inst->wOpt); + if (inst->rOpt) rocksdb_readoptions_destroy(inst->rOpt); + + taosMemoryFree(inst); +} // |key|-----value------| // |key|ttl|len|userData| -static rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfName, - rocksdb_snapshot_t** snapshot, rocksdb_readoptions_t** readOpt); - int defaultKeyComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen) { int len = aLen < bLen ? aLen : bLen; int ret = memcmp(aBuf, bBuf, len); @@ -1330,23 +1404,7 @@ int parKeyToString(void* k, char* buf) { n = sprintf(buf + n, "[groupId:%" PRIi64 "]", *key); return n; } -int stremaValueEncode(void* k, char* buf) { - int len = 0; - SStreamValue* key = k; - len += taosEncodeFixedI64((void**)&buf, key->unixTimestamp); - len += taosEncodeFixedI32((void**)&buf, key->len); - len += taosEncodeBinary((void**)&buf, key->data, key->len); - return len; -} -int streamValueDecode(void* k, char* buf) { - SStreamValue* key = k; - char* p = buf; - p = taosDecodeFixedI64(p, &key->unixTimestamp); - p = taosDecodeFixedI32(p, &key->len); - p = taosDecodeBinary(p, (void**)&key->data, key->len); - return p - buf; -} -int32_t streamValueToString(void* k, char* buf) { +int32_t valueToString(void* k, char* buf) { SStreamValue* key = k; int n = 0; n += sprintf(buf + n, "[unixTimestamp:%" PRIi64 ",", key->unixTimestamp); @@ -1356,7 +1414,7 @@ int32_t streamValueToString(void* k, char* buf) { } /*1: stale, 0: no stale*/ -int32_t streaValueIsStale(void* k, int64_t ts) { +int32_t valueIsStale(void* k, int64_t ts) { SStreamValue* key = k; if (key->unixTimestamp < ts) { return 1; @@ -1364,12 +1422,12 @@ int32_t streaValueIsStale(void* k, int64_t ts) { return 0; } -void destroyFunc(void* arg) { +void destroyCompare(void* arg) { (void)arg; return; } -int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest) { +int32_t valueEncode(void* value, int32_t vlen, int64_t ttl, char** dest) { SStreamValue key = {.unixTimestamp = ttl, .len = vlen, .data = (char*)(value)}; int32_t len = 0; if (*dest == NULL) { @@ -1391,7 +1449,7 @@ int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest) { * ret >= 0 : found valid value * ret < 0 : error or timeout */ -int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest) { +int32_t valueDecode(void* value, int32_t vlen, int64_t* ttl, char** dest) { SStreamValue key = {0}; char* p = value; if (streamStateValueIsStale(p)) { @@ -1448,7 +1506,23 @@ void destroyCompactFilteFactory(void* arg) { } const char* compactFilteFactoryName(void* arg) { SCompactFilteFactory* state = arg; - return "stream_compact_filter"; + return "stream_compact_factory_filter_default"; +} +const char* compactFilteFactoryNameSess(void* arg) { + SCompactFilteFactory* state = arg; + return "stream_compact_factory_filter_sess"; +} +const char* compactFilteFactoryNameState(void* arg) { + SCompactFilteFactory* state = arg; + return "stream_compact_factory_filter_state"; +} +const char* compactFilteFactoryNameFill(void* arg) { + SCompactFilteFactory* state = arg; + return "stream_compact_factory_filter_fill"; +} +const char* compactFilteFactoryNameFunc(void* arg) { + SCompactFilteFactory* state = arg; + return "stream_compact_factory_filter_func"; } void destroyCompactFilte(void* arg) { (void)arg; } @@ -1456,38 +1530,499 @@ unsigned char compactFilte(void* arg, int level, const char* key, size_t klen, c char** newval, size_t* newvlen, unsigned char* value_changed) { return streamStateValueIsStale((char*)val) ? 1 : 0; } -const char* compactFilteName(void* arg) { return "stream_filte"; } +const char* compactFilteName(void* arg) { return "stream_filte_default"; } +const char* compactFilteNameSess(void* arg) { return "stream_filte_sess"; } +const char* compactFilteNameState(void* arg) { return "stream_filte_state"; } +const char* compactFilteNameFill(void* arg) { return "stream_filte_fill"; } +const char* compactFilteNameFunc(void* arg) { return "stream_filte_func"; } + +unsigned char compactFilteSess(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, + char** newval, size_t* newvlen, unsigned char* value_changed) { + // not impl yet + return 0; +} + +unsigned char compactFilteState(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, + char** newval, size_t* newvlen, unsigned char* value_changed) { + // not impl yet + return 0; +} + +unsigned char compactFilteFill(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, + char** newval, size_t* newvlen, unsigned char* value_changed) { + // not impl yet + return 0; +} + +unsigned char compactFilteFunc(void* arg, int level, const char* key, size_t klen, const char* val, size_t vlen, + char** newval, size_t* newvlen, unsigned char* value_changed) { + // not impl yet + return 0; + // return streamStateValueIsStale((char*)val) ? 1 : 0; +} rocksdb_compactionfilter_t* compactFilteFactoryCreateFilter(void* arg, rocksdb_compactionfiltercontext_t* ctx) { SCompactFilteFactory* state = arg; rocksdb_compactionfilter_t* filter = - rocksdb_compactionfilter_create(NULL, destroyCompactFilte, compactFilte, compactFilteName); + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilte, compactFilteName); + return filter; +} +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterSess(void* arg, rocksdb_compactionfiltercontext_t* ctx) { + SCompactFilteFactory* state = arg; + rocksdb_compactionfilter_t* filter = + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilteSess, compactFilteNameSess); + return filter; +} +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterState(void* arg, rocksdb_compactionfiltercontext_t* ctx) { + SCompactFilteFactory* state = arg; + rocksdb_compactionfilter_t* filter = + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilteState, compactFilteNameState); + return filter; +} +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterFill(void* arg, rocksdb_compactionfiltercontext_t* ctx) { + SCompactFilteFactory* state = arg; + rocksdb_compactionfilter_t* filter = + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilteFill, compactFilteNameFill); + return filter; +} +rocksdb_compactionfilter_t* compactFilteFactoryCreateFilterFunc(void* arg, rocksdb_compactionfiltercontext_t* ctx) { + SCompactFilteFactory* state = arg; + rocksdb_compactionfilter_t* filter = + rocksdb_compactionfilter_create(state, destroyCompactFilte, compactFilteFunc, compactFilteNameFunc); return filter; } -void destroyRocksdbCfInst(RocksdbCfInst* inst) { - int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]); - if (inst->pHandle) { - for (int i = 0; i < cfLen; i++) { - if (inst->pHandle[i]) rocksdb_column_family_handle_destroy((inst->pHandle)[i]); - } - taosMemoryFree(inst->pHandle); +int32_t taskDbOpenCfs(STaskDbWrapper* pTask, char* path, char** pCfNames, int32_t nCf) { + int32_t code = -1; + char* err = NULL; + + rocksdb_options_t** cfOpts = taosMemoryCalloc(nCf, sizeof(rocksdb_options_t*)); + rocksdb_column_family_handle_t** cfHandle = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); + + for (int i = 0; i < nCf; i++) { + int32_t idx = getCfIdx(pCfNames[i]); + cfOpts[i] = pTask->pCfOpts[idx]; } - if (inst->cfOpt) { - for (int i = 0; i < cfLen; i++) { - rocksdb_options_destroy(inst->cfOpt[i]); - rocksdb_block_based_options_destroy(((RocksdbCfParam*)inst->param)[i].tableOpt); - } - taosMemoryFreeClear(inst->cfOpt); - taosMemoryFreeClear(inst->param); - } - if (inst->wOpt) rocksdb_writeoptions_destroy(inst->wOpt); - if (inst->rOpt) rocksdb_readoptions_destroy(inst->rOpt); + rocksdb_t* db = rocksdb_open_column_families(pTask->dbOpt, path, nCf, (const char* const*)pCfNames, + (const rocksdb_options_t* const*)cfOpts, cfHandle, &err); - taosMemoryFree(inst); + if (err != NULL) { + stError("failed to open cf path: %s", err); + taosMemoryFree(err); + goto _EXIT; + } + + for (int i = 0; i < nCf; i++) { + int32_t idx = getCfIdx(pCfNames[i]); + pTask->pCf[idx] = cfHandle[i]; + } + + pTask->db = db; + code = 0; + +_EXIT: + taosMemoryFree(cfOpts); + taosMemoryFree(cfHandle); + return code; +} +void* taskDbAddRef(void* pTaskDb) { + STaskDbWrapper* pBackend = pTaskDb; + return taosAcquireRef(taskDbWrapperId, pBackend->refId); +} +void taskDbRemoveRef(void* pTaskDb) { + if (pTaskDb == NULL) return; + STaskDbWrapper* pBackend = pTaskDb; + taosReleaseRef(taskDbWrapperId, pBackend->refId); } +void taskDbInitOpt(STaskDbWrapper* pTaskDb) { + rocksdb_env_t* env = rocksdb_create_default_env(); + + rocksdb_cache_t* cache = rocksdb_cache_create_lru(256); + rocksdb_options_t* opts = rocksdb_options_create(); + rocksdb_options_set_env(opts, env); + rocksdb_options_set_create_if_missing(opts, 1); + rocksdb_options_set_create_missing_column_families(opts, 1); + // rocksdb_options_set_max_total_wal_size(opts, dbMemLimit); + rocksdb_options_set_recycle_log_file_num(opts, 6); + rocksdb_options_set_max_write_buffer_number(opts, 3); + rocksdb_options_set_info_log_level(opts, 1); + rocksdb_options_set_db_write_buffer_size(opts, 64 << 20); + rocksdb_options_set_write_buffer_size(opts, 32 << 20); + rocksdb_options_set_atomic_flush(opts, 1); + + pTaskDb->dbOpt = opts; + pTaskDb->env = env; + pTaskDb->cache = cache; + pTaskDb->filterFactory = rocksdb_compactionfilterfactory_create( + NULL, destroyCompactFilteFactory, compactFilteFactoryCreateFilter, compactFilteFactoryName); + rocksdb_options_set_compaction_filter_factory(pTaskDb->dbOpt, pTaskDb->filterFactory); + pTaskDb->readOpt = rocksdb_readoptions_create(); + pTaskDb->writeOpt = rocksdb_writeoptions_create(); + + size_t nCf = sizeof(ginitDict) / sizeof(ginitDict[0]); + pTaskDb->pCf = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); + pTaskDb->pCfParams = taosMemoryCalloc(nCf, sizeof(RocksdbCfParam)); + pTaskDb->pCfOpts = taosMemoryCalloc(nCf, sizeof(rocksdb_options_t*)); + pTaskDb->pCompares = taosMemoryCalloc(nCf, sizeof(rocksdb_comparator_t*)); + + for (int i = 0; i < nCf; i++) { + rocksdb_options_t* opt = rocksdb_options_create_copy(pTaskDb->dbOpt); + rocksdb_block_based_table_options_t* tableOpt = rocksdb_block_based_options_create(); + rocksdb_block_based_options_set_block_cache(tableOpt, pTaskDb->cache); + rocksdb_block_based_options_set_partition_filters(tableOpt, 1); + + rocksdb_filterpolicy_t* filter = rocksdb_filterpolicy_create_bloom(15); + rocksdb_block_based_options_set_filter_policy(tableOpt, filter); + + rocksdb_options_set_block_based_table_factory((rocksdb_options_t*)opt, tableOpt); + + SCfInit* cfPara = &ginitDict[i]; + + rocksdb_comparator_t* compare = + rocksdb_comparator_create(NULL, cfPara->destroyCmp, cfPara->cmpKey, cfPara->cmpName); + rocksdb_options_set_comparator((rocksdb_options_t*)opt, compare); + + rocksdb_compactionfilterfactory_t* filterFactory = + rocksdb_compactionfilterfactory_create(NULL, cfPara->destroyFilter, cfPara->createFilter, cfPara->funcName); + rocksdb_options_set_compaction_filter_factory(opt, filterFactory); + + pTaskDb->pCompares[i] = compare; + pTaskDb->pCfOpts[i] = opt; + pTaskDb->pCfParams[i].tableOpt = tableOpt; + } + return; +} +void taskDbInitChkpOpt(STaskDbWrapper* pTaskDb) { + pTaskDb->chkpId = -1; + pTaskDb->chkpCap = 4; + pTaskDb->chkpSaved = taosArrayInit(4, sizeof(int64_t)); + pTaskDb->chkpInUse = taosArrayInit(4, sizeof(int64_t)); + + taosThreadRwlockInit(&pTaskDb->chkpDirLock, NULL); +} + +void taskDbDestroyChkpOpt(STaskDbWrapper* pTaskDb) { + taosArrayDestroy(pTaskDb->chkpSaved); + taosArrayDestroy(pTaskDb->chkpInUse); + taosThreadRwlockDestroy(&pTaskDb->chkpDirLock); +} + +int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** stateFullPath) { + int32_t code = 0; + + char* statePath = taosMemoryCalloc(1, strlen(path) + 128); + sprintf(statePath, "%s%s%s", path, TD_DIRSEP, key); + if (!taosDirExist(statePath)) { + code = taosMulMkDir(statePath); + if (code != 0) { + stError("failed to create dir: %s, reason:%s", statePath, tstrerror(code)); + taosMemoryFree(statePath); + return code; + } + } + + char* dbPath = taosMemoryCalloc(1, strlen(statePath) + 128); + sprintf(dbPath, "%s%s%s", statePath, TD_DIRSEP, "state"); + if (!taosDirExist(dbPath)) { + code = taosMulMkDir(dbPath); + if (code != 0) { + stError("failed to create dir: %s, reason:%s", dbPath, tstrerror(code)); + taosMemoryFree(statePath); + taosMemoryFree(dbPath); + return code; + } + } + + *dbFullPath = dbPath; + *stateFullPath = statePath; + return 0; +} +void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId) { + STaskDbWrapper* p = pTaskDb; + taosThreadMutexLock(&p->mutex); + p->chkpId = chkpId; + taosThreadMutexUnlock(&p->mutex); +} + +STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) { + char* err = NULL; + char** cfNames = NULL; + size_t nCf = 0; + + STaskDbWrapper* pTaskDb = taosMemoryCalloc(1, sizeof(STaskDbWrapper)); + pTaskDb->idstr = key ? taosStrdup(key) : NULL; + pTaskDb->path = statePath ? taosStrdup(statePath) : NULL; + + taosThreadMutexInit(&pTaskDb->mutex, NULL); + taskDbInitChkpOpt(pTaskDb); + taskDbInitOpt(pTaskDb); + + cfNames = rocksdb_list_column_families(pTaskDb->dbOpt, dbPath, &nCf, &err); + if (nCf == 0) { + stInfo("newly create db, need to restart"); + // pre create db + pTaskDb->db = rocksdb_open(pTaskDb->pCfOpts[0], dbPath, &err); + rocksdb_close(pTaskDb->db); + + if (cfNames != NULL) { + rocksdb_list_column_families_destroy(cfNames, nCf); + } + taosMemoryFree(err); + err = NULL; + + cfNames = rocksdb_list_column_families(pTaskDb->dbOpt, dbPath, &nCf, &err); + ASSERT(err == NULL); + } + + if (taskDbOpenCfs(pTaskDb, dbPath, cfNames, nCf) != 0) { + goto _EXIT; + } + + if (cfNames != NULL) { + rocksdb_list_column_families_destroy(cfNames, nCf); + cfNames = NULL; + } + + qDebug("succ to init stream backend at %s, backend:%p", dbPath, pTaskDb); + return pTaskDb; +_EXIT: + + taskDbDestroy(pTaskDb, false); + if (err) taosMemoryFree(err); + if (cfNames) rocksdb_list_column_families_destroy(cfNames, nCf); + return NULL; +} +STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId) { + char* statePath = NULL; + char* dbPath = NULL; + + if (rebuildDirFromChkp2(path, key, chkpId, &statePath, &dbPath) != 0) { + return NULL; + } + + STaskDbWrapper* pTaskDb = taskDbOpenImpl(key, statePath, dbPath); + taosMemoryFree(dbPath); + taosMemoryFree(statePath); + return pTaskDb; +} + +void taskDbDestroy(void* pDb, bool flush) { + STaskDbWrapper* wrapper = pDb; + qDebug("succ to destroy stream backend:%p", wrapper); + + int8_t nCf = sizeof(ginitDict) / sizeof(ginitDict[0]); + + if (wrapper == NULL) return; + + if (flush) { + if (wrapper->db && wrapper->pCf) { + rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); + rocksdb_flushoptions_set_wait(flushOpt, 1); + + char* err = NULL; + rocksdb_column_family_handle_t** cfs = taosMemoryCalloc(1, sizeof(rocksdb_column_family_handle_t*) * nCf); + int numOfFlushCf = 0; + for (int i = 0; i < nCf; i++) { + if (wrapper->pCf[i] != NULL) { + cfs[numOfFlushCf++] = wrapper->pCf[i]; + } + } + if (numOfFlushCf != 0) { + rocksdb_flush_cfs(wrapper->db, flushOpt, cfs, numOfFlushCf, &err); + if (err != NULL) { + stError("failed to flush all cfs, reason:%s", err); + taosMemoryFreeClear(err); + } + } + taosMemoryFree(cfs); + rocksdb_flushoptions_destroy(flushOpt); + } + } + for (int i = 0; i < nCf; i++) { + if (wrapper->pCf[i] != NULL) { + rocksdb_column_family_handle_destroy(wrapper->pCf[i]); + } + } + + if (wrapper->db) rocksdb_close(wrapper->db); + + rocksdb_options_destroy(wrapper->dbOpt); + rocksdb_readoptions_destroy(wrapper->readOpt); + rocksdb_writeoptions_destroy(wrapper->writeOpt); + rocksdb_env_destroy(wrapper->env); + rocksdb_cache_destroy(wrapper->cache); + + taosMemoryFree(wrapper->pCf); + for (int i = 0; i < nCf; i++) { + rocksdb_options_t* opt = wrapper->pCfOpts[i]; + rocksdb_comparator_t* compare = wrapper->pCompares[i]; + rocksdb_block_based_table_options_t* tblOpt = wrapper->pCfParams[i].tableOpt; + + rocksdb_options_destroy(opt); + rocksdb_comparator_destroy(compare); + rocksdb_block_based_options_destroy(tblOpt); + } + taosMemoryFree(wrapper->pCompares); + taosMemoryFree(wrapper->pCfOpts); + taosMemoryFree(wrapper->pCfParams); + + taosThreadMutexDestroy(&wrapper->mutex); + + taskDbDestroyChkpOpt(wrapper); + + taosMemoryFree(wrapper->idstr); + taosMemoryFree(wrapper->path); + taosMemoryFree(wrapper); + + return; +} + +void taskDbDestroy2(void* pDb) { taskDbDestroy(pDb, true); } + +int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char** path) { + int64_t st = taosGetTimestampMs(); + int32_t code = -1; + int64_t refId = pDb->refId; + + if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { + return -1; + } + + char* buf = taosMemoryCalloc(1, strlen(pDb->path) + 128); + sprintf(buf, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); + if (taosIsDir(buf)) { + code = 0; + *path = buf; + } else { + taosMemoryFree(buf); + } + + taosReleaseRef(taskDbWrapperId, refId); + return code; +} + +int32_t taskDbGenChkpUploadData__s3(STaskDbWrapper* pDb, void* bkdChkpMgt, int64_t chkpId, char** path, SArray* list) { + int32_t code = 0; + SBkdMgt* p = (SBkdMgt*)bkdChkpMgt; + + char* temp = taosMemoryCalloc(1, strlen(pDb->path) + 32); + sprintf(temp, "%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "tmp", chkpId); + + if (taosDirExist(temp)) { + taosRemoveDir(temp); + taosMkDir(temp); + } else { + taosMkDir(temp); + } + code = bkdMgtGetDelta(p, pDb->idstr, chkpId, list, temp); + + *path = temp; + + return code; +} +int32_t taskDbGenChkpUploadData(void* arg, void* mgt, int64_t chkpId, int8_t type, char** path, SArray* list) { + STaskDbWrapper* pDb = arg; + UPLOAD_TYPE utype = type; + + if (utype == UPLOAD_RSYNC) { + return taskDbGenChkpUploadData__rsync(pDb, chkpId, path); + } else if (utype == UPLOAD_S3) { + return taskDbGenChkpUploadData__s3(pDb, mgt, chkpId, path, list); + } + return -1; +} + +int32_t taskDbOpenCfByKey(STaskDbWrapper* pDb, const char* key) { + int32_t code = 0; + char* err = NULL; + int8_t idx = getCfIdx(key); + + if (idx == -1) return -1; + + if (pDb->pCf[idx] != NULL) return code; + + rocksdb_column_family_handle_t* cf = + rocksdb_create_column_family(pDb->db, pDb->pCfOpts[idx], ginitDict[idx].key, &err); + if (err != NULL) { + stError("failed to open cf, key:%s, reason: %s", key, err); + taosMemoryFree(err); + code = -1; + return code; + } + + pDb->pCf[idx] = cf; + return code; +} +int32_t copyDataAt(RocksdbCfInst* pSrc, STaskDbWrapper* pDst, int8_t i) { + int32_t WRITE_BATCH = 1024; + char* err = NULL; + int code = 0; + + rocksdb_readoptions_t* pRdOpt = rocksdb_readoptions_create(); + + rocksdb_writebatch_t* wb = rocksdb_writebatch_create(); + rocksdb_iterator_t* pIter = rocksdb_create_iterator_cf(pSrc->db, pRdOpt, pSrc->pHandle[i]); + rocksdb_iter_seek_to_first(pIter); + while (rocksdb_iter_valid(pIter)) { + if (rocksdb_writebatch_count(wb) >= WRITE_BATCH) { + rocksdb_write(pDst->db, pDst->writeOpt, wb, &err); + if (err != NULL) { + code = -1; + goto _EXIT; + } + rocksdb_writebatch_clear(wb); + } + + size_t klen = 0, vlen = 0; + char* key = (char*)rocksdb_iter_key(pIter, &klen); + char* val = (char*)rocksdb_iter_value(pIter, &vlen); + + rocksdb_writebatch_put_cf(wb, pDst->pCf[i], key, klen, val, vlen); + rocksdb_iter_next(pIter); + } + + if (rocksdb_writebatch_count(wb) > 0) { + rocksdb_write(pDst->db, pDst->writeOpt, wb, &err); + if (err != NULL) { + code = -1; + goto _EXIT; + } + } + +_EXIT: + rocksdb_iter_destroy(pIter); + rocksdb_readoptions_destroy(pRdOpt); + taosMemoryFree(err); + + return code; +} + +int32_t streamStateCvtDataFormat(char* path, char* key, void* pCfInst) { + int nCf = sizeof(ginitDict) / sizeof(ginitDict[0]); + + int32_t code = 0; + + STaskDbWrapper* pTaskDb = taskDbOpen(path, key, 0); + RocksdbCfInst* pSrcBackend = pCfInst; + + for (int i = 0; i < nCf; i++) { + rocksdb_column_family_handle_t* pSrcCf = pSrcBackend->pHandle[i]; + if (pSrcCf == NULL) continue; + + code = taskDbOpenCfByKey(pTaskDb, ginitDict[i].key); + if (code != 0) goto _EXIT; + + code = copyDataAt(pSrcBackend, pTaskDb, i); + if (code != 0) goto _EXIT; + } + +_EXIT: + taskDbDestroy(pTaskDb, true); + + return code; +} int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf) { SBackendWrapper* handle = backend; char* err = NULL; @@ -1524,7 +2059,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t SCfInit* cfPara = &ginitDict[idx]; rocksdb_comparator_t* compare = - rocksdb_comparator_create(NULL, cfPara->detroyFunc, cfPara->cmpFunc, cfPara->cmpName); + rocksdb_comparator_create(NULL, cfPara->destroyCmp, cfPara->cmpKey, cfPara->cmpName); rocksdb_options_set_comparator((rocksdb_options_t*)cfOpts[i], compare); pCompare[i] = compare; } @@ -1608,7 +2143,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t SCfInit* cfPara = &ginitDict[i]; rocksdb_comparator_t* compare = - rocksdb_comparator_create(NULL, cfPara->detroyFunc, cfPara->cmpFunc, cfPara->cmpName); + rocksdb_comparator_create(NULL, cfPara->destroyCmp, cfPara->cmpKey, cfPara->cmpName); rocksdb_options_set_comparator((rocksdb_options_t*)opt, compare); inst->pCompares[i] = compare; @@ -1688,7 +2223,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { for (int i = 0; i < cfLen; i++) { SCfInit* cf = &ginitDict[i]; - rocksdb_comparator_t* compare = rocksdb_comparator_create(NULL, cf->detroyFunc, cf->cmpFunc, cf->cmpName); + rocksdb_comparator_t* compare = rocksdb_comparator_create(NULL, cf->destroyCmp, cf->cmpKey, cf->cmpName); rocksdb_options_set_comparator((rocksdb_options_t*)cfOpt[i], compare); pCompare[i] = compare; } @@ -1754,26 +2289,25 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) { } } if (pState != NULL && idx != -1) { - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; rocksdb_column_family_handle_t* cf = NULL; - taosThreadRwlockWrlock(&wrapper->rwLock); - cf = wrapper->pHandle[idx]; - if (cf == NULL) { - char buf[128] = {0}; - GEN_COLUMN_FAMILY_NAME(buf, wrapper->idstr, ginitDict[idx].key); - char* err = NULL; - cf = rocksdb_create_column_family(wrapper->rocksdb, wrapper->cfOpts[idx], buf, &err); + taosThreadMutexLock(&wrapper->mutex); + + cf = wrapper->pCf[idx]; + if (cf == NULL) { + char* err = NULL; + cf = rocksdb_create_column_family(wrapper->db, wrapper->pCfOpts[idx], ginitDict[idx].key, &err); if (err != NULL) { idx = -1; stError("failed to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err); taosMemoryFree(err); } else { - stDebug("succ to to open cf, %p %s_%s", pState, wrapper->idstr, funcName); - wrapper->pHandle[idx] = cf; + qDebug("succ to open cf, %p %s_%s", pState, wrapper->idstr, funcName); + wrapper->pCf[idx] = cf; } } - taosThreadRwlockUnlock(&wrapper->rwLock); + taosThreadMutexUnlock(&wrapper->mutex); } return idx; @@ -1794,118 +2328,120 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfKe *readOpt = rocksdb_readoptions_create(); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; if (snapshot != NULL) { - *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->rocksdb); + *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->db); rocksdb_readoptions_set_snapshot(*readOpt, *snapshot); rocksdb_readoptions_set_fill_cache(*readOpt, 0); } - return rocksdb_create_iterator_cf(wrapper->rocksdb, *readOpt, - ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]); + return rocksdb_create_iterator_cf(wrapper->db, *readOpt, ((rocksdb_column_family_handle_t**)wrapper->pCf)[idx]); } -#define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ - do { \ - code = 0; \ - char buf[128] = {0}; \ - char* err = NULL; \ - int i = streamStateGetCfIdx(pState, funcname); \ - if (i < 0) { \ - stWarn("streamState failed to get cf name: %s", funcname); \ - code = -1; \ - break; \ - } \ - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ - char toString[128] = {0}; \ - if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ - int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = wrapper->rocksdb; \ - rocksdb_writeoptions_t* opts = wrapper->writeOpts; \ - char* ttlV = NULL; \ - int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \ - rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \ - if (err != NULL) { \ - stError("streamState str: %s failed to write to %s, err: %s", toString, funcname, err); \ - taosMemoryFree(err); \ - code = -1; \ - } else { \ - stTrace("streamState str:%s succ to write to %s, rowValLen:%d, ttlValLen:%d", toString, funcname, vLen, \ - ttlVLen); \ - } \ - taosMemoryFree(ttlV); \ +#define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ + do { \ + code = 0; \ + char buf[128] = {0}; \ + char* err = NULL; \ + int i = streamStateGetCfIdx(pState, funcname); \ + if (i < 0) { \ + qWarn("streamState failed to get cf name: %s", funcname); \ + code = -1; \ + break; \ + } \ + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; \ + wrapper->dataWritten += 1; \ + char toString[128] = {0}; \ + if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ + int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pCf)[ginitDict[i].idx]; \ + rocksdb_writeoptions_t* opts = wrapper->writeOpt; \ + rocksdb_t* db = wrapper->db; \ + char* ttlV = NULL; \ + int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \ + rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \ + if (err != NULL) { \ + stError("streamState str: %s failed to write to %s, err: %s", toString, funcname, err); \ + taosMemoryFree(err); \ + code = -1; \ + } else { \ + qTrace("streamState str:%s succ to write to %s, rowValLen:%d, ttlValLen:%d, %p", toString, funcname, vLen, \ + ttlVLen, wrapper); \ + } \ + taosMemoryFree(ttlV); \ } while (0); -#define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \ - do { \ - code = 0; \ - char buf[128] = {0}; \ - char* err = NULL; \ - int i = streamStateGetCfIdx(pState, funcname); \ - if (i < 0) { \ - stWarn("streamState failed to get cf name: %s", funcname); \ - code = -1; \ - break; \ - } \ - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ - char toString[128] = {0}; \ - if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ - int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = wrapper->rocksdb; \ - rocksdb_readoptions_t* opts = wrapper->readOpts; \ - size_t len = 0; \ - char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \ - if (val == NULL || len == 0) { \ - if (err == NULL) { \ - stTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \ - } else { \ - stError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ - taosMemoryFreeClear(err); \ - } \ - code = -1; \ - } else { \ - char* p = NULL; \ - int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \ - if (tlen <= 0) { \ - stError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, wrapper->idstr, \ - funcname); \ - code = -1; \ - } else { \ - stTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, wrapper->idstr, funcname, tlen); \ - } \ - taosMemoryFree(val); \ - if (vLen != NULL) *vLen = tlen; \ - } \ +#define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \ + do { \ + code = 0; \ + char buf[128] = {0}; \ + char* err = NULL; \ + int i = streamStateGetCfIdx(pState, funcname); \ + if (i < 0) { \ + qWarn("streamState failed to get cf name: %s", funcname); \ + code = -1; \ + break; \ + } \ + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; \ + char toString[128] = {0}; \ + if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ + int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pCf)[ginitDict[i].idx]; \ + rocksdb_t* db = wrapper->db; \ + rocksdb_readoptions_t* opts = wrapper->readOpt; \ + size_t len = 0; \ + char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \ + if (val == NULL || len == 0) { \ + if (err == NULL) { \ + qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \ + } else { \ + stError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ + taosMemoryFreeClear(err); \ + } \ + code = -1; \ + } else { \ + char* p = NULL; \ + int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \ + if (tlen <= 0) { \ + stError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, wrapper->idstr, \ + funcname); \ + code = -1; \ + } else { \ + qTrace("streamState str: %s succ to read from %s_%s, valLen:%d, %p", toString, wrapper->idstr, funcname, tlen, \ + wrapper); \ + } \ + taosMemoryFree(val); \ + if (vLen != NULL) *vLen = tlen; \ + } \ } while (0); -#define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \ - do { \ - code = 0; \ - char buf[128] = {0}; \ - char* err = NULL; \ - int i = streamStateGetCfIdx(pState, funcname); \ - if (i < 0) { \ - stWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \ - code = -1; \ - break; \ - } \ - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ - char toString[128] = {0}; \ - if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ - int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = wrapper->rocksdb; \ - rocksdb_writeoptions_t* opts = wrapper->writeOpts; \ - rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \ - if (err != NULL) { \ - stError("streamState str: %s failed to del from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ - taosMemoryFree(err); \ - code = -1; \ - } else { \ - stTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \ - } \ +#define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \ + do { \ + code = 0; \ + char buf[128] = {0}; \ + char* err = NULL; \ + int i = streamStateGetCfIdx(pState, funcname); \ + if (i < 0) { \ + qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \ + code = -1; \ + break; \ + } \ + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; \ + wrapper->dataWritten += 1; \ + char toString[128] = {0}; \ + if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ + int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pCf)[ginitDict[i].idx]; \ + rocksdb_t* db = wrapper->db; \ + rocksdb_writeoptions_t* opts = wrapper->writeOpt; \ + rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \ + if (err != NULL) { \ + stError("streamState str: %s failed to del from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ + taosMemoryFree(err); \ + code = -1; \ + } else { \ + qTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \ + } \ } while (0); // state cf @@ -1931,7 +2467,8 @@ int32_t streamStateDel_rocksdb(SStreamState* pState, const SWinKey* key) { int32_t streamStateClear_rocksdb(SStreamState* pState) { stDebug("streamStateClear_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + wrapper->dataWritten += 1; char sKeyStr[128] = {0}; char eKeyStr[128] = {0}; @@ -1941,10 +2478,9 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) { int sLen = stateKeyEncode(&sKey, sKeyStr); int eLen = stateKeyEncode(&eKey, eKeyStr); - if (wrapper->pHandle[1] != NULL) { + if (wrapper->pCf[1] != NULL) { char* err = NULL; - rocksdb_delete_range_cf(wrapper->rocksdb, wrapper->writeOpts, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen, - &err); + rocksdb_delete_range_cf(wrapper->db, wrapper->writeOpt, wrapper->pCf[1], sKeyStr, sLen, eKeyStr, eLen, &err); if (err != NULL) { char toStringStart[128] = {0}; char toStringEnd[128] = {0}; @@ -1954,7 +2490,7 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) { stWarn("failed to delete range cf(state) start: %s, end:%s, reason:%s", toStringStart, toStringEnd, err); taosMemoryFree(err); } else { - rocksdb_compact_range_cf(wrapper->rocksdb, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen); + rocksdb_compact_range_cf(wrapper->db, wrapper->pCf[1], sKeyStr, sLen, eKeyStr, eLen); } } @@ -2028,7 +2564,7 @@ int32_t streamStateGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, cons if (pVLen != NULL) { size_t vlen = 0; const char* valStr = rocksdb_iter_value(pCur->iter, &vlen); - *pVLen = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); + *pVLen = valueDecode((void*)valStr, vlen, NULL, (char**)pVal); } *pKey = pKtmp->key; @@ -2051,9 +2587,9 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin if (pCur == NULL) { return NULL; } - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; pCur->number = pState->number; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -2103,7 +2639,7 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState) { if (pCur == NULL) return NULL; pCur->number = pState->number; - pCur->db = ((SBackendCfWrapper*)pState->pTdbState->pBackendCfWrapper)->rocksdb; + pCur->db = ((STaskDbWrapper*)pState->pTdbState->pOwner->pBackend)->db; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -2125,12 +2661,13 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState) { } SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + qDebug("streamStateGetCur_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) return NULL; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2177,6 +2714,9 @@ int32_t streamStateFuncDel_rocksdb(SStreamState* pState, const STupleKey* key) { int32_t streamStateSessionPut_rocksdb(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) { int code = 0; SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; + if (value == NULL || vLen == 0) { + stError("streamStateSessionPut_rocksdb val: %p, len: %d", value, vLen); + } STREAM_STATE_PUT_ROCKSDB(pState, "sess", &sKey, value, vLen); return code; } @@ -2225,11 +2765,11 @@ SStreamStateCur* streamStateSessionSeekToLast_rocksdb(SStreamState* pState) { if (code != 0) { return NULL; } + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + SStreamStateCur* pCur = createStreamStateCursor(); pCur->number = pState->number; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -2261,13 +2801,14 @@ int32_t streamStateSessionCurPrev_rocksdb(SStreamStateCur* pCur) { SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pState, const SSessionKey* key) { stDebug("streamStateSessionSeekKeyCurrentPrev_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } + pCur->number = pState->number; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -2300,13 +2841,13 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta return pCur; } SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pState, SSessionKey* key) { - stDebug("streamStateSessionSeekKeyCurrentNext_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + qDebug("streamStateSessionSeekKeyCurrentNext_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2338,13 +2879,13 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta } SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, const SSessionKey* key) { - stDebug("streamStateSessionSeekKeyNext_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + qDebug("streamStateSessionSeekKeyNext_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2397,7 +2938,7 @@ int32_t streamStateSessionGetKVByCur_rocksdb(SStreamStateCur* pCur, SSessionKey* SStateSessionKey* pKTmp = &ktmp; const char* vval = rocksdb_iter_value(pCur->iter, (size_t*)&vLen); char* val = NULL; - int32_t len = decodeValueFunc((void*)vval, vLen, NULL, &val); + int32_t len = valueDecode((void*)vval, vLen, NULL, &val); if (len < 0) { taosMemoryFree(val); return -1; @@ -2442,13 +2983,13 @@ int32_t streamStateFillDel_rocksdb(SStreamState* pState, const SWinKey* key) { } SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { - stDebug("streamStateFillGetCur_rocksdb"); - SStreamStateCur* pCur = createStreamStateCursor(); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + qDebug("streamStateFillGetCur_rocksdb"); + SStreamStateCur* pCur = createStreamStateCursor(); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; if (pCur == NULL) return NULL; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2491,7 +3032,7 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, winKeyDecode(&winKey, keyStr); const char* valStr = rocksdb_iter_value(pCur->iter, &vlen); - int32_t len = decodeValueFunc((void*)valStr, vlen, NULL, (char**)pVal); + int32_t len = valueDecode((void*)valStr, vlen, NULL, (char**)pVal); if (len < 0) { return -1; } @@ -2502,14 +3043,14 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, } SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) { - stDebug("streamStateFillSeekKeyNext_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + qDebug("streamStateFillSeekKeyNext_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (!pCur) { return NULL; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2540,14 +3081,14 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const return NULL; } SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const SWinKey* key) { - stDebug("streamStateFillSeekKeyPrev_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + qDebug("streamStateFillSeekKeyPrev_rocksdb"); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2579,12 +3120,12 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const } int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) { stDebug("streamStateSessionGetKeyByRange_rocksdb"); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = createStreamStateCursor(); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return -1; } - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2811,7 +3352,7 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co int code = 0; char* err = NULL; - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; rocksdb_snapshot_t* snapshot = NULL; rocksdb_readoptions_t* readopts = NULL; rocksdb_iterator_t* pIter = streamStateIterCreate(pState, "default", &snapshot, &readopts); @@ -2825,7 +3366,7 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co int32_t vlen = 0; const char* vval = rocksdb_iter_value(pIter, (size_t*)&vlen); char* val = NULL; - int32_t len = decodeValueFunc((void*)vval, vlen, NULL, NULL); + int32_t len = valueDecode((void*)vval, vlen, NULL, NULL); if (len < 0) { rocksdb_iter_next(pIter); continue; @@ -2844,16 +3385,16 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co } rocksdb_iter_next(pIter); } - rocksdb_release_snapshot(wrapper->rocksdb, snapshot); + rocksdb_release_snapshot(wrapper->db, snapshot); rocksdb_readoptions_destroy(readopts); rocksdb_iter_destroy(pIter); return code; } void* streamDefaultIterCreate_rocksdb(SStreamState* pState) { - SStreamStateCur* pCur = createStreamStateCursor(); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SStreamStateCur* pCur = createStreamStateCursor(); + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; - pCur->db = wrapper->rocksdb; + pCur->db = wrapper->db; pCur->iter = streamStateIterCreate(pState, "default", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -2884,7 +3425,7 @@ char* streamDefaultIterVal_rocksdb(void* iter, int32_t* len) { int32_t vlen = 0; const char* val = rocksdb_iter_value(pCur->iter, (size_t*)&vlen); - *len = decodeValueFunc((void*)val, vlen, NULL, &ret); + *len = valueDecode((void*)val, vlen, NULL, &ret); if (*len < 0) { taosMemoryFree(ret); return NULL; @@ -2906,7 +3447,8 @@ void streamStateClearBatch(void* pBatch) { rocksdb_writebatch_clear((rocksdb_ void streamStateDestroyBatch(void* pBatch) { rocksdb_writebatch_destroy((rocksdb_writebatch_t*)pBatch); } int32_t streamStatePutBatch(SStreamState* pState, const char* cfKeyName, rocksdb_writebatch_t* pBatch, void* key, void* val, int32_t vlen, int64_t ttl) { - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + wrapper->dataWritten += 1; int i = streamStateGetCfIdx(pState, cfKeyName); if (i < 0) { @@ -2920,7 +3462,7 @@ int32_t streamStatePutBatch(SStreamState* pState, const char* cfKeyName, rocksdb char* ttlV = NULL; int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV); - rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[i].idx]; + rocksdb_column_family_handle_t* pCf = wrapper->pCf[ginitDict[i].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); taosMemoryFree(ttlV); @@ -2939,8 +3481,10 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb char* ttlV = tmpBuf; int32_t ttlVLen = ginitDict[cfIdx].enValueFunc(val, vlen, ttl, &ttlV); - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[cfIdx].idx]; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + wrapper->dataWritten += 1; + + rocksdb_column_family_handle_t* pCf = wrapper->pCf[ginitDict[cfIdx].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); if (tmpBuf == NULL) { @@ -2955,15 +3499,16 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb return 0; } int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { - char* err = NULL; - SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - rocksdb_write(wrapper->rocksdb, wrapper->writeOpts, (rocksdb_writebatch_t*)pBatch, &err); + char* err = NULL; + STaskDbWrapper* wrapper = pState->pTdbState->pOwner->pBackend; + wrapper->dataWritten += 1; + rocksdb_write(wrapper->db, wrapper->writeOpt, (rocksdb_writebatch_t*)pBatch, &err); if (err != NULL) { stError("streamState failed to write batch, err:%s", err); taosMemoryFree(err); return -1; } else { - stDebug("write batch to backend:%p", wrapper->pBackend); + stDebug("write batch to backend:%p", wrapper->db); } return 0; } @@ -2977,3 +3522,446 @@ uint32_t nextPow2(uint32_t x) { x = x | (x >> 16); return x + 1; } +int32_t copyFiles(const char* src, const char* dst) { + int32_t code = 0; + // opt later, just hard link + int32_t sLen = strlen(src); + int32_t dLen = strlen(dst); + char* srcName = taosMemoryCalloc(1, sLen + 64); + char* dstName = taosMemoryCalloc(1, dLen + 64); + + TdDirPtr pDir = taosOpenDir(src); + if (pDir == NULL) { + taosMemoryFree(srcName); + taosMemoryFree(dstName); + return -1; + } + + TdDirEntryPtr de = NULL; + while ((de = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(de); + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + + sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); + sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); + if (!taosDirEntryIsDir(de)) { + code = taosCopyFile(srcName, dstName); + if (code == -1) { + goto _err; + } + } + + memset(srcName, 0, sLen + 64); + memset(dstName, 0, dLen + 64); + } + +_err: + taosMemoryFreeClear(srcName); + taosMemoryFreeClear(dstName); + taosCloseDir(&pDir); + return code >= 0 ? 0 : -1; +} + +int32_t isBkdDataMeta(char* name, int32_t len) { + const char* pCurrent = "CURRENT"; + int32_t currLen = strlen(pCurrent); + + const char* pManifest = "MANIFEST-"; + int32_t maniLen = strlen(pManifest); + + if (len >= maniLen && strncmp(name, pManifest, maniLen) == 0) { + return 1; + } else if (len == currLen && strcmp(name, pCurrent) == 0) { + return 1; + } + return 0; +} +int32_t compareHashTableImpl(SHashObj* p1, SHashObj* p2, SArray* diff) { + int32_t code = 0; + size_t len = 0; + void* pIter = taosHashIterate(p2, NULL); + while (pIter) { + char* name = taosHashGetKey(pIter, &len); + if (!isBkdDataMeta(name, len) && !taosHashGet(p1, name, len)) { + char* fname = taosMemoryCalloc(1, len + 1); + strncpy(fname, name, len); + taosArrayPush(diff, &fname); + } + pIter = taosHashIterate(p2, pIter); + } + return code; +} +int32_t compareHashTable(SHashObj* p1, SHashObj* p2, SArray* add, SArray* del) { + int32_t code = 0; + + code = compareHashTableImpl(p1, p2, add); + code = compareHashTableImpl(p2, p1, del); + + return code; +} + +void hashTableToDebug(SHashObj* pTbl, char** buf) { + size_t sz = taosHashGetSize(pTbl); + int32_t total = 0; + char* p = taosMemoryCalloc(1, sz * 16 + 4); + void* pIter = taosHashIterate(pTbl, NULL); + while (pIter) { + size_t len = 0; + char* name = taosHashGetKey(pIter, &len); + char* tname = taosMemoryCalloc(1, len + 1); + memcpy(tname, name, len); + total += sprintf(p + total, "%s,", tname); + + pIter = taosHashIterate(pTbl, pIter); + taosMemoryFree(tname); + } + if (total > 0) { + p[total - 1] = 0; + } + *buf = p; +} +void strArrayDebugInfo(SArray* pArr, char** buf) { + int32_t sz = taosArrayGetSize(pArr); + if (sz <= 0) return; + + char* p = (char*)taosMemoryCalloc(1, 64 + sz * 64); + int32_t total = 0; + + for (int i = 0; i < sz; i++) { + char* name = taosArrayGetP(pArr, i); + total += sprintf(p + total, "%s,", name); + } + p[total - 1] = 0; + + *buf = p; +} +void dbChkpDebugInfo(SDbChkp* pDb) { + // stTrace("chkp get file list: curr"); + char* p[4] = {NULL}; + + hashTableToDebug(pDb->pSstTbl[pDb->idx], &p[0]); + stTrace("chkp previous file: [%s]", p[0]); + + hashTableToDebug(pDb->pSstTbl[1 - pDb->idx], &p[1]); + stTrace("chkp curr file: [%s]", p[1]); + + strArrayDebugInfo(pDb->pAdd, &p[2]); + stTrace("chkp newly addded file: [%s]", p[2]); + + strArrayDebugInfo(pDb->pDel, &p[3]); + stTrace("chkp newly deleted file: [%s]", p[3]); + + for (int i = 0; i < 4; i++) { + taosMemoryFree(p[i]); + } +} +int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { + taosThreadRwlockWrlock(&p->rwLock); + + p->preCkptId = p->curChkpId; + p->curChkpId = chkpId; + const char* pCurrent = "CURRENT"; + int32_t currLen = strlen(pCurrent); + + const char* pManifest = "MANIFEST-"; + int32_t maniLen = strlen(pManifest); + + const char* pSST = ".sst"; + int32_t sstLen = strlen(pSST); + + memset(p->buf, 0, p->len); + sprintf(p->buf, "%s%s%s%scheckpoint%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + + taosArrayClearP(p->pAdd, taosMemoryFree); + taosArrayClearP(p->pDel, taosMemoryFree); + taosHashClear(p->pSstTbl[1 - p->idx]); + + TdDirPtr pDir = taosOpenDir(p->buf); + TdDirEntryPtr de = NULL; + int8_t dummy = 0; + while ((de = taosReadDir(pDir)) != NULL) { + char* name = taosGetDirEntryName(de); + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + if (strlen(name) == currLen && strcmp(name, pCurrent) == 0) { + taosMemoryFreeClear(p->pCurrent); + p->pCurrent = taosStrdup(name); + // taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + + if (strlen(name) >= maniLen && strncmp(name, pManifest, maniLen) == 0) { + taosMemoryFreeClear(p->pManifest); + p->pManifest = taosStrdup(name); + // taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { + taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + continue; + } + } + taosCloseDir(&pDir); + + if (p->init == 0) { + void* pIter = taosHashIterate(p->pSstTbl[1 - p->idx], NULL); + while (pIter) { + size_t len = 0; + char* name = taosHashGetKey(pIter, &len); + if (name != NULL && !isBkdDataMeta(name, len)) { + char* fname = taosMemoryCalloc(1, len + 1); + strncpy(fname, name, len); + taosArrayPush(p->pAdd, &fname); + } + pIter = taosHashIterate(p->pSstTbl[1 - p->idx], pIter); + } + if (taosArrayGetSize(p->pAdd) > 0) p->update = 1; + + p->init = 1; + p->preCkptId = -1; + p->curChkpId = chkpId; + } else { + int32_t code = compareHashTable(p->pSstTbl[p->idx], p->pSstTbl[1 - p->idx], p->pAdd, p->pDel); + if (code != 0) { + // dead code + taosArrayClearP(p->pAdd, taosMemoryFree); + taosArrayClearP(p->pDel, taosMemoryFree); + taosHashClear(p->pSstTbl[1 - p->idx]); + p->update = 0; + return code; + } + + if (taosArrayGetSize(p->pAdd) == 0 && taosArrayGetSize(p->pDel) == 0) { + p->update = 0; + } + + p->preCkptId = p->curChkpId; + p->curChkpId = chkpId; + } + + dbChkpDebugInfo(p); + + p->idx = 1 - p->idx; + + taosThreadRwlockUnlock(&p->rwLock); + + return 0; +} + +SDbChkp* dbChkpCreate(char* path, int64_t initChkpId) { + SDbChkp* p = taosMemoryCalloc(1, sizeof(SDbChkp)); + p->curChkpId = initChkpId; + p->preCkptId = -1; + p->pSST = taosArrayInit(64, sizeof(void*)); + p->path = path; + p->len = strlen(path) + 128; + p->buf = taosMemoryCalloc(1, p->len); + + p->idx = 0; + p->pSstTbl[0] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + p->pSstTbl[1] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + + p->pAdd = taosArrayInit(64, sizeof(void*)); + p->pDel = taosArrayInit(64, sizeof(void*)); + p->update = 0; + taosThreadRwlockInit(&p->rwLock, NULL); + + SArray* list = NULL; + int32_t code = dbChkpGetDelta(p, initChkpId, list); + + return p; +} + +void dbChkpDestroy(SDbChkp* pChkp) { + taosMemoryFree(pChkp->buf); + taosMemoryFree(pChkp->path); + + taosArrayDestroyP(pChkp->pSST, taosMemoryFree); + taosArrayDestroyP(pChkp->pAdd, taosMemoryFree); + taosArrayDestroyP(pChkp->pDel, taosMemoryFree); + + taosHashCleanup(pChkp->pSstTbl[0]); + taosHashCleanup(pChkp->pSstTbl[1]); + + taosMemoryFree(pChkp->pCurrent); + taosMemoryFree(pChkp->pManifest); + taosMemoryFree(pChkp); +} + +int32_t dbChkpInit(SDbChkp* p) { + if (p == NULL) return 0; + return 0; +} +int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { + taosThreadRwlockRdlock(&p->rwLock); + int32_t code = -1; + int32_t len = p->len + 128; + + char* srcBuf = taosMemoryCalloc(1, len); + char* dstBuf = taosMemoryCalloc(1, len); + + char* srcDir = taosMemoryCalloc(1, len); + char* dstDir = taosMemoryCalloc(1, len); + + sprintf(srcDir, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", p->curChkpId); + sprintf(dstDir, "%s", dname); + + if (!taosDirExist(srcDir)) { + stError("failed to dump srcDir %s, reason: not exist such dir", srcDir); + goto _ERROR; + } + + // add file to $name dir + for (int i = 0; i < taosArrayGetSize(p->pAdd); i++) { + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + + char* filename = taosArrayGetP(p->pAdd, i); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, filename); + sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); + + if (taosCopyFile(srcBuf, dstBuf) < 0) { + stError("failed to copy file from %s to %s", srcBuf, dstBuf); + goto _ERROR; + } + } + // del file in $name + for (int i = 0; i < taosArrayGetSize(p->pDel); i++) { + char* filename = taosArrayGetP(p->pDel, i); + char* p = taosStrdup(filename); + taosArrayPush(list, &p); + } + + // copy current file to dst dir + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pCurrent); + sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pCurrent, p->curChkpId); + if (taosCopyFile(srcBuf, dstBuf) < 0) { + stError("failed to copy file from %s to %s", srcBuf, dstBuf); + goto _ERROR; + } + + // copy manifest file to dst dir + memset(srcBuf, 0, len); + memset(dstBuf, 0, len); + sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pManifest); + sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pManifest, p->curChkpId); + if (taosCopyFile(srcBuf, dstBuf) < 0) { + stError("failed to copy file from %s to %s", srcBuf, dstBuf); + goto _ERROR; + } + + static char* chkpMeta = "META"; + memset(dstBuf, 0, len); + sprintf(dstDir, "%s%s%s", dstDir, TD_DIRSEP, chkpMeta); + + TdFilePtr pFile = taosOpenFile(dstDir, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) { + stError("chkp failed to create meta file: %s", dstDir); + goto _ERROR; + } + char content[128] = {0}; + snprintf(content, sizeof(content), "%s_%" PRId64 "\n%s_%" PRId64 "", p->pCurrent, p->curChkpId, p->pManifest, + p->curChkpId); + if (taosWriteFile(pFile, content, strlen(content)) <= 0) { + stError("chkp failed to write meta file: %s", dstDir); + taosCloseFile(&pFile); + goto _ERROR; + } + taosCloseFile(&pFile); + + // clear delta data buf + taosArrayClearP(p->pAdd, taosMemoryFree); + taosArrayClearP(p->pDel, taosMemoryFree); + code = 0; + +_ERROR: + taosThreadRwlockUnlock(&p->rwLock); + taosMemoryFree(srcBuf); + taosMemoryFree(dstBuf); + taosMemoryFree(srcDir); + taosMemoryFree(dstDir); + return code; +} +SBkdMgt* bkdMgtCreate(char* path) { + SBkdMgt* p = taosMemoryCalloc(1, sizeof(SBkdMgt)); + p->pDbChkpTbl = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + p->path = taosStrdup(path); + taosThreadRwlockInit(&p->rwLock, NULL); + return p; +} + +void bkdMgtDestroy(SBkdMgt* bm) { + if (bm == NULL) return; + void* pIter = taosHashIterate(bm->pDbChkpTbl, NULL); + while (pIter) { + SDbChkp* pChkp = *(SDbChkp**)(pIter); + dbChkpDestroy(pChkp); + + pIter = taosHashIterate(bm->pDbChkpTbl, pIter); + } + + taosThreadRwlockDestroy(&bm->rwLock); + taosMemoryFree(bm->path); + taosHashCleanup(bm->pDbChkpTbl); + + taosMemoryFree(bm); +} +int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, char* dname) { + int32_t code = 0; + + taosThreadRwlockWrlock(&bm->rwLock); + SDbChkp** ppChkp = taosHashGet(bm->pDbChkpTbl, taskId, strlen(taskId)); + SDbChkp* pChkp = ppChkp != NULL ? *ppChkp : NULL; + + if (pChkp == NULL) { + char* path = taosMemoryCalloc(1, strlen(bm->path) + 64); + sprintf(path, "%s%s%s", bm->path, TD_DIRSEP, taskId); + + SDbChkp* p = dbChkpCreate(path, chkpId); + taosHashPut(bm->pDbChkpTbl, taskId, strlen(taskId), &p, sizeof(void*)); + + pChkp = p; + + code = dbChkpDumpTo(pChkp, dname, list); + taosThreadRwlockUnlock(&bm->rwLock); + return code; + } + + code = dbChkpGetDelta(pChkp, chkpId, NULL); + code = dbChkpDumpTo(pChkp, dname, list); + + taosThreadRwlockUnlock(&bm->rwLock); + return code; +} + +int32_t bkdMgtAddChkp(SBkdMgt* bm, char* task, char* path) { + int32_t code = -1; + + taosThreadRwlockWrlock(&bm->rwLock); + SDbChkp** pp = taosHashGet(bm->pDbChkpTbl, task, strlen(task)); + if (pp == NULL) { + SDbChkp* p = dbChkpCreate(path, 0); + if (p != NULL) { + taosHashPut(bm->pDbChkpTbl, task, strlen(task), &p, sizeof(void*)); + code = 0; + } + } else { + stError("task chkp already exists"); + } + + taosThreadRwlockUnlock(&bm->rwLock); + + return code; +} + +int32_t bkdMgtDumpTo(SBkdMgt* bm, char* taskId, char* dname) { + int32_t code = 0; + taosThreadRwlockRdlock(&bm->rwLock); + + SDbChkp* p = taosHashGet(bm->pDbChkpTbl, taskId, strlen(taskId)); + code = dbChkpDumpTo(p, dname, NULL); + + taosThreadRwlockUnlock(&bm->rwLock); + return code; +} \ No newline at end of file diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 6201329b95..e2561de841 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -13,10 +13,18 @@ * along with this program. If not, see . */ -#include "streamInt.h" -#include "rsync.h" #include "cos.h" +#include "rsync.h" +#include "streamBackendRocksdb.h" +#include "streamInt.h" +typedef struct { + UPLOAD_TYPE type; + char* taskId; + int64_t chkpId; + + SStreamTask* pTask; +} SAsyncUploadArg; int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; @@ -95,12 +103,12 @@ int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointRea static int32_t streamAlignCheckpoint(SStreamTask* pTask) { int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); - int64_t old = atomic_val_compare_exchange_32(&pTask->checkpointAlignCnt, 0, num); + int64_t old = atomic_val_compare_exchange_32(&pTask->chkInfo.downstreamAlignNum, 0, num); if (old == 0) { stDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num); } - return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1); + return atomic_sub_fetch_32(&pTask->chkInfo.downstreamAlignNum, 1); } static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { @@ -118,11 +126,11 @@ static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpoint } pBlock->info.type = STREAM_CHECKPOINT; - pBlock->info.version = pTask->checkpointingId; + pBlock->info.version = pTask->chkInfo.checkpointingId; pBlock->info.rows = 1; pBlock->info.childId = pTask->info.selfChildId; - pChkpoint->blocks = taosArrayInit(4, sizeof(SSDataBlock));//pBlock; + pChkpoint->blocks = taosArrayInit(4, sizeof(SSDataBlock)); // pBlock; taosArrayPush(pChkpoint->blocks, pBlock); taosMemoryFree(pBlock); @@ -141,13 +149,12 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo // 1. set task status to be prepared for check point, no data are allowed to put into inputQ. streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); - pTask->checkpointingId = pReq->checkpointId; - pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + pTask->chkInfo.checkpointingId = pReq->checkpointId; + pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); pTask->chkInfo.startTs = taosGetTimestampMs(); pTask->execInfo.checkpoint += 1; // 2. Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task - // already. int32_t code = appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); return code; } @@ -158,6 +165,7 @@ static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStream int32_t code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock); if (code == 0) { + ASSERT(pTask->chkInfo.dispatchCheckpointTrigger == false); streamDispatchStreamBlock(pTask); } else { stError("s-task:%s failed to put checkpoint into outputQ, code:%s", pTask->id.idStr, tstrerror(code)); @@ -169,14 +177,13 @@ static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStream int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { SSDataBlock* pDataBlock = taosArrayGet(pBlock->blocks, 0); - int64_t checkpointId = pDataBlock->info.version; - - const char* id = pTask->id.idStr; - int32_t code = TSDB_CODE_SUCCESS; + int64_t checkpointId = pDataBlock->info.version; + const char* id = pTask->id.idStr; + int32_t code = TSDB_CODE_SUCCESS; // set task status if (streamTaskGetStatus(pTask, NULL) != TASK_STATUS__CK) { - pTask->checkpointingId = checkpointId; + pTask->chkInfo.checkpointingId = checkpointId; code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s handle checkpoint-trigger block failed, code:%s", id, tstrerror(code)); @@ -184,25 +191,15 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc } } - { // todo: remove this when the pipeline checkpoint generating is used. - SStreamMeta* pMeta = pTask->pMeta; - streamMetaWLock(pMeta); - - if (pMeta->chkptNotReadyTasks == 0) { - pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks; - } - - streamMetaWUnLock(pMeta); - } - - //todo fix race condition: set the status and append checkpoint block + // todo fix race condition: set the status and append checkpoint block int32_t taskLevel = pTask->info.taskLevel; if (taskLevel == TASK_LEVEL__SOURCE) { - if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + int8_t type = pTask->outputInfo.type; + if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) { stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); continueDispatchCheckpointBlock(pBlock, pTask); } else { // only one task exists, no need to dispatch downstream info - atomic_add_fetch_32(&pTask->checkpointNotReadyTasks, 1); + atomic_add_fetch_32(&pTask->chkInfo.checkpointNotReadyTasks, 1); streamProcessCheckpointReadyMsg(pTask); streamFreeQitem((SStreamQueueItem*)pBlock); } @@ -221,24 +218,23 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); if (notReady > 0) { stDebug("s-task:%s received checkpoint block, idx:%d, %d upstream tasks not send checkpoint info yet, total:%d", - id, pTask->info.selfChildId, notReady, num); + id, pTask->info.selfChildId, notReady, num); streamFreeQitem((SStreamQueueItem*)pBlock); return code; } if (taskLevel == TASK_LEVEL__SINK) { stDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, send ready msg to upstream", - id, num); + id, num); streamFreeQitem((SStreamQueueItem*)pBlock); streamTaskBuildCheckpoint(pTask); - } else { - stDebug( - "s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, dispatch checkpoint msg " - "downstream", id, num); + } else { // source & agg tasks need to forward the checkpoint msg downwards + stDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, continue forwards msg", id, + num); // set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task // can start local checkpoint procedure - pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); // Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task // already. And then, dispatch check point msg to all downstream tasks @@ -257,12 +253,12 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG); // only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task - int32_t notReady = atomic_sub_fetch_32(&pTask->checkpointNotReadyTasks, 1); + int32_t notReady = atomic_sub_fetch_32(&pTask->chkInfo.checkpointNotReadyTasks, 1); ASSERT(notReady >= 0); if (notReady == 0) { stDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for current task", - pTask->id.idStr); + pTask->id.idStr); appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT); } else { int32_t total = streamTaskGetNumOfDownstream(pTask); @@ -272,196 +268,343 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { return 0; } -void streamTaskClearCheckInfo(SStreamTask* pTask) { - pTask->checkpointingId = 0; // clear the checkpoint id +void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { + pTask->chkInfo.checkpointingId = 0; // clear the checkpoint id pTask->chkInfo.failedId = 0; pTask->chkInfo.startTs = 0; // clear the recorded start time - pTask->checkpointNotReadyTasks = 0; - pTask->checkpointAlignCnt = 0; - streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks + pTask->chkInfo.checkpointNotReadyTasks = 0; + // pTask->chkInfo.checkpointAlignCnt = 0; + pTask->chkInfo.dispatchCheckpointTrigger = false; + streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks + if (clearChkpReadyMsg) { + streamClearChkptReadyMsg(pTask); + } } -int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { - int32_t vgId = pMeta->vgId; - int32_t code = 0; +int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) { + SStreamMeta* pMeta = p->pMeta; + int32_t vgId = pMeta->vgId; + const char* id = p->id.idStr; + int32_t code = 0; - streamMetaWLock(pMeta); + if (p->info.fillHistory == 1) { + return code; + } - for (int32_t i = 0; i < taosArrayGetSize(pMeta->pTaskList); ++i) { - STaskId* pId = taosArrayGet(pMeta->pTaskList, i); - SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); - if (ppTask == NULL) { - continue; - } + if (p->info.taskLevel > TASK_LEVEL__SINK) { + return code; + } - SStreamTask* p = *ppTask; - if (p->info.fillHistory == 1) { - continue; - } + taosThreadMutexLock(&p->lock); - ASSERT(p->chkInfo.checkpointId <= p->checkpointingId && p->checkpointingId == checkpointId && - p->chkInfo.checkpointVer <= p->chkInfo.processedVer); + ASSERT(p->chkInfo.checkpointId <= p->chkInfo.checkpointingId && p->chkInfo.checkpointingId == checkpointId && + p->chkInfo.checkpointVer <= p->chkInfo.processedVer); + p->chkInfo.checkpointId = p->chkInfo.checkpointingId; + p->chkInfo.checkpointVer = p->chkInfo.processedVer; - p->chkInfo.checkpointId = p->checkpointingId; - p->chkInfo.checkpointVer = p->chkInfo.processedVer; + streamTaskClearCheckInfo(p, false); + char* str = NULL; + streamTaskGetStatus(p, &str); - streamTaskClearCheckInfo(p); + code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + taosThreadMutexUnlock(&p->lock); - char* str = NULL; - streamTaskGetStatus(p, &str); + if (code != TSDB_CODE_SUCCESS) { + stDebug("s-task:%s vgId:%d handle event:checkpoint-done failed", id, vgId); + return -1; + } - code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + stDebug("vgId:%d s-task:%s level:%d open upstream inputQ, save status after checkpoint, checkpointId:%" PRId64 + ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status: normal, prev:%s", + vgId, id, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer, str); + + // save the task if not sink task + if (p->info.taskLevel != TASK_LEVEL__SINK) { + streamMetaWLock(pMeta); + + code = streamMetaSaveTask(pMeta, p); if (code != TSDB_CODE_SUCCESS) { - stDebug("s-task:%s vgId:%d save task status failed, since handle event failed", p->id.idStr, vgId); streamMetaWUnLock(pMeta); - return -1; - } else { // save the task - streamMetaSaveTask(pMeta, p); + stError("s-task:%s vgId:%d failed to save task info after do checkpoint, checkpointId:%" PRId64 ", since %s", id, + vgId, checkpointId, terrstr()); + return code; } - stDebug( - "vgId:%d s-task:%s level:%d open upstream inputQ, commit task status after checkpoint completed, " - "checkpointId:%" PRId64 ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status to be normal, prev:%s", - pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer, - str); - } + code = streamMetaCommit(pMeta); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:%s vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", + id, vgId, checkpointId, terrstr()); + } - code = streamMetaCommit(pMeta); - if (code < 0) { - stError("vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", pMeta->vgId, - checkpointId, terrstr()); - } else { - stInfo("vgId:%d commit stream meta after do checkpoint, checkpointId:%" PRId64 " DONE", pMeta->vgId, checkpointId); + streamMetaWUnLock(pMeta); } - - streamMetaWUnLock(pMeta); return code; } -int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { - int32_t code = 0; +void streamTaskSetFailedId(SStreamTask* pTask) { + pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; + pTask->chkInfo.checkpointId = pTask->chkInfo.checkpointingId; +} - // check for all tasks, and do generate the vnode-wide checkpoint data. - SStreamMeta* pMeta = pTask->pMeta; - int32_t remain = atomic_sub_fetch_32(&pMeta->chkptNotReadyTasks, 1); - ASSERT(remain >= 0); - - double el = (taosGetTimestampMs() - pTask->chkInfo.startTs) / 1000.0; - if (remain == 0) { // all tasks are ready - stDebug("s-task:%s all downstreams are ready, ready for do checkpoint", pTask->id.idStr); - streamBackendDoCheckpoint(pMeta, pTask->checkpointingId); - streamSaveAllTaskStatus(pMeta, pTask->checkpointingId); - stInfo( - "vgId:%d vnode wide checkpoint completed, save all tasks status, last:%s, level:%d elapsed time:%.2f Sec " - "checkpointId:%" PRId64, - pMeta->vgId, pTask->id.idStr, pTask->info.taskLevel, el, pTask->checkpointingId); +int32_t getChkpMeta(char* id, char* path, SArray* list) { + char* file = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(file, "%s%s%s", path, TD_DIRSEP, "META_TMP"); + int32_t code = downloadCheckpointByName(id, "META", file); + if (code != 0) { + stDebug("chkp failed to download meta file:%s", file); + taosMemoryFree(file); + return code; + } + TdFilePtr pFile = taosOpenFile(file, TD_FILE_READ); + char buf[128] = {0}; + if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { + stError("chkp failed to read meta file:%s", file); + code = -1; } else { - stInfo( - "vgId:%d vnode wide tasks not reach checkpoint ready status, ready s-task:%s, level:%d elapsed time:%.2f Sec " - "not ready:%d/%d", - pMeta->vgId, pTask->id.idStr, pTask->info.taskLevel, el, remain, pMeta->numOfStreamTasks); + int32_t len = strlen(buf); + for (int i = 0; i < len; i++) { + if (buf[i] == '\n') { + char* item = taosMemoryCalloc(1, i + 1); + memcpy(item, buf, i); + taosArrayPush(list, &item); + + item = taosMemoryCalloc(1, len - i); + memcpy(item, buf + i + 1, len - i - 1); + taosArrayPush(list, &item); + } + } + } + taosCloseFile(&pFile); + taosRemoveFile(file); + taosMemoryFree(file); + return code; +} +int32_t doUploadChkp(void* param) { + SAsyncUploadArg* arg = param; + char* path = NULL; + int32_t code = 0; + SArray* toDelFiles = taosArrayInit(4, sizeof(void*)); + + if ((code = taskDbGenChkpUploadData(arg->pTask->pBackend, arg->pTask->pMeta->bkdChkptMgt, arg->chkpId, + (int8_t)(arg->type), &path, toDelFiles)) != 0) { + stError("s-task:%s failed to gen upload checkpoint:%" PRId64 "", arg->pTask->id.idStr, arg->chkpId); + } + if (arg->type == UPLOAD_S3) { + if (code == 0 && (code = getChkpMeta(arg->taskId, path, toDelFiles)) != 0) { + stError("s-task:%s failed to get checkpoint:%" PRId64 " meta", arg->pTask->id.idStr, arg->chkpId); + } + } + + if (code == 0 && (code = uploadCheckpoint(arg->taskId, path)) != 0) { + stError("s-task:%s failed to upload checkpoint:%" PRId64, arg->pTask->id.idStr, arg->chkpId); + } + + if (code == 0) { + for (int i = 0; i < taosArrayGetSize(toDelFiles); i++) { + char* p = taosArrayGetP(toDelFiles, i); + code = deleteCheckpointFile(arg->taskId, p); + stDebug("s-task:%s try to del file: %s", arg->pTask->id.idStr, p); + if (code != 0) { + break; + } + } + } + + taosArrayDestroyP(toDelFiles, taosMemoryFree); + + taosRemoveDir(path); + taosMemoryFree(path); + + taosMemoryFree(arg->taskId); + taosMemoryFree(arg); + return code; +} +int32_t streamTaskUploadChkp(SStreamTask* pTask, int64_t chkpId, char* taskId) { + // async upload + UPLOAD_TYPE type = getUploadType(); + if (type == UPLOAD_DISABLE) { + return 0; + } + SAsyncUploadArg* arg = taosMemoryCalloc(1, sizeof(SAsyncUploadArg)); + arg->type = type; + arg->taskId = taosStrdup(taskId); + arg->chkpId = chkpId; + arg->pTask = pTask; + + return streamMetaAsyncExec(pTask->pMeta, doUploadChkp, arg, NULL); +} +int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { + int32_t code = TSDB_CODE_SUCCESS; + int64_t startTs = pTask->chkInfo.startTs; + int64_t ckId = pTask->chkInfo.checkpointingId; + + // sink task do not need to save the status, and generated the checkpoint + if (pTask->info.taskLevel != TASK_LEVEL__SINK) { + stDebug("s-task:%s level:%d start gen checkpoint", pTask->id.idStr, pTask->info.taskLevel); + code = streamBackendDoCheckpoint(pTask->pBackend, ckId); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:%s gen checkpoint:%" PRId64 " failed, code:%s", pTask->id.idStr, ckId, tstrerror(terrno)); + } } // send check point response to upstream task - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - code = streamTaskSendCheckpointSourceRsp(pTask); - } else { - code = streamTaskSendCheckpointReadyMsg(pTask); + if (code == TSDB_CODE_SUCCESS) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + code = streamTaskSendCheckpointSourceRsp(pTask); + } else { + code = streamTaskSendCheckpointReadyMsg(pTask); + } + + if (code != TSDB_CODE_SUCCESS) { + // todo: let's retry send rsp to upstream/mnode + stError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr, + ckId, tstrerror(code)); + } } - if (code != TSDB_CODE_SUCCESS) { - // todo: let's retry send rsp to upstream/mnode - stError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr, - pTask->checkpointingId, tstrerror(code)); + // clear the checkpoint info, and commit the newest checkpoint info if all works are done successfully + if (code == TSDB_CODE_SUCCESS) { + code = streamSaveTaskCheckpointInfo(pTask, ckId); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:%s commit taskInfo failed, checkpoint:%" PRId64 " failed, code:%s", pTask->id.idStr, ckId, + tstrerror(terrno)); + } else { + code = streamTaskUploadChkp(pTask, ckId, (char*)pTask->id.idStr); + if (code != 0) { + stError("s-task:%s failed to upload checkpoint:%" PRId64 " failed", pTask->id.idStr, ckId); + } + } } + if (code != TSDB_CODE_SUCCESS) { // clear the checkpoint info if failed + taosThreadMutexLock(&pTask->lock); + streamTaskClearCheckInfo(pTask, false); + code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + taosThreadMutexUnlock(&pTask->lock); + + streamTaskSetFailedId(pTask); + stDebug("s-task:%s clear checkpoint flag since gen checkpoint failed, checkpointId:%" PRId64, pTask->id.idStr, + ckId); + } + + double el = (taosGetTimestampMs() - startTs) / 1000.0; + stInfo("s-task:%s vgId:%d level:%d, checkpointId:%" PRId64 " ver:%" PRId64 " elapsed time:%.2f Sec, %s ", + pTask->id.idStr, pTask->pMeta->vgId, pTask->info.taskLevel, ckId, pTask->chkInfo.checkpointVer, el, + (code == TSDB_CODE_SUCCESS) ? "succ" : "failed"); + return code; } -static int uploadCheckpointToS3(char* id, char* path){ +static int uploadCheckpointToS3(char* id, char* path) { TdDirPtr pDir = taosOpenDir(path); if (pDir == NULL) return -1; TdDirEntryPtr de = NULL; + s3Init(); while ((de = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(de); - if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || - taosDirEntryIsDir(de)) continue; + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || taosDirEntryIsDir(de)) continue; char filename[PATH_MAX] = {0}; - if(path[strlen(path) - 1] == TD_DIRSEP_CHAR){ + if (path[strlen(path) - 1] == TD_DIRSEP_CHAR) { snprintf(filename, sizeof(filename), "%s%s", path, name); - }else{ + } else { snprintf(filename, sizeof(filename), "%s%s%s", path, TD_DIRSEP, name); } char object[PATH_MAX] = {0}; snprintf(object, sizeof(object), "%s%s%s", id, TD_DIRSEP, name); - if(s3PutObjectFromFile2(filename, object) != 0){ + if (s3PutObjectFromFile2(filename, object) != 0) { taosCloseDir(&pDir); return -1; } stDebug("[s3] upload checkpoint:%s", filename); + // break; } taosCloseDir(&pDir); return 0; } -UPLOAD_TYPE getUploadType(){ - if(strlen(tsSnodeAddress) != 0){ +static int downloadCheckpointByNameS3(char* id, char* fname, char* dstName) { + int code = 0; + char* buf = taosMemoryCalloc(1, strlen(id) + strlen(dstName) + 4); + sprintf(buf, "%s/%s", id, fname); + if (s3GetObjectToFile(buf, dstName) != 0) { + code = -1; + } + taosMemoryFree(buf); + return code; +} + +UPLOAD_TYPE getUploadType() { + if (strlen(tsSnodeAddress) != 0) { return UPLOAD_RSYNC; - }else if(tsS3StreamEnabled){ + } else if (tsS3StreamEnabled) { return UPLOAD_S3; - }else{ + } else { return UPLOAD_DISABLE; } } -int uploadCheckpoint(char* id, char* path){ - if(id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX){ +int uploadCheckpoint(char* id, char* path) { + if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { stError("uploadCheckpoint parameters invalid"); return -1; } - if(strlen(tsSnodeAddress) != 0){ + if (strlen(tsSnodeAddress) != 0) { return uploadRsync(id, path); - }else if(tsS3StreamEnabled){ + } else if (tsS3StreamEnabled) { return uploadCheckpointToS3(id, path); } return 0; } -int downloadCheckpoint(char* id, char* path){ - if(id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX){ +// fileName: CURRENT +int downloadCheckpointByName(char* id, char* fname, char* dstName) { + if (id == NULL || fname == NULL || strlen(id) == 0 || strlen(fname) == 0 || strlen(fname) >= PATH_MAX) { + stError("uploadCheckpointByName parameters invalid"); + return -1; + } + if (strlen(tsSnodeAddress) != 0) { + return 0; + } else if (tsS3StreamEnabled) { + return downloadCheckpointByNameS3(id, fname, dstName); + } + return 0; +} + +int downloadCheckpoint(char* id, char* path) { + if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { stError("downloadCheckpoint parameters invalid"); return -1; } - if(strlen(tsSnodeAddress) != 0){ + if (strlen(tsSnodeAddress) != 0) { return downloadRsync(id, path); - }else if(tsS3StreamEnabled){ + } else if (tsS3StreamEnabled) { return s3GetObjectsByPrefix(id, path); } return 0; } -int deleteCheckpoint(char* id){ - if(id == NULL || strlen(id) == 0){ +int deleteCheckpoint(char* id) { + if (id == NULL || strlen(id) == 0) { stError("deleteCheckpoint parameters invalid"); return -1; } - if(strlen(tsSnodeAddress) != 0){ + if (strlen(tsSnodeAddress) != 0) { return deleteRsync(id); - }else if(tsS3StreamEnabled){ + } else if (tsS3StreamEnabled) { s3DeleteObjectsByPrefix(id); } return 0; } -int deleteCheckpointFile(char* id, char* name){ +int deleteCheckpointFile(char* id, char* name) { char object[128] = {0}; snprintf(object, sizeof(object), "%s/%s", id, name); - char *tmp = object; + char* tmp = object; s3DeleteObjects((const char**)&tmp, 1); return 0; } diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index f6ec6e9fdb..bcda85e7a7 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -158,7 +158,7 @@ int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubm } // todo handle memory error -SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) { +SStreamQueueItem* streamQueueMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) { terrno = 0; if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) { diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 42280b0d0f..6247d4ed53 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -14,9 +14,9 @@ */ #include "streamInt.h" +#include "tmisce.h" #include "trpc.h" #include "ttimer.h" -#include "tmisce.h" typedef struct SBlockName { uint32_t hashValue; @@ -231,7 +231,7 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) buf = NULL; stDebug("s-task:%s (child %d) send retrieve req to task:0x%x (vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, - pTask->info.selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId); + pTask->info.selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId); } code = 0; @@ -270,7 +270,7 @@ int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, initRpcMsg(&msg, TDMT_VND_STREAM_TASK_CHECK, buf, tlen + sizeof(SMsgHead)); stDebug("s-task:%s (level:%d) send check msg to s-task:0x%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr, - pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId); + pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId); tmsgSendReq(pEpSet, &msg); return 0; @@ -343,7 +343,8 @@ static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pD SSDataBlock* pDataBlock = taosArrayGet(pData->blocks, i); // TODO: do not use broadcast - if (pDataBlock->info.type == STREAM_DELETE_RESULT || pDataBlock->info.type == STREAM_CHECKPOINT || pDataBlock->info.type == STREAM_TRANS_STATE) { + if (pDataBlock->info.type == STREAM_DELETE_RESULT || pDataBlock->info.type == STREAM_CHECKPOINT || + pDataBlock->info.type == STREAM_TRANS_STATE) { for (int32_t j = 0; j < numOfVgroups; j++) { code = streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]); if (code != 0) { @@ -362,7 +363,7 @@ static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pD } code = streamSearchAndAddBlock(pTask, pReqs, pDataBlock, numOfVgroups, pDataBlock->info.id.groupId); - if(code != 0) { + if (code != 0) { destroyDispatchMsg(pReqs, numOfVgroups); return code; } @@ -371,13 +372,14 @@ static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pD pTask->msgInfo.pData = pReqs; } - stDebug("s-task:%s build dispatch msg success, msgId:%d", pTask->id.idStr, pTask->execInfo.dispatch); + stDebug("s-task:%s build dispatch msg success, msgId:%d, stage:%" PRId64, pTask->id.idStr, pTask->execInfo.dispatch, + pTask->pMeta->stage); return code; } static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatchMsg) { - int32_t code = 0; - int32_t msgId = pTask->execInfo.dispatch; + int32_t code = 0; + int32_t msgId = pTask->execInfo.dispatch; const char* id = pTask->id.idStr; if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { @@ -393,8 +395,8 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgroups = taosArrayGetSize(vgInfo); - stDebug("s-task:%s (child taskId:%d) start to shuffle-dispatch blocks to %d vgroup(s), msgId:%d", - id, pTask->info.selfChildId, numOfVgroups, msgId); + stDebug("s-task:%s (child taskId:%d) start to shuffle-dispatch blocks to %d vgroup(s), msgId:%d", id, + pTask->info.selfChildId, numOfVgroups, msgId); for (int32_t i = 0; i < numOfVgroups; i++) { if (pDispatchMsg[i].blockNum > 0) { @@ -409,7 +411,8 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch } } - stDebug("s-task:%s complete shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, numOfVgroups, msgId); + stDebug("s-task:%s complete shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, numOfVgroups, + msgId); } return code; @@ -434,20 +437,20 @@ static void doRetryDispatchData(void* param, void* tmrId) { SArray* pList = taosArrayDup(pTask->msgInfo.pRetryList, NULL); taosArrayClear(pTask->msgInfo.pRetryList); - SStreamDispatchReq *pReq = pTask->msgInfo.pData; + SStreamDispatchReq* pReq = pTask->msgInfo.pData; if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgroups = taosArrayGetSize(vgInfo); int32_t numOfFailed = taosArrayGetSize(pList); - stDebug("s-task:%s (child taskId:%d) retry shuffle-dispatch blocks to %d vgroup(s), msgId:%d", - id, pTask->info.selfChildId, numOfFailed, msgId); + stDebug("s-task:%s (child taskId:%d) retry shuffle-dispatch blocks to %d vgroup(s), msgId:%d", id, + pTask->info.selfChildId, numOfFailed, msgId); for (int32_t i = 0; i < numOfFailed; i++) { - int32_t vgId = *(int32_t*) taosArrayGet(pList, i); + int32_t vgId = *(int32_t*)taosArrayGet(pList, i); - for(int32_t j = 0; j < numOfVgroups; ++j) { + for (int32_t j = 0; j < numOfVgroups; ++j) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j); if (pVgInfo->vgId == vgId) { stDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, @@ -461,7 +464,8 @@ static void doRetryDispatchData(void* param, void* tmrId) { } } - stDebug("s-task:%s complete re-try shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, numOfFailed, msgId); + stDebug("s-task:%s complete re-try shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, + numOfFailed, msgId); } else { int32_t vgId = pTask->outputInfo.fixedDispatcher.nodeId; SEpSet* pEpSet = &pTask->outputInfo.fixedDispatcher.epSet; @@ -478,8 +482,8 @@ static void doRetryDispatchData(void* param, void* tmrId) { if (code != TSDB_CODE_SUCCESS) { if (!streamTaskShouldStop(pTask)) { -// stDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr); -// atomic_store_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, 0); + // stDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr); + // atomic_store_32(&pTask->outputInfo.shuffleDispatcher.waitingRspCnt, 0); if (streamTaskShouldPause(pTask)) { streamRetryDispatchData(pTask, DISPATCH_RETRY_INTERVAL_MS * 10); } else { @@ -531,10 +535,12 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S } if (pDataBlock->info.parTbName[0]) { - snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->outputInfo.shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); + snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->outputInfo.shuffleDispatcher.dbInfo.db, + pDataBlock->info.parTbName); } else { buildCtbNameByGroupIdImpl(pTask->outputInfo.shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName); - snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->outputInfo.shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); + snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->outputInfo.shuffleDispatcher.dbInfo.db, + pDataBlock->info.parTbName); } /*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/ @@ -576,13 +582,15 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S } int32_t streamDispatchStreamBlock(SStreamTask* pTask) { - ASSERT((pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH)); + ASSERT((pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH || + pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH)); const char* id = pTask->id.idStr; - int32_t numOfElems = streamQueueGetNumOfItems(pTask->outputq.queue); + int32_t numOfElems = streamQueueGetNumOfItems(pTask->outputq.queue); if (numOfElems > 0) { double size = SIZE_IN_MiB(taosQueueMemorySize(pTask->outputq.queue->pQueue)); - stDebug("s-task:%s start to dispatch intermediate block to downstream, elem in outputQ:%d, size:%.2fMiB", id, numOfElems, size); + stDebug("s-task:%s start to dispatch intermediate block to downstream, elem in outputQ:%d, size:%.2fMiB", id, + numOfElems, size); } // to make sure only one dispatch is running @@ -593,6 +601,12 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return 0; } + if (pTask->chkInfo.dispatchCheckpointTrigger) { + stDebug("s-task:%s already send checkpoint trigger, not dispatch anymore", id); + atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); + return 0; + } + ASSERT(pTask->msgInfo.pData == NULL); stDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputq.status); @@ -612,7 +626,7 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { int32_t code = doBuildDispatchMsg(pTask, pBlock); if (code == 0) { destroyStreamDataBlock(pBlock); - } else { // todo handle build dispatch msg failed + } else { // todo handle build dispatch msg failed } int32_t retryCount = 0; @@ -635,8 +649,9 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { if (++retryCount > MAX_CONTINUE_RETRY_COUNT) { // add to timer to retry int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s failed to dispatch msg to downstream for %d times, code:%s, add timer to retry in %dms, ref:%d", - pTask->id.idStr, retryCount, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS, ref); + stDebug( + "s-task:%s failed to dispatch msg to downstream for %d times, code:%s, add timer to retry in %dms, ref:%d", + pTask->id.idStr, retryCount, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS, ref); streamRetryDispatchData(pTask, DISPATCH_RETRY_INTERVAL_MS); break; @@ -659,7 +674,8 @@ int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { req.downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; pTask->notReadyTasks = 1; - doDispatchScanHistoryFinishMsg(pTask, &req, pTask->outputInfo.fixedDispatcher.nodeId, &pTask->outputInfo.fixedDispatcher.epSet); + doDispatchScanHistoryFinishMsg(pTask, &req, pTask->outputInfo.fixedDispatcher.nodeId, + &pTask->outputInfo.fixedDispatcher.epSet); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); @@ -667,8 +683,8 @@ int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { char* p = NULL; streamTaskGetStatus(pTask, &p); - stDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, - numOfVgs, p); + stDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", + pTask->id.idStr, numOfVgs, p); for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); req.downstreamTaskId = pVgInfo->taskId; @@ -692,11 +708,12 @@ int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { tmsgSendReq(&pInfo->upstreamNodeEpset, &pInfo->msg); stDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel, - pInfo->upStreamTaskId); + pInfo->upStreamTaskId); } taosArrayClear(pTask->pReadyMsgList); - stDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, num); + stDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, + num); return TSDB_CODE_SUCCESS; } @@ -783,7 +800,7 @@ int32_t doDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHist char* p = NULL; streamTaskGetStatus(pTask, &p); stDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, p, - pReq->downstreamTaskId, vgId); + pReq->downstreamTaskId, vgId); return 0; } @@ -866,8 +883,8 @@ int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInf return 0; } -int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, - SStreamTask* pTask, int8_t isSucceed) { +int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask, + int8_t isSucceed) { SStreamChkptReadyInfo info = {0}; buildCheckpointSourceRsp(pReq, pRpcInfo, &info.msg, isSucceed); @@ -876,7 +893,8 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa } taosArrayPush(pTask->pReadyMsgList, &info); - stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, (int32_t)taosArrayGetSize(pTask->pReadyMsgList)); + stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, + (int32_t)taosArrayGetSize(pTask->pReadyMsgList)); return TSDB_CODE_SUCCESS; } @@ -926,8 +944,10 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); info.msg.info.noResp = 1; // refactor later. - stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d", - pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index); + stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 + ":0x%x (vgId:%d) idx:%d, vgId:%d", + pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index, + req.upstreamNodeId); if (pTask->pReadyMsgList == NULL) { pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); @@ -937,6 +957,18 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, return 0; } +void streamClearChkptReadyMsg(SStreamTask* pTask) { + if (pTask->pReadyMsgList == NULL) { + return; + } + + for (int i = 0; i < taosArrayGetSize(pTask->pReadyMsgList); i++) { + SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, i); + rpcFreeCont(pInfo->msg.pCont); + } + taosArrayClear(pTask->pReadyMsgList); +} + int32_t tEncodeCompleteHistoryDataMsg(SEncoder* pEncoder, const SStreamCompleteHistoryMsg* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; @@ -959,7 +991,8 @@ int32_t tDecodeCompleteHistoryDataMsg(SDecoder* pDecoder, SStreamCompleteHistory return 0; } -int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen) { +int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, + int32_t* pLen) { int32_t len = 0; int32_t code = 0; SEncoder encoder; @@ -1016,7 +1049,7 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, int32_t num = taosArrayGetSize(pTask->pRspMsgList); stDebug("s-task:%s add scan-history finish rsp msg for task:0x%x, total:%d", pTask->id.idStr, pReq->upstreamTaskId, - num); + num); return TSDB_CODE_SUCCESS; } @@ -1039,30 +1072,14 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask) { return 0; } -static void dispatchDataInFuture(void* param, void* tmrId) { - SStreamTask* pTask = param; - if (streamTaskShouldStop(pTask)) { - int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s should stop, abort from timer, ref:%d", pTask->id.idStr, ref); - return; - } - - ETaskStatus status = streamTaskGetStatus(pTask, NULL); - if (status == TASK_STATUS__CK) { - stDebug("s-task:%s in checkpoint status, wait for 500ms to dispatch data downstream", pTask->id.idStr); - taosTmrReset(dispatchDataInFuture, 500, pTask, streamEnv.timer, &pTask->msgInfo.pTimer); - } else { - int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s start to dispatch data, jump out of timer, ref:%d", pTask->id.idStr, ref); - streamDispatchStreamBlock(pTask); - } -} - // this message has been sent successfully, let's try next one. static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId) { destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask)); bool delayDispatch = (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER); + if (delayDispatch) { + pTask->chkInfo.dispatchCheckpointTrigger = true; + } pTask->msgInfo.pData = NULL; pTask->msgInfo.dispatchMsgType = 0; @@ -1075,7 +1092,7 @@ static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId stDebug("s-task:%s downstream task:0x%x resume to normal from inputQ blocking, blocking time:%" PRId64 "ms", pTask->id.idStr, downstreamId, el); } else { - stDebug("s-task:%s dispatch completed, elapsed time:%"PRId64"ms", pTask->id.idStr, el); + stDebug("s-task:%s dispatch completed, elapsed time:%" PRId64 "ms", pTask->id.idStr, el); } // now ready for next data output @@ -1083,13 +1100,7 @@ static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId // otherwise, continue dispatch the first block to down stream task in pipeline if (delayDispatch) { - int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s in checkpoint status, add in timer, try dispatch data in 500ms, ref:%d", pTask->id.idStr, ref); - if (pTask->msgInfo.pTimer != NULL) { - taosTmrReset(dispatchDataInFuture, 500, pTask, streamEnv.timer, &pTask->msgInfo.pTimer); - } else { - pTask->msgInfo.pTimer = taosTmrStart(dispatchDataInFuture, 500, pTask, streamEnv.timer); - } + return 0; } else { streamDispatchStreamBlock(pTask); } @@ -1100,11 +1111,19 @@ static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) { const char* id = pTask->id.idStr; int32_t vgId = pTask->pMeta->vgId; - int32_t msgId = pTask->execInfo.dispatch; + int32_t msgId = pTask->execInfo.dispatch; + +#if 0 + // for test purpose, build the failure case + if (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER) { + pRsp->inputStatus = TASK_INPUT_STATUS__REFUSED; + } +#endif // follower not handle the dispatch rsp if ((pTask->pMeta->role == NODE_ROLE_FOLLOWER) || (pTask->status.downstreamReady != 1)) { - stError("s-task:%s vgId:%d is follower or task just re-launched, not handle the dispatch rsp, discard it", id, vgId); + stError("s-task:%s vgId:%d is follower or task just re-launched, not handle the dispatch rsp, discard it", id, + vgId); return TSDB_CODE_STREAM_TASK_NOT_EXIST; } @@ -1122,8 +1141,8 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i // flag. Here we need to retry dispatch this message to downstream task immediately. handle the case the failure // happened too fast. if (code == TSDB_CODE_STREAM_TASK_NOT_EXIST) { // destination task does not exist, not retry anymore - stError("s-task:%s failed to dispatch msg to task:0x%x(vgId:%d), msgId:%d no retry, since task destroyed already", id, - pRsp->downstreamTaskId, pRsp->downstreamNodeId, msgId); + stError("s-task:%s failed to dispatch msg to task:0x%x(vgId:%d), msgId:%d no retry, since task destroyed already", + id, pRsp->downstreamTaskId, pRsp->downstreamNodeId, msgId); } else { stError("s-task:%s failed to dispatch msgId:%d to task:0x%x(vgId:%d), code:%s, add to retry list", id, msgId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, tstrerror(code)); @@ -1143,8 +1162,21 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i stWarn("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch", id, pRsp->downstreamTaskId, pRsp->downstreamNodeId, DISPATCH_RETRY_INTERVAL_MS); } else if (pRsp->inputStatus == TASK_INPUT_STATUS__REFUSED) { - stError("s-task:%s downstream task:0x%x(vgId:%d) refused the dispatch msg, treat it as success", id, - pRsp->downstreamTaskId, pRsp->downstreamNodeId); + // todo handle the agg task failure, add test case + if (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER && + pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + stError("s-task:%s failed to dispatch checkpoint-trigger msg, checkpointId:%" PRId64 + ", set the current checkpoint failed, and send rsp to mnode", + id, pTask->chkInfo.checkpointingId); + { // send checkpoint failure msg to mnode directly + pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; // record the latest failed checkpoint id + pTask->chkInfo.checkpointingId = pTask->chkInfo.checkpointingId; + streamTaskSendCheckpointSourceRsp(pTask); + } + } else { + stError("s-task:%s downstream task:0x%x(vgId:%d) refused the dispatch msg, treat it as success", id, + pRsp->downstreamTaskId, pRsp->downstreamNodeId); + } } } @@ -1154,16 +1186,18 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i ASSERT(leftRsp >= 0); if (leftRsp > 0) { - stDebug( "s-task:%s recv dispatch rsp, msgId:%d from 0x%x(vgId:%d), downstream task input status:%d code:%s, waiting for %d rsp", - id, msgId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->inputStatus, tstrerror(code), leftRsp); + stDebug( + "s-task:%s recv dispatch rsp, msgId:%d from 0x%x(vgId:%d), downstream task input status:%d code:%s, waiting " + "for %d rsp", + id, msgId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->inputStatus, tstrerror(code), leftRsp); } else { stDebug( "s-task:%s recv dispatch rsp, msgId:%d from 0x%x(vgId:%d), downstream task input status:%d code:%s, all rsp", id, msgId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->inputStatus, tstrerror(code)); } } else { - stDebug("s-task:%s recv fix-dispatch rsp, msgId:%d from 0x%x(vgId:%d), downstream task input status:%d code:%s", - id, msgId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->inputStatus, tstrerror(code)); + stDebug("s-task:%s recv fix-dispatch rsp, msgId:%d from 0x%x(vgId:%d), downstream task input status:%d code:%s", id, + msgId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->inputStatus, tstrerror(code)); } ASSERT(leftRsp >= 0); @@ -1185,7 +1219,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i pTask->id.idStr, DISPATCH_RETRY_INTERVAL_MS, ref); streamRetryDispatchData(pTask, DISPATCH_RETRY_INTERVAL_MS); - } else { // this message has been sent successfully, let's try next one. + } else { // this message has been sent successfully, let's try next one. pTask->msgInfo.retryCount = 0; // transtate msg has been sent to downstream successfully. let's transfer the fill-history task state diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index cae537a860..25f32195be 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -48,6 +48,7 @@ static int32_t doOutputResultBlockImpl(SStreamTask* pTask, SStreamDataBlock* pBl return code; } + // checkpoint trigger will be checked streamDispatchStreamBlock(pTask); } @@ -251,14 +252,18 @@ static void streamScanHistoryDataImpl(SStreamTask* pTask, SArray* pRes, int32_t* SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); - void* exec = pTask->exec.pExecutor; - bool finished = false; + void* exec = pTask->exec.pExecutor; + bool finished = false; + const char* id = pTask->id.idStr; - qSetStreamOpOpen(exec); + if (!pTask->hTaskInfo.operatorOpen) { + qSetStreamOpOpen(exec); + pTask->hTaskInfo.operatorOpen = true; + } while (1) { if (streamTaskShouldPause(pTask)) { - stDebug("s-task:%s paused from the scan-history task", pTask->id.idStr); + stDebug("s-task:%s paused from the scan-history task", id); // quit from step1, not continue to handle the step2 return (SScanhistoryDataInfo){TASK_SCANHISTORY_QUIT, 0}; } @@ -266,8 +271,7 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); if (pRes == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - stError("s-task:%s scan-history prepare result block failed, code:%s, retry later", pTask->id.idStr, - tstrerror(terrno)); + stError("s-task:%s scan-history prepare result block failed, code:%s, retry later", id, tstrerror(terrno)); continue; } @@ -280,12 +284,12 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { } // dispatch the generated results - int32_t code = handleResultBlocks(pTask, pRes, size); + /*int32_t code = */handleResultBlocks(pTask, pRes, size); int64_t el = taosGetTimestampMs() - st; // downstream task input queue is full, try in 5sec - if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { + if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED && (pTask->info.fillHistory == 1)) { return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 5000}; } @@ -293,9 +297,9 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { return (SScanhistoryDataInfo){TASK_SCANHISTORY_CONT, 0}; } - if (el >= STREAM_SCAN_HISTORY_TIMESLICE) { - stDebug("s-task:%s fill-history:%d time slice exhausted, elapsed time:%.2fs, retry in 100ms", - pTask->id.idStr, pTask->info.fillHistory, el / 1000.0); + if (el >= STREAM_SCAN_HISTORY_TIMESLICE && (pTask->info.fillHistory == 1)) { + stDebug("s-task:%s fill-history:%d time slice exhausted, elapsed time:%.2fs, retry in 100ms", id, + pTask->info.fillHistory, el / 1000.0); return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 100}; } } @@ -400,7 +404,7 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { // 5. save to disk pStreamTask->status.taskStatus = streamTaskGetStatus(pStreamTask, NULL); - // 6. pause allowed. + // 6. add empty delete block if ((pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) && taosQueueEmpty(pStreamTask->inputq.queue->pQueue)) { SStreamRefDataBlock* pItem = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); @@ -542,7 +546,7 @@ int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock * todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the * appropriate batch of blocks should be handled in 5 to 10 sec. */ -int32_t streamExecForAll(SStreamTask* pTask) { +int32_t doStreamExecTask(SStreamTask* pTask) { const char* id = pTask->id.idStr; // merge multiple input data if possible in the input queue. @@ -653,7 +657,7 @@ int32_t streamExecTask(SStreamTask* pTask) { int8_t schedStatus = streamTaskSetSchedStatusActive(pTask); if (schedStatus == TASK_SCHED_STATUS__WAITING) { while (1) { - int32_t code = streamExecForAll(pTask); + int32_t code = doStreamExecTask(pTask); if (code < 0) { // todo this status should be removed atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED); return -1; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index ae8c92d48e..bd23e41a84 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -18,6 +18,7 @@ #include "streamInt.h" #include "tmisce.h" #include "tref.h" +#include "tsched.h" #include "tstream.h" #include "ttimer.h" #include "wal.h" @@ -27,6 +28,7 @@ static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT; int32_t streamBackendId = 0; int32_t streamBackendCfWrapperId = 0; int32_t streamMetaId = 0; +int32_t taskDbWrapperId = 0; static void metaHbToMnode(void* param, void* tmrId); static void streamMetaClear(SStreamMeta* pMeta); @@ -55,6 +57,7 @@ int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid); static void streamMetaEnvInit() { streamBackendId = taosOpenRef(64, streamBackendCleanup); streamBackendCfWrapperId = taosOpenRef(64, streamBackendHandleCleanup); + taskDbWrapperId = taosOpenRef(64, taskDbDestroy2); streamMetaId = taosOpenRef(64, streamMetaCloseImpl); @@ -62,6 +65,7 @@ static void streamMetaEnvInit() { } void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } + void streamMetaCleanup() { taosCloseRef(streamBackendId); taosCloseRef(streamBackendCfWrapperId); @@ -106,6 +110,174 @@ int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid) { return 0; } +typedef struct { + int64_t chkpId; + char* path; + char* taskId; + + SArray* pChkpSave; + SArray* pChkpInUse; + int8_t chkpCap; + void* backend; + +} StreamMetaTaskState; + +int32_t streamMetaOpenTdb(SStreamMeta* pMeta) { + if (tdbOpen(pMeta->path, 16 * 1024, 1, &pMeta->db, 0) < 0) { + return -1; + // goto _err; + } + + if (tdbTbOpen("task.db", STREAM_TASK_KEY_LEN, -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) { + return -1; + } + + if (tdbTbOpen("checkpoint.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pCheckpointDb, 0) < 0) { + return -1; + } + return 0; +} + +// +// impl later +// +enum STREAM_STATE_VER { + STREAM_STATA_NO_COMPATIBLE, + STREAM_STATA_COMPATIBLE, + STREAM_STATA_NEED_CONVERT, +}; + +int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { + int8_t ret = STREAM_STATA_COMPATIBLE; + TBC* pCur = NULL; + + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { + // no task info, no stream + return ret; + } + void* pKey = NULL; + int32_t kLen = 0; + void* pVal = NULL; + int32_t vLen = 0; + + tdbTbcMoveToFirst(pCur); + while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { + if (pVal == NULL || vLen == 0) { + break; + } + SDecoder decoder; + SCheckpointInfo info; + tDecoderInit(&decoder, (uint8_t*)pVal, vLen); + if (tDecodeStreamTaskChkInfo(&decoder, &info) < 0) { + continue; + } + if (info.msgVer <= SSTREAM_TASK_INCOMPATIBLE_VER) { + ret = STREAM_STATA_NO_COMPATIBLE; + } else if (info.msgVer == SSTREAM_TASK_NEED_CONVERT_VER) { + ret = STREAM_STATA_NEED_CONVERT; + } else if (info.msgVer == SSTREAM_TASK_VER) { + ret = STREAM_STATA_COMPATIBLE; + } + tDecoderClear(&decoder); + break; + } + tdbFree(pKey); + tdbFree(pVal); + tdbTbcClose(pCur); + return ret; +} + +int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { + int32_t code = 0; + int64_t chkpId = streamMetaGetLatestCheckpointId(pMeta); + + bool exist = streamBackendDataIsExist(pMeta->path, chkpId, pMeta->vgId); + if (exist == false) { + return code; + } + SBackendWrapper* pBackend = streamBackendInit(pMeta->path, chkpId, pMeta->vgId); + + void* pIter = taosHashIterate(pBackend->cfInst, NULL); + while (pIter) { + void* key = taosHashGetKey(pIter, NULL); + code = streamStateCvtDataFormat(pMeta->path, key, *(void**)pIter); + if (code != 0) { + qError("failed to cvt data"); + goto _EXIT; + } + + pIter = taosHashIterate(pBackend->cfInst, pIter); + } + +_EXIT: + streamBackendCleanup((void*)pBackend); + + if (code == 0) { + char* state = taosMemoryCalloc(1, strlen(pMeta->path) + 32); + sprintf(state, "%s%s%s", pMeta->path, TD_DIRSEP, "state"); + taosRemoveDir(state); + taosMemoryFree(state); + } + + return code; +} +int32_t streamMetaMayCvtDbFormat(SStreamMeta* pMeta) { + int8_t compatible = streamMetaCheckBackendCompatible(pMeta); + if (compatible == STREAM_STATA_COMPATIBLE) { + return 0; + } else if (compatible == STREAM_STATA_NEED_CONVERT) { + qInfo("stream state need covert backend format"); + + return streamMetaCvtDbFormat(pMeta); + } else if (compatible == STREAM_STATA_NO_COMPATIBLE) { + qError( + "stream read incompatible data, rm %s/vnode/vnode*/tq/stream if taosd cannot start, and rebuild stream " + "manually", + tsDataDir); + + return -1; + } + return 0; +} + +int32_t streamTaskSetDb(SStreamMeta* pMeta, void* arg, char* key) { + SStreamTask* pTask = arg; + + int64_t chkpId = pTask->chkInfo.checkpointId; + + taosThreadMutexLock(&pMeta->backendMutex); + void** ppBackend = taosHashGet(pMeta->pTaskDbUnique, key, strlen(key)); + if (ppBackend != NULL && *ppBackend != NULL) { + taskDbAddRef(*ppBackend); + + STaskDbWrapper* pBackend = *ppBackend; + + pTask->backendRefId = pBackend->refId; + pTask->pBackend = pBackend; + taosThreadMutexUnlock(&pMeta->backendMutex); + + stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend); + return 0; + } + + STaskDbWrapper* pBackend = taskDbOpen(pMeta->path, key, chkpId); + if (pBackend == NULL) { + taosThreadMutexUnlock(&pMeta->backendMutex); + return -1; + } + + int64_t tref = taosAddRef(taskDbWrapperId, pBackend); + pTask->backendRefId = tref; + pTask->pBackend = pBackend; + pBackend->refId = tref; + pBackend->pTask = pTask; + + taosHashPut(pMeta->pTaskDbUnique, key, strlen(key), &pBackend, sizeof(void*)); + taosThreadMutexUnlock(&pMeta->backendMutex); + + stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend); + return 0; +} SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage) { int32_t code = -1; SStreamMeta* pMeta = taosMemoryCalloc(1, sizeof(SStreamMeta)); @@ -121,15 +293,11 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF sprintf(tpath, "%s%s%s", path, TD_DIRSEP, "stream"); pMeta->path = tpath; - if (tdbOpen(pMeta->path, 16 * 1024, 1, &pMeta->db, 0) < 0) { + if (streamMetaOpenTdb(pMeta) < 0) { goto _err; } - if (tdbTbOpen("task.db", STREAM_TASK_KEY_LEN, -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) { - goto _err; - } - - if (tdbTbOpen("checkpoint.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pCheckpointDb, 0) < 0) { + if (streamMetaMayCvtDbFormat(pMeta) < 0) { goto _err; } if (streamMetaBegin(pMeta) < 0) { @@ -174,49 +342,43 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF pMeta->ahandle = ahandle; pMeta->expandFunc = expandFunc; pMeta->stage = stage; + pMeta->role = (vgId == SNODE_HANDLE) ? NODE_ROLE_LEADER : NODE_ROLE_UNINIT; - // send heartbeat every 5sec. - pMeta->rid = taosAddRef(streamMetaId, pMeta); - int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); - *pRid = pMeta->rid; + pMeta->pTaskDbUnique = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); - metaRefMgtAdd(pMeta->vgId, pRid); - - pMeta->pHbInfo->hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); - pMeta->pHbInfo->tickCounter = 0; - pMeta->pHbInfo->stopFlag = 0; - - pMeta->pTaskBackendUnique = - taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); - pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t)); - pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t)); - pMeta->chkpCap = 2; - taosInitRWLatch(&pMeta->chkpDirLock); - - pMeta->chkpId = streamMetaGetLatestCheckpointId(pMeta); - pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId); - while (pMeta->streamBackend == NULL) { - taosMsleep(100); - pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, vgId); - if (pMeta->streamBackend == NULL) { - stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); - } - } - pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); - - pMeta->role = NODE_ROLE_UNINIT; - code = streamBackendLoadCheckpointInfo(pMeta); - - taosInitRWLatch(&pMeta->lock); - taosThreadMutexInit(&pMeta->backendMutex, NULL); + // pMeta->chkpId = streamGetLatestCheckpointId(pMeta); + // pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + // while (pMeta->streamBackend == NULL) { + // qError("vgId:%d failed to init stream backend", pMeta->vgId); + // taosMsleep(2 * 1000); + // qInfo("vgId:%d retry to init stream backend", pMeta->vgId); + // pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + // if (pMeta->streamBackend == NULL) { + // } + // } + // pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); pMeta->numOfPausedTasks = 0; pMeta->numOfStreamTasks = 0; stInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId, stage); + + pMeta->rid = taosAddRef(streamMetaId, pMeta); + + int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); + memcpy(pRid, &pMeta->rid, sizeof(pMeta->rid)); + metaRefMgtAdd(pMeta->vgId, pRid); + + pMeta->pHbInfo->hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer); + pMeta->pHbInfo->tickCounter = 0; + pMeta->pHbInfo->stopFlag = 0; + pMeta->qHandle = taosInitScheduler(32, 1, "stream-chkp", NULL); + + pMeta->bkdChkptMgt = bkdMgtCreate(tpath); + return pMeta; - _err: +_err: taosMemoryFree(pMeta->path); if (pMeta->pTasksMap) taosHashCleanup(pMeta->pTasksMap); if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList); @@ -311,14 +473,13 @@ void streamMetaClear(SStreamMeta* pMeta) { taosRemoveRef(streamBackendId, pMeta->streamBackendRid); taosHashClear(pMeta->pTasksMap); - taosHashClear(pMeta->pTaskBackendUnique); + taosHashClear(pMeta->pTaskDbUnique); taosArrayClear(pMeta->pTaskList); taosArrayClear(pMeta->chkpSaved); taosArrayClear(pMeta->chkpInUse); pMeta->numOfStreamTasks = 0; pMeta->numOfPausedTasks = 0; - pMeta->chkptNotReadyTasks = 0; // the willrestart/starting flag can NOT be cleared taosHashClear(pMeta->startInfo.pReadyTaskSet); @@ -360,7 +521,9 @@ void streamMetaCloseImpl(void* arg) { taosArrayDestroy(pMeta->chkpInUse); taosHashCleanup(pMeta->pTasksMap); - taosHashCleanup(pMeta->pTaskBackendUnique); + taosHashCleanup(pMeta->pTaskDbUnique); + taosHashCleanup(pMeta->pUpdateTaskSet); + // taosHashCleanup(pMeta->pTaskBackendUnique); taosHashCleanup(pMeta->updateInfo.pTasks); taosHashCleanup(pMeta->startInfo.pReadyTaskSet); taosHashCleanup(pMeta->startInfo.pFailedTaskSet); @@ -369,6 +532,11 @@ void streamMetaCloseImpl(void* arg) { taosMemoryFree(pMeta->path); taosThreadMutexDestroy(&pMeta->backendMutex); + taosCleanUpScheduler(pMeta->qHandle); + taosMemoryFree(pMeta->qHandle); + + bkdMgtDestroy(pMeta->bkdChkptMgt); + pMeta->role = NODE_ROLE_UNINIT; taosMemoryFree(pMeta); stDebug("end to close stream meta"); @@ -661,6 +829,11 @@ static void doClear(void* pKey, void* pVal, TBC* pCur, SArray* pRecycleList) { taosArrayDestroy(pRecycleList); } +int32_t streamMetaReloadAllTasks(SStreamMeta* pMeta) { + if (pMeta == NULL) return 0; + + return streamMetaLoadAllTasks(pMeta); +} int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { TBC* pCur = NULL; int32_t vgId = pMeta->vgId; @@ -728,8 +901,6 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) { } else { // todo this should replace the existed object put by replay creating stream task msg from mnode stError("s-task:0x%x already added into table meta by replaying WAL, need check", pTask->id.taskId); - tdbFree(pKey); - tdbFree(pVal); taosMemoryFree(pTask); continue; } @@ -784,7 +955,7 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { if (tEncodeI64(pEncoder, ps->id.streamId) < 0) return -1; if (tEncodeI32(pEncoder, ps->id.taskId) < 0) return -1; if (tEncodeI32(pEncoder, ps->status) < 0) return -1; - if (tEncodeI32(pEncoder, ps->stage) < 0) return -1; + if (tEncodeI64(pEncoder, ps->stage) < 0) return -1; if (tEncodeI32(pEncoder, ps->nodeId) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputQUsed) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputRate) < 0) return -1; @@ -822,7 +993,7 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { if (tDecodeI64(pDecoder, &entry.id.streamId) < 0) return -1; if (tDecodeI32(pDecoder, &taskId) < 0) return -1; if (tDecodeI32(pDecoder, &entry.status) < 0) return -1; - if (tDecodeI32(pDecoder, &entry.stage) < 0) return -1; + if (tDecodeI64(pDecoder, &entry.stage) < 0) return -1; if (tDecodeI32(pDecoder, &entry.nodeId) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputQUsed) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputRate) < 0) return -1; @@ -938,7 +1109,7 @@ void metaHbToMnode(void* param, void* tmrId) { SStreamHbMsg hbMsg = {0}; SEpSet epset = {0}; bool hasMnodeEpset = false; - int32_t stage = 0; + int64_t stage = 0; streamMetaRLock(pMeta); @@ -983,9 +1154,9 @@ void metaHbToMnode(void* param, void* tmrId) { entry.sinkDataSize = SIZE_IN_MiB((*pTask)->execInfo.sink.dataSize); } - if ((*pTask)->checkpointingId != 0) { - entry.checkpointFailed = ((*pTask)->chkInfo.failedId >= (*pTask)->checkpointingId); - entry.activeCheckpointId = (*pTask)->checkpointingId; + if ((*pTask)->chkInfo.checkpointingId != 0) { + entry.checkpointFailed = ((*pTask)->chkInfo.failedId >= (*pTask)->chkInfo.checkpointingId); + entry.activeCheckpointId = (*pTask)->chkInfo.checkpointingId; } if ((*pTask)->exec.pWalReader != NULL) { @@ -1028,7 +1199,9 @@ void metaHbToMnode(void* param, void* tmrId) { } tEncoderClear(&encoder); - SRpcMsg msg = {.info.noResp = 1,}; + SRpcMsg msg = { + .info.noResp = 1, + }; initRpcMsg(&msg, TDMT_MND_STREAM_HEARTBEAT, buf, tlen); pMeta->pHbInfo->hbCount += 1; @@ -1040,7 +1213,7 @@ void metaHbToMnode(void* param, void* tmrId) { stDebug("vgId:%d no tasks and no mnd epset, not send stream hb to mnode", pMeta->vgId); } - _end: +_end: clearHbMsg(&hbMsg, pIdList); taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->pHbInfo->hbTmr); taosReleaseRef(streamMetaId, rid); @@ -1070,8 +1243,8 @@ bool streamMetaTaskInTimer(SStreamMeta* pMeta) { void streamMetaNotifyClose(SStreamMeta* pMeta) { int32_t vgId = pMeta->vgId; - stDebug("vgId:%d notify all stream tasks that the vnode is closing. isLeader:%d startHb:%" PRId64 ", totalHb:%d", vgId, - (pMeta->role == NODE_ROLE_LEADER), pMeta->pHbInfo->hbStart, pMeta->pHbInfo->hbCount); + stDebug("vgId:%d notify all stream tasks that the vnode is closing. isLeader:%d startHb:%" PRId64 ", totalHb:%d", + vgId, (pMeta->role == NODE_ROLE_LEADER), pMeta->pHbInfo->hbStart, pMeta->pHbInfo->hbCount); streamMetaWLock(pMeta); @@ -1117,11 +1290,6 @@ void streamMetaStartHb(SStreamMeta* pMeta) { metaHbToMnode(pRid, NULL); } -void streamMetaInitForSnode(SStreamMeta* pMeta) { - pMeta->stage = 0; - pMeta->role = NODE_ROLE_LEADER; -} - void streamMetaResetStartInfo(STaskStartInfo* pStartInfo) { taosHashClear(pStartInfo->pReadyTaskSet); taosHashClear(pStartInfo->pFailedTaskSet); @@ -1147,4 +1315,19 @@ void streamMetaWUnLock(SStreamMeta* pMeta) { stTrace("vgId:%d meta-wunlock", pMeta->vgId); taosWUnLockLatch(&pMeta->lock); } +static void execHelper(struct SSchedMsg* pSchedMsg) { + __async_exec_fn_t execFn = (__async_exec_fn_t)pSchedMsg->ahandle; + int32_t code = execFn(pSchedMsg->thandle); + if (code != 0 && pSchedMsg->msg != NULL) { + *(int32_t*)pSchedMsg->msg = code; + } +} +int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code) { + SSchedMsg schedMsg = {0}; + schedMsg.fp = execHelper; + schedMsg.ahandle = fn; + schedMsg.thandle = param; + schedMsg.msg = code; + return taosScheduleTask(pMeta->qHandle, &schedMsg); +} diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 556de169b4..d1610362f9 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -221,7 +221,7 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu *pInput = qItem; } else { // merge current block failed, let's handle the already merged blocks. - void* newRet = streamMergeQueueItem(*pInput, qItem); + void* newRet = streamQueueMergeQueueItem(*pInput, qItem); if (newRet == NULL) { if (terrno != 0) { stError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d, code:%s", id, *numOfBlocks, @@ -388,32 +388,36 @@ int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t pBucket->quotaCapacity = quotaRate * MAX_SMOOTH_BURST_RATIO; pBucket->quotaRemain = pBucket->quotaCapacity; - pBucket->fillTimestamp = taosGetTimestampMs(); + pBucket->tokenFillTimestamp = taosGetTimestampMs(); + pBucket->quotaFillTimestamp = taosGetTimestampMs(); stDebug("s-task:%s sink quotaRate:%.2fMiB, numRate:%d", id, quotaRate, numRate); return TSDB_CODE_SUCCESS; } static void fillTokenBucket(STokenBucket* pBucket, const char* id) { int64_t now = taosGetTimestampMs(); - int64_t delta = now - pBucket->fillTimestamp; + + int64_t deltaToken = now - pBucket->tokenFillTimestamp; ASSERT(pBucket->numOfToken >= 0); - int32_t incNum = (delta / 1000.0) * pBucket->numRate; + int32_t incNum = (deltaToken / 1000.0) * pBucket->numRate; if (incNum > 0) { pBucket->numOfToken = TMIN(pBucket->numOfToken + incNum, pBucket->numCapacity); - pBucket->fillTimestamp = now; + pBucket->tokenFillTimestamp = now; } // increase the new available quota as time goes on - double incSize = (delta / 1000.0) * pBucket->quotaRate; + int64_t deltaQuota = now - pBucket->quotaFillTimestamp; + double incSize = (deltaQuota / 1000.0) * pBucket->quotaRate; if (incSize > 0) { pBucket->quotaRemain = TMIN(pBucket->quotaRemain + incSize, pBucket->quotaCapacity); - pBucket->fillTimestamp = now; + pBucket->quotaFillTimestamp = now; } if (incNum > 0 || incSize > 0) { - stTrace("token/quota available, token:%d inc:%d, quota:%.2fMiB inc:%.3fMiB, ts:%" PRId64 " idle:%" PRId64 "ms, %s", - pBucket->numOfToken, incNum, pBucket->quotaRemain, incSize, now, delta, id); + stTrace("token/quota available, token:%d inc:%d, token_TsDelta:%" PRId64 + ", quota:%.2fMiB inc:%.3fMiB quotaTs:%" PRId64 " now:%" PRId64 "ms, %s", + pBucket->numOfToken, incNum, deltaToken, pBucket->quotaRemain, incSize, deltaQuota, now, id); } } diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 5893bc14f1..e29f2ba7de 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -32,6 +32,7 @@ typedef struct SBackendFileItem { char* name; int8_t type; int64_t size; + int8_t ref; } SBackendFileItem; typedef struct SBackendFile { char* pCurrent; @@ -40,7 +41,28 @@ typedef struct SBackendFile { SArray* pSst; char* pCheckpointMeta; char* path; + } SBanckendFile; + +typedef struct SBackendSnapFiles2 { + char* pCurrent; + char* pMainfest; + char* pOptions; + SArray* pSst; + char* pCheckpointMeta; + char* path; + + int64_t checkpointId; + int64_t seraial; + int64_t offset; + TdFilePtr fd; + int8_t filetype; + SArray* pFileList; + int32_t currFileIdx; + SStreamTaskSnap snapInfo; + int8_t inited; + +} SBackendSnapFile2; struct SStreamSnapHandle { void* handle; SBanckendFile* pBackendFile; @@ -51,16 +73,23 @@ struct SStreamSnapHandle { int8_t filetype; SArray* pFileList; int32_t currFileIdx; - int8_t delFlag; // 0 : not del, 1: del + char* metaPath; + + SArray* pDbSnapSet; + int32_t currIdx; + int8_t delFlag; // 0 : not del, 1: del }; struct SStreamSnapBlockHdr { int8_t type; int8_t flag; int64_t index; - char name[128]; - int64_t totalSize; - int64_t size; - uint8_t data[]; + // int64_t streamId; + // int64_t taskId; + SStreamTaskSnap snapInfo; + char name[128]; + int64_t totalSize; + int64_t size; + uint8_t data[]; }; struct SStreamSnapReader { void* pMeta; @@ -82,7 +111,7 @@ const char* ROCKSDB_CURRENT = "CURRENT"; const char* ROCKSDB_CHECKPOINT_META = "CHECKPOINT"; static int64_t kBlockSize = 64 * 1024; -int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, int64_t chkpId, void* pMeta); +int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, void* pMeta); void streamSnapHandleDestroy(SStreamSnapHandle* handle); // static void streamBuildFname(char* path, char* file, char* fullname) @@ -106,195 +135,205 @@ int32_t streamGetFileSize(char* path, char* name, int64_t* sz) { TdFilePtr streamOpenFile(char* path, char* name, int32_t opt) { char fullname[256] = {0}; + STREAM_ROCKSDB_BUILD_FULLNAME(path, name, fullname); return taosOpenFile(fullname, opt); } -int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chkpId, void* pMeta) { - // impl later - int len = strlen(path); - char* tdir = taosMemoryCalloc(1, len + 256); - memcpy(tdir, path, len); +int32_t streamTaskDbGetSnapInfo(void* arg, char* path, SArray* pSnap) { return taskDbBuildSnap(arg, pSnap); } - int32_t code = 0; +void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) { + if (qDebugFlag & DEBUG_DEBUG) { + char* buf = taosMemoryCalloc(1, 512); + sprintf(buf + strlen(buf), "["); - int8_t validChkp = 0; - if (chkpId != 0) { - sprintf(tdir, "%s%s%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "stream", TD_DIRSEP, "checkpoints", TD_DIRSEP, - chkpId); - if (taosIsDir(tdir)) { - validChkp = 1; - stInfo("%s start to read snap %s", STREAM_STATE_TRANSFER, tdir); - streamBackendAddInUseChkp(pMeta, chkpId); - } else { - stWarn("%s failed to read from %s, reason: dir not exist,retry to default state dir", STREAM_STATE_TRANSFER, - tdir); + if (pSnapFile->pCurrent) sprintf(buf, "current: %s,", pSnapFile->pCurrent); + if (pSnapFile->pMainfest) sprintf(buf + strlen(buf), "MANIFEST: %s,", pSnapFile->pMainfest); + if (pSnapFile->pOptions) sprintf(buf + strlen(buf), "options: %s,", pSnapFile->pOptions); + if (pSnapFile->pSst) { + for (int i = 0; i < taosArrayGetSize(pSnapFile->pSst); i++) { + char* name = taosArrayGetP(pSnapFile->pSst, i); + sprintf(buf + strlen(buf), "%s,", name); + } } + sprintf(buf + strlen(buf) - 1, "]"); + + qInfo("%s %" PRId64 "-%" PRId64 " get file list: %s", STREAM_STATE_TRANSFER, pSnapFile->snapInfo.streamId, + pSnapFile->snapInfo.taskId, buf); + taosMemoryFree(buf); } +} - // no checkpoint specified or not exists invalid checkpoint, do checkpoint at default path and translate it - if (validChkp == 0) { - sprintf(tdir, "%s%s%s%s%s", path, TD_DIRSEP, "stream", TD_DIRSEP, "state"); - char* chkpdir = taosMemoryCalloc(1, len + 256); - sprintf(chkpdir, "%s%s%s", tdir, TD_DIRSEP, "tmp"); - taosMemoryFree(tdir); +int32_t snapFileGenMeta(SBackendSnapFile2* pSnapFile) { + SBackendFileItem item = {0}; + item.ref = 1; + // current + item.name = pSnapFile->pCurrent; + item.type = ROCKSDB_CURRENT_TYPE; + streamGetFileSize(pSnapFile->path, item.name, &item.size); + taosArrayPush(pSnapFile->pFileList, &item); - tdir = chkpdir; - stInfo("%s start to trigger checkpoint on %s", STREAM_STATE_TRANSFER, tdir); + // mainfest + item.name = pSnapFile->pMainfest; + item.type = ROCKSDB_MAINFEST_TYPE; + streamGetFileSize(pSnapFile->path, item.name, &item.size); + taosArrayPush(pSnapFile->pFileList, &item); - code = streamBackendTriggerChkp(pMeta, tdir); - if (code != 0) { - stError("%s failed to trigger chekckpoint at %s", STREAM_STATE_TRANSFER, tdir); - taosMemoryFree(tdir); - return code; - } - pHandle->delFlag = 1; - chkpId = 0; + // options + item.name = pSnapFile->pOptions; + item.type = ROCKSDB_OPTIONS_TYPE; + streamGetFileSize(pSnapFile->path, item.name, &item.size); + taosArrayPush(pSnapFile->pFileList, &item); + // sst + for (int i = 0; i < taosArrayGetSize(pSnapFile->pSst); i++) { + char* sst = taosArrayGetP(pSnapFile->pSst, i); + item.name = sst; + item.type = ROCKSDB_SST_TYPE; + streamGetFileSize(pSnapFile->path, item.name, &item.size); + taosArrayPush(pSnapFile->pFileList, &item); } - - stInfo("%s start to read dir: %s", STREAM_STATE_TRANSFER, tdir); - - TdDirPtr pDir = taosOpenDir(tdir); + // meta + item.name = pSnapFile->pCheckpointMeta; + item.type = ROCKSDB_CHECKPOINT_META_TYPE; + if (streamGetFileSize(pSnapFile->path, item.name, &item.size) == 0) { + taosArrayPush(pSnapFile->pFileList, &item); + } + return 0; +} +int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { + TdDirPtr pDir = taosOpenDir(pSnapFile->path); if (NULL == pDir) { - stError("%s failed to open %s", STREAM_STATE_TRANSFER, tdir); - goto _err; + qError("%s failed to open %s", STREAM_STATE_TRANSFER, pSnapFile->path); + return -1; } - SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile)); - pHandle->pBackendFile = pFile; - pHandle->checkpointId = chkpId; - pHandle->seraial = 0; - - pFile->path = tdir; - pFile->pSst = taosArrayInit(16, sizeof(void*)); - TdDirEntryPtr pDirEntry; while ((pDirEntry = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(pDirEntry); if (strlen(name) >= strlen(ROCKSDB_CURRENT) && 0 == strncmp(name, ROCKSDB_CURRENT, strlen(ROCKSDB_CURRENT))) { - pFile->pCurrent = taosStrdup(name); + pSnapFile->pCurrent = taosStrdup(name); continue; } if (strlen(name) >= strlen(ROCKSDB_MAINFEST) && 0 == strncmp(name, ROCKSDB_MAINFEST, strlen(ROCKSDB_MAINFEST))) { - pFile->pMainfest = taosStrdup(name); + pSnapFile->pMainfest = taosStrdup(name); continue; } if (strlen(name) >= strlen(ROCKSDB_OPTIONS) && 0 == strncmp(name, ROCKSDB_OPTIONS, strlen(ROCKSDB_OPTIONS))) { - pFile->pOptions = taosStrdup(name); + pSnapFile->pOptions = taosStrdup(name); continue; } if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_META) && 0 == strncmp(name, ROCKSDB_CHECKPOINT_META, strlen(ROCKSDB_CHECKPOINT_META))) { - pFile->pCheckpointMeta = taosStrdup(name); + pSnapFile->pCheckpointMeta = taosStrdup(name); continue; } if (strlen(name) >= strlen(ROCKSDB_SST) && 0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) { char* sst = taosStrdup(name); - taosArrayPush(pFile->pSst, &sst); + taosArrayPush(pSnapFile->pSst, &sst); } } - if (qDebugFlag & DEBUG_TRACE) { - char* buf = taosMemoryCalloc(1, 128 + taosArrayGetSize(pFile->pSst) * 64); - sprintf(buf, "[current: %s,", pFile->pCurrent); - sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest); - sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions); - - for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { - char* name = taosArrayGetP(pFile->pSst, i); - sprintf(buf + strlen(buf), "%s,", name); - } - sprintf(buf + strlen(buf) - 1, "]"); - - stInfo("%s get file list: %s", STREAM_STATE_TRANSFER, buf); - taosMemoryFree(buf); - } - taosCloseDir(&pDir); - - if (pFile->pCurrent == NULL) { - stError("%s failed to open %s, reason: no valid file", STREAM_STATE_TRANSFER, tdir); - code = -1; - tdir = NULL; - goto _err; - } - SArray* list = taosArrayInit(64, sizeof(SBackendFileItem)); - - SBackendFileItem item; - // current - item.name = pFile->pCurrent; - item.type = ROCKSDB_CURRENT_TYPE; - streamGetFileSize(pFile->path, item.name, &item.size); - taosArrayPush(list, &item); - - // mainfest - item.name = pFile->pMainfest; - item.type = ROCKSDB_MAINFEST_TYPE; - streamGetFileSize(pFile->path, item.name, &item.size); - taosArrayPush(list, &item); - - // options - item.name = pFile->pOptions; - item.type = ROCKSDB_OPTIONS_TYPE; - streamGetFileSize(pFile->path, item.name, &item.size); - taosArrayPush(list, &item); - // sst - for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { - char* sst = taosArrayGetP(pFile->pSst, i); - item.name = sst; - item.type = ROCKSDB_SST_TYPE; - streamGetFileSize(pFile->path, item.name, &item.size); - taosArrayPush(list, &item); - } - // meta - item.name = pFile->pCheckpointMeta; - item.type = ROCKSDB_CHECKPOINT_META_TYPE; - if (streamGetFileSize(pFile->path, item.name, &item.size) == 0) { - taosArrayPush(list, &item); - } - - pHandle->pBackendFile = pFile; - - pHandle->currFileIdx = 0; - pHandle->pFileList = list; - pHandle->seraial = 0; - pHandle->offset = 0; - pHandle->handle = pMeta; return 0; +} +int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBackendSnapFile2* pSnapFile) { + int32_t code = -1; + + char* path = taosMemoryCalloc(1, strlen(pSnap->dbPrefixPath) + 256); + // char idstr[64] = {0}; + sprintf(path, "%s%s%s%s%s%" PRId64 "", pSnap->dbPrefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", + pSnap->chkpId); + if (!taosIsDir(path)) { + goto _ERROR; + } + + pSnapFile->pSst = taosArrayInit(16, sizeof(void*)); + pSnapFile->pFileList = taosArrayInit(64, sizeof(SBackendFileItem)); + pSnapFile->path = path; + pSnapFile->snapInfo = *pSnap; + if ((code = snapFileReadMeta(pSnapFile)) != 0) { + goto _ERROR; + } + if ((code = snapFileGenMeta(pSnapFile)) != 0) { + goto _ERROR; + } + + snapFileDebugInfo(pSnapFile); + path = NULL; + code = 0; + +_ERROR: + taosMemoryFree(path); + return code; +} +void snapFileDestroy(SBackendSnapFile2* pSnap) { + taosMemoryFree(pSnap->pCheckpointMeta); + taosMemoryFree(pSnap->pCurrent); + taosMemoryFree(pSnap->pMainfest); + taosMemoryFree(pSnap->pOptions); + taosMemoryFree(pSnap->path); + for (int i = 0; i < taosArrayGetSize(pSnap->pSst); i++) { + char* sst = taosArrayGetP(pSnap->pSst, i); + taosMemoryFree(sst); + } + // unite read/write snap file + for (int i = 0; i < taosArrayGetSize(pSnap->pFileList); i++) { + SBackendFileItem* pItem = taosArrayGet(pSnap->pFileList, i); + if (pItem->ref == 0) { + taosMemoryFree(pItem->name); + } + } + taosArrayDestroy(pSnap->pFileList); + taosArrayDestroy(pSnap->pSst); + taosCloseFile(&pSnap->fd); + + return; +} +int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta) { + // impl later + SArray* pSnapSet = taosArrayInit(4, sizeof(SStreamTaskSnap)); + int32_t code = streamTaskDbGetSnapInfo(pMeta, path, pSnapSet); + if (code != 0) { + return -1; + } + + SArray* pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); + + for (int i = 0; i < taosArrayGetSize(pSnapSet); i++) { + SStreamTaskSnap* pSnap = taosArrayGet(pSnapSet, i); + + SBackendSnapFile2 snapFile = {0}; + code = streamBackendSnapInitFile(path, pSnap, &snapFile); + ASSERT(code == 0); + taosArrayPush(pDbSnapSet, &snapFile); + } + for (int i = 0; i < taosArrayGetSize(pSnapSet); i++) { + SStreamTaskSnap* pSnap = taosArrayGet(pSnapSet, i); + taosMemoryFree(pSnap->dbPrefixPath); + } + taosArrayDestroy(pSnapSet); + + pHandle->pDbSnapSet = pDbSnapSet; + pHandle->currIdx = 0; + return 0; + _err: streamSnapHandleDestroy(pHandle); - taosMemoryFreeClear(tdir); code = -1; return code; } void streamSnapHandleDestroy(SStreamSnapHandle* handle) { - SBanckendFile* pFile = handle->pBackendFile; - - if (handle->checkpointId == 0) { - // del tmp dir - if (pFile && taosIsDir(pFile->path)) { - if (handle->delFlag) taosRemoveDir(pFile->path); + if (handle->pDbSnapSet) { + for (int i = 0; i < taosArrayGetSize(handle->pDbSnapSet); i++) { + SBackendSnapFile2* pSnapFile = taosArrayGet(handle->pDbSnapSet, i); + snapFileDebugInfo(pSnapFile); + snapFileDestroy(pSnapFile); } - } else { - streamBackendDelInUseChkp(handle->handle, handle->checkpointId); + taosArrayDestroy(handle->pDbSnapSet); } - if (pFile) { - taosMemoryFree(pFile->pCheckpointMeta); - taosMemoryFree(pFile->pCurrent); - taosMemoryFree(pFile->pMainfest); - taosMemoryFree(pFile->pOptions); - taosMemoryFree(pFile->path); - for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) { - char* sst = taosArrayGetP(pFile->pSst, i); - taosMemoryFree(sst); - } - taosArrayDestroy(pFile->pSst); - taosMemoryFree(pFile); - } - taosArrayDestroy(handle->pFileList); - taosCloseFile(&handle->fd); + taosMemoryFree(handle->metaPath); return; } @@ -305,7 +344,7 @@ int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* pa return TSDB_CODE_OUT_OF_MEMORY; } - if (streamSnapHandleInit(&pReader->handle, (char*)path, chkpId, pMeta) < 0) { + if (streamSnapHandleInit(&pReader->handle, (char*)path, pMeta) < 0) { taosMemoryFree(pReader); return -1; } @@ -321,34 +360,50 @@ int32_t streamSnapReaderClose(SStreamSnapReader* pReader) { taosMemoryFree(pReader); return 0; } + int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* size) { // impl later int32_t code = 0; SStreamSnapHandle* pHandle = &pReader->handle; - SBanckendFile* pFile = pHandle->pBackendFile; + int32_t idx = pHandle->currIdx; - SBackendFileItem* item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + SBackendSnapFile2* pSnapFile = taosArrayGet(pHandle->pDbSnapSet, idx); + if (pSnapFile == NULL) { + return 0; + } + SBackendFileItem* item = NULL; + +_NEXT: + + if (pSnapFile->fd == NULL) { + if (pSnapFile->currFileIdx >= taosArrayGetSize(pSnapFile->pFileList)) { + if (pHandle->currIdx + 1 < taosArrayGetSize(pHandle->pDbSnapSet)) { + pHandle->currIdx += 1; + + pSnapFile = taosArrayGet(pHandle->pDbSnapSet, pHandle->currIdx); + goto _NEXT; + } else { + *ppData = NULL; + *size = 0; + return 0; + } - if (pHandle->fd == NULL) { - if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) { - // finish - *ppData = NULL; - *size = 0; - return 0; } else { - pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ); + item = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + pSnapFile->fd = streamOpenFile(pSnapFile->path, item->name, TD_FILE_READ); stDebug("%s open file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, - item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx); } } + item = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + + qDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 + ", file no.%d, total set:%d, current set idx: %d", + STREAM_STATE_TRANSFER, item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx, + (int)taosArrayGetSize(pHandle->pDbSnapSet), pHandle->currIdx); - stDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER, - item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize); - if (buf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); + int64_t nread = taosPReadFile(pSnapFile->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pSnapFile->offset); if (nread == -1) { taosMemoryFree(buf); code = TAOS_SYSTEM_ERROR(terrno); @@ -358,44 +413,51 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si } else if (nread > 0 && nread <= kBlockSize) { // left bytes less than kBlockSize stDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, - item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); - pHandle->offset += nread; - if (pHandle->offset >= item->size || nread < kBlockSize) { - taosCloseFile(&pHandle->fd); - pHandle->offset = 0; - pHandle->currFileIdx += 1; + item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx); + pSnapFile->offset += nread; + if (pSnapFile->offset >= item->size || nread < kBlockSize) { + taosCloseFile(&pSnapFile->fd); + pSnapFile->offset = 0; + pSnapFile->currFileIdx += 1; } } else { stDebug("%s no data read, close file no.%d, move to next file, open and read", STREAM_STATE_TRANSFER, - pHandle->currFileIdx); - taosCloseFile(&pHandle->fd); - pHandle->offset = 0; - pHandle->currFileIdx += 1; + pSnapFile->currFileIdx); + taosCloseFile(&pSnapFile->fd); + pSnapFile->offset = 0; + pSnapFile->currFileIdx += 1; - if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) { + if (pSnapFile->currFileIdx >= taosArrayGetSize(pSnapFile->pFileList)) { // finish - *ppData = NULL; - *size = 0; - taosMemoryFree(buf); - return 0; + if (pHandle->currIdx + 1 < taosArrayGetSize(pHandle->pDbSnapSet)) { + // skip to next snap set + pHandle->currIdx += 1; + pSnapFile = taosArrayGet(pHandle->pDbSnapSet, pHandle->currIdx); + goto _NEXT; + } else { + *ppData = NULL; + *size = 0; + return 0; + } } - item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); - pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ); + item = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + pSnapFile->fd = streamOpenFile(pSnapFile->path, item->name, TD_FILE_READ); - nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); - pHandle->offset += nread; + nread = taosPReadFile(pSnapFile->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pSnapFile->offset); + pSnapFile->offset += nread; stDebug("%s open file and read file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", - STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); + STREAM_STATE_TRANSFER, item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx); } SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)buf; pHdr->size = nread; pHdr->type = item->type; pHdr->totalSize = item->size; + pHdr->snapInfo = pSnapFile->snapInfo; memcpy(pHdr->name, item->name, strlen(item->name)); - pHandle->seraial += nread; + pSnapFile->seraial += nread; *ppData = buf; *size = sizeof(SStreamSnapBlockHdr) + nread; @@ -408,101 +470,133 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path if (pWriter == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } + SStreamSnapHandle* pHandle = &pWriter->handle; + pHandle->currIdx = 0; + pHandle->metaPath = taosStrdup(path); + pHandle->pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); - SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile)); - pFile->path = taosStrdup(path); - SArray* list = taosArrayInit(64, sizeof(SBackendFileItem)); - - SBackendFileItem item; - item.name = taosStrdup((char*)ROCKSDB_CURRENT); - item.type = ROCKSDB_CURRENT_TYPE; - taosArrayPush(list, &item); - - pHandle->pBackendFile = pFile; - - pHandle->pFileList = list; - pHandle->currFileIdx = 0; - pHandle->offset = 0; - pHandle->delFlag = 0; + SBackendSnapFile2 snapFile = {0}; + taosArrayPush(pHandle->pDbSnapSet, &snapFile); *ppWriter = pWriter; return 0; } +int32_t snapInfoEqual(SStreamTaskSnap* a, SStreamTaskSnap* b) { + if (a->streamId != b->streamId || a->taskId != b->taskId || a->chkpId != b->chkpId) { + return 0; + } + return 1; +} + +int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData, SBackendSnapFile2* pSnapFile) { + int code = -1; + SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)pData; + SStreamSnapHandle* pHandle = &pWriter->handle; + SStreamTaskSnap snapInfo = pHdr->snapInfo; + + SStreamTaskSnap* pSnapInfo = &pSnapFile->snapInfo; + + SBackendFileItem* pItem = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + + if (pSnapFile->fd == 0) { + pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pSnapFile->fd == NULL) { + code = TAOS_SYSTEM_ERROR(terrno); + stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pHandle->metaPath, TD_DIRSEP, + pHdr->name, tstrerror(code)); + } + } + if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) { + int64_t bytes = taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset); + if (bytes != pHdr->size) { + code = TAOS_SYSTEM_ERROR(terrno); + stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); + return code; + } else { + qInfo("succ to write data %s", pItem->name); + } + pSnapFile->offset += bytes; + } else { + taosCloseFile(&pSnapFile->fd); + pSnapFile->offset = 0; + pSnapFile->currFileIdx += 1; + + SBackendFileItem item = {0}; + item.name = taosStrdup(pHdr->name); + item.type = pHdr->type; + + taosArrayPush(pSnapFile->pFileList, &item); + + SBackendFileItem* pItem = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pSnapFile->fd == NULL) { + code = TAOS_SYSTEM_ERROR(terrno); + stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pSnapFile->path, TD_DIRSEP, + pHdr->name, tstrerror(code)); + } + + taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset); + qInfo("succ to write data %s", pItem->name); + pSnapFile->offset += pHdr->size; + } + code = 0; +_EXIT: + return code; +} + int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { int32_t code = 0; SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)pData; SStreamSnapHandle* pHandle = &pWriter->handle; - SBanckendFile* pFile = pHandle->pBackendFile; - SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); + SStreamTaskSnap snapInfo = pHdr->snapInfo; - if (pHandle->fd == NULL) { - pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); - if (pHandle->fd == NULL) { - code = TAOS_SYSTEM_ERROR(terrno); - stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, - pHdr->name, tstrerror(code)); - } - } + SBackendSnapFile2* pDbSnapFile = taosArrayGet(pHandle->pDbSnapSet, pHandle->currIdx); + if (pDbSnapFile->inited == 0) { + char idstr[64] = {0}; + sprintf(idstr, "0x%" PRIx64 "-0x%x", snapInfo.streamId, (int32_t)(snapInfo.taskId)); - if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) { - int64_t bytes = taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); - if (bytes != pHdr->size) { - code = TAOS_SYSTEM_ERROR(terrno); - stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); - return code; + char* path = taosMemoryCalloc(1, strlen(pHandle->metaPath) + 256); + sprintf(path, "%s%s%s%s%s%s%s%" PRId64 "", pHandle->metaPath, TD_DIRSEP, idstr, TD_DIRSEP, "checkpoints", TD_DIRSEP, + "checkpoint", snapInfo.chkpId); + if (!taosIsDir(path)) { + code = taosMulMkDir(path); + qInfo("%s mkdir %s", STREAM_STATE_TRANSFER, path); + ASSERT(code == 0); } - pHandle->offset += bytes; + + pDbSnapFile->path = path; + pDbSnapFile->snapInfo = snapInfo; + pDbSnapFile->pFileList = taosArrayInit(64, sizeof(SBackendFileItem)); + pDbSnapFile->currFileIdx = 0; + pDbSnapFile->offset = 0; + + SBackendFileItem item = {0}; + item.name = taosStrdup((char*)ROCKSDB_CURRENT); + item.type = ROCKSDB_CURRENT_TYPE; + + taosArrayPush(pDbSnapFile->pFileList, &item); + + pDbSnapFile->inited = 1; + return streamSnapWriteImpl(pWriter, pData, nData, pDbSnapFile); } else { - taosCloseFile(&pHandle->fd); - pHandle->offset = 0; - pHandle->currFileIdx += 1; + if (snapInfoEqual(&snapInfo, &pDbSnapFile->snapInfo)) { + return streamSnapWriteImpl(pWriter, pData, nData, pDbSnapFile); + } else { + SBackendSnapFile2 snapFile = {0}; + taosArrayPush(pHandle->pDbSnapSet, &snapFile); + pHandle->currIdx += 1; - SBackendFileItem item; - item.name = taosStrdup(pHdr->name); - item.type = pHdr->type; - taosArrayPush(pHandle->pFileList, &item); - - SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); - pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); - if (pHandle->fd == NULL) { - code = TAOS_SYSTEM_ERROR(terrno); - stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP, - pHdr->name, tstrerror(code)); + return streamSnapWrite(pWriter, pData, nData); } - - taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset); - pHandle->offset += pHdr->size; } - - // impl later - return 0; + return code; } int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) { - SStreamSnapHandle* handle = &pWriter->handle; - if (qDebugFlag & DEBUG_TRACE) { - char* buf = (char*)taosMemoryMalloc(128 + taosArrayGetSize(handle->pFileList) * 64); - int n = sprintf(buf, "["); - for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { - SBackendFileItem* item = taosArrayGet(handle->pFileList, i); - if (i != taosArrayGetSize(handle->pFileList) - 1) { - n += sprintf(buf + n, "%s %" PRId64 ",", item->name, item->size); - } else { - n += sprintf(buf + n, "%s %" PRId64 "]", item->name, item->size); - } - } - stDebug("%s snap get file list, %s", STREAM_STATE_TRANSFER, buf); - taosMemoryFree(buf); - } - - for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { - SBackendFileItem* item = taosArrayGet(handle->pFileList, i); - taosMemoryFree(item->name); - } - - streamSnapHandleDestroy(handle); + if (pWriter == NULL) return 0; + streamSnapHandleDestroy(&pWriter->handle); taosMemoryFree(pWriter); return 0; diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 97eb7b79a2..a4c448f678 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -168,7 +168,6 @@ int32_t streamTaskStartScanHistory(SStreamTask* pTask) { } else if (level == TASK_LEVEL__AGG) { if (pTask->info.fillHistory) { streamSetParamForScanHistory(pTask); - streamTaskEnablePause(pTask); } } else if (level == TASK_LEVEL__SINK) { stDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr); @@ -290,10 +289,11 @@ static void recheckDownstreamTasks(void* param, void* tmrId) { stDebug("s-task:%s complete send check in timer, ref:%d", pTask->id.idStr, ref); } -int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage) { +int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage, int64_t* oldStage) { SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); ASSERT(pInfo != NULL); + *oldStage = pInfo->stage; const char* id = pTask->id.idStr; if (stage == -1) { stDebug("s-task:%s receive check msg from upstream task:0x%x(vgId:%d), invalid stageId:%" PRId64 ", not ready", id, @@ -323,7 +323,7 @@ int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_ } } -int32_t onNormalTaskReady(SStreamTask* pTask) { +int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask) { const char* id = pTask->id.idStr; streamTaskSetReady(pTask); @@ -345,11 +345,10 @@ int32_t onNormalTaskReady(SStreamTask* pTask) { stDebug("s-task:%s level:%d status:%s sched-status:%d", id, pTask->info.taskLevel, p, pTask->status.schedStatus); } - streamTaskEnablePause(pTask); return TSDB_CODE_SUCCESS; } -int32_t onScanhistoryTaskReady(SStreamTask* pTask) { +int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask) { const char* id = pTask->id.idStr; // set the state to be ready @@ -410,7 +409,6 @@ static void addIntoNodeUpdateList(SStreamTask* pTask, int32_t nodeId) { int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { ASSERT(pTask->id.taskId == pRsp->upstreamTaskId); const char* id = pTask->id.idStr; - int32_t vgId = pTask->pMeta->vgId; if (streamTaskShouldStop(pTask)) { stDebug("s-task:%s should stop, do not do check downstream again", id); @@ -459,24 +457,33 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs if (pRsp->status == TASK_UPSTREAM_NEW_STAGE || pRsp->status == TASK_DOWNSTREAM_NOT_LEADER) { if (pRsp->status == TASK_UPSTREAM_NEW_STAGE) { stError( - "s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%d, current stage:%d, " + "s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%"PRId64", current stage:%"PRId64", " "not check wait for downstream task nodeUpdate, and all tasks restart", - id, pRsp->upstreamNodeId, pRsp->oldStage, (int32_t)pTask->pMeta->stage); + id, pRsp->upstreamNodeId, pRsp->oldStage, pTask->pMeta->stage); + addIntoNodeUpdateList(pTask, pRsp->upstreamNodeId); } else { stError( "s-task:%s downstream taskId:0x%x (vgId:%d) not leader, self dispatch epset needs to be updated, not check " "downstream again, nodeUpdate needed", id, pRsp->downstreamTaskId, pRsp->downstreamNodeId); + addIntoNodeUpdateList(pTask, pRsp->downstreamNodeId); } - addIntoNodeUpdateList(pTask, pRsp->downstreamNodeId); streamMetaUpdateTaskDownstreamStatus(pTask, pTask->execInfo.init, taosGetTimestampMs(), false); + // automatically set the related fill-history task to be failed. + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + STaskId* pId = &pTask->hTaskInfo.id; + + SStreamTask* pHTask = streamMetaAcquireTask(pTask->pMeta, pId->streamId, pId->taskId); + streamMetaUpdateTaskDownstreamStatus(pHTask, pHTask->execInfo.init, taosGetTimestampMs(), false); + streamMetaReleaseTask(pTask->pMeta, pHTask); + } } else { // TASK_DOWNSTREAM_NOT_READY, let's retry in 100ms STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp); int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%d, retry in 100ms, ref:%d ", id, + stDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%"PRId64", retry in 100ms, ref:%d ", id, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage, ref); pInfo->checkTimer = taosTmrStart(recheckDownstreamTasks, CHECK_DOWNSTREAM_INTERVAL, pInfo, streamEnv.timer); } @@ -659,9 +666,6 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) { streamMetaCommit(pMeta); streamMetaWUnLock(pMeta); - // history data scan in the stream time window finished, now let's enable the pause - streamTaskEnablePause(pTask); - // for source tasks, let's continue execute. if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { streamSchedExec(pTask); @@ -926,7 +930,7 @@ int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1; - if (tEncodeI32(pEncoder, pRsp->oldStage) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->oldStage) < 0) return -1; if (tEncodeI8(pEncoder, pRsp->status) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; @@ -941,7 +945,7 @@ int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) if (tDecodeI32(pDecoder, &pRsp->downstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->downstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1; - if (tDecodeI32(pDecoder, &pRsp->oldStage) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->oldStage) < 0) return -1; if (tDecodeI8(pDecoder, &pRsp->status) < 0) return -1; tEndDecode(pDecoder); return 0; @@ -1040,11 +1044,6 @@ void streamTaskResume(SStreamTask* pTask) { } } -void streamTaskEnablePause(SStreamTask* pTask) { - stDebug("s-task:%s enable task pause", pTask->id.idStr); - pTask->status.pauseAllowed = 1; -} - static void displayStatusInfo(SStreamMeta* pMeta, SHashObj* pTaskSet, bool succ) { int32_t vgId = pMeta->vgId; void* pIter = NULL; @@ -1081,8 +1080,9 @@ int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs taosHashPut(pDst, &id, sizeof(id), &initTs, sizeof(STaskInitTs)); int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta); + int32_t numOfRecv = taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet); - if (taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet) == numOfTotal) { + if (numOfRecv == numOfTotal) { pStartInfo->readyTs = taosGetTimestampMs(); pStartInfo->elapsedTime = (pStartInfo->startTs != 0) ? pStartInfo->readyTs - pStartInfo->startTs : 0; @@ -1096,6 +1096,8 @@ int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs displayStatusInfo(pMeta, pStartInfo->pFailedTaskSet, false); streamMetaResetStartInfo(pStartInfo); + } else { + stDebug("vgId:%d recv check down results:%d, total:%d", pMeta->vgId, numOfRecv, numOfTotal); } streamMetaWUnLock(pMeta); diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 2e51200fe4..276ed08785 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -106,51 +106,21 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz } SStreamTask* pStreamTask = pTask; - char statePath[1024]; - if (!specPath) { - sprintf(statePath, "%s%s%d", path, TD_DIRSEP, pStreamTask->id.taskId); - } else { - memset(statePath, 0, 1024); - tstrncpy(statePath, path, 1024); - } - pState->taskId = pStreamTask->id.taskId; pState->streamId = pStreamTask->id.streamId; - sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-%d", pState->streamId, pState->taskId); + sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-0x%x", pState->streamId, pState->taskId); + + streamTaskSetDb(pStreamTask->pMeta, pTask, pState->pTdbState->idstr); #ifdef USE_ROCKSDB SStreamMeta* pMeta = pStreamTask->pMeta; - pState->streamBackendRid = pMeta->streamBackendRid; - // streamMetaWLock(pMeta); - taosThreadMutexLock(&pMeta->backendMutex); - void* uniqueId = - taosHashGet(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); - if (uniqueId == NULL) { - int code = streamStateOpenBackend(pMeta->streamBackend, pState); - if (code == -1) { - taosThreadMutexUnlock(&pMeta->backendMutex); - taosMemoryFree(pState); - return NULL; - } - taosHashPut(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1, - &pState->pTdbState->backendCfWrapperId, sizeof(pState->pTdbState->backendCfWrapperId)); - } else { - int64_t id = *(int64_t*)uniqueId; - pState->pTdbState->backendCfWrapperId = id; - pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id); - // already exist stream task for - stInfo("already exist stream-state for %s", pState->pTdbState->idstr); - // taosAcquireRef(streamBackendId, pState->streamBackendRid); - } - taosThreadMutexUnlock(&pMeta->backendMutex); - pState->pTdbState->pOwner = pTask; pState->pFileState = NULL; _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT); pState->parNameMap = tSimpleHashInit(1024, hashFn); stInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId, - pState->taskId); + pState->taskId); return pState; #else @@ -237,6 +207,12 @@ _err: #endif } +int32_t streamStateDelTaskDb(SStreamState* pState) { + SStreamTask* pTask = pState->pTdbState->pOwner; + taskDbRemoveRef(pTask->pBackend); + taosMemoryFree(pTask); + return 0; +} void streamStateClose(SStreamState* pState, bool remove) { SStreamTask* pTask = pState->pTdbState->pOwner; #ifdef USE_ROCKSDB @@ -692,8 +668,7 @@ void streamStateResetCur(SStreamStateCur* pCur) { } void streamStateFreeCur(SStreamStateCur* pCur) { - if (!pCur || pCur->buffIndex >= 0) { - taosMemoryFree(pCur); + if (!pCur) { return; } qDebug("streamStateFreeCur"); @@ -722,7 +697,7 @@ int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, void streamStateReleaseBuf(pState, pos, true); putFreeBuff(pState->pFileState, pos); stDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 ".code:%d", key->win.skey, - key->win.ekey, key->groupId, code); + key->win.ekey, key->groupId, code); } else { code = putSessionWinResultBuff(pState->pFileState, value); } @@ -768,7 +743,7 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB - stDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, + qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, key->groupId); return deleteRowBuff(pState->pFileState, key, sizeof(SSessionKey)); #else @@ -1086,7 +1061,6 @@ _end: } int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char tbname[TSDB_TABLE_NAME_LEN]) { - stDebug("try to write to cf parname"); #ifdef USE_ROCKSDB if (tSimpleHashGetSize(pState->parNameMap) > MAX_TABLE_NAME_NUM) { if (tSimpleHashGet(pState->parNameMap, &groupId, sizeof(int64_t)) == NULL) { @@ -1123,7 +1097,7 @@ int32_t streamStateGetParName(SStreamState* pState, int64_t groupId, void** pVal void streamStateDestroy(SStreamState* pState, bool remove) { #ifdef USE_ROCKSDB streamFileStateDestroy(pState->pFileState); - streamStateDestroy_rocksdb(pState, remove); + // streamStateDestroy_rocksdb(pState, remove); tSimpleHashCleanup(pState->parNameMap); // do nothong #endif @@ -1233,4 +1207,4 @@ char* streamStateIntervalDump(SStreamState* pState) { streamStateFreeCur(pCur); return dumpBuf; } -#endif +#endif \ No newline at end of file diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 24228c0307..db0217f000 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -15,11 +15,11 @@ #include "executor.h" #include "streamInt.h" +#include "streamsm.h" #include "tmisce.h" #include "tstream.h" #include "ttimer.h" #include "wal.h" -#include "streamsm.h" static void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo); @@ -250,9 +250,8 @@ int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo) SEpSet epSet; if (tStartDecode(pDecoder) < 0) return -1; - if (tDecodeI64(pDecoder, &ver) < 0) return -1; - - if (ver != SSTREAM_TASK_VER) return -1; + if (tDecodeI64(pDecoder, &pChkpInfo->msgVer) < 0) return -1; + // if (ver != SSTREAM_TASK_VER) return -1; if (tDecodeI64(pDecoder, &skip64) < 0) return -1; if (tDecodeI32(pDecoder, &skip32) < 0) return -1; @@ -309,11 +308,11 @@ void tFreeStreamTask(SStreamTask* pTask) { stDebug("start to free s-task:0x%x, %p, state:%p", taskId, pTask, pTask->pState); stDebug("s-task:0x%x task exec summary: create:%" PRId64 ", init:%" PRId64 ", start:%" PRId64 - ", updateCount:%d latestUpdate:%" PRId64 ", latestCheckPoint:%" PRId64 ", ver:%" PRId64 - " nextProcessVer:%" PRId64", checkpointCount:%d", - taskId, pStatis->created, pStatis->init, pStatis->start, pStatis->updateCount, pStatis->latestUpdateTs, - pTask->chkInfo.checkpointId, pTask->chkInfo.checkpointVer, pTask->chkInfo.nextProcessVer, - pStatis->checkpoint); + ", updateCount:%d latestUpdate:%" PRId64 ", latestCheckPoint:%" PRId64 ", ver:%" PRId64 + " nextProcessVer:%" PRId64 ", checkpointCount:%d", + taskId, pStatis->created, pStatis->init, pStatis->start, pStatis->updateCount, pStatis->latestUpdateTs, + pTask->chkInfo.checkpointId, pTask->chkInfo.checkpointVer, pTask->chkInfo.nextProcessVer, + pStatis->checkpoint); // remove the ref by timer while (pTask->status.timerActive > 0) { @@ -358,7 +357,9 @@ void tFreeStreamTask(SStreamTask* pTask) { walCloseReader(pTask->exec.pWalReader); } + streamClearChkptReadyMsg(pTask); pTask->pReadyMsgList = taosArrayDestroy(pTask->pReadyMsgList); + if (pTask->msgInfo.pData != NULL) { destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask)); pTask->msgInfo.pData = NULL; @@ -377,6 +378,8 @@ void tFreeStreamTask(SStreamTask* pTask) { if (pTask->pState) { stDebug("s-task:0x%x start to free task state", taskId); streamStateClose(pTask->pState, status == TASK_STATUS__DROPPING); + taskDbRemoveRef(pTask->pBackend); + } if (pTask->id.idStr != NULL) { @@ -422,7 +425,7 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i pTask->status.pSM = streamCreateStateMachine(pTask); if (pTask->status.pSM == NULL) { stError("s-task:%s failed create state-machine for stream task, initialization failed, code:%s", pTask->id.idStr, - tstrerror(terrno)); + tstrerror(terrno)); return terrno; } @@ -434,7 +437,7 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i pTask->chkInfo.checkpointVer = ver - 1; // only update when generating checkpoint pTask->chkInfo.processedVer = ver - 1; // already processed version - pTask->chkInfo.nextProcessVer = ver; // next processed version + pTask->chkInfo.nextProcessVer = ver; // next processed version pTask->dataRange.range.maxVer = ver; pTask->dataRange.range.minVer = ver; pTask->pMsgCb = pMsgCb; @@ -442,16 +445,17 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i pTask->outputInfo.pTokenBucket = taosMemoryCalloc(1, sizeof(STokenBucket)); if (pTask->outputInfo.pTokenBucket == NULL) { - stError("s-task:%s failed to prepare the tokenBucket, code:%s", pTask->id.idStr, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + stError("s-task:%s failed to prepare the tokenBucket, code:%s", pTask->id.idStr, + tstrerror(TSDB_CODE_OUT_OF_MEMORY)); return TSDB_CODE_OUT_OF_MEMORY; } // 2MiB per second for sink task // 50 times sink operator per second - streamTaskInitTokenBucket(pTask->outputInfo.pTokenBucket, 50, 50, tsSinkDataRate, pTask->id.idStr); + streamTaskInitTokenBucket(pTask->outputInfo.pTokenBucket, 35, 35, tsSinkDataRate, pTask->id.idStr); TdThreadMutexAttr attr = {0}; - int code = taosThreadMutexAttrInit(&attr); + int code = taosThreadMutexAttrInit(&attr); if (code != 0) { stError("s-task:%s initElapsed mutex attr failed, code:%s", pTask->id.idStr, tstrerror(code)); return code; @@ -464,6 +468,14 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i } taosThreadMutexInit(&pTask->lock, &attr); + // if (pTask->info.fillHistory == 1) { + // // + // } else { + + // } + // if (streamTaskSetDb(pMeta, pTask) != 0) { + // return -1; + // } streamTaskOpenAllUpstreamInput(pTask); pTask->outputInfo.pDownstreamUpdateList = taosArrayInit(4, sizeof(SDownstreamTaskEpset)); @@ -529,8 +541,8 @@ void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpS SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (pInfo->nodeId == nodeId) { epsetAssign(&pInfo->epSet, pEpSet); - stDebug("s-task:0x%x update the upstreamInfo taskId:0x%x(nodeId:%d) newEpset:%s", pTask->id.taskId, - pInfo->taskId, nodeId, buf); + stDebug("s-task:0x%x update the upstreamInfo taskId:0x%x(nodeId:%d) newEpset:%s", pTask->id.taskId, pInfo->taskId, + nodeId, buf); break; } } @@ -569,7 +581,7 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE if (pVgInfo->vgId == nodeId) { epsetAssign(&pVgInfo->epSet, pEpSet); stDebug("s-task:0x%x update the dispatch info, task:0x%x(nodeId:%d) newEpset:%s", pTask->id.taskId, - pVgInfo->taskId, nodeId, buf); + pVgInfo->taskId, nodeId, buf); break; } } @@ -578,7 +590,7 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE if (pDispatcher->nodeId == nodeId) { epsetAssign(&pDispatcher->epSet, pEpSet); stDebug("s-task:0x%x update the dispatch info, task:0x%x(nodeId:%d) newEpSet:%s", pTask->id.taskId, - pDispatcher->taskId, nodeId, buf); + pDispatcher->taskId, nodeId, buf); } } else { // do nothing @@ -586,9 +598,9 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE } int32_t streamTaskStop(SStreamTask* pTask) { - int32_t vgId = pTask->pMeta->vgId; - int64_t st = taosGetTimestampMs(); - const char* id = pTask->id.idStr; + int32_t vgId = pTask->pMeta->vgId; + int64_t st = taosGetTimestampMs(); + const char* id = pTask->id.idStr; streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_STOP); qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); @@ -635,7 +647,7 @@ int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) { p->latestUpdateTs = taosGetTimestampMs(); p->updateCount += 1; stDebug("s-task:0x%x update task nodeEp epset, updatedNodes:%d, updateCount:%d, prevTs:%" PRId64, pTask->id.taskId, - numOfNodes, p->updateCount, prevTs); + numOfNodes, p->updateCount, prevTs); for (int32_t i = 0; i < taosArrayGetSize(pNodeList); ++i) { SNodeUpdateInfo* pInfo = taosArrayGet(pNodeList, i); @@ -706,7 +718,7 @@ int32_t streamTaskClearHTaskAttr(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } - STaskId sTaskId = {.streamId = pTask->streamTaskId.streamId, .taskId = pTask->streamTaskId.taskId}; + STaskId sTaskId = {.streamId = pTask->streamTaskId.streamId, .taskId = pTask->streamTaskId.taskId}; SStreamTask** ppStreamTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &sTaskId, sizeof(sTaskId)); if (ppStreamTask != NULL) { @@ -720,7 +732,7 @@ int32_t streamTaskClearHTaskAttr(SStreamTask* pTask) { } int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId) { - SVDropStreamTaskReq *pReq = rpcMallocCont(sizeof(SVDropStreamTaskReq)); + SVDropStreamTaskReq* pReq = rpcMallocCont(sizeof(SVDropStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; @@ -779,4 +791,5 @@ void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc) pDst->sinkDataSize = pSrc->sinkDataSize; pDst->activeCheckpointId = pSrc->activeCheckpointId; pDst->checkpointFailed = pSrc->checkpointFailed; -} \ No newline at end of file +} + diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 04b449aaaf..cac3766893 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -269,6 +269,7 @@ int32_t streamTaskHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event) { pTask->id.idStr, pSM->current.name, GET_EVT_NAME(evt)); taosMsleep(100); } else { + // no active event trans exists, handle this event directly pTrans = streamTaskFindTransform(pSM->current.state, event); if (pTrans == NULL) { stDebug("s-task:%s failed to handle event:%s", pTask->id.idStr, GET_EVT_NAME(event)); @@ -451,60 +452,43 @@ int32_t initStateTransferTable() { return TSDB_CODE_SUCCESS; } +//clang-format off void doInitStateTransferTable(void) { streamTaskSMTrans = taosArrayInit(8, sizeof(STaskStateTrans)); // initialization event handle - STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, - streamTaskInitStatus, onNormalTaskReady, false, false); + STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, streamTaskInitStatus, streamTaskOnNormalTaskReady, false, false); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, - streamTaskInitStatus, onScanhistoryTaskReady, false, false); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, streamTaskOnScanhistoryTaskReady, false, false); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STREAM_SCAN_HISTORY, TASK_EVENT_INIT_STREAM_SCANHIST, - streamTaskInitStatus, onScanhistoryTaskReady, false, false); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STREAM_SCAN_HISTORY, TASK_EVENT_INIT_STREAM_SCANHIST, streamTaskInitStatus, streamTaskOnScanhistoryTaskReady, false, false); taosArrayPush(streamTaskSMTrans, &trans); // scan-history related event - trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, - NULL, true); + trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, - NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); // halt stream task, from other task status - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, - streamTaskKeepCurrentVerInWal, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, - streamTaskKeepCurrentVerInWal, NULL, true); + trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); SAttachedEventInfo info = {.status = TASK_STATUS__READY, .event = TASK_EVENT_HALT}; - trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, - streamTaskKeepCurrentVerInWal, &info, true); - taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, - &info, true); + trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, - streamTaskKeepCurrentVerInWal, NULL, true); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true); + taosArrayPush(streamTaskSMTrans, &trans); + trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); // checkpoint related event - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, - streamTaskDoCheckpoint, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, streamTaskDoCheckpoint, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); - - trans = - createStateTransform(TASK_STATUS__CK, TASK_STATUS__READY, TASK_EVENT_CHECKPOINT_DONE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__READY, TASK_EVENT_CHECKPOINT_DONE, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); // pause & resume related event handle @@ -571,4 +555,5 @@ void doInitStateTransferTable(void) { taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); -} \ No newline at end of file +} +//clang-format on \ No newline at end of file diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index 0dcc3eee29..0e98fe94eb 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -103,7 +103,7 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/libs/sync/src/syncRaftStore.c b/source/libs/sync/src/syncRaftStore.c index 051106b99d..c200c6cb4b 100644 --- a/source/libs/sync/src/syncRaftStore.c +++ b/source/libs/sync/src/syncRaftStore.c @@ -128,7 +128,7 @@ int32_t raftStoreWriteFile(SSyncNode *pNode) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/libs/tfs/src/tfsTier.c b/source/libs/tfs/src/tfsTier.c index d4f228a537..911fdc52b7 100644 --- a/source/libs/tfs/src/tfsTier.c +++ b/source/libs/tfs/src/tfsTier.c @@ -112,7 +112,7 @@ int32_t tfsAllocDiskOnTier(STfsTier *pTier) { int32_t retId = -1; int64_t avail = 0; for (int32_t id = 0; id < TFS_MAX_DISKS_PER_TIER; ++id) { -#if 0 // round-robin +#if 1 // round-robin int32_t diskId = (pTier->nextid + id) % pTier->ndisk; STfsDisk *pDisk = pTier->disks[diskId]; diff --git a/source/libs/transport/src/thttp.c b/source/libs/transport/src/thttp.c index 65b0058cfe..afb982a50a 100644 --- a/source/libs/transport/src/thttp.c +++ b/source/libs/transport/src/thttp.c @@ -28,10 +28,12 @@ static int32_t httpRefMgt = 0; static int64_t httpRef = -1; +static int32_t FAST_FAILURE_LIMIT = 120; typedef struct SHttpModule { uv_loop_t* loop; SAsyncPool* asyncPool; TdThread thread; + SHashObj* connStatusTable; } SHttpModule; typedef struct SHttpMsg { @@ -64,6 +66,8 @@ static void httpHandleReq(SHttpMsg* msg); static void httpHandleQuit(SHttpMsg* msg); static int32_t httpSendQuit(); +static bool httpFailFastShoudIgnoreMsg(SHashObj* pTable, char* server, int16_t port); +static void httpFailFastMayUpdate(SHashObj* pTable, char* server, int16_t port, int8_t succ); static int32_t taosSendHttpReportImpl(const char* server, const char* uri, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag); @@ -193,11 +197,20 @@ static void httpAsyncCb(uv_async_t* handle) { SHttpMsg *msg = NULL, *quitMsg = NULL; queue wq; + QUEUE_INIT(&wq); + + static int32_t BATCH_SIZE = 5; + int32_t count = 0; + taosThreadMutexLock(&item->mtx); - QUEUE_MOVE(&item->qmsg, &wq); + + while (!QUEUE_IS_EMPTY(&item->qmsg) && count++ < BATCH_SIZE) { + queue* h = QUEUE_HEAD(&item->qmsg); + QUEUE_REMOVE(h); + QUEUE_PUSH(&wq, h); + } taosThreadMutexUnlock(&item->mtx); - int count = 0; while (!QUEUE_IS_EMPTY(&wq)) { queue* h = QUEUE_HEAD(&wq); QUEUE_REMOVE(h); @@ -262,14 +275,20 @@ static void clientSentCb(uv_write_t* req, int32_t status) { } } static void clientConnCb(uv_connect_t* req, int32_t status) { + SHttpModule* http = taosAcquireRef(httpRefMgt, httpRef); SHttpClient* cli = req->data; if (status != 0) { + httpFailFastMayUpdate(http->connStatusTable, cli->addr, cli->port, 0); + tError("http-report failed to conn to server, reason:%s, dst:%s:%d", uv_strerror(status), cli->addr, cli->port); if (!uv_is_closing((uv_handle_t*)&cli->tcp)) { uv_close((uv_handle_t*)&cli->tcp, clientCloseCb); } + taosReleaseRef(httpRefMgt, httpRef); return; } + httpFailFastMayUpdate(http->connStatusTable, cli->addr, cli->port, 1); + status = uv_write(&cli->req, (uv_stream_t*)&cli->tcp, cli->wbuf, 2, clientSentCb); if (0 != status) { tError("http-report failed to send data,reason:%s, dst:%s:%d", uv_strerror(status), cli->addr, cli->port); @@ -277,6 +296,7 @@ static void clientConnCb(uv_connect_t* req, int32_t status) { uv_close((uv_handle_t*)&cli->tcp, clientCloseCb); } } + taosReleaseRef(httpRefMgt, httpRef); } int32_t httpSendQuit() { @@ -349,16 +369,51 @@ static void httpHandleQuit(SHttpMsg* msg) { uv_walk(http->loop, httpWalkCb, NULL); taosReleaseRef(httpRefMgt, httpRef); } + +static bool httpFailFastShoudIgnoreMsg(SHashObj* pTable, char* server, int16_t port) { + char buf[256] = {0}; + sprintf(buf, "%s:%d", server, port); + + int32_t* failedTime = (int32_t*)taosHashGet(pTable, buf, strlen(buf)); + if (failedTime == NULL) { + return false; + } + + int32_t now = taosGetTimestampSec(); + if (*failedTime > now - FAST_FAILURE_LIMIT) { + tDebug("http-report succ to ignore msg,reason:connection timed out, dst:%s", buf); + return true; + } else { + return false; + } +} +static void httpFailFastMayUpdate(SHashObj* pTable, char* server, int16_t port, int8_t succ) { + char buf[256] = {0}; + sprintf(buf, "%s:%d", server, port); + + if (succ) { + taosHashRemove(pTable, buf, strlen(buf)); + } else { + int32_t st = taosGetTimestampSec(); + taosHashPut(pTable, buf, strlen(buf), &st, sizeof(st)); + } + return; +} static void httpHandleReq(SHttpMsg* msg) { + int32_t ignore = false; SHttpModule* http = taosAcquireRef(httpRefMgt, httpRef); if (http == NULL) { goto END; } - + if (httpFailFastShoudIgnoreMsg(http->connStatusTable, msg->server, msg->port)) { + ignore = true; + goto END; + } struct sockaddr_in dest = {0}; if (taosBuildDstAddr(msg->server, msg->port, &dest) < 0) { goto END; } + if (msg->flag == HTTP_GZIP) { int32_t dstLen = taosCompressHttpRport(msg->cont, msg->len); if (dstLen > 0) { @@ -399,11 +454,11 @@ static void httpHandleReq(SHttpMsg* msg) { uv_tcp_init(http->loop, &cli->tcp); // set up timeout to avoid stuck; - int32_t fd = taosCreateSocketWithTimeout(5); + int32_t fd = taosCreateSocketWithTimeout(5000); if (fd < 0) { tError("http-report failed to open socket, dst:%s:%d", cli->addr, cli->port); - taosReleaseRef(httpRefMgt, httpRef); destroyHttpClient(cli); + taosReleaseRef(httpRefMgt, httpRef); return; } int ret = uv_tcp_open((uv_tcp_t*)&cli->tcp, fd); @@ -418,13 +473,16 @@ static void httpHandleReq(SHttpMsg* msg) { if (ret != 0) { tError("http-report failed to connect to http-server, reason:%s, dst:%s:%d", uv_strerror(ret), cli->addr, cli->port); + httpFailFastMayUpdate(http->connStatusTable, cli->addr, cli->port, 0); destroyHttpClient(cli); } taosReleaseRef(httpRefMgt, httpRef); return; END: - tError("http-report failed to report, reason: %s, addr: %s:%d", terrstr(), msg->server, msg->port); + if (ignore == false) { + tError("http-report failed to report, reason: %s, addr: %s:%d", terrstr(), msg->server, msg->port); + } httpDestroyMsg(msg); taosReleaseRef(httpRefMgt, httpRef); } @@ -441,6 +499,8 @@ static void transHttpEnvInit() { SHttpModule* http = taosMemoryMalloc(sizeof(SHttpModule)); http->loop = taosMemoryMalloc(sizeof(uv_loop_t)); + http->connStatusTable = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + uv_loop_init(http->loop); http->asyncPool = transAsyncPoolCreate(http->loop, 1, http, httpAsyncCb); @@ -474,6 +534,8 @@ void transHttpEnvDestroy() { uv_loop_close(load->loop); taosMemoryFree(load->loop); + taosHashCleanup(load->connStatusTable); + taosReleaseRef(httpRefMgt, httpRef); taosRemoveRef(httpRefMgt, httpRef); } diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index ef60c8a94e..e51c61c49d 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -957,8 +957,8 @@ static void cliSendCb(uv_write_t* req, int status) { SCliMsg* pMsg = !transQueueEmpty(&pConn->cliMsgs) ? transQueueGet(&pConn->cliMsgs, 0) : NULL; if (pMsg != NULL) { int64_t cost = taosGetTimestampUs() - pMsg->st; - if (cost > 1000 * 20) { - tWarn("%s conn %p send cost:%dus, send exception", CONN_GET_INST_LABEL(pConn), pConn, (int)cost); + if (cost > 1000 * 50) { + tTrace("%s conn %p send cost:%dus ", CONN_GET_INST_LABEL(pConn), pConn, (int)cost); } } diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index bf73c253bc..017969b4e5 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -159,7 +159,7 @@ static void uvStartSendResp(SSvrMsg* msg); static void uvNotifyLinkBrokenToApp(SSvrConn* conn); -static FORCE_INLINE void destroySmsg(SSvrMsg* smsg); +static FORCE_INLINE void destroySmsg(SSvrMsg* smsg); static FORCE_INLINE SSvrConn* createConn(void* hThrd); static FORCE_INLINE void destroyConn(SSvrConn* conn, bool clear /*clear handle or not*/); static FORCE_INLINE void destroyConnRegArg(SSvrConn* conn); @@ -1499,6 +1499,7 @@ int transSendResponse(const STransMsg* msg) { } SExHandle* exh = msg->info.handle; if (exh == NULL) { + rpcFreeCont(msg->pCont); return 0; } int64_t refId = msg->info.refId; diff --git a/source/libs/transport/test/CMakeLists.txt b/source/libs/transport/test/CMakeLists.txt index b0548149d0..da4cda5dc7 100644 --- a/source/libs/transport/test/CMakeLists.txt +++ b/source/libs/transport/test/CMakeLists.txt @@ -2,6 +2,7 @@ add_executable(transportTest "") add_executable(transUT "") add_executable(svrBench "") add_executable(cliBench "") +add_executable(httpBench "") target_sources(transUT PRIVATE @@ -21,6 +22,10 @@ target_sources(cliBench PRIVATE "cliBench.c" ) +target_sources(httpBench + PRIVATE + "http_test.c" +) target_include_directories(transportTest PUBLIC @@ -51,11 +56,6 @@ target_include_directories(transUT "${CMAKE_CURRENT_SOURCE_DIR}/../inc" ) -target_include_directories(svrBench - PUBLIC - "${TD_SOURCE_DIR}/include/libs/transport" - "${CMAKE_CURRENT_SOURCE_DIR}/../inc" -) target_include_directories(svrBench PUBLIC "${TD_SOURCE_DIR}/include/libs/transport" @@ -75,7 +75,8 @@ target_include_directories(cliBench "${TD_SOURCE_DIR}/include/libs/transport" "${CMAKE_CURRENT_SOURCE_DIR}/../inc" ) -target_include_directories(cliBench + +target_include_directories(httpBench PUBLIC "${TD_SOURCE_DIR}/include/libs/transport" "${CMAKE_CURRENT_SOURCE_DIR}/../inc" @@ -89,6 +90,14 @@ target_link_libraries (cliBench transport ) +target_link_libraries(httpBench + os + util + common + gtest_main + transport +) + add_test( NAME transUT COMMAND transUT diff --git a/source/libs/transport/test/http_test.c b/source/libs/transport/test/http_test.c new file mode 100644 index 0000000000..d04fb84843 --- /dev/null +++ b/source/libs/transport/test/http_test.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#include "os.h" +#include "taoserror.h" +#include "tglobal.h" +#include "thttp.h" +#include "transLog.h" +#include "trpc.h" +#include "tutil.h" +#include "tversion.h" + +void initLogEnv() { + const char * logDir = "/tmp/trans_cli"; + const char * defaultLogFileNamePrefix = "taoslog"; + const int32_t maxLogFileNum = 1000000; + tsAsyncLog = 0; + // rpcDebugflag = 143; + strcpy(tsLogDir, (char *)logDir); + taosRemoveDir(tsLogDir); + taosMkDir(tsLogDir); + + if (taosInitLog(defaultLogFileNamePrefix, maxLogFileNum) < 0) { + printf("failed to open log file in directory:%s\n", tsLogDir); + } +} +typedef struct TThread { + TdThread thread; + int idx; +} TThread; + +void *proces(void *arg) { + char *monitor = "172.26.10.94"; + while (1) { + int32_t len = 512; + char * msg = taosMemoryCalloc(1, len); + memset(msg, 1, len); + int32_t code = taosSendHttpReport(monitor, "/crash", 6050, msg, 10, HTTP_FLAT); + taosMemoryFree(msg); + taosUsleep(10); + } +} +int main(int argc, char *argv[]) { + initLogEnv(); + int32_t numOfThreads = 10; + TThread *thread = taosMemoryCalloc(1, sizeof(TThread) * numOfThreads); + + for (int i = 0; i < numOfThreads; i++) { + thread[i].idx = i; + taosThreadCreate(&(thread[i].thread), NULL, proces, (void *)&thread[i]); + } + while (1) { + taosMsleep(5000); + } + + taosCloseLog(); + + return 0; +} diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index f5e5427c68..933014466a 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -873,7 +873,7 @@ int walSaveMeta(SWal* pWal) { return -1; } - TdFilePtr pMetaFile = taosOpenFile(tmpFnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + TdFilePtr pMetaFile = taosOpenFile(tmpFnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pMetaFile == NULL) { wError("vgId:%d, failed to open file due to %s. file:%s", pWal->cfg.vgId, strerror(errno), tmpFnameStr); terrno = TAOS_SYSTEM_ERROR(errno); diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index ef97bff896..341d989f8f 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -133,6 +133,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { } walBuildIdxName(pWal, walGetCurFileFirstVer(pWal), fnameStr); + taosCloseFile(&pWal->pIdxFile); TdFilePtr pIdxFile = taosOpenFile(fnameStr, TD_FILE_WRITE | TD_FILE_READ | TD_FILE_APPEND); if (pIdxFile == NULL) { @@ -153,6 +154,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { } walBuildLogName(pWal, walGetCurFileFirstVer(pWal), fnameStr); + taosCloseFile(&pWal->pLogFile); TdFilePtr pLogFile = taosOpenFile(fnameStr, TD_FILE_WRITE | TD_FILE_READ | TD_FILE_APPEND); wDebug("vgId:%d, wal truncate file %s", pWal->cfg.vgId, fnameStr); if (pLogFile == NULL) { @@ -204,6 +206,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { pWal->vers.lastVer = ver - 1; ((SWalFileInfo *)taosArrayGetLast(pWal->fileInfoSet))->lastVer = ver - 1; ((SWalFileInfo *)taosArrayGetLast(pWal->fileInfoSet))->fileSize = entry.offset; + taosCloseFile(&pIdxFile); taosCloseFile(&pLogFile); @@ -324,15 +327,19 @@ int32_t walEndSnapshot(SWal *pWal) { // iterate files, until the searched result // delete according to file size or close time + SWalFileInfo *pUntil = NULL; for (SWalFileInfo *iter = pWal->fileInfoSet->pData; iter < pInfo; iter++) { if ((pWal->cfg.retentionSize > 0 && newTotSize > pWal->cfg.retentionSize) || (pWal->cfg.retentionPeriod == 0 || pWal->cfg.retentionPeriod > 0 && iter->closeTs >= 0 && iter->closeTs + pWal->cfg.retentionPeriod < ts)) { - deleteCnt++; newTotSize -= iter->fileSize; - taosArrayPush(pWal->toDeleteFiles, iter); + pUntil = iter; } } + for (SWalFileInfo *iter = pWal->fileInfoSet->pData; iter <= pUntil; iter++) { + deleteCnt++; + taosArrayPush(pWal->toDeleteFiles, iter); + } // make new array, remove files taosArrayPopFrontBatch(pWal->fileInfoSet, deleteCnt); @@ -605,7 +612,7 @@ int32_t walWriteWithSyncInfo(SWal *pWal, int64_t index, tmsg_t msgType, SWalSync return -1; } - if (pWal->pIdxFile == NULL || pWal->pIdxFile == NULL || pWal->writeCur < 0) { + if (pWal->pIdxFile == NULL || pWal->pLogFile == NULL || pWal->writeCur < 0) { if (walInitWriteFile(pWal) < 0) { taosThreadMutexUnlock(&pWal->mutex); return -1; diff --git a/source/os/src/osFile.c b/source/os/src/osFile.c index 30f079d10d..15aca85fc2 100644 --- a/source/os/src/osFile.c +++ b/source/os/src/osFile.c @@ -21,11 +21,12 @@ #include #include #include +#include #define F_OK 0 #define W_OK 2 #define R_OK 4 -#define _SEND_FILE_STEP_ 1000 +#define _SEND_FILE_STEP_ 1024 #else #include @@ -44,12 +45,22 @@ typedef int32_t FileFd; +#ifdef WINDOWS +typedef struct TdFile { + TdThreadRwlock rwlock; + int refId; + HANDLE hFile; + FILE* fp; + int32_t tdFileOptions; +} TdFile; +#else typedef struct TdFile { TdThreadRwlock rwlock; int refId; FileFd fd; FILE *fp; } TdFile; +#endif // WINDOWS #define FILE_WITH_LOCK 1 @@ -240,15 +251,12 @@ int32_t taosStatFile(const char *path, int64_t *size, int32_t *mtime, int32_t *a return 0; } int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) { - if (pFile == NULL || pFile->fd < 0) { +#ifdef WINDOWS + if (pFile == NULL || pFile->hFile == NULL) { return -1; } - -#ifdef WINDOWS - BY_HANDLE_FILE_INFORMATION bhfi; - HANDLE handle = (HANDLE)_get_osfhandle(pFile->fd); - if (GetFileInformationByHandle(handle, &bhfi) == FALSE) { + if (GetFileInformationByHandle(pFile->hFile, &bhfi) == FALSE) { printf("taosFStatFile get file info fail."); return -1; } @@ -262,7 +270,9 @@ int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) { } #else - + if (pFile == NULL || pFile->fd < 0) { + return -1; + } struct stat fileStat; int32_t code = fstat(pFile->fd, &fileStat); if (code < 0) { @@ -282,116 +292,365 @@ int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) { return 0; } -TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions) { - int fd = -1; - FILE *fp = NULL; - if (tdFileOptions & TD_FILE_STREAM) { - char *mode = NULL; - if (tdFileOptions & TD_FILE_APPEND) { - mode = (tdFileOptions & TD_FILE_TEXT) ? "at+" : "ab+"; - } else if (tdFileOptions & TD_FILE_TRUNC) { - mode = (tdFileOptions & TD_FILE_TEXT) ? "wt+" : "wb+"; - } else if ((tdFileOptions & TD_FILE_READ) && !(tdFileOptions & TD_FILE_WRITE)) { - mode = (tdFileOptions & TD_FILE_TEXT) ? "rt" : "rb"; - } else { - mode = (tdFileOptions & TD_FILE_TEXT) ? "rt+" : "rb+"; - } - ASSERT(!(tdFileOptions & TD_FILE_EXCL)); - if (tdFileOptions & TD_FILE_EXCL) { - return NULL; - } - fp = fopen(path, mode); - if (fp == NULL) { - return NULL; - } +FILE *taosOpenFileForStream(const char *path, int32_t tdFileOptions) { + char *mode = NULL; + if (tdFileOptions & TD_FILE_APPEND) { + mode = (tdFileOptions & TD_FILE_TEXT) ? "at+" : "ab+"; + } else if (tdFileOptions & TD_FILE_TRUNC) { + mode = (tdFileOptions & TD_FILE_TEXT) ? "wt+" : "wb+"; + } else if ((tdFileOptions & TD_FILE_READ) && !(tdFileOptions & TD_FILE_WRITE)) { + mode = (tdFileOptions & TD_FILE_TEXT) ? "rt" : "rb"; } else { - int access = O_BINARY; - access |= (tdFileOptions & TD_FILE_CREATE) ? O_CREAT : 0; - if ((tdFileOptions & TD_FILE_WRITE) && (tdFileOptions & TD_FILE_READ)) { - access |= O_RDWR; - } else if (tdFileOptions & TD_FILE_WRITE) { - access |= O_WRONLY; - } else if (tdFileOptions & TD_FILE_READ) { - access |= O_RDONLY; - } - access |= (tdFileOptions & TD_FILE_TRUNC) ? O_TRUNC : 0; - access |= (tdFileOptions & TD_FILE_APPEND) ? O_APPEND : 0; - access |= (tdFileOptions & TD_FILE_TEXT) ? O_TEXT : 0; - access |= (tdFileOptions & TD_FILE_EXCL) ? O_EXCL : 0; -#ifdef WINDOWS - int32_t pmode = _S_IREAD | _S_IWRITE; - if (tdFileOptions & TD_FILE_AUTO_DEL) { - pmode |= _O_TEMPORARY; - } - fd = _open(path, access, pmode); -#else - fd = open(path, access, S_IRWXU | S_IRWXG | S_IRWXO); -#endif - if (fd == -1) { - return NULL; - } + mode = (tdFileOptions & TD_FILE_TEXT) ? "rt+" : "rb+"; } - - TdFilePtr pFile = (TdFilePtr)taosMemoryMalloc(sizeof(TdFile)); - if (pFile == NULL) { - if (fd >= 0) close(fd); - if (fp != NULL) fclose(fp); + ASSERT(!(tdFileOptions & TD_FILE_EXCL)); + if (tdFileOptions & TD_FILE_EXCL) { return NULL; } - -#if FILE_WITH_LOCK - taosThreadRwlockInit(&(pFile->rwlock), NULL); -#endif - pFile->fd = fd; - pFile->fp = fp; - pFile->refId = 0; - - if (tdFileOptions & TD_FILE_AUTO_DEL) { -#ifdef WINDOWS - // do nothing, since the property of pmode is set with _O_TEMPORARY; the OS will recycle - // the file handle, as well as the space on disk. -#else - // Remove it instantly, so when the program exits normally/abnormally, the file - // will be automatically remove by OS. - unlink(path); -#endif - } - - return pFile; + return fopen(path, mode); } -int32_t taosCloseFile(TdFilePtr *ppFile) { - int32_t code = 0; - if (ppFile == NULL || *ppFile == NULL) { +#ifdef WINDOWS +HANDLE taosOpenFileNotStream(const char *path, int32_t tdFileOptions) { + DWORD openMode = 0; + DWORD access = 0; + DWORD fileFlag = FILE_ATTRIBUTE_NORMAL; + DWORD shareMode = FILE_SHARE_READ; + + openMode = OPEN_EXISTING; + if (tdFileOptions & TD_FILE_CREATE) { + openMode = OPEN_ALWAYS; + } else if (tdFileOptions & TD_FILE_EXCL) { + openMode = CREATE_NEW; + } else if ((tdFileOptions & TD_FILE_TRUNC)) { + openMode = TRUNCATE_EXISTING; + access |= GENERIC_WRITE; + } + if (tdFileOptions & TD_FILE_APPEND) { + access |= FILE_APPEND_DATA; + } + if (tdFileOptions & TD_FILE_WRITE) { + access |= GENERIC_WRITE; + } + + shareMode |= FILE_SHARE_WRITE; + + access |= GENERIC_READ; + + if (tdFileOptions & TD_FILE_AUTO_DEL) { + fileFlag |= FILE_ATTRIBUTE_TEMPORARY; + } + if (tdFileOptions & TD_FILE_WRITE_THROUGH) { + fileFlag |= FILE_FLAG_WRITE_THROUGH; + } + + HANDLE h = CreateFile(path, access, shareMode, NULL, openMode, fileFlag, NULL); + if (h != INVALID_HANDLE_VALUE && (tdFileOptions & TD_FILE_APPEND) && (tdFileOptions & TD_FILE_WRITE)) { + SetFilePointer(h, 0, NULL, FILE_END); + } + if (h == INVALID_HANDLE_VALUE) { + DWORD dwError = GetLastError(); + LPVOID lpMsgBuf; + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, dwError, + 0, + (LPTSTR)&lpMsgBuf, 0, NULL); + printf("CreateFile failed with error %d: %s", dwError, (char*)lpMsgBuf); + LocalFree(lpMsgBuf); + } + return h; +} + +int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) { +#if FILE_WITH_LOCK + taosThreadRwlockRdlock(&(pFile->rwlock)); +#endif + if (pFile->hFile == NULL) { +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return -1; + } + + DWORD bytesRead; + if (!ReadFile(pFile->hFile, buf, count, &bytesRead, NULL)) { + bytesRead = -1; + } +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return bytesRead; +} + +int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) { + if (pFile == NULL || pFile->hFile == NULL) { return 0; } #if FILE_WITH_LOCK - taosThreadRwlockWrlock(&((*ppFile)->rwlock)); + taosThreadRwlockWrlock(&(pFile->rwlock)); #endif - if ((*ppFile)->fp != NULL) { - fflush((*ppFile)->fp); - fclose((*ppFile)->fp); - (*ppFile)->fp = NULL; + + DWORD bytesWritten; + if (!WriteFile(pFile->hFile, buf, count, &bytesWritten, NULL)) { + bytesWritten = -1; } - if ((*ppFile)->fd >= 0) { -#ifdef WINDOWS - HANDLE h = (HANDLE)_get_osfhandle((*ppFile)->fd); - !FlushFileBuffers(h); -#else - // warning: never fsync silently in base lib - /*fsync((*ppFile)->fd);*/ -#endif - code = close((*ppFile)->fd); - (*ppFile)->fd = -1; - } - (*ppFile)->refId = 0; + #if FILE_WITH_LOCK - taosThreadRwlockUnlock(&((*ppFile)->rwlock)); - taosThreadRwlockDestroy(&((*ppFile)->rwlock)); + taosThreadRwlockUnlock(&(pFile->rwlock)); #endif - taosMemoryFree(*ppFile); - *ppFile = NULL; - return code; + return bytesWritten; +} + +int64_t taosPWriteFile(TdFilePtr pFile, const void *buf, int64_t count, int64_t offset) { + if (pFile == NULL) { + return 0; + } +#if FILE_WITH_LOCK + taosThreadRwlockWrlock(&(pFile->rwlock)); +#endif + ASSERT(pFile->hFile != NULL); // Please check if you have closed the file. + if (pFile->hFile == NULL) { +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return 0; + } + + DWORD ret = 0; + OVERLAPPED ol = {0}; + ol.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 0x20); + ol.Offset = (uint32_t)(offset & 0xFFFFFFFFLL); + + SetLastError(0); + BOOL result = WriteFile(pFile->hFile, buf, count, &ret, &ol); + if (!result) { + errno = GetLastError(); + ret = -1; + } + +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return ret; +} + +int64_t taosLSeekFile(TdFilePtr pFile, int64_t offset, int32_t whence) { + if (pFile == NULL || pFile->hFile == NULL) { + return -1; + } +#if FILE_WITH_LOCK + taosThreadRwlockRdlock(&(pFile->rwlock)); +#endif + + LARGE_INTEGER liOffset; + liOffset.QuadPart = offset; + if (!SetFilePointerEx(pFile->hFile, liOffset, NULL, whence)) { + return -1; + } + + liOffset.QuadPart = 0; + if (!SetFilePointerEx(pFile->hFile, liOffset, &liOffset, FILE_CURRENT)) { + return -1; + } +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return liOffset.QuadPart; +} + +int32_t taosFStatFile(TdFilePtr pFile, int64_t *size, int32_t *mtime) { + if (pFile == NULL || pFile->hFile == NULL) { + return 0; + } + + if (size != NULL) { + LARGE_INTEGER fileSize; + if (!GetFileSizeEx(pFile->hFile, &fileSize)) { + return -1; // Error getting file size + } + *size = fileSize.QuadPart; + } + + if (mtime != NULL) { + FILETIME creationTime, lastAccessTime, lastWriteTime; + if (!GetFileTime(pFile->hFile, &creationTime, &lastAccessTime, &lastWriteTime)) { + return -1; // Error getting file time + } + // Convert the FILETIME structure to a time_t value + ULARGE_INTEGER ull; + ull.LowPart = lastWriteTime.dwLowDateTime; + ull.HighPart = lastWriteTime.dwHighDateTime; + *mtime = (int32_t)((ull.QuadPart - 116444736000000000ULL) / 10000000ULL); + } + return 0; +} + +int32_t taosLockFile(TdFilePtr pFile) { + if (pFile == NULL || pFile->hFile == NULL) { + return -1; + } + + BOOL fSuccess = FALSE; + LARGE_INTEGER fileSize; + OVERLAPPED overlapped = {0}; + + fSuccess = LockFileEx(pFile->hFile, LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, + 0, // reserved + ~0, // number of bytes to lock low + ~0, // number of bytes to lock high + &overlapped // overlapped structure + ); + if (!fSuccess) { + return GetLastError(); + } + return 0; +} + +int32_t taosUnLockFile(TdFilePtr pFile) { + if (pFile == NULL || pFile->hFile == NULL) { + return 0; + } + BOOL fSuccess = FALSE; + OVERLAPPED overlapped = {0}; + + fSuccess = UnlockFileEx(pFile->hFile, 0, ~0, ~0, &overlapped); + if (!fSuccess) { + return GetLastError(); + } + return 0; +} + +int32_t taosFtruncateFile(TdFilePtr pFile, int64_t l_size) { + if (pFile == NULL) { + return 0; + } + if (pFile->hFile == NULL) { + printf("Ftruncate file error, hFile was null\n"); + return -1; + } + + LARGE_INTEGER li_0; + li_0.QuadPart = (int64_t)0; + BOOL cur = SetFilePointerEx(pFile->hFile, li_0, NULL, FILE_CURRENT); + if (!cur) { + printf("SetFilePointerEx Error getting current position in file.\n"); + return -1; + } + + LARGE_INTEGER li_size; + li_size.QuadPart = l_size; + BOOL cur2 = SetFilePointerEx(pFile->hFile, li_size, NULL, FILE_BEGIN); + if (cur2 == 0) { + int error = GetLastError(); + printf("SetFilePointerEx GetLastError is: %d\n", error); + switch (error) { + case ERROR_INVALID_HANDLE: + errno = EBADF; + break; + default: + errno = EIO; + break; + } + return -1; + } + + if (!SetEndOfFile(pFile->hFile)) { + int error = GetLastError(); + printf("SetEndOfFile GetLastError is:%d", error); + switch (error) { + case ERROR_INVALID_HANDLE: + errno = EBADF; + break; + default: + errno = EIO; + break; + } + return -1; + } + return 0; +} + +int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, int64_t size) { + if (pFileOut == NULL || pFileIn == NULL) { + return 0; + } + if (pFileIn->hFile == NULL || pFileOut->hFile == NULL) { + return 0; + } + + LARGE_INTEGER fileOffset; + fileOffset.QuadPart = *offset; + + if (!SetFilePointerEx(pFileIn->hFile, fileOffset, &fileOffset, FILE_BEGIN)) { + return -1; + } + + int64_t writeLen = 0; + uint8_t buffer[_SEND_FILE_STEP_] = {0}; + + DWORD bytesRead; + DWORD bytesWritten; + for (int64_t len = 0; len < (size - _SEND_FILE_STEP_); len += _SEND_FILE_STEP_) { + if (!ReadFile(pFileIn->hFile, buffer, _SEND_FILE_STEP_, &bytesRead, NULL)) { + return writeLen; + } + + if (bytesRead <= 0) { + return writeLen; + } else if (bytesRead < _SEND_FILE_STEP_) { + if (!WriteFile(pFileOut->hFile, buffer, bytesRead, &bytesWritten, NULL)) { + return -1; + } else { + return (int64_t)(writeLen + bytesRead); + } + } else { + if (!WriteFile(pFileOut->hFile, buffer, _SEND_FILE_STEP_, &bytesWritten, NULL)) { + return -1; + } else { + writeLen += _SEND_FILE_STEP_; + } + } + } + + int64_t remain = size - writeLen; + if (remain > 0) { + DWORD bytesRead; + if (!ReadFile(pFileIn->hFile, buffer, (DWORD)remain, &bytesRead, NULL)) { + return -1; + } + + if (bytesRead <= 0) { + return writeLen; + } else { + DWORD bytesWritten; + if (!WriteFile(pFileOut->hFile, buffer, bytesRead, &bytesWritten, NULL)) { + return -1; + } else { + writeLen += bytesWritten; + } + } + } + return writeLen; +} + +#else +int taosOpenFileNotStream(const char *path, int32_t tdFileOptions) { + int access = O_BINARY; + access |= (tdFileOptions & TD_FILE_CREATE) ? O_CREAT : 0; + if ((tdFileOptions & TD_FILE_WRITE) && (tdFileOptions & TD_FILE_READ)) { + access |= O_RDWR; + } else if (tdFileOptions & TD_FILE_WRITE) { + access |= O_WRONLY; + } else if (tdFileOptions & TD_FILE_READ) { + access |= O_RDONLY; + } + access |= (tdFileOptions & TD_FILE_TRUNC) ? O_TRUNC : 0; + access |= (tdFileOptions & TD_FILE_APPEND) ? O_APPEND : 0; + access |= (tdFileOptions & TD_FILE_TEXT) ? O_TEXT : 0; + access |= (tdFileOptions & TD_FILE_EXCL) ? O_EXCL : 0; + access |= (tdFileOptions & TD_FILE_CLOEXEC) ? O_CLOEXEC : 0; + + int fd = open(path, access, S_IRWXU | S_IRWXG | S_IRWXO); + return fd; } int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) { @@ -407,7 +666,7 @@ int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) { } int64_t leftbytes = count; int64_t readbytes; - char *tbuf = (char *)buf; + char * tbuf = (char *)buf; while (leftbytes > 0) { #ifdef WINDOWS @@ -441,42 +700,6 @@ int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) { return count; } -int64_t taosPReadFile(TdFilePtr pFile, void *buf, int64_t count, int64_t offset) { - if (pFile == NULL) { - return 0; - } -#if FILE_WITH_LOCK - taosThreadRwlockRdlock(&(pFile->rwlock)); -#endif - ASSERT(pFile->fd >= 0); // Please check if you have closed the file. - if (pFile->fd < 0) { -#if FILE_WITH_LOCK - taosThreadRwlockUnlock(&(pFile->rwlock)); -#endif - return -1; - } -#ifdef WINDOWS - DWORD ret = 0; - OVERLAPPED ol = {0}; - ol.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 0x20); - ol.Offset = (uint32_t)(offset & 0xFFFFFFFFLL); - - HANDLE handle = (HANDLE)_get_osfhandle(pFile->fd); - SetLastError(0); - BOOL result = ReadFile(handle, buf, count, &ret, &ol); - if (!result && GetLastError() != ERROR_HANDLE_EOF) { - errno = GetLastError(); - ret = -1; - } -#else - int64_t ret = pread(pFile->fd, buf, count, offset); -#endif -#if FILE_WITH_LOCK - taosThreadRwlockUnlock(&(pFile->rwlock)); -#endif - return ret; -} - int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) { if (pFile == NULL) { return 0; @@ -493,7 +716,7 @@ int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) { int64_t nleft = count; int64_t nwritten = 0; - char *tbuf = (char *)buf; + char * tbuf = (char *)buf; while (nleft > 0) { nwritten = write(pFile->fd, (void *)tbuf, (uint32_t)nleft); @@ -706,25 +929,6 @@ int32_t taosFtruncateFile(TdFilePtr pFile, int64_t l_size) { #endif } -int32_t taosFsyncFile(TdFilePtr pFile) { - if (pFile == NULL) { - return 0; - } - - // this implementation is WRONG - // fflush is not a replacement of fsync - if (pFile->fp != NULL) return fflush(pFile->fp); - if (pFile->fd >= 0) { -#ifdef WINDOWS - HANDLE h = (HANDLE)_get_osfhandle(pFile->fd); - return !FlushFileBuffers(h); -#else - return fsync(pFile->fd); -#endif - } - return 0; -} - int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, int64_t size) { if (pFileOut == NULL || pFileIn == NULL) { return 0; @@ -824,6 +1028,167 @@ int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, in #endif } +#endif // WINDOWS + +TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions) { + FILE *fp = NULL; +#ifdef WINDOWS + HANDLE hFile = NULL; +#else + int fd = -1; +#endif + if (tdFileOptions & TD_FILE_STREAM) { + fp = taosOpenFileForStream(path, tdFileOptions); + if (fp == NULL) return NULL; + } else { +#ifdef WINDOWS + hFile = taosOpenFileNotStream(path, tdFileOptions); + if (hFile == INVALID_HANDLE_VALUE) return NULL; +#else + fd = taosOpenFileNotStream(path, tdFileOptions); + if (fd == -1) return NULL; +#endif + } + + TdFilePtr pFile = (TdFilePtr)taosMemoryMalloc(sizeof(TdFile)); + if (pFile == NULL) { +#ifdef WINDOWS + if (hFile != NULL) CloseHandle(hFile); +#else + if (fd >= 0) close(fd); +#endif + if (fp != NULL) fclose(fp); + return NULL; + } + +#if FILE_WITH_LOCK + taosThreadRwlockInit(&(pFile->rwlock), NULL); +#endif + pFile->fp = fp; + pFile->refId = 0; + + #ifdef WINDOWS + pFile->hFile = hFile; + pFile->tdFileOptions = tdFileOptions; + // do nothing, since the property of pmode is set with _O_TEMPORARY; the OS will recycle + // the file handle, as well as the space on disk. +#else + pFile->fd = fd; + // Remove it instantly, so when the program exits normally/abnormally, the file + // will be automatically remove by OS. + if (tdFileOptions & TD_FILE_AUTO_DEL) { + unlink(path); + } +#endif + return pFile; +} + +int32_t taosCloseFile(TdFilePtr *ppFile) { + int32_t code = 0; + if (ppFile == NULL || *ppFile == NULL) { + return 0; + } +#if FILE_WITH_LOCK + taosThreadRwlockWrlock(&((*ppFile)->rwlock)); +#endif + if ((*ppFile)->fp != NULL) { + fflush((*ppFile)->fp); + fclose((*ppFile)->fp); + (*ppFile)->fp = NULL; + } +#ifdef WINDOWS + if ((*ppFile)->hFile != NULL) { + // FlushFileBuffers((*ppFile)->hFile); + if (!CloseHandle((*ppFile)->hFile)) { + code = -1; + } + (*ppFile)->hFile = NULL; +#else + if ((*ppFile)->fd >= 0) { + // warning: never fsync silently in base lib + /*fsync((*ppFile)->fd);*/ + code = close((*ppFile)->fd); + (*ppFile)->fd = -1; +#endif + } + (*ppFile)->refId = 0; +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&((*ppFile)->rwlock)); + taosThreadRwlockDestroy(&((*ppFile)->rwlock)); +#endif + taosMemoryFree(*ppFile); + *ppFile = NULL; + return code; +} + +int64_t taosPReadFile(TdFilePtr pFile, void *buf, int64_t count, int64_t offset) { + if (pFile == NULL) { + return 0; + } + +#ifdef WINDOWS +#if FILE_WITH_LOCK + taosThreadRwlockRdlock(&(pFile->rwlock)); +#endif + ASSERT(pFile->hFile != NULL); // Please check if you have closed the file. + if (pFile->hFile == NULL) { +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return -1; + } + DWORD ret = 0; + OVERLAPPED ol = {0}; + ol.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 0x20); + ol.Offset = (uint32_t)(offset & 0xFFFFFFFFLL); + + SetLastError(0); + BOOL result = ReadFile(pFile->hFile, buf, count, &ret, &ol); + if (!result && GetLastError() != ERROR_HANDLE_EOF) { + errno = GetLastError(); + ret = -1; + } +#else +#if FILE_WITH_LOCK + taosThreadRwlockRdlock(&(pFile->rwlock)); +#endif + ASSERT(pFile->fd >= 0); // Please check if you have closed the file. + if (pFile->fd < 0) { +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return -1; + } + int64_t ret = pread(pFile->fd, buf, count, offset); +#endif +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return ret; +} + +int32_t taosFsyncFile(TdFilePtr pFile) { + if (pFile == NULL) { + return 0; + } + + // this implementation is WRONG + // fflush is not a replacement of fsync + if (pFile->fp != NULL) return fflush(pFile->fp); +#ifdef WINDOWS + if (pFile->hFile != NULL) { + if (pFile->tdFileOptions & TD_FILE_WRITE_THROUGH) { + return 0; + } + return !FlushFileBuffers(pFile->hFile); +#else + if (pFile->fd >= 0) { + return fsync(pFile->fd); +#endif + } + return 0; +} + void taosFprintfFile(TdFilePtr pFile, const char *format, ...) { if (pFile == NULL || pFile->fp == NULL) { return; @@ -834,7 +1199,13 @@ void taosFprintfFile(TdFilePtr pFile, const char *format, ...) { va_end(ap); } -bool taosValidFile(TdFilePtr pFile) { return pFile != NULL && pFile->fd > 0; } +bool taosValidFile(TdFilePtr pFile) { +#ifdef WINDOWS + return pFile != NULL && pFile->hFile != NULL; +#else + return pFile != NULL && pFile->fd > 0; +#endif +} int32_t taosUmaskFile(int32_t maskVal) { #ifdef WINDOWS @@ -960,14 +1331,20 @@ int32_t taosCompressFile(char *srcFileName, char *destFileName) { goto cmp_end; } - pFile = taosOpenFile(destFileName, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { + int access = O_BINARY | O_WRONLY | O_TRUNC | O_CREAT; +#ifdef WINDOWS + int32_t pmode = _S_IREAD | _S_IWRITE; +#else + int32_t pmode = S_IRWXU | S_IRWXG | S_IRWXO; +#endif + int fd = open(destFileName, access, pmode); + if (fd < 0) { ret = -2; goto cmp_end; } // Both gzclose() and fclose() will close the associated fd, so they need to have different fds. - FileFd gzFd = dup(pFile->fd); + FileFd gzFd = dup(fd); if (gzFd < 0) { ret = -4; goto cmp_end; diff --git a/source/os/test/osTests.cpp b/source/os/test/osTests.cpp index a2ccc4de02..e2185aeac2 100644 --- a/source/os/test/osTests.cpp +++ b/source/os/test/osTests.cpp @@ -15,6 +15,7 @@ #include #include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wwrite-strings" @@ -29,6 +30,10 @@ #include "os.h" #include "tlog.h" +#ifdef WINDOWS +#include +#endif // WINDOWS + TEST(osTest, osSystem) { const char *flags = "UTL FATAL "; ELogLevel level = DEBUG_FATAL; @@ -68,7 +73,8 @@ void fileOperateOnBusy(void *param) { char * fname = (char *)param; TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE); printf("On busy thread open file\n"); - ASSERT_NE(pFile, nullptr); + if (pFile == NULL) return; + // ASSERT_NE(pFile, nullptr); int ret = taosLockFile(pFile); printf("On busy thread lock file ret:%d\n", ret); @@ -97,6 +103,7 @@ TEST(osTest, osFile) { TdFilePtr pOutFD = taosCreateFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); ASSERT_NE(pOutFD, nullptr); printf("create file success\n"); + taosCloseFile(&pOutFD); TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE); printf("open file\n"); @@ -135,4 +142,178 @@ TEST(osTest, osFile) { //printf("remove file success"); } +#ifndef OSFILE_PERFORMANCE_TEST + +#define MAX_WORDS 100 +#define MAX_WORD_LENGTH 20 +#define MAX_TEST_FILE_SIZE 100000 +#define TESTTIMES 1000 + +char *getRandomWord() { + static char words[][MAX_WORD_LENGTH] = { + "Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", + "sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", "magna", + "aliqua", "Ut", "enim", "ad", "minim", "veniam", "quis", "nostrud", "exercitation", "ullamco", + "Why", "do", "programmers", "prefer", "using", "dark", "mode?", "Because", "light", "attracts", + "bugs", "and", "they", "want", "to", "code", "in", "peace,", "like", "a", "ninja", "in", "the", "shadows." + "aliqua", "Ut", "enim", "ad", "minim", "veniam", "quis", "nostrud", "exercitation", "ullamco", + "laboris", "nisi", "ut", "aliquip", "ex", "ea", "commodo", "consequat", "Duis", "aute", "irure", + "dolor", "in", "reprehenderit", "in", "voluptate", "velit", "esse", "cillum", "dolore", "eu", + "fugiat", "nulla", "pariatur", "Excepteur", "sint", "occaecat", "cupidatat", "non", "proident", + "sunt", "in", "culpa", "qui", "officia", "deserunt", "mollit", "anim", "id", "est", "laborum" + }; + + return words[taosRand() % MAX_WORDS]; +} + +int64_t fillBufferWithRandomWords(char *buffer, int64_t maxBufferSize) { + int64_t len = 0; + while (len < maxBufferSize) { + char * word = getRandomWord(); + size_t wordLen = strlen(word); + + if (len + wordLen + 1 < maxBufferSize) { + strcat(buffer, word); + strcat(buffer, " "); + len += wordLen + 1; + } else { + break; + } + } + return len; +} + +int64_t calculateAverage(int64_t arr[], int size) { + int64_t sum = 0; + for (int i = 0; i < size; i++) { + sum += arr[i]; + } + return sum / size; +} + +int64_t calculateMax(int64_t arr[], int size) { + int64_t max = arr[0]; + for (int i = 1; i < size; i++) { + if (arr[i] > max) { + max = arr[i]; + } + } + return max; +} + +int64_t calculateMin(int64_t arr[], int size) { + int64_t min = arr[0]; + for (int i = 1; i < size; i++) { + if (arr[i] < min) { + min = arr[i]; + } + } + return min; +} + +TEST(osTest, osFilePerformance) { + printf("os file performance testting...\n"); + int64_t WriteFileCost; + int64_t ReadFileCost; + int64_t OpenForWriteCloseFileCost; + int64_t OpenForReadCloseFileCost; + + char * buffer; + char * writeBuffer = (char *)taosMemoryCalloc(1, MAX_TEST_FILE_SIZE); + char * readBuffer = (char *)taosMemoryCalloc(1, MAX_TEST_FILE_SIZE); + int64_t size = fillBufferWithRandomWords(writeBuffer, MAX_TEST_FILE_SIZE); + char * fname = "./osFilePerformanceTest.txt"; + + TdFilePtr pOutFD = taosCreateFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + ASSERT_NE(pOutFD, nullptr); + taosCloseFile(&pOutFD); + + printf("os file performance start write...\n"); + int64_t t1 = taosGetTimestampUs(); + for (int i = 0; i < TESTTIMES; ++i) { + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_WRITE_THROUGH); + ASSERT_NE(pFile, nullptr); + taosWriteFile(pFile, writeBuffer, size); + taosFsyncFile(pFile); + taosCloseFile(&pFile); + } + + int64_t t2 = taosGetTimestampUs(); + WriteFileCost = t2 - t1; + + printf("os file performance start read...\n"); + for (int i = 0; i < TESTTIMES; ++i) { + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_READ); + ASSERT_NE(pFile, nullptr); + taosReadFile(pFile, readBuffer, size); + taosCloseFile(&pFile); + int readLine = strlen(readBuffer); + ASSERT_EQ(size, readLine); + } + int64_t t3 = taosGetTimestampUs(); + ReadFileCost = t3 - t2; + + printf("os file performance start open1...\n"); + for (int i = 0; i < TESTTIMES; ++i) { + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE); + ASSERT_NE(pFile, nullptr); + taosCloseFile(&pFile); + } + int64_t t4 = taosGetTimestampUs(); + OpenForWriteCloseFileCost = t4 - t3; + + printf("os file performance start open2...\n"); + for (int i = 0; i < TESTTIMES; ++i) { + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_READ); + ASSERT_NE(pFile, nullptr); + taosCloseFile(&pFile); + } + int64_t t5 = taosGetTimestampUs(); + OpenForReadCloseFileCost = t5 - t4; + +#ifdef WINDOWS + printf("os file performance start window native...\n"); + for (int i = 0; i < TESTTIMES; ++i) { + HANDLE hFile = CreateFile(fname, // 文件名 + GENERIC_WRITE, // 写权限 + FILE_SHARE_READ, // 不共享 + NULL, // 默认安全描述符 + OPEN_ALWAYS, // 打开已存在的文件 + FILE_FLAG_WRITE_THROUGH, // 文件标志,可以根据实际需求调整 + NULL // 模板文件句柄,对于创建新文件不需要 + ); + + if (hFile == INVALID_HANDLE_VALUE) { + printf("Error opening file\n"); + break; + } + + // 写入数据 + DWORD bytesWritten; + if (!WriteFile(hFile, writeBuffer, size, &bytesWritten, NULL)) { + // 处理错误 + printf("Error writing to file\n"); + CloseHandle(hFile); + break; + } + // 关闭文件 + CloseHandle(hFile); + } + int64_t t6 = taosGetTimestampUs(); + int64_t nativeWritCost = t6 - t5; + + printf("Test Write file using native API %d times, cost: %" PRId64 "us\n", TESTTIMES, nativeWritCost); +#endif // WINDOWS + + taosMemoryFree(writeBuffer); + taosMemoryFree(readBuffer); + + printf("Test Write file %d times, cost: %" PRId64 "us\n", TESTTIMES, WriteFileCost); + printf("Test Read file %d times, cost: %" PRId64 "us\n", TESTTIMES, ReadFileCost); + printf("Test OpenForWrite & Close file %d times, cost: %" PRId64 "us\n", TESTTIMES, OpenForWriteCloseFileCost); + printf("Test OpenForRead & Close file %d times, cost: %" PRId64 "us\n", TESTTIMES, OpenForReadCloseFileCost); +} + +#endif OSFILE_PERFORMANCE_TEST + #pragma GCC diagnostic pop diff --git a/source/util/src/tcompression.c b/source/util/src/tcompression.c index 06b82f3ba1..3cc00ddc7f 100644 --- a/source/util/src/tcompression.c +++ b/source/util/src/tcompression.c @@ -280,8 +280,7 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha #endif } -/* ----------------------------------------------Bool Compression - * ---------------------------------------------- */ +/* ----------------------------------------------Bool Compression ---------------------------------------------- */ // TODO: You can also implement it using RLE method. int32_t tsCompressBoolImp(const char *const input, const int32_t nelements, char *const output) { int32_t pos = -1; @@ -387,8 +386,7 @@ int32_t tsDecompressBoolRLEImp(const char *const input, const int32_t nelements, } #endif -/* ----------------------------------------------String Compression - * ---------------------------------------------- */ +/* ----------------------------------------------String Compression ---------------------------------------------- */ // Note: the size of the output must be larger than input_size + 1 and // LZ4_compressBound(size) + 1; // >= max(input_size, LZ4_compressBound(input_size)) + 1; @@ -430,8 +428,7 @@ int32_t tsDecompressStringImp(const char *const input, int32_t compressedSize, c } } -/* --------------------------------------------Timestamp Compression - * ---------------------------------------------- */ +/* --------------------------------------------Timestamp Compression ---------------------------------------------- */ // TODO: Take care here, we assumes little endian encoding. int32_t tsCompressTimestampImp(const char *const input, const int32_t nelements, char *const output) { int32_t _pos = 1; @@ -541,66 +538,71 @@ int32_t tsDecompressTimestampImp(const char *const input, const int32_t nelement memcpy(output, input + 1, nelements * longBytes); return nelements * longBytes; } else if (input[0] == 1) { // Decompress - int64_t *ostream = (int64_t *)output; + if (tsSIMDEnable && tsAVX512Enable) { + tsDecompressTimestampAvx512(input, nelements, output, false); + } else if (tsSIMDEnable && tsAVX2Enable) { + tsDecompressTimestampAvx2(input, nelements, output, false); + } else { + int64_t *ostream = (int64_t *)output; - int32_t ipos = 1, opos = 0; - int8_t nbytes = 0; - int64_t prev_value = 0; - int64_t prev_delta = 0; - int64_t delta_of_delta = 0; + int32_t ipos = 1, opos = 0; + int8_t nbytes = 0; + int64_t prev_value = 0; + int64_t prev_delta = 0; + int64_t delta_of_delta = 0; - while (1) { - uint8_t flags = input[ipos++]; - // Decode dd1 - uint64_t dd1 = 0; - nbytes = flags & INT8MASK(4); - if (nbytes == 0) { - delta_of_delta = 0; - } else { - if (is_bigendian()) { - memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes); + while (1) { + uint8_t flags = input[ipos++]; + // Decode dd1 + uint64_t dd1 = 0; + nbytes = flags & INT8MASK(4); + if (nbytes == 0) { + delta_of_delta = 0; } else { - memcpy(&dd1, input + ipos, nbytes); + if (is_bigendian()) { + memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes); + } else { + memcpy(&dd1, input + ipos, nbytes); + } + delta_of_delta = ZIGZAG_DECODE(int64_t, dd1); } - delta_of_delta = ZIGZAG_DECODE(int64_t, dd1); - } - ipos += nbytes; - if (opos == 0) { - prev_value = delta_of_delta; - prev_delta = 0; - ostream[opos++] = delta_of_delta; - } else { + + ipos += nbytes; + if (opos == 0) { + prev_value = delta_of_delta; + prev_delta = 0; + ostream[opos++] = delta_of_delta; + } else { + prev_delta = delta_of_delta + prev_delta; + prev_value = prev_value + prev_delta; + ostream[opos++] = prev_value; + } + if (opos == nelements) return nelements * longBytes; + + // Decode dd2 + uint64_t dd2 = 0; + nbytes = (flags >> 4) & INT8MASK(4); + if (nbytes == 0) { + delta_of_delta = 0; + } else { + if (is_bigendian()) { + memcpy(((char *)(&dd2)) + longBytes - nbytes, input + ipos, nbytes); + } else { + memcpy(&dd2, input + ipos, nbytes); + } + // zigzag_decoding + delta_of_delta = ZIGZAG_DECODE(int64_t, dd2); + } + ipos += nbytes; prev_delta = delta_of_delta + prev_delta; prev_value = prev_value + prev_delta; ostream[opos++] = prev_value; + if (opos == nelements) return nelements * longBytes; } - if (opos == nelements) return nelements * longBytes; - - // Decode dd2 - uint64_t dd2 = 0; - nbytes = (flags >> 4) & INT8MASK(4); - if (nbytes == 0) { - delta_of_delta = 0; - } else { - if (is_bigendian()) { - memcpy(((char *)(&dd2)) + longBytes - nbytes, input + ipos, nbytes); - } else { - memcpy(&dd2, input + ipos, nbytes); - } - // zigzag_decoding - delta_of_delta = ZIGZAG_DECODE(int64_t, dd2); - } - ipos += nbytes; - prev_delta = delta_of_delta + prev_delta; - prev_value = prev_value + prev_delta; - ostream[opos++] = prev_value; - if (opos == nelements) return nelements * longBytes; } - - } else { - ASSERT(0); - return -1; } + + return nelements * longBytes; } /* --------------------------------------------Double Compression ---------------------------------------------- */ @@ -751,8 +753,7 @@ int32_t tsDecompressDoubleImp(const char *const input, const int32_t nelements, return nelements * DOUBLE_BYTES; } -/* --------------------------------------------Float Compression - * ---------------------------------------------- */ +/* --------------------------------------------Float Compression ---------------------------------------------- */ void encodeFloatValue(uint32_t diff, uint8_t flag, char *const output, int32_t *const pos) { uint8_t nbytes = (flag & INT8MASK(3)) + 1; int32_t nshift = (FLOAT_BYTES * BITS_PER_BYTE - nbytes * BITS_PER_BYTE) * (flag >> 3); diff --git a/source/util/src/tdecompress.c b/source/util/src/tdecompress.c index f32a4014d6..f212bf5231 100644 --- a/source/util/src/tdecompress.c +++ b/source/util/src/tdecompress.c @@ -50,14 +50,11 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, const char *ip = input + 1; int32_t count = 0; int32_t _pos = 0; - int64_t prev_value = 0; + int64_t prevValue = 0; #if __AVX2__ - while (1) { - if (_pos == nelements) break; - - uint64_t w = 0; - memcpy(&w, ip, LONG_BYTES); + while (_pos < nelements) { + uint64_t w = *(uint64_t*) ip; char selector = (char)(w & INT64MASK(4)); // selector = 4 char bit = bit_per_integer[(int32_t)selector]; // bit = 3 @@ -80,13 +77,13 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, if (selector == 0 || selector == 1) { if (tsSIMDEnable && tsAVX2Enable) { for (int32_t i = 0; i < batch; ++i) { - __m256i prev = _mm256_set1_epi64x(prev_value); + __m256i prev = _mm256_set1_epi64x(prevValue); _mm256_storeu_si256((__m256i *)&p[_pos], prev); _pos += 4; } for (int32_t i = 0; i < remain; ++i) { - p[_pos++] = prev_value; + p[_pos++] = prevValue; } } else if (tsSIMDEnable && tsAVX512Enable) { #if __AVX512F__ @@ -94,7 +91,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, #endif } else { // alternative implementation without SIMD instructions. for (int32_t i = 0; i < elems && count < nelements; i++, count++) { - p[_pos++] = prev_value; + p[_pos++] = prevValue; v += bit; } } @@ -114,20 +111,20 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, __m256i signmask = _mm256_and_si256(_mm256_set1_epi64x(1), zigzagVal); signmask = _mm256_sub_epi64(_mm256_setzero_si256(), signmask); - // get the four zigzag values here + // get four zigzag values here __m256i delta = _mm256_xor_si256(_mm256_srli_epi64(zigzagVal, 1), signmask); // calculate the cumulative sum (prefix sum) for each number - // decode[0] = prev_value + final[0] - // decode[1] = decode[0] + final[1] -----> prev_value + final[0] + final[1] - // decode[2] = decode[1] + final[2] -----> prev_value + final[0] + final[1] + final[2] - // decode[3] = decode[2] + final[3] -----> prev_value + final[0] + final[1] + final[2] + final[3] + // decode[0] = prevValue + final[0] + // decode[1] = decode[0] + final[1] -----> prevValue + final[0] + final[1] + // decode[2] = decode[1] + final[2] -----> prevValue + final[0] + final[1] + final[2] + // decode[3] = decode[2] + final[3] -----> prevValue + final[0] + final[1] + final[2] + final[3] // 1, 2, 3, 4 //+ 0, 1, 0, 3 // 1, 3, 3, 7 // shift and add for the first round - __m128i prev = _mm_set1_epi64x(prev_value); + __m128i prev = _mm_set1_epi64x(prevValue); __m256i x = _mm256_slli_si256(delta, 8); delta = _mm256_add_epi64(delta, x); @@ -148,16 +145,16 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, _mm_storeu_si128((__m128i *)&p[_pos + 2], secPart); shiftBits = _mm256_add_epi64(shiftBits, inc); - prev_value = p[_pos + 3]; + prevValue = p[_pos + 3]; _pos += 4; } // handle the remain value for (int32_t i = 0; i < remain; i++) { zigzag_value = ((w >> (v + (batch * bit * 4))) & mask); - prev_value += ZIGZAG_DECODE(int64_t, zigzag_value); + prevValue += ZIGZAG_DECODE(int64_t, zigzag_value); - p[_pos++] = prev_value; + p[_pos++] = prevValue; v += bit; } } else if (tsSIMDEnable && tsAVX512Enable) { @@ -167,9 +164,9 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, } else { // alternative implementation without SIMD instructions. for (int32_t i = 0; i < elems && count < nelements; i++, count++) { zigzag_value = ((w >> v) & mask); - prev_value += ZIGZAG_DECODE(int64_t, zigzag_value); + prevValue += ZIGZAG_DECODE(int64_t, zigzag_value); - p[_pos++] = prev_value; + p[_pos++] = prevValue; v += bit; } } @@ -180,14 +177,14 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, if (selector == 0 || selector == 1) { for (int32_t i = 0; i < elems && count < nelements; i++, count++) { - p[_pos++] = (int32_t)prev_value; + p[_pos++] = (int32_t)prevValue; } } else { for (int32_t i = 0; i < elems && count < nelements; i++, count++) { zigzag_value = ((w >> v) & mask); - prev_value += ZIGZAG_DECODE(int64_t, zigzag_value); + prevValue += ZIGZAG_DECODE(int64_t, zigzag_value); - p[_pos++] = (int32_t)prev_value; + p[_pos++] = (int32_t)prevValue; v += bit; } } @@ -197,14 +194,14 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, if (selector == 0 || selector == 1) { for (int32_t i = 0; i < elems && count < nelements; i++, count++) { - p[_pos++] = (int16_t)prev_value; + p[_pos++] = (int16_t)prevValue; } } else { for (int32_t i = 0; i < elems && count < nelements; i++, count++) { zigzag_value = ((w >> v) & mask); - prev_value += ZIGZAG_DECODE(int64_t, zigzag_value); + prevValue += ZIGZAG_DECODE(int64_t, zigzag_value); - p[_pos++] = (int16_t)prev_value; + p[_pos++] = (int16_t)prevValue; v += bit; } } @@ -215,14 +212,14 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, if (selector == 0 || selector == 1) { for (int32_t i = 0; i < elems && count < nelements; i++, count++) { - p[_pos++] = (int8_t)prev_value; + p[_pos++] = (int8_t)prevValue; } } else { for (int32_t i = 0; i < elems && count < nelements; i++, count++) { zigzag_value = ((w >> v) & mask); - prev_value += ZIGZAG_DECODE(int64_t, zigzag_value); + prevValue += ZIGZAG_DECODE(int64_t, zigzag_value); - p[_pos++] = (int8_t)prev_value; + p[_pos++] = (int8_t)prevValue; v += bit; } } @@ -246,6 +243,268 @@ int32_t tsDecompressFloatImplAvx512(const char *const input, const int32_t nelem // todo add later int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelements, char *const output) { #if __AVX2__ +#endif + return 0; +} + +int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelements, char *const output, + bool bigEndian) { +#if 0 + int64_t *ostream = (int64_t *)output; + int32_t ipos = 1, opos = 0; + __m128i prevVal = _mm_setzero_si128(); + __m128i prevDelta = _mm_setzero_si128(); + +#if __AVX2__ + int32_t batch = nelements >> 1; + int32_t remainder = nelements & 0x01; + __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff}; + + int32_t i = 0; + if (batch > 1) { + // first loop + uint8_t flags = input[ipos++]; + + int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 + int8_t nbytes2 = (flags >> 4) & INT8MASK(4); + + __m128i data1; + if (nbytes1 == 0) { + data1 = _mm_setzero_si128(); + } else { + memcpy(&data1, (const void*) (input + ipos), nbytes1); + } + + __m128i data2; + if (nbytes2 == 0) { + data2 = _mm_setzero_si128(); + } else { + memcpy(&data2, (const void*) (input + ipos + nbytes1), nbytes2); + } + + data2 = _mm_broadcastq_epi64(data2); + __m128i zzVal = _mm_blend_epi32(data2, data1, 0x03); + + // ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) + __m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal); + signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask); + + // get two zigzag values here + __m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask); + + __m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta); + deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent); + + __m128i val = _mm_add_epi64(deltaCurrent, prevVal); + _mm_storeu_si128((__m128i *)&ostream[opos], val); + + // keep the previous value + prevVal = _mm_shuffle_epi32 (val, 0xEE); + + // keep the previous delta of delta, for the first item + prevDelta = _mm_shuffle_epi32(deltaOfDelta, 0xEE); + + opos += 2; + ipos += nbytes1 + nbytes2; + i += 1; + } + + // the remain + for(; i < batch; ++i) { + uint8_t flags = input[ipos++]; + + int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 + int8_t nbytes2 = (flags >> 4) & INT8MASK(4); + +// __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos)); +// __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1)); + __m128i data1; + if (nbytes1 == 0) { + data1 = _mm_setzero_si128(); + } else { + int64_t dd = 0; + memcpy(&dd, (const void*) (input + ipos), nbytes1); + data1 = _mm_loadu_si64(&dd); + } + + __m128i data2; + if (nbytes2 == 0) { + data2 = _mm_setzero_si128(); + } else { + int64_t dd = 0; + memcpy(&dd, (const void*) (input + ipos + nbytes1), nbytes2); + data2 = _mm_loadu_si64(&dd); + } + + data2 = _mm_broadcastq_epi64(data2); + + __m128i zzVal = _mm_blend_epi32(data2, data1, 0x03); + + // ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) + __m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal); + signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask); + + // get two zigzag values here + __m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask); + + __m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta); + deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent); + + __m128i val = _mm_add_epi64(deltaCurrent, prevVal); + _mm_storeu_si128((__m128i *)&ostream[opos], val); + + // keep the previous value + prevVal = _mm_shuffle_epi32 (val, 0xEE); + + // keep the previous delta of delta + __m128i delta = _mm_add_epi64(_mm_slli_si128(deltaOfDelta, 8), deltaOfDelta); + prevDelta = _mm_shuffle_epi32(_mm_add_epi64(delta, prevDelta), 0xEE); + + opos += 2; + ipos += nbytes1 + nbytes2; + } + + if (remainder > 0) { + uint64_t dd = 0; + uint8_t flags = input[ipos++]; + + int32_t nbytes = flags & INT8MASK(4); + int64_t deltaOfDelta = 0; + if (nbytes == 0) { + deltaOfDelta = 0; + } else { + // if (is_bigendian()) { + // memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes); + // } else { + memcpy(&dd, input + ipos, nbytes); + // } + deltaOfDelta = ZIGZAG_DECODE(int64_t, dd); + } + + ipos += nbytes; + if (opos == 0) { + ostream[opos++] = deltaOfDelta; + } else { + int64_t prevDeltaX = deltaOfDelta + prevDelta[1]; + ostream[opos++] = prevVal[1] + prevDeltaX; + } + } +#endif +#endif + return 0; +} + +int32_t tsDecompressTimestampAvx512(const char *const input, const int32_t nelements, char *const output, + bool UNUSED_PARAM(bigEndian)) { + int64_t *ostream = (int64_t *)output; + int32_t ipos = 1, opos = 0; + +#if __AVX512VL__ + + __m128i prevVal = _mm_setzero_si128(); + __m128i prevDelta = _mm_setzero_si128(); + + int32_t numOfBatch = nelements >> 1; + int32_t remainder = nelements & 0x01; + __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff}; + + int32_t i = 0; + if (numOfBatch > 1) { + // first loop + uint8_t flags = input[ipos++]; + + int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 + int8_t nbytes2 = (flags >> 4) & INT8MASK(4); + + __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos)); + __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1)); + data2 = _mm_broadcastq_epi64(data2); + + __m128i zzVal = _mm_blend_epi32(data2, data1, 0x03); + + // ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) + __m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal); + signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask); + + // get two zigzag values here + __m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask); + + __m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta); + deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent); + + __m128i val = _mm_add_epi64(deltaCurrent, prevVal); + _mm_storeu_si128((__m128i *)&ostream[opos], val); + + // keep the previous value + prevVal = _mm_shuffle_epi32 (val, 0xEE); + + // keep the previous delta of delta, for the first item + prevDelta = _mm_shuffle_epi32(deltaOfDelta, 0xEE); + + opos += 2; + ipos += nbytes1 + nbytes2; + i += 1; + } + + // the remain + for(; i < numOfBatch; ++i) { + uint8_t flags = input[ipos++]; + + int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 + int8_t nbytes2 = (flags >> 4) & INT8MASK(4); + + __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos)); + __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1)); + data2 = _mm_broadcastq_epi64(data2); + + __m128i zzVal = _mm_blend_epi32(data2, data1, 0x03); + + // ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) + __m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal); + signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask); + + // get two zigzag values here + __m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask); + + __m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta); + deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent); + + __m128i val = _mm_add_epi64(deltaCurrent, prevVal); + _mm_storeu_si128((__m128i *)&ostream[opos], val); + + // keep the previous value + prevVal = _mm_shuffle_epi32 (val, 0xEE); + + // keep the previous delta of delta + __m128i delta = _mm_add_epi64(_mm_slli_si128(deltaOfDelta, 8), deltaOfDelta); + prevDelta = _mm_shuffle_epi32(_mm_add_epi64(delta, prevDelta), 0xEE); + + opos += 2; + ipos += nbytes1 + nbytes2; + } + + if (remainder > 0) { + uint64_t dd = 0; + uint8_t flags = input[ipos++]; + + int32_t nbytes = flags & INT8MASK(4); + int64_t deltaOfDelta = 0; + if (nbytes == 0) { + deltaOfDelta = 0; + } else { + memcpy(&dd, input + ipos, nbytes); + deltaOfDelta = ZIGZAG_DECODE(int64_t, dd); + } + + ipos += nbytes; + if (opos == 0) { + ostream[opos++] = deltaOfDelta; + } else { + int64_t prevDeltaX = deltaOfDelta + prevDelta[1]; + ostream[opos++] = prevVal[1] + prevDeltaX; + } + } + #endif return 0; } \ No newline at end of file diff --git a/source/util/src/terror.c b/source/util/src/terror.c index f310db53ef..cc6647e463 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -101,6 +101,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_APP_IS_STOPPING, "Database is closing d TAOS_DEFINE_ERROR(TSDB_CODE_INVALID_DATA_FMT, "Invalid data format") TAOS_DEFINE_ERROR(TSDB_CODE_INVALID_CFG_VALUE, "Invalid configuration value") TAOS_DEFINE_ERROR(TSDB_CODE_IP_NOT_IN_WHITE_LIST, "Not allowed to connect") +TAOS_DEFINE_ERROR(TSDB_CODE_FAILED_TO_CONNECT_S3, "Failed to connect to s3 server") //client TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_OPERATION, "Invalid operation") @@ -444,7 +445,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_PAR_DEC_IVLD_KLEN, "Invalid klen to decod TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_IVLD_KEY, "Invalid key to gen active code") TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_APP_LIMIT, "Limited app num to gen active code") TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_ENC_IVLD_KLEN, "Invalid klen to encode active code") -TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_PAR_IVLD_DIST, "Invalid dist to parse active code") // sync TAOS_DEFINE_ERROR(TSDB_CODE_SYN_TIMEOUT, "Sync timeout") diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index aa6719f604..184e18fc67 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -110,6 +110,7 @@ int32_t metaDebugFlag = 131; int32_t udfDebugFlag = 131; int32_t smaDebugFlag = 131; int32_t idxDebugFlag = 131; +int32_t sndDebugFlag = 131; int64_t dbgEmptyW = 0; int64_t dbgWN = 0; @@ -153,7 +154,12 @@ int32_t taosInitSlowLog() { #endif if (strlen(tsLogDir) != 0) { - snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logFileName); + char lastC = tsLogDir[strlen(tsLogDir) - 1]; + if (lastC == '\\' || lastC == '/') { + snprintf(fullName, PATH_MAX, "%s" "%s", tsLogDir, logFileName); + } else { + snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logFileName); + } } else { snprintf(fullName, PATH_MAX, "%s", logFileName); } @@ -177,7 +183,12 @@ int32_t taosInitLog(const char *logName, int32_t maxFiles) { char fullName[PATH_MAX] = {0}; if (strlen(tsLogDir) != 0) { - snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logName); + char lastC = tsLogDir[strlen(tsLogDir) - 1]; + if (lastC == '\\' || lastC == '/') { + snprintf(fullName, PATH_MAX, "%s" "%s", tsLogDir, logName); + } else { + snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logName); + } } else { snprintf(fullName, PATH_MAX, "%s", logName); } diff --git a/source/util/test/decompressTest.cpp b/source/util/test/decompressTest.cpp new file mode 100644 index 0000000000..caf8df3ba8 --- /dev/null +++ b/source/util/test/decompressTest.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include + +namespace {} // namespace + +TEST(utilTest, decompress_test) { + int64_t tsList[10] = {1700000000, 1700000100, 1700000200, 1700000300, 1700000400, + 1700000500, 1700000600, 1700000700, 1700000800, 1700000900}; + + char* pOutput[10 * sizeof(int64_t)] = {0}; + int32_t len = tsCompressTimestamp(tsList, sizeof(tsList), sizeof(tsList) / sizeof(tsList[0]), pOutput, 10, ONE_STAGE_COMP, NULL, 0); + + char* decompOutput[10 * 8] = {0}; + tsDecompressTimestamp(pOutput, len, 10, decompOutput, sizeof(int64_t)*10, ONE_STAGE_COMP, NULL, 0); + + for(int32_t i = 0; i < 10; ++i) { + std::cout<< ((int64_t*)decompOutput)[i] << std::endl; + } + + memset(decompOutput, 0, 10*8); + tsDecompressTimestampAvx512(reinterpret_cast(pOutput), 10, + reinterpret_cast(decompOutput), false); + + for(int32_t i = 0; i < 10; ++i) { + std::cout<<((int64_t*)decompOutput)[i] << std::endl; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + int64_t tsList1[7] = {1700000000, 1700000000, 1700000000, 1700000000, 1700000000, 1700000000, 1700000900}; + int32_t len1 = tsCompressTimestamp(tsList1, sizeof(tsList1), sizeof(tsList1) / sizeof(tsList1[0]), pOutput, 7, ONE_STAGE_COMP, NULL, 0); + + memset(decompOutput, 0, 10*8); + tsDecompressTimestampAvx512(reinterpret_cast(pOutput), 7, + reinterpret_cast(decompOutput), false); + + for(int32_t i = 0; i < 7; ++i) { + std::cout<<((int64_t*)decompOutput)[i] << std::endl; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + int64_t tsList2[1] = {1700000000}; + int32_t len2 = tsCompressTimestamp(tsList2, sizeof(tsList2), sizeof(tsList2) / sizeof(tsList2[0]), pOutput, 1, ONE_STAGE_COMP, NULL, 0); + + memset(decompOutput, 0, 10*8); + tsDecompressTimestampAvx512(reinterpret_cast(pOutput), 1, + reinterpret_cast(decompOutput), false); + + for(int32_t i = 0; i < 1; ++i) { + std::cout<<((int64_t*)decompOutput)[i] << std::endl; + } +} + +TEST(utilTest, decompress_perf_test) { + int32_t num = 10000; + + int64_t* pList = static_cast(taosMemoryCalloc(num, sizeof(int64_t))); + int64_t iniVal = 1700000000; + + uint32_t v = 100; + + for(int32_t i = 0; i < num; ++i) { + iniVal += taosRandR(&v)%10; + pList[i] = iniVal; + } + + char* px = static_cast(taosMemoryMalloc(num * sizeof(int64_t))); + int32_t len = tsCompressTimestamp(pList, num * sizeof(int64_t), num, px, num, ONE_STAGE_COMP, NULL, 0); + + char* pOutput = static_cast(taosMemoryMalloc(num * sizeof(int64_t))); + + int64_t st = taosGetTimestampUs(); + for(int32_t k = 0; k < 10000; ++k) { + tsDecompressTimestamp(px, len, num, pOutput, sizeof(int64_t) * num, ONE_STAGE_COMP, NULL, 0); + } + + int64_t el1 = taosGetTimestampUs() - st; + std::cout << "soft decompress elapsed time:" << el1 << " us" << std::endl; + + memset(pOutput, 0, num * sizeof(int64_t)); + st = taosGetTimestampUs(); + for(int32_t k = 0; k < 10000; ++k) { + tsDecompressTimestampAvx512(px, num, pOutput, false); + } + + int64_t el2 = taosGetTimestampUs() - st; + std::cout << "SIMD decompress elapsed time:" << el2 << " us" << std::endl; + + taosMemoryFree(pList); + taosMemoryFree(pOutput); + taosMemoryFree(px); +} + diff --git a/tests/develop-test/win-test-file b/tests/develop-test/win-test-file index b640ef6bfe..1da890fc36 100644 --- a/tests/develop-test/win-test-file +++ b/tests/develop-test/win-test-file @@ -1,4 +1,7 @@ python3 ./test.py -f 2-query/table_count_scan.py +python3 ./test.py -f 2-query/pseudo_column.py +python3 ./test.py -f 2-query/ts-range.py +python3 ./test.py -f 2-query/tag_scan.py python3 ./test.py -f 2-query/show_create_db.py python3 ./test.py -f 5-taos-tools/taosbenchmark/auto_create_table_json.py python3 ./test.py -f 5-taos-tools/taosbenchmark/custom_col_tag.py diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 92eaec52b5..7d3efbf181 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -20,6 +20,9 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/window_close_session_ext.py ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/partition_interval.py ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/pause_resume_test.py +#,,n,system-test,python3 ./test.py -f 8-stream/vnode_restart.py -N 4 +#,,n,system-test,python3 ./test.py -f 8-stream/snode_restart.py -N 4 +#,,n,system-test,python3 ./test.py -f 8-stream/snode_restart_with_checkpoint.py -N 4 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py @@ -195,6 +198,7 @@ #,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeTransform-db.py -N 6 -n 3 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select.py -N 2 -n 1 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py -N 3 -n 3 +,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select-duplicatedata-false.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-stb.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit-column.py -N 3 -n 3 @@ -296,6 +300,9 @@ e ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/precisionUS.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/precisionNS.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/test_ts4219.py +,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/ts-4272.py +,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/test_ts4295.py +,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/test_td27388.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/show.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/show_tag_index.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/information_schema.py @@ -873,7 +880,7 @@ e ,,y,script,./test.sh -f tsim/dnode/balance2.sim ,,y,script,./test.sh -f tsim/vnode/replica3_repeat.sim ,,y,script,./test.sh -f tsim/parser/col_arithmetic_operation.sim -,,y,script,./test.sh -f tsim/trans/create_db.sim +#,,y,script,./test.sh -f tsim/trans/create_db.sim ,,y,script,./test.sh -f tsim/dnode/balance3.sim ,,y,script,./test.sh -f tsim/vnode/replica3_many.sim ,,y,script,./test.sh -f tsim/stable/metrics_idx.sim @@ -1065,6 +1072,7 @@ e ,,y,script,./test.sh -f tsim/query/show_db_table_kind.sim ,,y,script,./test.sh -f tsim/query/bi_star_table.sim ,,y,script,./test.sh -f tsim/query/bi_tag_scan.sim +,,y,script,./test.sh -f tsim/query/bi_tbname_col.sim ,,y,script,./test.sh -f tsim/query/tag_scan.sim ,,y,script,./test.sh -f tsim/query/nullColSma.sim ,,y,script,./test.sh -f tsim/query/bug3398.sim @@ -1329,7 +1337,7 @@ e #docs-examples test ,,n,docs-examples-test,bash python.sh -,,n,docs-examples-test,bash node.sh +#,,n,docs-examples-test,bash node.sh ,,n,docs-examples-test,bash csharp.sh ,,n,docs-examples-test,bash jdbc.sh ,,n,docs-examples-test,bash go.sh diff --git a/tests/script/tsim/query/bi_tbname_col.sim b/tests/script/tsim/query/bi_tbname_col.sim new file mode 100644 index 0000000000..d351a92539 --- /dev/null +++ b/tests/script/tsim/query/bi_tbname_col.sim @@ -0,0 +1,36 @@ + +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql drop database if exists db1; +sql create database db1 vgroups 3; +sql create database db1; +sql use db1; +sql create stable sta (ts timestamp, f1 int, f2 binary(200)) tags(t1 int, t2 int, t3 int); +sql create stable stb (ts timestamp, f1 int, f2 binary(200)) tags(t1 int, t2 int, t3 int); +sql create table tba1 using sta tags(1, 1, 1); +sql create table tba2 using sta tags(2, 2, 2); +sql insert into tba1 values(now, 1, "1")(now+3s, 3, "3")(now+5s, 5, "5"); +sql insert into tba2 values(now + 1s, 2, "2")(now+2s, 2, "2")(now+4s, 4, "4"); +sql create table tbn1 (ts timestamp, f1 int); + +set_bi_mode 1 + +sql select `tbname`, f1, f2 from sta order by ts +print $rows +print $data00 $data01 $data02 $data10 $data11 $data12 +if $rows != 6 then + return -1 +endi +if $data00 != @tba1@ then + return -1 +endi +if $data10 != @tba2@ then + return -1 +endi + +sql_error create table stc(ts timestamp, `tbname` binary(200)); +sql_error create table std(ts timestamp, f1 int) tags(`tbname` binary(200)); +system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/stream/pauseAndResume.sim b/tests/script/tsim/stream/pauseAndResume.sim index 673bc77c0f..5eb9eef010 100644 --- a/tests/script/tsim/stream/pauseAndResume.sim +++ b/tests/script/tsim/stream/pauseAndResume.sim @@ -16,9 +16,8 @@ sql create table ts2 using st tags(2,2,2); sql create table ts3 using st tags(3,2,2); sql create table ts4 using st tags(4,2,2); sql create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 watermark 1d into streamt1 as select _wstart, count(*) c1, sum(a) c3 from st interval(10s); -sleep 1000 +sleep 2000 -sleep 1000 sql pause stream streams1; sql insert into ts1 values(1648791213001,1,12,3,1.0); diff --git a/tests/script/win-test-file b/tests/script/win-test-file index b2d50ade8a..d9ff09f468 100644 --- a/tests/script/win-test-file +++ b/tests/script/win-test-file @@ -19,19 +19,18 @@ ./test.sh -f tsim/dnode/balance3.sim ./test.sh -f tsim/vnode/replica3_many.sim ./test.sh -f tsim/stable/metrics_idx.sim -./test.sh -f tsim/db/alter_replica_13.sim ./test.sh -f tsim/sync/3Replica1VgElect.sim ./test.sh -f tsim/sync/3Replica5VgElect.sim ./test.sh -f tsim/valgrind/checkError6.sim ./test.sh -f tsim/user/basic.sim ./test.sh -f tsim/user/password.sim +./test.sh -f tsim/user/whitelist.sim ./test.sh -f tsim/user/privilege_db.sim ./test.sh -f tsim/user/privilege_sysinfo.sim ./test.sh -f tsim/user/privilege_topic.sim ./test.sh -f tsim/user/privilege_table.sim ./test.sh -f tsim/user/privilege_create_db.sim ./test.sh -f tsim/db/alter_option.sim -./test.sh -f tsim/db/alter_replica_31.sim ./test.sh -f tsim/db/basic1.sim ./test.sh -f tsim/db/basic2.sim ./test.sh -f tsim/db/basic3.sim @@ -97,6 +96,7 @@ ./test.sh -f tsim/insert/delete0.sim ./test.sh -f tsim/insert/update1_sort_merge.sim ./test.sh -f tsim/insert/update2.sim +./test.sh -f tsim/insert/insert_stb.sim ./test.sh -f tsim/parser/alter__for_community_version.sim ./test.sh -f tsim/parser/alter_column.sim ./test.sh -f tsim/parser/alter_stable.sim @@ -186,6 +186,7 @@ ./test.sh -f tsim/query/session.sim ./test.sh -f tsim/query/join_interval.sim ./test.sh -f tsim/query/join_pk.sim +./test.sh -f tsim/query/count_spread.sim ./test.sh -f tsim/query/unionall_as_table.sim ./test.sh -f tsim/query/multi_order_by.sim ./test.sh -f tsim/query/sys_tbname.sim @@ -197,10 +198,14 @@ ./test.sh -f tsim/query/emptyTsRange_scl.sim ./test.sh -f tsim/query/partitionby.sim ./test.sh -f tsim/query/tableCount.sim +./test.sh -f tsim/query/show_db_table_kind.sim +./test.sh -f tsim/query/bi_star_table.sim +./test.sh -f tsim/query/bi_tag_scan.sim ./test.sh -f tsim/query/tag_scan.sim ./test.sh -f tsim/query/nullColSma.sim ./test.sh -f tsim/query/bug3398.sim ./test.sh -f tsim/query/explain_tsorder.sim +./test.sh -f tsim/query/apercentile.sim ./test.sh -f tsim/qnode/basic1.sim ./test.sh -f tsim/snode/basic1.sim ./test.sh -f tsim/mnode/basic1.sim @@ -237,52 +242,6 @@ ./test.sh -f tsim/table/table.sim ./test.sh -f tsim/table/tinyint.sim ./test.sh -f tsim/table/vgroup.sim -./test.sh -f tsim/stream/basic0.sim -g -./test.sh -f tsim/stream/basic1.sim -./test.sh -f tsim/stream/basic2.sim -./test.sh -f tsim/stream/basic3.sim -./test.sh -f tsim/stream/basic4.sim -./test.sh -f tsim/stream/checkpointInterval0.sim -./test.sh -f tsim/stream/checkStreamSTable1.sim -./test.sh -f tsim/stream/checkStreamSTable.sim -./test.sh -f tsim/stream/deleteInterval.sim -./test.sh -f tsim/stream/deleteSession.sim -./test.sh -f tsim/stream/deleteState.sim -./test.sh -f tsim/stream/distributeInterval0.sim -./test.sh -f tsim/stream/distributeIntervalRetrive0.sim -./test.sh -f tsim/stream/distributeSession0.sim -./test.sh -f tsim/stream/drop_stream.sim -./test.sh -f tsim/stream/fillHistoryBasic1.sim -./test.sh -f tsim/stream/fillHistoryBasic2.sim -./test.sh -f tsim/stream/fillHistoryBasic3.sim -./test.sh -f tsim/stream/fillIntervalDelete0.sim -./test.sh -f tsim/stream/fillIntervalDelete1.sim -./test.sh -f tsim/stream/fillIntervalLinear.sim -./test.sh -f tsim/stream/fillIntervalPartitionBy.sim -./test.sh -f tsim/stream/fillIntervalPrevNext1.sim -./test.sh -f tsim/stream/fillIntervalPrevNext.sim -./test.sh -f tsim/stream/fillIntervalRange.sim -./test.sh -f tsim/stream/fillIntervalValue.sim -./test.sh -f tsim/stream/ignoreCheckUpdate.sim -./test.sh -f tsim/stream/ignoreExpiredData.sim -./test.sh -f tsim/stream/partitionby1.sim -./test.sh -f tsim/stream/partitionbyColumnInterval.sim -./test.sh -f tsim/stream/partitionbyColumnSession.sim -./test.sh -f tsim/stream/partitionbyColumnState.sim -./test.sh -f tsim/stream/partitionby.sim -./test.sh -f tsim/stream/pauseAndResume.sim -./test.sh -f tsim/stream/schedSnode.sim -./test.sh -f tsim/stream/session0.sim -./test.sh -f tsim/stream/session1.sim -./test.sh -f tsim/stream/sliding.sim -./test.sh -f tsim/stream/state0.sim -./test.sh -f tsim/stream/state1.sim -./test.sh -f tsim/stream/triggerInterval0.sim -./test.sh -f tsim/stream/triggerSession0.sim -./test.sh -f tsim/stream/udTableAndTag0.sim -./test.sh -f tsim/stream/udTableAndTag1.sim -./test.sh -f tsim/stream/udTableAndTag2.sim -./test.sh -f tsim/stream/windowClose.sim ./test.sh -f tsim/trans/lossdata1.sim ./test.sh -f tsim/tmq/basic1.sim ./test.sh -f tsim/tmq/basic2.sim @@ -431,3 +390,8 @@ ./test.sh -f tsim/tag/drop_tag.sim ./test.sh -f tsim/tag/tbNameIn.sim ./test.sh -f tmp/monitor.sim +./test.sh -f tsim/tagindex/add_index.sim +./test.sh -f tsim/tagindex/sma_and_tag_index.sim +./test.sh -f tsim/view/view.sim +./test.sh -f tsim/query/cache_last.sim +./test.sh -f tsim/query/const.sim diff --git a/tests/system-test/0-others/compatibility.py b/tests/system-test/0-others/compatibility.py index 83bfb2bed7..d54c676c0d 100644 --- a/tests/system-test/0-others/compatibility.py +++ b/tests/system-test/0-others/compatibility.py @@ -90,7 +90,10 @@ class TDTestCase: packagePath = "/usr/local/src/" dataPath = cPath + "/../data/" - packageName = "TDengine-server-"+ BASEVERSION + "-Linux-x64.tar.gz" + if platform.system() == "Linux" and platform.machine() == "aarch64": + packageName = "TDengine-server-"+ BASEVERSION + "-Linux-arm64.tar.gz" + else: + packageName = "TDengine-server-"+ BASEVERSION + "-Linux-x64.tar.gz" packageTPath = packageName.split("-Linux-")[0] my_file = Path(f"{packagePath}/{packageName}") if not my_file.exists(): diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py index 544a966960..2bfe33d0af 100644 --- a/tests/system-test/0-others/information_schema.py +++ b/tests/system-test/0-others/information_schema.py @@ -247,10 +247,7 @@ class TDTestCase: tdSql.error('alter all dnodes "activeCode" "' + self.str510 + '"') tdSql.query(f'select * from information_schema.ins_dnodes') tdSql.checkEqual(tdSql.queryResult[0][8],"") - tdSql.error('alter dnode 1 "activeCode" ""') - tdSql.error('alter dnode 1 "activeCode"') - tdSql.execute('alter all dnodes "activeCode" ""') - tdSql.execute('alter all dnodes "activeCode"') + tdSql.execute('alter dnode 1 "activeCode" ""') tdSql.query(f'select active_code,c_active_code from information_schema.ins_dnodes') tdSql.checkEqual(tdSql.queryResult[0][0],"") tdSql.checkEqual(tdSql.queryResult[0][1],'') @@ -262,10 +259,6 @@ class TDTestCase: tdSql.error('alter all dnodes "cActiveCode" "' + self.str257 + '"') tdSql.error('alter all dnodes "cActiveCode" "' + self.str254 + '"') tdSql.error('alter dnode 1 "cActiveCode" "' + self.str510 + '"') - tdSql.error('alter dnode 1 "cActiveCode" ""') - tdSql.error('alter dnode 1 "cActiveCode"') - tdSql.execute('alter all dnodes "cActiveCode" ""') - tdSql.execute('alter all dnodes "cActiveCode"') tdSql.query(f'select active_code,c_active_code from information_schema.ins_dnodes') tdSql.checkEqual(tdSql.queryResult[0][0],"") tdSql.checkEqual(tdSql.queryResult[0][1],"") diff --git a/tests/system-test/0-others/test_hot_refresh_configurations.py b/tests/system-test/0-others/test_hot_refresh_configurations.py index 7aed7274a4..cbde8c060e 100644 --- a/tests/system-test/0-others/test_hot_refresh_configurations.py +++ b/tests/system-test/0-others/test_hot_refresh_configurations.py @@ -2,7 +2,7 @@ import subprocess import random import time import os - +import platform from util.log import * from util.sql import * from util.cases import * @@ -190,6 +190,8 @@ class TDTestCase: for v in values: dnode = random.choice(p_list) tdSql.execute(f'alter {dnode} "{name} {v}";') + if platform.system() == "Linux" and platform.machine() == "aarch64": + continue value = self.get_param_value_with_gdb(alias, "taosd") if value: tdLog.debug(f"value: {value}") diff --git a/tests/system-test/0-others/user_privilege.py b/tests/system-test/0-others/user_privilege.py index d1b93f6942..a731e85ddb 100644 --- a/tests/system-test/0-others/user_privilege.py +++ b/tests/system-test/0-others/user_privilege.py @@ -27,6 +27,7 @@ class TDTestCase: tdSql.init(conn.cursor()) self.setsql = TDSetSql() self.stbname = 'stb' + self.user_name = 'test' self.binary_length = 20 # the length of binary for column_dict self.nchar_length = 20 # the length of nchar for column_dict self.dbnames = ['db1', 'db2'] @@ -54,12 +55,12 @@ class TDTestCase: ] self.tbnum = 4 + self.stbnum_grant = 200 def create_user(self): - user_name = 'test' - tdSql.execute(f'create user {user_name} pass "test"') - tdSql.execute(f'grant read on {self.dbnames[0]}.{self.stbname} with t2 = "Beijing" to {user_name}') - tdSql.execute(f'grant write on {self.dbnames[1]}.{self.stbname} with t1 = 2 to {user_name}') + tdSql.execute(f'create user {self.user_name} pass "test"') + tdSql.execute(f'grant read on {self.dbnames[0]}.{self.stbname} with t2 = "Beijing" to {self.user_name}') + tdSql.execute(f'grant write on {self.dbnames[1]}.{self.stbname} with t1 = 2 to {self.user_name}') def prepare_data(self): for db in self.dbnames: @@ -70,6 +71,8 @@ class TDTestCase: tdSql.execute(f'create table {self.stbname}_{i} using {self.stbname} tags({self.tag_list[i]})') for j in self.values_list: tdSql.execute(f'insert into {self.stbname}_{i} values({j})') + for i in range(self.stbnum_grant): + tdSql.execute(f'create table {self.stbname}_grant_{i} (ts timestamp, c0 int) tags(t0 int)') def user_read_privilege_check(self, dbname): testconn = taos.connect(user='test', password='test') @@ -128,12 +131,20 @@ class TDTestCase: tdLog.exit(f"{caller.filename}({caller.lineno}) failed: sql:{sql}, expect error not occured") pass + def user_privilege_grant_check(self): + for db in self.dbnames: + tdSql.execute(f"use {db}") + for i in range(self.stbnum_grant): + tdSql.execute(f'grant read on {db}.{self.stbname}_grant_{i} to {self.user_name}') + tdSql.execute(f'grant write on {db}.{self.stbname}_grant_{i} to {self.user_name}') + def run(self): self.prepare_data() self.create_user() self.user_read_privilege_check(self.dbnames[0]) self.user_write_privilege_check(self.dbnames[1]) self.user_privilege_error_check() + self.user_privilege_grant_check() def stop(self): tdSql.close() diff --git a/tests/system-test/0-others/view/non_marterial_view/test_view.py b/tests/system-test/0-others/view/non_marterial_view/test_view.py index afb2476305..4b829b4049 100644 --- a/tests/system-test/0-others/view/non_marterial_view/test_view.py +++ b/tests/system-test/0-others/view/non_marterial_view/test_view.py @@ -162,7 +162,7 @@ class TDTestCase: assert('TIMESTAMP' in data_type_list and 'INT' in data_type_list and 'INT UNSIGNED' in data_type_list and 'BIGINT' in data_type_list and 'BIGINT UNSIGNED' in data_type_list and 'FLOAT' in data_type_list and 'DOUBLE' in data_type_list and 'VARCHAR' in data_type_list and 'SMALLINT' in data_type_list and 'SMALLINT UNSIGNED' in data_type_list and 'TINYINT' in data_type_list and 'TINYINT UNSIGNED' in data_type_list and 'BOOL' in data_type_list and 'VARCHAR' in data_type_list and 'NCHAR' in data_type_list and 'GEOMETRY' in data_type_list and 'VARBINARY' in data_type_list) tdSql.execute("create view v2 as select * from tb where c1 >5 and c7 like '%ab%';") self.check_view_num(2) - tdSql.error("create view v3 as select * from tb where c1 like '%ab%';", expectErrInfo='Invalid value type') + tdSql.error("create view v3 as select * from tb where c1 like '%ab%';", expectErrInfo='Invalid operation') tdSql.execute("create view v3 as select first(ts), sum(c1) from tb group by c2 having avg(c4) > 0;") tdSql.execute("create view v4 as select _wstart,sum(c6) from tb interval(10s);") tdSql.execute("create view v5 as select * from tb join v2 on tb.ts = v2.ts;") diff --git a/tests/system-test/1-insert/alter_stable.py b/tests/system-test/1-insert/alter_stable.py index 52f185a868..40d7f04ceb 100644 --- a/tests/system-test/1-insert/alter_stable.py +++ b/tests/system-test/1-insert/alter_stable.py @@ -13,6 +13,7 @@ import random import string +import threading from util.log import * from util.cases import * from util.sql import * @@ -25,10 +26,24 @@ class TDTestCase: tdLog.debug("start to execute %s" % __file__) tdSql.init(conn.cursor()) self.setsql = TDSetSql() + self.fname = __file__ + '.tmp.sql' + self.dbname = 'db1' self.ntbname = 'ntb' self.stbname = 'stb' + self.stbnum = 10 + self.ntbnum = 10 + self.colnum = 52 + self.tagnum = 15 + self.collen = 320 + self.colnum_modify = 40 + self.tagnum_modify = 40 + self.collen_old_modify = 160 + self.collen_new_modify = 455 + self.taglen_old_modify = 80 + self.taglen_new_modify = 155 self.binary_length = 20 # the length of binary for column_dict self.nchar_length = 20 # the length of nchar for column_dict + self.threadnum = 2 self.column_dict = { 'ts' : 'timestamp', 'col1': 'tinyint', @@ -183,9 +198,114 @@ class TDTestCase: tdLog.info(res) assert(res[1][2] == 39001) + def prepareAlterEnv(self): + tdSql.execute(f'drop database if exists {self.dbname}') + tdSql.execute(f'create database if not exists {self.dbname} vgroups 2') + tdSql.execute(f'use {self.dbname}') + + def destroyAlterEnv(self): + tdSql.execute(f'drop database if exists {self.dbname}') + + def alterTableTask(self, i): + os.system(f'taos -f {self.fname}.{i};') + + def executeAlterTable(self, opt): + threads = [] + for i in range(self.threadnum): + thread = threading.Thread(target=self.alterTableTask, args=(i,)) + threads.append(thread) + thread.start() + for i in range(self.threadnum): + threads[i].join() + + def checkAlterTable(self, opt): + if opt in ["stb_add_col", "stb_add_tag"]: + for i in range(self.stbnum): + tdSql.execute(f'desc {self.stbname}_{i}') + elif opt in ["stb_modify_col", "stb_modify_tag"]: + for i in range(self.stbnum): + tdSql.execute(f'desc {self.stbname}_{i}') + elif opt in ["ntb_add_col", "ntb_modify_col"]: + for i in range(self.ntbnum): + tdSql.execute(f'desc {self.ntbname}_{i}') + + def destroyAlterTable(self): + for i in range(self.threadnum): + if os.path.isfile(f'{self.fname}.{i}'): + os.remove(f'{self.fname}.{i}') + + def prepareAlterTable(self, opt): + self.destroyAlterTable() + lines = [f'use {self.dbname};\n'] + if opt in ["stb_add_col", "stb_add_tag"]: + for i in range(self.stbnum): + tdSql.execute(f'create table if not exists {self.stbname}_{i} (ts timestamp, c_0 NCHAR({self.collen})) tags(t0 nchar({self.collen}));') + for i in range(self.stbnum): + if opt == 'stb_add_col': + for c in range(1, self.colnum): + lines.append(f'alter table {self.stbname}_{i} add column c_{c} NCHAR({self.collen});\n') + else: + for c in range(1, self.tagnum): + lines.append(f'alter table {self.stbname}_{i} add tag t_{c} NCHAR({self.collen});\n') + elif opt in ["stb_modify_col", "stb_modify_tag"]: + for i in range(self.stbnum): + createTbSql = f'CREATE table if not exists {self.stbname}_{i} (ts timestamp' + for j in range(self.colnum_modify): + createTbSql += f',c_{j} NCHAR({self.collen_old_modify})' + createTbSql += f') tags(t_0 NCHAR({self.taglen_old_modify})' + for k in range(1,self.tagnum_modify): + createTbSql += f',t_{k} NCHAR({self.taglen_old_modify})' + createTbSql += f');' + tdLog.info(createTbSql) + tdSql.execute(createTbSql) + for i in range(self.stbnum): + if opt == 'stb_modify_col': + for c in range(self.colnum_modify): + lines.append(f'alter table {self.stbname}_{i} modify column c_{c} NCHAR({self.collen_new_modify});\n') + else: + for c in range(self.tagnum_modify): + lines.append(f'alter table {self.stbname}_{i} modify tag t_{c} NCHAR({self.taglen_new_modify});\n') + elif opt in ['ntb_add_col']: + for i in range(self.ntbnum): + tdSql.execute(f'create table if not exists {self.ntbname}_{i} (ts timestamp, c_0 NCHAR({self.collen}));') + for i in range(self.ntbnum): + for c in range(1, self.colnum): + lines.append(f'alter table {self.ntbname}_{i} add column c_{c} NCHAR({self.collen});\n') + elif opt in ['ntb_modify_col']: + for i in range(self.ntbnum): + createTbSql = f'CREATE table if not exists {self.ntbname}_{i} (ts timestamp' + for j in range(self.colnum_modify): + createTbSql += f',c_{j} NCHAR({self.collen_old_modify})' + createTbSql += f');' + tdLog.info(createTbSql) + tdSql.execute(createTbSql) + for i in range(self.ntbnum): + for c in range(self.colnum_modify): + lines.append(f'alter table {self.ntbname}_{i} modify column c_{c} NCHAR({self.collen_new_modify});\n') + # generate sql file + with open(f'{self.fname}.0', "a") as f: + f.writelines(lines) + # clone sql file in case of race condition + for i in range(1, self.threadnum): + shutil.copy(f'{self.fname}.0', f'{self.fname}.{i}') + + def alter_stable_multi_client_check(self): + """Check alter stable/ntable var type column/tag(PI-23) + """ + alter_table_check_type = ["stb_add_col", "stb_add_tag", "stb_modify_col", "stb_modify_tag", "ntb_add_col", "ntb_modify_col"] + + for opt in alter_table_check_type: + self.prepareAlterEnv() + self.prepareAlterTable(opt) + self.executeAlterTable(opt) + self.checkAlterTable(opt) + self.destroyAlterTable() + self.destroyAlterEnv() + def run(self): self.alter_stable_check() self.alter_stable_column_varchar_39001() + self.alter_stable_multi_client_check() def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__) diff --git a/tests/system-test/1-insert/test_td27388.py b/tests/system-test/1-insert/test_td27388.py new file mode 100644 index 0000000000..7b49a63dbb --- /dev/null +++ b/tests/system-test/1-insert/test_td27388.py @@ -0,0 +1,97 @@ +import random +import string +from util.log import * +from util.cases import * +from util.sql import * +from util.sqlset import * +from util import constant +from util.common import * + + +class TDTestCase: + """Verify the insert with format exception for task TD-27388 + """ + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor()) + self.dbname = 'db' + self.stbname = 'st' + tdSql.execute("create database {};".format(self.dbname)) + tdSql.execute("use {};".format(self.dbname)) + tdSql.execute("create table st (ts timestamp, col1 int, col2 varchar(64)) tags (t1 int, t2 varchar(32));") + + def test_half_quotes(self): + sql_list = [ + "insert into t1 using st tags(1, 'tag1) values(now, 1, 'test msg');", + "insert into t1 using st tags(1, tag1') values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg);", + "insert into t1 using st tags(1, 'tag1') values(now, 1, test msg');", + "insert into t1 using st tags(1, 'tag1' values(now, 1, test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg)';", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg);", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2) values(now, 2, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, tag2') values(now, 2, 'test msg');", + "insert into t1 using st tags(1, 'tag1) values(now, 1, 'test msg') t2 using st tags(2, 'tag2) values(now, 2, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg) t2 using st tags(2, 'tag2') values(now, 2, test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2') values(now, 2, 'test msg);", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2') values(now, 2, 'test msg);" + ] + for sql in sql_list: + tdLog.debug("execute harlf quotes sql: %s" % sql) + tdSql.error(sql) + + def test_esc(self): + sql_list = [ + "insert into t1 using st tags(1, 'tag1\\') values(now, 1, 'test msg');", + "insert into t1 using st tags(1, \\'tag1') values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg\\');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, \\'test msg');", + "insert into t1 using st tags(1, \\'tag1\\') values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, \\'test msg\\');", + "insert into t1 using st tags(1, \\'tag1\\') values(now, 1, \\'test msg\\');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2\\') values(now, 2, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, \\'tag2') values(now, 2, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2') values(now, 2, \\'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2') values(now, 2, 'test msg\\');", + "insert into t1 using st tags(1, \\'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2\\') values(now, 2, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2') values(now, 2, \\'test msg\\');" + ] + for sql in sql_list: + tdLog.debug("execute escape character sql: %s" % sql) + tdSql.error(sql) + + def test_specific_character(self): + sql_list = [ + "insert into t1 using st tags(1, 'tag1$) values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1,) values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1'') values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1() values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1*) values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1+) values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1,) values(now, 1, 'test msg');", + "isnert into t1 using st tags(1, 'tag1-) values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1.) values(now, 1, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg$);", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg,);", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg+%+-.);", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2$) values(now, 2, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2,) values(now, 2, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2'') values(now, 2, 'test msg');", + "insert into t1 using st tags(1, 'tag1') values(now, 1, 'test msg') t2 using st tags(2, 'tag2() values(now, 2, 'test msg');" + ] + for sql in sql_list: + tdLog.debug("execute specific character sql: %s" % sql) + tdSql.error(sql) + + def run(self): + self.test_half_quotes() + self.test_esc() + self.test_specific_character() + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/system-test/1-insert/test_ts4295.py b/tests/system-test/1-insert/test_ts4295.py new file mode 100644 index 0000000000..89e445f3c1 --- /dev/null +++ b/tests/system-test/1-insert/test_ts4295.py @@ -0,0 +1,49 @@ +import os +import sys +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import tdDnodes +from math import inf +import taos + +class TDTestCase: + """Verify inserting varbinary type data of ts-4295 + """ + def init(self, conn, logSql, replicaVer=1): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), True) + self.conn = conn + self.db_name = "db" + self.stable_name = "st" + + def run(self): + tdSql.execute("create database if not exists %s" % self.db_name) + tdSql.execute("use %s" % self.db_name) + # create super table + tdSql.execute("create table %s (ts timestamp, c1 varbinary(32)) tags (t1 int)" % self.stable_name) + # create child table + child_table_list = [] + for i in range(10): + child_table_name = "ct_" + str(i+1) + child_table_list.append(child_table_name) + tdSql.execute("create table %s using st tags(%s);" % (child_table_name, str(i+1))) + tdLog.info("create table %s successfully" % child_table_name) + # insert data + for i in range(100): + sql = "insert into table_name values" + for j in range(10000): + sql += "(now+%ss, '0x7661726331')," % str(j+1) + for child_table in child_table_list: + tdSql.execute(sql.replace("table_name", child_table)) + tdLog.info("Insert data into %s successfully" % child_table) + tdLog.info("Insert data round %s successfully" % str(i+1)) + tdSql.execute("flush database %s" % self.db_name) + + def stop(self): + tdSql.execute("drop database if exists %s" % self.db_name) + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/system-test/1-insert/ts-4272.py b/tests/system-test/1-insert/ts-4272.py new file mode 100644 index 0000000000..bb81305eb3 --- /dev/null +++ b/tests/system-test/1-insert/ts-4272.py @@ -0,0 +1,205 @@ + +import csv +from datetime import datetime + +import taos +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * + +class TDTestCase: + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + self.testcasePath = os.path.split(__file__)[0] + self.testcasefilename = os.path.split(__file__)[-1] + self.ts = 1700638570000 # 2023-11-22T07:36:10.000Z + self.db = 'db1' + self.tb1 = 'd001' + self.tb2 = 'd002' + self.stable0 = "meters" + self.stable1 = "stb_1" + self.stable2 = "stb_null" + self.tag1 = f'using {self.stable0}(groupId) tags(1)' + self.tag2 = f'using {self.stable0}(groupId) tags(2)' + self.file1 = f"{self.testcasePath}/b.csv" + self.file2 = f"{self.testcasePath}/c.csv" + + #os.system("rm -rf %s/b.csv" %self.testcasePath) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), logSql) + + def check_count(self, rows, records): + tdSql.execute(f"use {self.db};") + tdSql.query(f"select tbname,count(*) from {self.stable0} group by tbname order by tbname;") + tdSql.checkRows(rows) + for i in range(rows): + tdSql.checkData(i, 1, records[i]) + + def reset_tb(self): + # create database and tables + # os.system("taos -s 'drop database if exists d1;'") + # os.system("taos -s 'create database d1;use d1;create stable meters (ts timestamp, current float, voltage int, phase float) tags (location binary(64), groupId int);'") + # os.system(f"taos -s 'use d1;create table d2001 using meters(groupId) tags(5);'") + # res = os.system(f"taos -s 'use d1;create table d2002 using meters(groupId) tags(6);'") + # if (0 != res): + # tdLog.exit(f"create tb error") + + tdSql.execute(f"drop database if exists {self.db};") + tdSql.execute(f"create database {self.db};") + tdSql.execute(f"use {self.db};") + tdSql.execute(f"create stable {self.stable0} (ts timestamp, current float, voltage int, phase float) tags (location binary(64), groupId int);") + tdSql.execute(f"create table {self.tb1} {self.tag1};") + tdSql.execute(f"create table {self.tb2} {self.tag2};") + tdSql.execute(f"create stable {self.stable1} (ts timestamp , q_int int , q_bigint bigint , q_smallint smallint , q_tinyint tinyint , q_float float , q_double double , q_bool bool , q_binary binary(100) , q_nchar nchar(100) , q_ts timestamp , q_int_null int , q_bigint_null bigint , q_smallint_null smallint , q_tinyint_null tinyint, q_float_null float , q_double_null double , q_bool_null bool , q_binary_null binary(20) , q_nchar_null nchar(20) , q_ts_null timestamp) tags(loc nchar(100) , t_int int , t_bigint bigint , t_smallint smallint , t_tinyint tinyint, t_bool bool , t_binary binary(100) , t_nchar nchar(100) ,t_float float , t_double double , t_ts timestamp);") + tdSql.execute(f"create stable {self.stable2} (ts timestamp , q_int int , q_bigint bigint , q_smallint smallint , q_tinyint tinyint , q_float float , q_double double , q_bool bool , q_binary binary(100) , q_nchar nchar(100) , q_ts timestamp , q_int_null int , q_bigint_null bigint , q_smallint_null smallint , q_tinyint_null tinyint, q_float_null float , q_double_null double , q_bool_null bool , q_binary_null binary(20) , q_nchar_null nchar(20) , q_ts_null timestamp) tags(loc nchar(100) , t_int int , t_bigint bigint , t_smallint smallint , t_tinyint tinyint, t_bool bool , t_binary binary(100) , t_nchar nchar(100) ,t_float float , t_double double , t_ts timestamp);") + + def test(self, sql): + sql = f"use {self.db};" + sql + res = os.system(f'taos -s "{sql}"') + # if (0 != res): + # tdLog.exit(f"taos sql error") + + + def check(self): + # same table, auto create + create + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}' {self.tb1} {self.tag1} file '{self.file2}';" + self.test(sql) + + # same table, create + insert + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}' {self.tb1} file '{self.file2}';" + self.test(sql) + + # same table, insert + create + sql = f"insert into {self.tb1} file '{self.file1}' {self.tb1} {self.tag1} file '{self.file2}';" + self.test(sql) + + # same table, insert + insert + sql = f"insert into {self.tb1} file '{self.file1}' {self.tb1} file '{self.file2}';" + self.test(sql) + + # diff table auto create + create + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}' {self.tb2} {self.tag2} file '{self.file2}';" + self.test(sql) + + # diff table, create + insert + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}' {self.tb2} file '{self.file2}';" + self.test(sql) + + # diff table, insert + create + sql = f"insert into {self.tb1} file '{self.file1}' {self.tb2} {self.tag2} file '{self.file2}';" + self.test(sql) + + # diff table, insert + insert + sql = f"insert into {self.tb1} file '{self.file1}' {self.tb2} file '{self.file2}';" + self.test(sql) + + # bigNum = 1010000 + # self.check_count(5, [2100, 2100, bigNum, bigNum, bigNum]) + + result = os.popen("taos -s 'select count(*) from %s.%s'" %(self.db, self.tb1)) + res = result.read() + if (f"OK" in res): + tdLog.info(f"check count success") + + def make_csv(self, filepath, once, qtime, startts): + f = open(filepath, 'w') + with f: + writer = csv.writer(f) + for j in range(qtime): + ts = startts + j*once + rows = [] + for i in range(once): + rows.append([ts + i, 0.3 + (i%10)/100.0, 210 + i%10, 10.0 + (i%20)/20.0]) + writer.writerows(rows) + f.close() + print(datetime.now(), filepath, " ready!") + + def test_mix(self): + #forbid use both value and file in one insert + result = os.popen(f"insert into {self.tb1} file '{self.file2}' {self.tb2} values('2021-07-13 14:06:34.630', 10.2, 219, 0.32);") + res = result.read() + if (f"error" in res): + tdLog.info(f"forbid success") + + def test_bigcsv(self): + # prepare csv + print("start csv data prepare") + once = 10000 + qtime1 = 101 + qtime2 = 100 + rowNum1 = qtime1 * once + rowNum2 = qtime2 * once + self.make_csv(self.file1, once, qtime1, self.ts - 86400000) + self.make_csv(self.file2, once, qtime2, self.ts) + print("end csv data prepare") + + # auto create + insert + sql = f"insert into {self.tb1} {self.tag1} file '{self.file1}';" + self.test(sql) + + # only insert + sql = f"insert into {self.tb2} file '{self.file2}';" + self.test(sql) + print("end insert to table") + + #tdSql.execute(f"use d1;") + tdSql.query(f"select tbname,count(*) from {self.stable0} group by tbname order by tbname;") + tdSql.checkRows(2) + tdSql.checkData(0, 1, rowNum1) + tdSql.checkData(1, 1, rowNum2) + print("check insert file to table success") + + def make_stable_csv(self, filepath, once, qtime, startts, table_name): + f = open(filepath, 'w') + with f: + writer = csv.writer(f) + for j in range(qtime): + offset = j*once + ts = startts + offset + rows = [] + for i in range(once): + rows.append([table_name, ts + i, offset + i, 'NULL']) + writer.writerows(rows) + f.close() + print(datetime.now(), filepath, " ready!") + + def test_stable_csv(self): + # prepare csv + print("start stable_csv data prepare") + once = 10000 + qtime1 = 101 + qtime2 = 100 + # rowNum1 = qtime1 * once + # rowNum2 = qtime2 * once + child_1 = f"{self.stable1}_1" + child_2 = f"{self.stable2}_1" + self.make_stable_csv(self.file1, once, qtime1, self.ts - 86400000, child_1) + self.make_stable_csv(self.file2, once, qtime2, self.ts, child_2) + print("end stable_csv data prepare") + + # insert create child table of stable + sql = f"insert into {self.db}.{self.stable1}(tbname,ts,q_int,q_binary) file '{self.file1}' {self.db}.{self.stable2}(tbname,ts,q_int,q_binary) file '{self.file2}';" + self.test(sql) + print("end insert to stable") + + #tdSql.execute(f"insert into {self.db}.{child_1}(ts, q_int) values(now, 1);") + tdSql.query(f"select tbname,count(*) from {self.stable1} group by tbname order by tbname;") + tdSql.checkRows(0) + print("check stable success") + + def run(self): + tdSql.prepare() + self.reset_tb() + self.test_stable_csv() + self.test_bigcsv() + self.test_mix() + self.check() + tdSql.close() + + def stop(self): + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/2-query/db.py b/tests/system-test/2-query/db.py index 6870c59a0d..0246626e40 100644 --- a/tests/system-test/2-query/db.py +++ b/tests/system-test/2-query/db.py @@ -55,7 +55,7 @@ class TDTestCase: tdSql.checkData(0, 2, 0) tdSql.query("show dnode 1 variables like '%debugFlag'") - tdSql.checkRows(22) + tdSql.checkRows(23) tdSql.query("show dnode 1 variables like '____debugFlag'") tdSql.checkRows(2) diff --git a/tests/system-test/2-query/timetruncate.py b/tests/system-test/2-query/timetruncate.py index a59180c2b1..09bdfcef63 100644 --- a/tests/system-test/2-query/timetruncate.py +++ b/tests/system-test/2-query/timetruncate.py @@ -58,7 +58,11 @@ class TDTestCase: elif unit.lower() == '1w': for i in range(len(self.ts_str)): ts_result = self.get_time.get_ms_timestamp(str(tdSql.queryResult[i][0])) - tdSql.checkEqual(ts_result,int(date_time[i]/1000/60/60/24/7)*7*24*60*60*1000) + if ignore_tz == 0: + tdSql.checkEqual(ts_result,int(date_time[i]/1000/60/60/24/7)*7*24*60*60*1000) + else: + # assuming the client timezone is UTC+0800 + tdSql.checkEqual(ts_result,int(date_time[i] - (date_time[i] + 8 * 3600 * 1000) % (86400 * 7 * 1000))) def check_us_timestamp(self,unit,date_time, ignore_tz): if unit.lower() == '1u': @@ -92,7 +96,11 @@ class TDTestCase: elif unit.lower() == '1w': for i in range(len(self.ts_str)): ts_result = self.get_time.get_us_timestamp(str(tdSql.queryResult[i][0])) - tdSql.checkEqual(ts_result,int(date_time[i]/1000/1000/60/60/24/7)*7*24*60*60*1000*1000) + if ignore_tz == 0: + tdSql.checkEqual(ts_result,int(date_time[i]/1000/1000/60/60/24/7)*7*24*60*60*1000*1000) + else: + # assuming the client timezone is UTC+0800 + tdSql.checkEqual(ts_result,int(date_time[i] - (date_time[i] + 8 * 3600 * 1000000) % (86400 * 7 * 1000000))) def check_ns_timestamp(self,unit,date_time, ignore_tz): if unit.lower() == '1b': @@ -130,7 +138,11 @@ class TDTestCase: elif unit.lower() == '1w': for i in range(len(self.ts_str)): if self.rest_tag != 'rest': - tdSql.checkEqual(tdSql.queryResult[i][0],int(date_time[i]*1000/1000/1000/1000/1000/60/60/24/7)*7*24*60*60*1000*1000*1000) + if ignore_tz == 0: + tdSql.checkEqual(tdSql.queryResult[i][0],int(date_time[i]*1000/1000/1000/1000/1000/60/60/24/7)*7*24*60*60*1000*1000*1000) + else: + # assuming the client timezone is UTC+0800 + tdSql.checkEqual(tdSql.queryResult[i][0],int(date_time[i] - (date_time[i] + 8 * 3600 * 1000000000) % (86400 * 7 * 1000000000))) def check_tb_type(self,unit,tb_type,ignore_tz): if tb_type.lower() == 'ntb': diff --git a/tests/system-test/6-cluster/5dnode3mnodeRoll.py b/tests/system-test/6-cluster/5dnode3mnodeRoll.py index 9d62eb3b4b..11a153c48f 100644 --- a/tests/system-test/6-cluster/5dnode3mnodeRoll.py +++ b/tests/system-test/6-cluster/5dnode3mnodeRoll.py @@ -4,7 +4,7 @@ import taos import sys import time import os - +import platform from util.log import * from util.sql import * from util.cases import * @@ -96,7 +96,10 @@ class TDTestCase: packagePath = "/usr/local/src/" dataPath = cPath + "/../data/" - packageName = "TDengine-server-"+ BASEVERSION + "-Linux-x64.tar.gz" + if platform.system() == "Linux" and platform.machine() == "aarch64": + packageName = "TDengine-server-"+ BASEVERSION + "-Linux-arm64.tar.gz" + else: + packageName = "TDengine-server-"+ BASEVERSION + "-Linux-x64.tar.gz" packageTPath = packageName.split("-Linux-")[0] my_file = Path(f"{packagePath}/{packageName}") if not my_file.exists(): diff --git a/tests/system-test/7-tmq/tmqCommon.py b/tests/system-test/7-tmq/tmqCommon.py index 059744caf0..66c88cf600 100644 --- a/tests/system-test/7-tmq/tmqCommon.py +++ b/tests/system-test/7-tmq/tmqCommon.py @@ -75,7 +75,7 @@ class TMQCom: if tdSql.getRows() == expectRows: break else: - time.sleep(5) + time.sleep(0.5) for i in range(expectRows): tdLog.info ("consume id: %d, consume msgs: %d, consume rows: %d"%(tdSql.getData(i , 1), tdSql.getData(i , 2), tdSql.getData(i , 3))) @@ -156,7 +156,7 @@ class TMQCom: tdLog.info("row: %d"%(actRows)) if (actRows >= rows): loopFlag = 0 - time.sleep(0.02) + time.sleep(0.5) return def getStartCommitNotifyFromTmqsim(self,cdbName='cdb',rows=1): @@ -167,7 +167,7 @@ class TMQCom: tdLog.info("row: %d"%(actRows)) if (actRows >= rows): loopFlag = 0 - time.sleep(0.02) + time.sleep(0.5) return def create_database(self,tsql, dbName,dropFlag=1,vgroups=4,replica=1): diff --git a/tests/system-test/7-tmq/tmqDnodeRestart.py b/tests/system-test/7-tmq/tmqDnodeRestart.py index 74aba31726..0ac8482163 100644 --- a/tests/system-test/7-tmq/tmqDnodeRestart.py +++ b/tests/system-test/7-tmq/tmqDnodeRestart.py @@ -4,6 +4,7 @@ import sys import time import socket import os +import platform import threading from enum import Enum @@ -184,6 +185,9 @@ class TDTestCase: paraDict['vgroups'] = self.vgroups paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl + # ARM64:time cost is so long for stopping taosd, so add the pollDdelay to 120s + if platform.system() == "Linux" and platform.machine() == "aarch64": + paraDict['pollDelay'] = 300 tmqCom.initConsumerTable() # tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) diff --git a/tests/system-test/7-tmq/tmqVnodeReplicate.py b/tests/system-test/7-tmq/tmqVnodeReplicate.py index fd8ece02e0..0ee11781ed 100644 --- a/tests/system-test/7-tmq/tmqVnodeReplicate.py +++ b/tests/system-test/7-tmq/tmqVnodeReplicate.py @@ -105,7 +105,6 @@ class TDTestCase: topicNameList = ['topic1'] # expectRowsList = [] - tmqCom.initConsumerTable("cdb", self.replicaVar) tdLog.info("create topics from stb with filter") queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) @@ -133,14 +132,15 @@ class TDTestCase: tmqCom.getStartConsumeNotifyFromTmqsim() tmqCom.getStartCommitNotifyFromTmqsim() - tdSql.query("select * from information_schema.ins_vnodes") - # tdLog.debug(tdSql.queryResult) - tdDnodes = cluster.dnodes - for result in tdSql.queryResult: - if result[2] == 'dbt' and result[3] == 'leader': - tdLog.debug("leader is %d"%(result[0] - 1)) - tdDnodes[result[0] - 1].stoptaosd() - break + tdSql.query("balance vgroup leader") + # tdSql.query("select * from information_schema.ins_vnodes") + # # tdLog.debug(tdSql.queryResult) + # tdDnodes = cluster.dnodes + # for result in tdSql.queryResult: + # if result[2] == 'dbt' and result[3] == 'leader': + # tdLog.debug("leader is %d"%(result[0] - 1)) + # tdDnodes[result[0] - 1].stoptaosd() + # break pInsertThread.join() expectRows = 1 @@ -159,7 +159,6 @@ class TDTestCase: tdLog.printNoPrefix("======== test case 1 end ...... ") def run(self): - tdSql.prepare() self.prepareTestEnv() self.tmqCase1() diff --git a/tests/system-test/7-tmq/tmqVnodeSplit-column.py b/tests/system-test/7-tmq/tmqVnodeSplit-column.py index 54a43465e7..87a73e981e 100644 --- a/tests/system-test/7-tmq/tmqVnodeSplit-column.py +++ b/tests/system-test/7-tmq/tmqVnodeSplit-column.py @@ -188,7 +188,7 @@ class TDTestCase: expectRows = 1 resultList = tmqCom.selectConsumeResult(expectRows) - if expectrowcnt / 2 >= resultList[0]: + if expectrowcnt / 2 > resultList[0]: tdLog.info("expect consume rows: %d, act consume rows: %d"%(expectrowcnt / 2, resultList[0])) tdLog.exit("%d tmq consume rows error!"%consumerId) diff --git a/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata-false.py b/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata-false.py new file mode 100644 index 0000000000..8276ae638b --- /dev/null +++ b/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata-false.py @@ -0,0 +1,218 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * +sys.path.append("./7-tmq") +from tmqCommon import * + +sys.path.append("./6-cluster") +from clusterCommonCreate import * +from clusterCommonCheck import clusterComCheck + + +class TDTestCase: + def __init__(self): + self.vgroups = 1 + self.ctbNum = 10 + self.rowsPerTbl = 10000 + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def getDataPath(self): + selfPath = tdCom.getBuildPath() + + return selfPath + '/../sim/dnode%d/data/vnode/vnode%d/wal/*'; + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 10, + 'rowsPerTbl': 10000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 60, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tdCom.drop_all_db() + tmqCom.initConsumerTable() + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], wal_retention_period=36000,vgroups=paraDict["vgroups"],replica=self.replicaVar) + tdLog.info("create stb") + tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) + return + + def restartAndRemoveWal(self, deleteWal): + tdDnodes = cluster.dnodes + tdSql.query("select * from information_schema.ins_vnodes") + for result in tdSql.queryResult: + if result[2] == 'dbt': + tdLog.debug("dnode is %d"%(result[0])) + dnodeId = result[0] + vnodeId = result[1] + + tdDnodes[dnodeId - 1].stoptaosd() + time.sleep(1) + dataPath = self.getDataPath() + dataPath = dataPath%(dnodeId,vnodeId) + tdLog.debug("dataPath:%s"%dataPath) + if deleteWal: + if os.system('rm -rf ' + dataPath) != 0: + tdLog.exit("rm error") + + tdDnodes[dnodeId - 1].starttaosd() + time.sleep(1) + break + tdLog.debug("restart dnode ok") + + def splitVgroups(self): + tdSql.query("select * from information_schema.ins_vnodes") + vnodeId = 0 + for result in tdSql.queryResult: + if result[2] == 'dbt': + vnodeId = result[1] + tdLog.debug("vnode is %d"%(vnodeId)) + break + splitSql = "split vgroup %d" %(vnodeId) + tdLog.debug("splitSql:%s"%(splitSql)) + tdSql.query(splitSql) + tdLog.debug("splitSql ok") + + def tmqCase1(self, deleteWal=False): + tdLog.printNoPrefix("======== test case 1: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb1', + 'ctbStartIdx': 0, + 'ctbNum': 10, + 'rowsPerTbl': 10000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 120, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + topicNameList = ['topic1'] + # expectRowsList = [] + tmqCom.initConsumerTable() + + tdLog.info("create topics from stb with filter") + queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + # sqlString = "create topic %s as stable %s" %(topicNameList[0], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicNameList[0], queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + # tdSql.query(queryString) + # expectRowsList.append(tdSql.getRows()) + + # init consume info, and start tmq_sim, then check consume result + tdLog.info("insert consume info to consume processor") + consumerId = 0 + expectrowcnt = paraDict["rowsPerTbl"] * paraDict["ctbNum"] * 2 + topicList = topicNameList[0] + ifcheckdata = 1 + ifManualCommit = 1 + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:200, auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + tdLog.info("wait the consume result") + + tdLog.info("create ctb1") + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + + tdLog.info("create ctb2") + paraDict['ctbPrefix'] = "ctb2" + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + + tdLog.info("insert ctb1 data") + pInsertThread = tmqCom.asyncInsertDataByInterlace(paraDict) + + tmqCom.getStartConsumeNotifyFromTmqsim() + tmqCom.getStartCommitNotifyFromTmqsim() + + #restart dnode & remove wal + self.restartAndRemoveWal(deleteWal) + + # split vgroup + self.splitVgroups() + + + tdLog.info("insert ctb2 data") + pInsertThread1 = tmqCom.asyncInsertDataByInterlace(paraDict) + pInsertThread.join() + pInsertThread1.join() + + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + + if expectrowcnt / 2 >= resultList[0]: + tdLog.info("expect consume rows: %d, act consume rows: %d"%(expectrowcnt / 2, resultList[0])) + tdLog.exit("%d tmq consume rows error!"%consumerId) + + # tmqCom.checkFileContent(consumerId, queryString) + + time.sleep(2) + for i in range(len(topicNameList)): + tdSql.query("drop topic %s"%topicNameList[i]) + + if deleteWal == True: + clusterComCheck.check_vgroups_status(vgroup_numbers=2,db_replica=self.replicaVar,db_name="dbt",count_number=240) + + tdLog.printNoPrefix("======== test case 1 end ...... ") + + def run(self): + self.prepareTestEnv() + self.tmqCase1(False) + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py b/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py index 4f3f46725a..0d247b2848 100644 --- a/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py +++ b/tests/system-test/7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py @@ -207,8 +207,6 @@ class TDTestCase: def run(self): self.prepareTestEnv() self.tmqCase1(True) - self.prepareTestEnv() - self.tmqCase1(False) def stop(self): tdSql.close() diff --git a/tests/system-test/7-tmq/tmq_per.py b/tests/system-test/7-tmq/tmq_per.py new file mode 100644 index 0000000000..f3701dacab --- /dev/null +++ b/tests/system-test/7-tmq/tmq_per.py @@ -0,0 +1,196 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * +sys.path.append("./7-tmq") +from tmqCommon import * + +from util.cluster import * +sys.path.append("./6-cluster") +from clusterCommonCreate import * +from clusterCommonCheck import clusterComCheck + +class TDTestCase: + def __init__(self): + self.vgroups = 1 + self.ctbNum = 10000 + self.rowsPerTbl = 10000 + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def getDataPath(self): + selfPath = tdCom.getBuildPath() + + return selfPath + '/../sim/dnode%d/data/vnode/vnode%d/wal/*'; + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 10, + 'rowsPerTbl': 10000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 60, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tdCom.drop_all_db() + tmqCom.initConsumerTable() + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], wal_retention_period=36000,vgroups=paraDict["vgroups"],replica=self.replicaVar) + tdLog.info("create stb") + tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) + return + + def restartAndRemoveWal(self, deleteWal): + tdDnodes = cluster.dnodes + tdSql.query("select * from information_schema.ins_vnodes") + for result in tdSql.queryResult: + if result[2] == 'dbt': + tdLog.debug("dnode is %d"%(result[0])) + dnodeId = result[0] + vnodeId = result[1] + + tdDnodes[dnodeId - 1].stoptaosd() + time.sleep(1) + dataPath = self.getDataPath() + dataPath = dataPath%(dnodeId,vnodeId) + tdLog.debug("dataPath:%s"%dataPath) + if deleteWal: + if os.system('rm -rf ' + dataPath) != 0: + tdLog.exit("rm error") + + tdDnodes[dnodeId - 1].starttaosd() + time.sleep(1) + break + tdLog.debug("restart dnode ok") + + def splitVgroups(self): + tdSql.query("select * from information_schema.ins_vnodes") + vnodeId = 0 + for result in tdSql.queryResult: + if result[2] == 'dbt': + vnodeId = result[1] + tdLog.debug("vnode is %d"%(vnodeId)) + break + splitSql = "split vgroup %d" %(vnodeId) + tdLog.debug("splitSql:%s"%(splitSql)) + tdSql.query(splitSql) + tdLog.debug("splitSql ok") + + def tmqCase1(self, deleteWal=False): + tdLog.printNoPrefix("======== test case 1: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb1', + 'ctbStartIdx': 0, + 'ctbNum': 10000, + 'rowsPerTbl': 10000, + 'batchNum': 10000, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 5, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + print + paraDict['rowsPerTbl'] = self.rowsPerTbl + + topicNameList = ['topic1'] + # expectRowsList = [] + tmqCom.initConsumerTable() + + tdLog.info("create topics from stb ") + queryString = "stable %s.%s"%(paraDict['dbName'], paraDict['stbName']) + # sqlString = "create topic %s as stable %s" %(topicNameList[0], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicNameList[0], queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + # tdSql.query(queryString) + # expectRowsList.append(tdSql.getRows()) + + # init consume info, and start tmq_sim, then check consume result + tdLog.info("insert consume info to consume processor") + consumerId = 0 + expectrowcnt = paraDict["rowsPerTbl"] * paraDict["ctbNum"] + topicList = topicNameList[0] + ifcheckdata = 1 + ifManualCommit = 1 + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:200, auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("create ctb1") + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + + tdLog.info("insert ctb1 data") + pInsertThread = tmqCom.asyncInsertDataByInterlace(paraDict) + pInsertThread.join() + + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + tdLog.info("wait the consume result") + + tmqCom.getStartConsumeNotifyFromTmqsim() + tmqCom.getStartCommitNotifyFromTmqsim() + + expectRows = 1 + tdLog.info("expectRows:%d"%expectRows) + resultList = tmqCom.selectConsumeResult(expectRows) + # for i in range(len(topicNameList)): + # tdSql.query("drop topic %s"%topicNameList[i]) + + if deleteWal == True: + clusterComCheck.check_vgroups_status(vgroup_numbers=1,db_replica=self.replicaVar,db_name="dbt",count_number=240) + tdLog.printNoPrefix("======== test case 1 end ...... ") + + def run(self): + self.prepareTestEnv() + self.tmqCase1(True) + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/8-stream/snode_restart.py b/tests/system-test/8-stream/snode_restart.py new file mode 100644 index 0000000000..3657163ab0 --- /dev/null +++ b/tests/system-test/8-stream/snode_restart.py @@ -0,0 +1,78 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * + +class TDTestCase: + updatecfgDict = {'checkpointInterval': 1100} + print("===================: ", updatecfgDict) + + def init(self, conn, logSql, replicaVar=1): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + + def case1(self): + tdLog.debug("========case1 start========") + + os.system("nohup taosBenchmark -y -B 1 -t 4 -S 1000 -n 1000 -i 1000 -v 2 > /dev/null 2>&1 &") + time.sleep(4) + tdSql.query("use test") + tdSql.query("create snode on dnode 4") + tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)") + tdLog.debug("========create stream useing snode and insert data ok========") + time.sleep(4) + + tdDnodes = cluster.dnodes + tdDnodes[3].stoptaosd() + time.sleep(2) + tdDnodes[3].starttaosd() + tdLog.debug("========snode restart ok========") + + time.sleep(30) + os.system("kill -9 `pgrep taosBenchmark`") + tdLog.debug("========stop insert ok========") + time.sleep(2) + + tdSql.query("select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s) order by groupid,_wstart") + rowCnt = tdSql.getRows() + results = [] + for i in range(rowCnt): + results.append(tdSql.getData(i,1)) + + tdSql.query("select * from st1 order by groupid,_wstart") + tdSql.checkRows(rowCnt) + for i in range(rowCnt): + data1 = tdSql.getData(i,1) + data2 = results[i] + if data1 != data2: + tdLog.info("num: %d, act data: %d, expect data: %d"%(i, data1, data2)) + tdLog.exit("check data error!") + + # tdLog.debug("========sleep 500s========") + # time.sleep(500) + + tdLog.debug("case1 end") + + def run(self): + self.case1() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/8-stream/snode_restart_with_checkpoint.py b/tests/system-test/8-stream/snode_restart_with_checkpoint.py new file mode 100644 index 0000000000..d7bfd7b407 --- /dev/null +++ b/tests/system-test/8-stream/snode_restart_with_checkpoint.py @@ -0,0 +1,78 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * + +class TDTestCase: + # updatecfgDict = {'checkpointInterval': 5} + # print("===================: ", updatecfgDict) + + def init(self, conn, logSql, replicaVar=1): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + + def case1(self): + tdLog.debug("========case1 start========") + + os.system("nohup taosBenchmark -y -B 1 -t 4 -S 1000 -n 1000 -i 1000 -v 2 > /dev/null 2>&1 &") + time.sleep(4) + tdSql.query("use test") + tdSql.query("create snode on dnode 4") + tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)") + tdLog.debug("========create stream using snode and insert data ok========") + time.sleep(60) + + tdDnodes = cluster.dnodes + tdDnodes[3].stoptaosd() + time.sleep(2) + tdDnodes[3].starttaosd() + tdLog.debug("========snode restart ok========") + + time.sleep(30) + os.system("kill -9 `pgrep taosBenchmark`") + tdLog.debug("========stop insert ok========") + time.sleep(2) + + tdSql.query("select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s) order by groupid,_wstart") + rowCnt = tdSql.getRows() + results = [] + for i in range(rowCnt): + results.append(tdSql.getData(i,1)) + + tdSql.query("select * from st1 order by groupid,_wstart") + tdSql.checkRows(rowCnt) + for i in range(rowCnt): + data1 = tdSql.getData(i,1) + data2 = results[i] + if data1 != data2: + tdLog.info("num: %d, act data: %d, expect data: %d"%(i, data1, data2)) + tdLog.exit("check data error!") + + # tdLog.debug("========sleep 500s========") + # time.sleep(500) + + tdLog.debug("case1 end") + + def run(self): + self.case1() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/8-stream/vnode_restart.py b/tests/system-test/8-stream/vnode_restart.py new file mode 100644 index 0000000000..a53432b77a --- /dev/null +++ b/tests/system-test/8-stream/vnode_restart.py @@ -0,0 +1,77 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * + +class TDTestCase: + updatecfgDict = {'checkpointInterval': 1100} + print("===================: ", updatecfgDict) + + def init(self, conn, logSql, replicaVar=1): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + + def case1(self): + tdLog.debug("========case1 start========") + + os.system("nohup taosBenchmark -y -B 1 -t 4 -S 1000 -n 1000 -i 1000 -v 2 > /dev/null 2>&1 &") + time.sleep(4) + tdSql.query("use test") + tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)") + tdLog.debug("========create stream useing snode and insert data ok========") + time.sleep(4) + + tdDnodes = cluster.dnodes + tdDnodes[2].stoptaosd() + time.sleep(2) + tdDnodes[2].starttaosd() + tdLog.debug("========vnode restart ok========") + + time.sleep(30) + os.system("kill -9 `pgrep taosBenchmark`") + tdLog.debug("========stop insert ok========") + time.sleep(2) + + tdSql.query("select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s) order by groupid,_wstart") + rowCnt = tdSql.getRows() + results = [] + for i in range(rowCnt): + results.append(tdSql.getData(i,1)) + + tdSql.query("select * from st1 order by groupid,_wstart") + tdSql.checkRows(rowCnt) + for i in range(rowCnt): + data1 = tdSql.getData(i,1) + data2 = results[i] + if data1 != data2: + tdLog.info("num: %d, act data: %d, expect data: %d"%(i, data1, data2)) + tdLog.exit("check data error!") + + # tdLog.debug("========sleep 500s========") + # time.sleep(500) + + tdLog.debug("case1 end") + + def run(self): + self.case1() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/output.txt b/tests/system-test/output.txt deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/system-test/test.py b/tests/system-test/test.py index 81f98fea22..795132b14e 100644 --- a/tests/system-test/test.py +++ b/tests/system-test/test.py @@ -582,7 +582,7 @@ if __name__ == "__main__": tdDnodes.setAsan(asan) tdDnodes.stopAll() for dnode in tdDnodes.dnodes: - tdDnodes.deploy(dnode.index,{}) + tdDnodes.deploy(dnode.index,updateCfgDict) for dnode in tdDnodes.dnodes: tdDnodes.starttaosd(dnode.index) tdCases.logSql(logSql) diff --git a/tests/system-test/win-test-file b/tests/system-test/win-test-file index 3daf65b406..aefdb1e824 100644 --- a/tests/system-test/win-test-file +++ b/tests/system-test/win-test-file @@ -1,3 +1,12 @@ +python3 ./test.py -f 2-query/tbname_vgroup.py +python3 ./test.py -f 2-query/stbJoin.py +python3 ./test.py -f 2-query/stbJoin.py -Q 2 +python3 ./test.py -f 2-query/stbJoin.py -Q 3 +python3 ./test.py -f 2-query/stbJoin.py -Q 4 +python3 ./test.py -f 2-query/hint.py +python3 ./test.py -f 2-query/hint.py -Q 2 +python3 ./test.py -f 2-query/hint.py -Q 3 +python3 ./test.py -f 2-query/hint.py -Q 4 python3 ./test.py -f 2-query/nestedQuery.py python3 ./test.py -f 2-query/nestedQuery_str.py python3 ./test.py -f 2-query/nestedQuery_math.py @@ -18,7 +27,30 @@ python3 ./test.py -f 2-query/nestedQuery_math.py -Q 4 python3 ./test.py -f 2-query/nestedQuery_time.py -Q 4 python3 ./test.py -f 2-query/nestedQuery_26.py -Q 4 python3 ./test.py -f 2-query/interval_limit_opt.py -Q 4 +python3 ./test.py -f 2-query/interval_unit.py +python3 ./test.py -f 2-query/interval_unit.py -Q 2 +python3 ./test.py -f 2-query/interval_unit.py -Q 3 +python3 ./test.py -f 2-query/interval_unit.py -Q 4 python3 ./test.py -f 2-query/partition_by_col.py -Q 4 +python3 ./test.py -f 2-query/partition_by_col.py -Q 3 +python3 ./test.py -f 2-query/partition_by_col.py -Q 2 +python3 ./test.py -f 2-query/partition_by_col.py +python3 ./test.py -f 2-query/partition_by_col_agg.py +python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 2 +python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 3 +python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 4 +python3 ./test.py -f 2-query/interval_limit_opt_2.py -Q 4 +python3 ./test.py -f 2-query/interval_limit_opt_2.py -Q 3 +python3 ./test.py -f 2-query/interval_limit_opt_2.py -Q 2 +python3 ./test.py -f 2-query/interval_limit_opt_2.py +python3 ./test.py -f 2-query/func_to_char_timestamp.py +python3 ./test.py -f 2-query/func_to_char_timestamp.py -Q 2 +python3 ./test.py -f 2-query/func_to_char_timestamp.py -Q 3 +python3 ./test.py -f 2-query/func_to_char_timestamp.py -Q 4 +python3 ./test.py -f 2-query/last_cache_scan.py +python3 ./test.py -f 2-query/last_cache_scan.py -Q 2 +python3 ./test.py -f 2-query/last_cache_scan.py -Q 3 +python3 ./test.py -f 2-query/last_cache_scan.py -Q 4 python3 ./test.py -f 7-tmq/tmqShow.py python3 ./test.py -f 7-tmq/tmqDropStb.py python3 ./test.py -f 7-tmq/subscribeStb0.py @@ -29,11 +61,13 @@ python3 ./test.py -f 7-tmq/subscribeDb0.py -N 3 -n 3 python3 ./test.py -f 7-tmq/ins_topics_test.py python3 ./test.py -f 7-tmq/tmqMaxTopic.py python3 ./test.py -f 7-tmq/tmqParamsTest.py +python3 ./test.py -f 7-tmq/tmqParamsTest.py -R python3 ./test.py -f 7-tmq/tmqClientConsLog.py python3 ./test.py -f 7-tmq/tmqMaxGroupIds.py python3 ./test.py -f 7-tmq/tmqConsumeDiscontinuousData.py python3 ./test.py -f 7-tmq/tmqOffset.py python3 ./test.py -f 7-tmq/tmqDropConsumer.py +python3 ./test.py -f 1-insert/insert_stb.py python3 ./test.py -f 1-insert/delete_stable.py python3 ./test.py -f 2-query/out_of_order.py -Q 3 python3 ./test.py -f 2-query/out_of_order.py @@ -61,10 +95,14 @@ python3 ./test.py -f 2-query/slimit.py -R python3 ./test.py -f 2-query/slimit.py -Q 2 python3 ./test.py -f 2-query/slimit.py -Q 3 python3 ./test.py -f 2-query/slimit.py -Q 4 -python3 ./test.py -f 3-enterprise/restore/restoreDnode.py -N 5 -M 3 -python3 ./test.py -f 3-enterprise/restore/restoreVnode.py -N 5 -M 3 -python3 ./test.py -f 3-enterprise/restore/restoreMnode.py -N 5 -M 3 -python3 ./test.py -f 3-enterprise/restore/restoreQnode.py -N 5 -M 3 +python3 ./test.py -f 2-query/ts-4233.py +python3 ./test.py -f 2-query/ts-4233.py -Q 2 +python3 ./test.py -f 2-query/ts-4233.py -Q 3 +python3 ./test.py -f 2-query/ts-4233.py -Q 4 +python3 ./test.py -f 3-enterprise/restore/restoreDnode.py -N 5 -M 3 -i False +python3 ./test.py -f 3-enterprise/restore/restoreVnode.py -N 5 -M 3 -i False +python3 ./test.py -f 3-enterprise/restore/restoreMnode.py -N 5 -M 3 -i False +python3 ./test.py -f 3-enterprise/restore/restoreQnode.py -N 5 -M 3 -i False python3 ./test.py -f 7-tmq/create_wrong_topic.py python3 ./test.py -f 7-tmq/dropDbR3ConflictTransaction.py -N 3 python3 ./test.py -f 7-tmq/basic5.py @@ -112,16 +150,30 @@ python3 ./test.py -f 7-tmq/stbTagFilter-1ctb.py python3 ./test.py -f 7-tmq/dataFromTsdbNWal.py python3 ./test.py -f 7-tmq/dataFromTsdbNWal-multiCtb.py python3 ./test.py -f 7-tmq/tmq_taosx.py +python3 ./test.py -f 7-tmq/tmq_replay.py +python3 ./test.py -f 7-tmq/tmqSeekAndCommit.py +python3 ./test.py -f 7-tmq/tmq_offset.py +python3 ./test.py -f 7-tmq/tmqDataPrecisionUnit.py python3 ./test.py -f 7-tmq/raw_block_interface_test.py python3 ./test.py -f 7-tmq/stbTagFilter-multiCtb.py python3 ./test.py -f 7-tmq/tmqSubscribeStb-r3.py -N 5 -python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -n 3 +python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -i True +python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -n 3 -i True +python3 test.py -f 7-tmq/tmqVnodeTransform-stb.py -N 2 -n 1 +python3 test.py -f 7-tmq/tmqVnodeTransform-stb.py -N 6 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select.py -N 2 -n 1 +python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select-duplicatedata.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-stb-select.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-stb.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-column.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeSplit-db.py -N 3 -n 3 +python3 test.py -f 7-tmq/tmqVnodeReplicate.py -M 3 -N 3 -n 3 python3 ./test.py -f 99-TDcase/TD-19201.py python3 ./test.py -f 99-TDcase/TD-21561.py python3 ./test.py -f 99-TDcase/TS-3404.py python3 ./test.py -f 99-TDcase/TS-3581.py python3 ./test.py -f 99-TDcase/TS-3311.py +python3 ./test.py -f 99-TDcase/TS-3821.py python3 ./test.py -f 0-others/balance_vgroups_r1.py -N 6 python3 ./test.py -f 0-others/taosShell.py python3 ./test.py -f 0-others/taosShellError.py @@ -139,10 +191,19 @@ python3 ./test.py -f 0-others/user_privilege_show.py python3 ./test.py -f 0-others/user_privilege_all.py python3 ./test.py -f 0-others/fsync.py python3 ./test.py -f 0-others/multilevel.py +python3 ./test.py -f 0-others/ttl.py +python3 ./test.py -f 0-others/ttlChangeOnWrite.py +python3 ./test.py -f 0-others/compress_tsz1.py +python3 ./test.py -f 0-others/compress_tsz2.py +python3 ./test.py -f 0-others/view/non_marterial_view/test_view.py python3 ./test.py -f 0-others/compatibility.py python3 ./test.py -f 0-others/tag_index_basic.py python3 ./test.py -N 3 -f 0-others/walRetention.py +python3 ./test.py -f 0-others/splitVGroupRep1.py -N 3 +python3 ./test.py -f 0-others/splitVGroupRep3.py -N 3 python3 ./test.py -f 0-others/timeRangeWise.py -N 3 +python3 ./test.py -f 0-others/delete_check.py +python3 ./test.py -f 0-others/test_hot_refresh_configurations.py python3 ./test.py -f 1-insert/alter_database.py python3 ./test.py -f 1-insert/alter_replica.py -N 3 python3 ./test.py -f 1-insert/influxdb_line_taosc_insert.py @@ -190,6 +251,7 @@ python3 ./test.py -f 1-insert/rowlength64k_4.py -Q 3 python3 ./test.py -f 1-insert/rowlength64k_4.py -Q 4 python3 ./test.py -f 1-insert/precisionUS.py python3 ./test.py -f 1-insert/precisionNS.py +python3 ./test.py -f 1-insert/test_ts4219.py python3 ./test.py -f 0-others/show.py python3 ./test.py -f 0-others/show_tag_index.py python3 ./test.py -f 0-others/information_schema.py @@ -325,6 +387,8 @@ python3 ./test.py -f 2-query/smaTest.py python3 ./test.py -f 2-query/smaTest.py -R python3 ./test.py -f 0-others/sma_index.py python3 ./test.py -f 2-query/sml_TS-3724.py +python3 ./test.py -f 2-query/sml-TD19291.py +python3 ./test.py -f 2-query/varbinary.py python3 ./test.py -f 2-query/sml.py python3 ./test.py -f 2-query/sml.py -R python3 ./test.py -f 2-query/spread.py @@ -426,7 +490,6 @@ python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertData.py -N 6 -M 3 python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertData.py -N 6 -M 3 -n 3 python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertDataAsync.py -N 6 -M 3 python3 ./test.py -f 6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py -N 6 -M 3 -python3 ./test.py -f 6-cluster/5dnode3mnodeRoll.py -N 3 -C 1 python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 7 -M 3 -C 6 python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 7 -M 3 -C 6 -n 3 python3 ./test.py -f 6-cluster/5dnode3mnodeRecreateMnode.py -N 6 -M 3 @@ -731,7 +794,9 @@ python3 ./test.py -f 2-query/out_of_order.py -R python3 ./test.py -f 2-query/blockSMA.py -Q 4 python3 ./test.py -f 2-query/projectionDesc.py -Q 4 python3 ./test.py -f 2-query/odbc.py +python3 ./test.py -f 2-query/fill_with_group.py python3 ./test.py -f 99-TDcase/TD-21561.py -Q 4 python3 ./test.py -f 99-TDcase/TD-20582.py python3 ./test.py -f 5-taos-tools/taosbenchmark/insertMix.py -N 3 python3 ./test.py -f 5-taos-tools/taosbenchmark/stt.py -N 3 +python3 ./test.py -f eco-system/meta/database/keep_time_offset.py diff --git a/utils/test/c/tmq_taosx_ci.c b/utils/test/c/tmq_taosx_ci.c index ff89bb1f75..8a7074844a 100644 --- a/utils/test/c/tmq_taosx_ci.c +++ b/utils/test/c/tmq_taosx_ci.c @@ -30,7 +30,7 @@ typedef struct { int meta; int srcVgroups; int dstVgroups; - char dir[64]; + char dir[256]; } Config; Config g_conf = {0}; @@ -409,6 +409,30 @@ int buildStable(TAOS* pConn, TAOS_RES* pRes) { } taos_free_result(pRes); +#ifdef WINDOWS + pRes = taos_query(pConn, + "CREATE STABLE `meters_summary` (`_wstart` TIMESTAMP, `current` FLOAT, `groupid` INT, `location` VARCHAR(16)) TAGS (`group_id` BIGINT UNSIGNED)"); + if (taos_errno(pRes) != 0) { + printf("failed to create super table meters_summary, reason:%s\n", taos_errstr(pRes)); + return -1; + } + taos_free_result(pRes); + + pRes = taos_query(pConn, + " CREATE TABLE `t_d2a450ee819dcf7576f0282d9ac22dbc` USING `meters_summary` (`group_id`) TAGS (13135550082773579308)"); + if (taos_errno(pRes) != 0) { + printf("failed to create super table meters_summary, reason:%s\n", taos_errstr(pRes)); + return -1; + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "insert into t_d2a450ee819dcf7576f0282d9ac22dbc values (now, 120, 1, 'San Francisco')"); + if (taos_errno(pRes) != 0) { + printf("failed to insert into table d0, reason:%s\n", taos_errstr(pRes)); + return -1; + } + taos_free_result(pRes); +#else pRes = taos_query(pConn, "create stream meters_summary_s trigger at_once IGNORE EXPIRED 0 into meters_summary as select _wstart, max(current) as current, " "groupid, location from meters partition by groupid, location interval(10m)"); @@ -417,6 +441,7 @@ int buildStable(TAOS* pConn, TAOS_RES* pRes) { return -1; } taos_free_result(pRes); +#endif pRes = taos_query(pConn, "insert into d0 (ts, current) values (now, 120)"); if (taos_errno(pRes) != 0) { @@ -598,8 +623,8 @@ void basic_consume_loop(tmq_t* tmq, tmq_list_t* topics) { } void initLogFile() { - char f1[256] = {0}; - char f2[256] = {0}; + char f1[1024] = {0}; + char f2[1024] = {0}; if (g_conf.snapShot) { sprintf(f1, "%s/../log/tmq_taosx_tmp_snapshot.source", g_conf.dir); diff --git a/utils/tsim/src/simExe.c b/utils/tsim/src/simExe.c index ac17fad36b..394b168b08 100644 --- a/utils/tsim/src/simExe.c +++ b/utils/tsim/src/simExe.c @@ -693,8 +693,8 @@ bool simExecuteNativeSqlCommand(SScript *script, char *rest, bool isSlow) { ret = 0; break; } else if (ret != 0) { - simDebug("script:%s, taos:%p, %s failed, ret:%d:%s, error:%s", script->fileName, script->taos, rest, ret & 0XFFFF, - tstrerror(ret), taos_errstr(pSql)); + simDebug("script:%s, taos:%p, %s failed, ret:%d:%s", script->fileName, script->taos, rest, ret & 0XFFFF, + tstrerror(ret)); if (line->errorJump == SQL_JUMP_TRUE) { script->linePos = line->jump;