diff --git a/.github/workflows/taosd-ci.yml b/.github/workflows/taosd-ci.yml new file mode 100644 index 0000000000..5be958adf7 --- /dev/null +++ b/.github/workflows/taosd-ci.yml @@ -0,0 +1,315 @@ +name: TDengine CI Test + +on: + pull_request: + branches: + - 'main' + - '3.0' + - '3.1' + paths-ignore: + - 'packaging/**' + - 'docs/**' + repository_dispatch: + types: [run-tests] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + CONTAINER_NAME: 'taosd-test' + WKDIR: '/var/lib/jenkins/workspace' + WK: '/var/lib/jenkins/workspace/TDinternal' + WKC: '/var/lib/jenkins/workspace/TDinternal/community' + +jobs: + fetch-parameters: + runs-on: + group: CI + labels: [self-hosted, Linux, X64, testing] + outputs: + tdinternal: ${{ steps.parameters.outputs.tdinternal }} + run_function_test: ${{ steps.parameters.outputs.run_function_test }} + run_tdgpt_test: ${{ steps.parameters.outputs.run_tdgpt_test }} + source_branch: ${{ steps.parameters.outputs.source_branch }} + target_branch: ${{ steps.parameters.outputs.target_branch }} + pr_number: ${{ steps.parameters.outputs.pr_number }} + steps: + - name: Determine trigger source and fetch parameters + id: parameters + run: | + set -euo pipefail + # check the trigger source and get branch information + if [ "${{ github.event_name }}" == "repository_dispatch" ]; then + tdinternal="true" + source_branch=${{ github.event.client_payload.tdinternal_source_branch }} + target_branch=${{ github.event.client_payload.tdinternal_target_branch }} + pr_number=${{ github.event.client_payload.tdinternal_pr_number }} + run_tdgpt_test="true" + run_function_test="true" + else + tdinternal="false" + source_branch=${{ github.event.pull_request.head.ref }} + target_branch=${{ github.event.pull_request.base.ref }} + pr_number=${{ github.event.pull_request.number }} + + # check whether to run tdgpt test cases + cd ${{ env.WKC }} + changed_files_non_doc=$(git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD $target_branch`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v ".md$" | tr '\n' ' ' || :) + + if [[ "$changed_files_non_doc" != '' && "$changed_files_non_doc" =~ /forecastoperator.c|anomalywindowoperator.c|tanalytics.h|tanalytics.c|tdgpt_cases.task|analytics/ ]]; then + run_tdgpt_test="true" + else + run_tdgpt_test="false" + fi + + # check whether to run function test cases + changed_files_non_tdgpt=$(git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD $target_branch`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v ".md$" | grep -Ev "forecastoperator.c|anomalywindowoperator.c|tanalytics.h|tanalytics.c|tdgpt_cases.task|analytics" | tr '\n' ' ' ||:) + if [ $changed_files_non_tdgpt != '' ]; then + run_function_test="true" + else + run_function_test="false" + fi + fi + + echo "tdinternal=$tdinternal" >> $GITHUB_OUTPUT + echo "run_function_test=$run_function_test" >> $GITHUB_OUTPUT + echo "run_tdgpt_test=$run_tdgpt_test" >> $GITHUB_OUTPUT + echo "source_branch=$source_branch" >> $GITHUB_OUTPUT + echo "target_branch=$target_branch" >> $GITHUB_OUTPUT + echo "pr_number=$pr_number" >> $GITHUB_OUTPUT + + run-tests-on-linux: + needs: fetch-parameters + runs-on: + group: CI + labels: [self-hosted, Linux, X64, testing] + timeout-minutes: 200 + env: + IS_TDINTERNAL: ${{ needs.fetch-parameters.outputs.tdinternal }} + RUN_RUNCTION_TEST: ${{ needs.fetch-parameters.outputs.run_function_test }} + RUN_TDGPT_TEST: ${{ needs.fetch-parameters.outputs.run_tdgpt_tests }} + SOURCE_BRANCH: ${{ needs.fetch-parameters.outputs.source_branch }} + TARGET_BRANCH: ${{ needs.fetch-parameters.outputs.target_branch }} + PR_NUMBER: ${{ needs.fetch-parameters.outputs.pr_number }} + steps: + - name: Output the environment information + run: | + echo "::group::Environment Info" + date + hostname + env + echo "Runner: ${{ runner.name }}" + echo "Trigger Source from TDinternal: ${{ env.IS_TDINTERNAL }}" + echo "Workspace: ${{ env.WKDIR }}" + git --version + echo "${{ env.WKDIR }}/restore.sh -p ${{ env.PR_NUMBER }} -n ${{ github.run_number }} -c ${{ env.CONTAINER_NAME }}" + echo "::endgroup::" + + - name: Prepare repositories + run: | + set -euo pipefail + prepare_environment() { + cd "$1" + git reset --hard + git clean -f + git remote prune origin + git fetch + git checkout "$2" + } + prepare_environment "${{ env.WK }}" "${{ env.TARGET_BRANCH }}" + prepare_environment "${{ env.WKC }}" "${{ env.TARGET_BRANCH }}" + + - name: Get latest codes and logs for TDinternal PR + if: ${{ env.IS_TDINTERNAL == 'true' }} + run: | + cd ${{ env.WK }} + git pull >/dev/null + git log -5 + echo "`date "+%Y%m%d-%H%M%S"` TDinternalTest/${{ env.PR_NUMBER }}:${{ github.run_number }}:${{ env.TARGET_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log + echo "CHANGE_BRANCH:${{ env.SOURCE_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log + echo "TDinternal log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + git fetch origin +refs/pull/${{ env.PR_NUMBER }}/merge + git checkout -qf FETCH_HEAD + git log -5 + echo "TDinternal log merged: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + cd ${{ env.WKC }} + git remote prune origin + git pull >/dev/null + git log -5 + echo "community log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + - name: Get latest codes and logs for TDengine PR + if: ${{ env.IS_TDINTERNAL == 'false' }} + run: | + cd ${{ env.WKC }} + git remote prune origin + git pull >/dev/null + git log -5 + echo "`date "+%Y%m%d-%H%M%S"` TDengineTest/${{ env.PR_NUMBER }}:${{ github.run_number }}:${{ env.TARGET_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log + echo "CHANGE_BRANCH:${{ env.SOURCE_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log + echo "community log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + git fetch origin +refs/pull/${{ env.PR_NUMBER }}/merge + git checkout -qf FETCH_HEAD + git log -5 + echo "community log merged: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + cd ${{ env.WK }} + git pull >/dev/null + git log -5 + echo "TDinternal log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + - name: Update submodule + run: | + cd ${{ env.WKC }} + git submodule update --init --recursive + - name: Output the 'file_no_doc_changed' information to the file + if: ${{ env.IS_TDINTERNAL == 'false' }} + run: | + mkdir -p ${{ env.WKDIR }}/tmp/${{ env.PR_NUMBER }}_${{ github.run_number }} + changed_files_non_doc=$(git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${{ env.TARGET_BRANCH }}`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v ".md$" | tr '\n' ' ' || :) + echo $changed_files_non_doc > ${{ env.WKDIR }}/tmp/${{ env.PR_NUMBER }}_${{ github.run_number }}/docs_changed.txt + - name: Check assert testing + run: | + cd ${{ env.WKC }}/tests/parallel_test + ./run_check_assert_container.sh -d ${{ env.WKDIR }} + - name: Check void function testing + run: | + cd ${{ env.WKC }}/tests/parallel_test + ./run_check_void_container.sh -d ${{ env.WKDIR }} + - name: Build docker container + run: | + date + rm -rf ${{ env.WKC }}/debug + cd ${{ env.WKC }}/tests/parallel_test + time ./container_build.sh -w ${{ env.WKDIR }} -e + - name: Get parameters for testing + id: get_param + run: | + log_server_file="/home/log_server.json" + timeout_cmd="" + extra_param="" + + if [ -f "$log_server_file" ]; then + log_server_enabled=$(jq '.enabled' "$log_server_file") + timeout_param=$(jq '.timeout' "$log_server_file") + if [ "$timeout_param" != "null" ] && [ "$timeout_param" != "0" ]; then + timeout_cmd="timeout $timeout_param" + fi + + if [ "$log_server_enabled" == "1" ]; then + log_server=$(jq '.server' "$log_server_file" | sed 's/\\\"//g') + if [ "$log_server" != "null" ] && [ "$log_server" != "" ]; then + extra_param="-w $log_server" + fi + fi + fi + echo "timeout_cmd=$timeout_cmd" >> $GITHUB_OUTPUT + echo "extra_param=$extra_param" >> $GITHUB_OUTPUT + - name: Run function returns with a null pointer scan testing + run: | + cd ${{ env.WKC }}/tests/parallel_test + ./run_scan_container.sh -d ${{ env.WKDIR }} -b ${{ env.PR_NUMBER }}_${{ github.run_number }} -f ${{ env.WKDIR }}/tmp/${{ env.PR_NUMBER }}_${{ github.run_number }}/docs_changed.txt ${{ steps.get_param.outputs.extra_param }} + - name: Run tdgpt test cases + if: ${{ env.IS_TDINTERNAL }} == 'false' && ${{ env.RUN_TDGPT_TEST }} == 'true' + run: | + cd ${{ env.WKC }}/tests/parallel_test + export DEFAULT_RETRY_TIME=2 + date + timeout 600 time ./run.sh -e -m /home/m.json -t tdgpt_cases.task -b ${{ env.PR_NUMBER }}_${{ github.run_number }} -l ${{ env.WKDIR }}/log -o 300 ${{ steps.get_param.outputs.extra_param }} + - name: Run function test cases + if: ${{ env.RUN_RUNCTION_TEST }} == 'true' + run: | + cd ${{ env.WKC }}/tests/parallel_test + export DEFAULT_RETRY_TIME=2 + date + ${{ steps.get_param.outputs.timeout_cmd }} time ./run.sh -e -m /home/m.json -t cases.task -b ${{ env.PR_NUMBER }}_${{ github.run_number }} -l ${{ env.WKDIR }}/log -o 1200 ${{ steps.get_param.outputs.extra_param }} + + run-tests-on-mac: + needs: fetch-parameters + if: ${{ needs.fetch-parameters.outputs.run_function_test == 'false' }} + runs-on: + group: CI + labels: [self-hosted, macOS, ARM64, testing] + timeout-minutes: 60 + env: + IS_TDINTERNAL: ${{ needs.fetch-parameters.outputs.tdinternal }} + SOURCE_BRANCH: ${{ needs.fetch-parameters.outputs.source_branch }} + TARGET_BRANCH: ${{ needs.fetch-parameters.outputs.target_branch }} + PR_NUMBER: ${{ needs.fetch-parameters.outputs.pr_number }} + steps: + - name: Output the environment information + run: | + echo "::group::Environment Info" + date + hostname + env + echo "Runner: ${{ runner.name }}" + echo "Trigger Source from TDinternal: ${{ env.IS_TDINTERNAL }}" + echo "Workspace: ${{ env.WKDIR }}" + git --version + echo "${{ env.WKDIR }}/restore.sh -p ${{ env.PR_NUMBER }} -n ${{ github.run_number }} -c ${{ env.CONTAINER_NAME }}" + echo "::endgroup::" + - name: Prepare repositories + run: | + set -euo pipefail + prepare_environment() { + cd "$1" + git reset --hard + git clean -f + git remote prune origin + git fetch + git checkout "$2" + } + prepare_environment "${{ env.WK }}" "${{ env.TARGET_BRANCH }}" + prepare_environment "${{ env.WKC }}" "${{ env.TARGET_BRANCH }}" + - name: Get latest codes and logs for TDinternal PR + if: ${{ env.IS_TDINTERNAL == 'true' }} + run: | + cd ${{ env.WK }} + git pull >/dev/null + git log -5 + echo "`date "+%Y%m%d-%H%M%S"` TDinternalTest/${{ env.PR_NUMBER }}:${{ github.run_number }}:${{ env.TARGET_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log + echo "CHANGE_BRANCH:${{ env.SOURCE_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log + echo "TDinternal log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + git fetch origin +refs/pull/${{ env.PR_NUMBER }}/merge + git checkout -qf FETCH_HEAD + git log -5 + echo "TDinternal log merged: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + cd ${{ env.WKC }} + git remote prune origin + git pull >/dev/null + git log -5 + echo "community log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + - name: Get latest codes and logs for TDengine PR + if: ${{ env.IS_TDINTERNAL == 'false' }} + run: | + cd ${{ env.WKC }} + git remote prune origin + git pull >/dev/null + git log -5 + echo "`date "+%Y%m%d-%H%M%S"` TDengineTest/${{ env.PR_NUMBER }}:${{ github.run_number }}:${{ env.TARGET_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log + echo "CHANGE_BRANCH:${{ env.SOURCE_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log + echo "community log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + git fetch origin +refs/pull/${{ env.PR_NUMBER }}/merge + git checkout -qf FETCH_HEAD + git log -5 + echo "community log merged: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + cd ${{ env.WK }} + git pull >/dev/null + git log -5 + echo "TDinternal log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log + - name: Update submodule + run: | + cd ${{ env.WKC }} + git submodule update --init --recursive + - name: Run tests + run: | + date + cd ${{ env.WK }} + rm -rf debug + mkdir debug + cd ${{ env.WK }}/debug + echo $PATH + echo "PATH=/opt/homebrew/bin:$PATH" >> $GITHUB_ENV + cmake .. -DBUILD_TEST=true -DBUILD_HTTPS=false -DCMAKE_BUILD_TYPE=Release + make -j10 + ctest -j10 || exit 7 + date diff --git a/.github/workflows/taosd-doc-build.yml b/.github/workflows/taosd-doc-build.yml index 52a35fc3d5..1dffaa0aa7 100644 --- a/.github/workflows/taosd-doc-build.yml +++ b/.github/workflows/taosd-doc-build.yml @@ -1,17 +1,14 @@ name: TDengine Doc Build on: - workflow_call: - inputs: - target_branch: - description: "Target branch name of for building the document" - required: true - type: string - - target_pr_number: - description: "PR number of target branch to merge for building the document" - required: true - type: string + pull_request: + branches: + - 'main' + - '3.0' + - '3.1' + paths: + - 'docs/**' + - '*.md' env: DOC_WKC: "/root/doc_ci_work" @@ -21,81 +18,32 @@ env: TOOLS_REPO: "taos-tools" jobs: - check: - runs-on: - group: CI - labels: [self-hosted, doc-build] - outputs: - changed_files_zh: ${{ steps.set_output.outputs.changed_files_zh }} - changed_files_en: ${{ steps.set_output.outputs.changed_files_en }} - changed_files_non_doc: ${{ steps.set_output.outputs.changed_files_non_doc }} - changed_files_non_tdgpt: ${{ steps.set_output.outputs.changed_files_non_tdgpt }} - steps: - - name: Get the latest document contents from the repository - run: | - set -e - # ./.github/scripts/update_repo.sh ${{ env.DOC_WKC }}/${{ env.TD_REPO }} ${{ inputs.target_branch }} ${{ inputs.target_pr_number }} - cd ${{ env.DOC_WKC }}/${{ env.TD_REPO }} - git reset --hard - git clean -f - git remote prune origin - git fetch - git checkout ${{ inputs.target_branch }} - git pull >/dev/null - git fetch origin +refs/pull/${{ inputs.target_pr_number }}/merge - git checkout -qf FETCH_HEAD - - name: Check whether the document is changed and set output variables - id: set_output - run: | - set -e - cd ${{ env.DOC_WKC }}/${{ env.TD_REPO }} - changed_files_zh=$(git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${{ inputs.tartget_branch }}`| grep "^docs/zh/" | tr '\n' ' ' || :) - changed_files_en=$(git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${{ inputs.tartget_branch }}`| grep "^docs/en/" | tr '\n' ' ' || :) - changed_files_non_doc=$(git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${{ inputs.tartget_branch }}`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v ".md$" | tr '\n' ' ' || :) - changed_files_non_tdgpt=$(git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${{ inputs.tartget_branch }}`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v ".md$" | grep -Ev "forecastoperator.c|anomalywindowoperator.c|tanalytics.h|tanalytics.c|tdgpt_cases.task|analytics" | tr '\n' ' ' ||:) - echo "changed_files_zh=${changed_files_zh}" >> $GITHUB_OUTPUT - echo "changed_files_en=${changed_files_en}" >> $GITHUB_OUTPUT - echo "changed_files_non_doc=${changed_files_non_doc}" >> $GITHUB_OUTPUT - echo "changed_files_non_tdgpt=${changed_files_non_tdgpt}" >> $GITHUB_OUTPUT - - build: - needs: check + build-doc: runs-on: group: CI labels: [self-hosted, doc-build] - if: ${{ needs.check.outputs.changed_files_zh != '' || needs.check.outputs.changed_files_en != '' }} - steps: - name: Get the latest document contents run: | set -e - #./.github/scripts/update_repo.sh ${{ env.DOC_WKC }}/${{ env.TD_REPO }} ${{ inputs.target_branch }} ${{ inputs.target_pr_number }} cd ${{ env.DOC_WKC }}/${{ env.TD_REPO }} git reset --hard git clean -f git remote prune origin git fetch - git checkout ${{ inputs.target_branch }} + git checkout ${{ github.event.pull_request.base.ref }} git pull >/dev/null - git fetch origin +refs/pull/${{ inputs.target_pr_number }}/merge + git fetch origin +refs/pull/${{ github.event.pull_request.number }}/merge git checkout -qf FETCH_HEAD - name: Build the chinese document - if: ${{ needs.check.outputs.changed_files_zh != '' }} run: | cd ${{ env.DOC_WKC }}/${{ env.ZH_DOC_REPO }} yarn ass local yarn build - name: Build the english document - if: ${{ needs.check.outputs.changed_files_en != '' }} run: | cd ${{ env.DOC_WKC }}/${{ env.EN_DOC_REPO }} yarn ass local yarn build - - outputs: - changed_files_zh: ${{ needs.check.outputs.changed_files_zh }} - changed_files_en: ${{ needs.check.outputs.changed_files_en }} - changed_files_non_doc: ${{ needs.check.outputs.changed_files_non_doc }} - changed_files_non_tdgpt: ${{ needs.check.outputs.changed_files_non_tdgpt }} diff --git a/cmake/cmake.version b/cmake/cmake.version index 52730e84e5..622bd49fdf 100644 --- a/cmake/cmake.version +++ b/cmake/cmake.version @@ -2,7 +2,7 @@ IF (DEFINED VERNUMBER) SET(TD_VER_NUMBER ${VERNUMBER}) ELSE () - SET(TD_VER_NUMBER "3.3.5.2.alpha") + SET(TD_VER_NUMBER "3.3.5.8.alpha") ENDIF () IF (DEFINED VERCOMPATIBLE) diff --git a/cmake/taosws_CMakeLists.txt.in b/cmake/taosws_CMakeLists.txt.in index 17446d184d..b013d45911 100644 --- a/cmake/taosws_CMakeLists.txt.in +++ b/cmake/taosws_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taosws-rs ExternalProject_Add(taosws-rs GIT_REPOSITORY https://github.com/taosdata/taos-connector-rust.git - GIT_TAG 3.0 + GIT_TAG main SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosws-rs" BINARY_DIR "" #BUILD_IN_SOURCE TRUE diff --git a/docs/en/10-third-party/05-bi/11-superset.md b/docs/en/10-third-party/05-bi/11-superset.md index d8dc200fd6..2ec59a2172 100644 --- a/docs/en/10-third-party/05-bi/11-superset.md +++ b/docs/en/10-third-party/05-bi/11-superset.md @@ -7,7 +7,7 @@ Apache Superset is a modern enterprise level business intelligence (BI) web appl It is supported by the Apache Software Foundation and is an open source project with an active community and rich ecosystem. Apache Superset provides an intuitive user interface that makes creating, sharing, and visualizing data simple, while supporting multiple data sources and rich visualization options. -Through the Python connector of TDengine, Superset can support TDengine data sources and provide functions such as data presentation and analysis +Through the Python connector of TDengine, Superset can support TDengine data sources and provide functions such as data presentation and analysis. ## Prerequisites diff --git a/docs/en/10-third-party/05-bi/12-tableau.md b/docs/en/10-third-party/05-bi/12-tableau.md index 31292644b2..db8a569926 100644 --- a/docs/en/10-third-party/05-bi/12-tableau.md +++ b/docs/en/10-third-party/05-bi/12-tableau.md @@ -10,8 +10,8 @@ Tableau is a well-known business intelligence tool that supports multiple data s Prepare the following environment: -- TDengine 3.3.5.4 and above version is installed and running normally (both Enterprise and Community versions are available) -- taosAdapter is running normally, refer to [taosAdapter Reference](../../../tdengine-reference/components/taosadapter/) +- TDengine 3.3.5.8 and above version is installed and running normally (both Enterprise and Community versions are available). +- taosAdapter is running normally, refer to [taosAdapter Reference](../../../tdengine-reference/components/taosadapter/). - Install and run Tableau Desktop (if not installed, please download and install Windows operating system 64-bit [Download Tableau Desktop](https://www.tableau.com/products/desktop/download)). Install Tableau please refer to [Tableau Desktop](https://www.tableau.com). - Download the latest Windows operating system X64 client driver from the TDengine official website and install it, refer to [Install ODBC Driver](../../../tdengine-reference/client-libraries/odbc/#Installation). @@ -19,6 +19,10 @@ Prepare the following environment: **Step 1**, Search and open the "ODBC Data Source (64 bit)" management tool in the Start menu of the Windows operating system and configure it, refer to [Install ODBC Driver](../../../tdengine-reference/client-libraries/odbc/#Installation). +:::tip +It should be noted that when configuring the ODBC data source for Tableau, the [Database] configuration item on the TDengine ODBC data source configuration page is required. You need to select a database that can be successfully connected. +::: + **Step 2**, Start Tableau in the Windows system environment, then search for "ODBC" on its connection page and select "Other Databases (ODBC)". **Step 3**, Click the `DSN` radio button, then select the configured data source (MyTDengine), and click the `Connect` button. After the connection is successful, delete the content of the string attachment, and finally click the `Sign In` button. diff --git a/docs/en/10-third-party/05-bi/13-excel.md b/docs/en/10-third-party/05-bi/13-excel.md index aff4b7adf6..114954e81f 100644 --- a/docs/en/10-third-party/05-bi/13-excel.md +++ b/docs/en/10-third-party/05-bi/13-excel.md @@ -10,7 +10,7 @@ toc_max_heading_level: 4 Prepare the following environment: -- TDengine 3.3.5.7 and above version is installed and running normally (both Enterprise and Community versions are available). +- TDengine 3.3.5.8 and above version is installed and running normally (both Enterprise and Community versions are available). - taosAdapter is running normally, refer to [taosAdapter Reference](../../../tdengine-reference/components/taosadapter/). - Install and run Excel. If not installed, please download and install it. For specific instructions, please refer to Microsoft's official documentation. - Download the latest Windows operating system X64 client driver from the TDengine official website and install it, refer to [Install ODBC Driver](../../../tdengine-reference/client-libraries/odbc/#Installation). diff --git a/docs/en/28-releases/01-tdengine.md b/docs/en/28-releases/01-tdengine.md index 9f4246c7a0..b2bfb30cfa 100644 --- a/docs/en/28-releases/01-tdengine.md +++ b/docs/en/28-releases/01-tdengine.md @@ -25,6 +25,10 @@ Download links for TDengine 3.x version installation packages are as follows: import Release from "/components/ReleaseV3"; +## 3.3.5.8 + + + ## 3.3.5.2 diff --git a/docs/en/28-releases/03-notes/3.3.5.8.md b/docs/en/28-releases/03-notes/3.3.5.8.md new file mode 100755 index 0000000000..be75272545 --- /dev/null +++ b/docs/en/28-releases/03-notes/3.3.5.8.md @@ -0,0 +1,66 @@ +--- +title: TDengine 3.3.5.8 Release Notes +sidebar_label: 3.3.5.8 +description: Version 3.3.5.8 Notes +slug: /release-history/release-notes/3.3.5.8 +--- + +## Features + 1. feat: suppport tmq subscription with ONLY META in JDBC + 2. feat: support multiple-line SQL editor in Grafana + 3. feat: add support for VARBINARY/GEOMETRY in ODBC + 4. feat: support TDengine with ODBC dirver in Excel + 5. feat: taosX agent use specific port range in local connection + +## Enhancements + 1. enh: websocket handle consumer error when tmq polled nothing + 2. enh: JDBC add support for unsigned integers + 3. enh: expose global.written_concurrent configuration for kafka/mqtt/csv in Explorer + 4. enh: support integration with TDgpt in community version + 5. enh: support BinaryRowData type in flink + 6. enh: in stmt2 SQL statements, the LIMIT clause supports the use of ? as a parameter placeholder + 7. enh: enable compression via websocket in taosX backup + 8. enh: ODBC support SQL_ROWSET_SIZE in SQLSetStmtAttr + 9. enh: expose num.of.consumers/writters configurations in Explorer + 10. enh: Add connector files to the macOS installation package. + 11. enh: handle errors when poll result is null in rust connector + 12. enh: tsbs support csv output format + 13. enh: add Classified Connections Counts table in TDinsight + 14. enh: use consist float precision in explorer and tao shell + 15. enh: flink table support update/delete + 16. enh: taosX agent will resume connection when taosX server disconnected for long time + +## Fixes + 1. fix: explorer support signup email with dot `.` + 2. fix: flock syscall error on aws cloud storage in taosAdapter + 3. fix: modify boolean tag values in sub-tables results in erroneous metadata from data subscriptions. + 4. fix: allow spaces in columns of csv in explorer datain + 5. fix: resolved the issue of high CPU usage by the stmtbind thread when the system is in an idle state + 6. fix: health state tick to idle when no data consumed + 7. fix: fix security issues in JDBC sample code + 8. fix: fix upgrade compaibility issue of taosX + 9. fix: ODBC core when set SQL_ATTR_TXN_ISOLATION with SQLSetConnectAttr + 10. fix: received/processed_messages should be reset when task rerun + 11. fix: when restoring data using taosX, it may crash if the database is not specified + 12. fix: when creating a database, the keep_time_offset options supports suffixes h (hours) and d (days) for time values + 13. fix: potential deadlocks while drop stream + 14. fix: failed to write data in a dual-replica database when a single dnode is disconnected from the network + 15. fix: when querying the information_schema.ins_tables table, a "Sync leader is unreachable" error may be triggered if the Leader of the mnode changes. + 16. fix: the time-filtering query results involving composite primary keys were incorrect after data compact + 17. fix: when the join condition of the primary key column is not a simple equality condition, it may lead to incorrect JOIN results + 18. fix: error caused by cursor.fetchmany with custom length in python taosws + 19. fix: the issue where the "show grants" command returned an incorrect number of columns + 20. fix: unexpected backup points before schedule executing + 21. fix: taosX task does not restart after interrupted + 22. fix: jdbc select server_version() caused mem high-usage + 23. fix: when using the WHERE tbname IN () statement, executing LAST query may cause taosd crash if the subtables filtered out do not belong to the same super table + 24. fix: after taosd exits abnormally and is restarted, if the WAL that has not been written to the data file is too large, it may cause an OOM error during startup + 25. fix: when using interp interpolation, if the select list contains string constants or string tags, the returned string content may be incomplete.[#29353](https://github.com/taosdata/TDengine/issues/29353) + 26. fix: when performing a JOIN query on a super table, using a subquery as the right table may lead to missing results + 27. fix: syntax error while use DISTINCT and ORDER BY together.[#29263](https://github.com/taosdata/TDengine/issues/29263) + 28. fix: when using the CAST function to convert a floating-point number to a binary and then performing a comparison, the result may be inaccurate due to loss of precision[#29382](https://github.com/taosdata/TDengine/issues/29382) + 29. fix: after upgrading from version 3.3.4 to 3.3.5, the taosd service fails to start properly if the configured charset does not exist in the system + 30. fix: websocket api timing field should not be negtive + 31. fix: duplicates backup points in taosX + 32. fix: configuration item s3BucketName was incorrectly set as a global variable, leading to failures while file uploads to S3. + diff --git a/docs/zh/08-operation/09-backup.md b/docs/zh/08-operation/09-backup.md index fbc2e612e1..babd23c2ba 100644 --- a/docs/zh/08-operation/09-backup.md +++ b/docs/zh/08-operation/09-backup.md @@ -76,6 +76,17 @@ taosExplorer 服务页面中,进入“系统管理 - 备份”页面,在“ 8. 备份文件大小:备份文件的大小限制。当备份文件大小达到此限制时,会自动创建新的备份文件。 9. 文件压缩等级:备份文件的压缩等级。支持:最快速度、最佳压缩比、兼具速度和压缩比。 +用户可以通过开启 S3 转储,将备份文件上传至 S3 存储服务上。开启 S3 转储,需要填写以下信息: + +1. S3 节点:S3 节点的地址。 +2. 访问密钥 ID:访问密钥 ID。 +3. 访问密钥:访问密钥。 +4. 存储桶:存储桶名称。 +5. 区域:存储桶所在的区域。 +6. 对象前缀:备份文件的对象前缀,类似于 S3 上的目录。 +7. 本地备份文件的保留时长:本地备份的保留时间,所有早于`当前时间 - backup_retention_period`的文件都需要上传到 S3。 +8. 本地备份文件的保留个数:本地备份文件的保留个数,本地只保留最新的`backup_retention_size`个备份文件。 + 创建成功后,备份计划会开始按照配置的参数运行。在“备份计划”下的列表中,可以查看已创建的备份计划。 备份计划支持以下操作: diff --git a/docs/zh/10-third-party/05-bi/12-tableau.md b/docs/zh/10-third-party/05-bi/12-tableau.md index 9fbdc53d45..ab4f86549b 100644 --- a/docs/zh/10-third-party/05-bi/12-tableau.md +++ b/docs/zh/10-third-party/05-bi/12-tableau.md @@ -8,8 +8,8 @@ Tableau 是一款知名的商业智能工具,它支持多种数据源,可方 ## 前置条件 准备以下环境: -- TDengine 3.3.5.4 以上版本集群已部署并正常运行(企业及社区版均可) -- taosAdapter 能够正常运行。详细参考 [taosAdapter 参考手册](../../../reference/components/taosadapter) +- TDengine 3.3.5.8 以上版本集群已部署并正常运行(企业及社区版均可)。 +- taosAdapter 能够正常运行。详细参考 [taosAdapter 参考手册](../../../reference/components/taosadapter)。 - Tableau 桌面版安装并运行(如未安装,请下载并安装 Windows 操作系统 64 位 [Tableau 桌面版](https://www.tableau.com/products/desktop/download) )。安装 Tableau 桌面版请参考 [官方文档](https://www.tableau.com)。 - 从 TDengine 官网下载最新的 Windows 操作系统 X64 客户端驱动程序,并进行安装。详细参考 [安装 ODBC 驱动](../../../reference/connector/odbc/#安装)。 @@ -18,7 +18,11 @@ Tableau 是一款知名的商业智能工具,它支持多种数据源,可方 **第 1 步**,在Windows操作系统的开始菜单中搜索并打开“ODBC数据源(64位)”管理工具并进行配置。详细参考[配置ODBC数据源](../../../reference/connector/odbc/#配置数据源)。 -**第 2 步**,在 Windows 系统环境下启动 Tableau,之后在其连接页面中搜索 “ODBC”,并选择 “其他数据库 (ODBC)”。 +:::tip +需要注意的是,在为 Tableau 配置 ODBC 数据源时,TDengine ODBC 数据源配置页面中的【数据库】配置项为必填项,需选择一个可成功连接的数据库。 +::: + +**第 2 步**,在 Windows 系统环境下启动 Tableau,之后在其连接页面中搜索 “ODBC”,并选择 “其他数据库 (ODBC)”。 对于 Tableau 的使用的ODBC数据源,在其 TDengine ODBC 数据源配置页面的【数据库】的配置项为必填,需要选择可以连接的数据库。 **第 3 步**,点击 `DSN` 单选框,接着选择已配置好的数据源(MyTDengine),然后点击`连接`按钮。待连接成功后,删除字符串附加部分的内容,最后点击`登录`按钮即可。 diff --git a/docs/zh/10-third-party/05-bi/13-excel.md b/docs/zh/10-third-party/05-bi/13-excel.md index 442d0175b4..bfa6b5b832 100644 --- a/docs/zh/10-third-party/05-bi/13-excel.md +++ b/docs/zh/10-third-party/05-bi/13-excel.md @@ -8,7 +8,7 @@ title: 与 Excel 集成 ## 前置条件 准备以下环境: -- TDengine 3.3.5.7 以上版本集群已部署并正常运行(企业及社区版均可)。 +- TDengine 3.3.5.8 以上版本集群已部署并正常运行(企业及社区版均可)。 - taosAdapter 能够正常运行,详细参考 [taosAdapter 参考手册](../../../reference/components/taosadapter)。 - Excel 安装并运行, 如未安装,请下载并安装, 具体操作请参考 Microsoft 官方文档。 - 从 TDengine 官网下载最新的 Windows 操作系统 X64 客户端驱动程序并进行安装,详细参考 [安装 ODBC 驱动](../../../reference/connector/odbc/#安装)。 diff --git a/docs/zh/14-reference/03-taos-sql/02-database.md b/docs/zh/14-reference/03-taos-sql/02-database.md index 35cb99a3dd..3742b7c571 100644 --- a/docs/zh/14-reference/03-taos-sql/02-database.md +++ b/docs/zh/14-reference/03-taos-sql/02-database.md @@ -67,7 +67,7 @@ database_option: { - DURATION:数据文件存储数据的时间跨度。可以使用加单位的表示形式,如 DURATION 100h、DURATION 10d 等,支持 m(分钟)、h(小时)和 d(天)三个单位。不加时间单位时默认单位为天,如 DURATION 50 表示 50 天。 - MAXROWS:文件块中记录的最大条数,默认为 4096 条。 - MINROWS:文件块中记录的最小条数,默认为 100 条。 -- KEEP:表示数据文件保存的天数,缺省值为 3650,取值范围 [1, 365000],且必须大于或等于 3 倍的 DURATION 参数值。数据库会自动删除保存时间超过 KEEP 值的数据从而释放存储空间。KEEP 可以使用加单位的表示形式,如 KEEP 100h、KEEP 10d 等,支持 m(分钟)、h(小时)和 d(天)三个单位。也可以不写单位,如 KEEP 50,此时默认单位为天。企业版支持[多级存储](../../operation/planning/#%E5%A4%9A%E7%BA%A7%E5%AD%98%E5%82%A8)功能, 因此, 可以设置多个保存时间(多个以英文逗号分隔,最多 3 个,满足 keep 0 \<= keep 1 \<= keep 2,如 KEEP 100h,100d,3650d); 社区版不支持多级存储功能(即使配置了多个保存时间, 也不会生效, KEEP 会取最大的保存时间)。了解更多,请点击 [关于主键时间戳](https://docs.taosdata.com/reference/taos-sql/insert/) +- KEEP:表示数据文件保存的天数,缺省值为 3650,取值范围 [1, 365000],且必须大于或等于 3 倍的 DURATION 参数值。数据库会自动删除保存时间超过 KEEP 值的数据从而释放存储空间。KEEP 可以使用加单位的表示形式,如 KEEP 100h、KEEP 10d 等,支持 m(分钟)、h(小时)和 d(天)三个单位。也可以不写单位,如 KEEP 50,此时默认单位为天。企业版支持[多级存储](https://docs.taosdata.com/operation/planning/#%E5%A4%9A%E7%BA%A7%E5%AD%98%E5%82%A8)功能, 因此, 可以设置多个保存时间(多个以英文逗号分隔,最多 3 个,满足 keep 0 \<= keep 1 \<= keep 2,如 KEEP 100h,100d,3650d); 社区版不支持多级存储功能(即使配置了多个保存时间, 也不会生效, KEEP 会取最大的保存时间)。了解更多,请点击 [关于主键时间戳](https://docs.taosdata.com/reference/taos-sql/insert/) - KEEP_TIME_OFFSET:自 3.2.0.0 版本生效。删除或迁移保存时间超过 KEEP 值的数据的延迟执行时间,默认值为 0 (小时)。在数据文件保存时间超过 KEEP 后,删除或迁移操作不会立即执行,而会额外等待本参数指定的时间间隔,以实现与业务高峰期错开的目的。 - STT_TRIGGER:表示落盘文件触发文件合并的个数。对于少表高频写入场景,此参数建议使用默认配置;而对于多表低频写入场景,此参数建议配置较大的值。 diff --git a/docs/zh/28-releases/01-tdengine.md b/docs/zh/28-releases/01-tdengine.md index 88c07a89f4..435d5819ce 100644 --- a/docs/zh/28-releases/01-tdengine.md +++ b/docs/zh/28-releases/01-tdengine.md @@ -24,6 +24,10 @@ TDengine 3.x 各版本安装包下载链接如下: import Release from "/components/ReleaseV3"; +## 3.3.5.8 + + + ## 3.3.5.2 diff --git a/docs/zh/28-releases/03-notes/3.3.5.8.md b/docs/zh/28-releases/03-notes/3.3.5.8.md new file mode 100755 index 0000000000..07f9cf060b --- /dev/null +++ b/docs/zh/28-releases/03-notes/3.3.5.8.md @@ -0,0 +1,65 @@ +--- +title: 3.3.5.8 版本说明 +sidebar_label: 3.3.5.8 +description: 3.3.5.8 版本说明 +--- + +## 特性 + 1. 特性:JDBC 支持 ONLY META 订阅 + 2. 特性:Grafana 插件 SQL 编辑支持多行 + 3. 特性:ODBC 支持 VARBINARY/GEOMETRY 类型 + 4. 特性:支持 Excel 通过 ODBC 驱动连接 TDengine + 5. 特性:taosX agent 支持指定本地端口 + +## 优化 + 1. 优化:WebSocket 连接订阅消息为空时,支持获取错误 + 2. 优化:JDBC 支持无符号整数 + 3. 优化:MQTT/Kafka/CSV 新增写并发参数 + 4. 优化:开源版本 TDengine 支持与 TDgpt 对接 + 5. 优化:Flink 连接器支持 BinaryRowData 数据传输类型 + 6. 优化:参数绑定 SQL 语句中,LIMIT 子句支持使用 ? 作为参数占位符 + 7. 优化:taosX 备份开启 WebSocket 压缩 + 8. 优化:ODBC SQLSetStmtAttr 支持 SQL_ROWSET_SIZE 属性 + 9. 优化:TMQ 数据同步任务新增 Number Of Writters/Consumers 等参数 + 10. 优化:mac 安装包增加连接器文件 + 11. 优化:Rust 连接器支持订阅结果为空时的错误处理 + 12. 优化:TSBS 支持生成 CSV 文件 + 13. 优化:TDinsight 添加连接分类信息 + 14. 优化:Explorer float 显示精度与 taos shell 一致 + 15. 优化:Flink 连接器 Table 支持更新和删除操作 + 16. 优化:taosX Agent 在 taosX 无法连接时可重试恢复 + +## 修复 + 1. 修复:Explorer 注册邮箱支持包含 "." + 2. 修复:AWS 云存储下 taosAdapter flock 失败 + 3. 修复:修改子表中布尔类型标签的值时,数据订阅的元数据返回结果存在错误 + 4. 修复:Explorer 导入 CSV 时列包含空格时预览将失效 + 5. 修复:解决 stmtbind 线程在系统处于空闲状态时 CPU 占用过高的问题 + 6. 修复:数据源任务不再处理数据时健康状态恢复为 Idle + 7. 修复:JDBC 示例代码安全漏洞 + 8. 修复:taosX 平滑升级 + 9. 修复:ODBC 调用 SQLSetConnectAttr 设置 SQL_ATTR_TXN_ISOLATION 时 core + 10. 修复:received/processed_messages 当前运行指标重启未清零 + 11. 修复:使用 taosX 恢复数据时,若未指定数据库,系统可能崩溃 + 12. 修复:创建数据库时,keep_time_offset 选项支持使用后缀 h和 d 来指定时间值 + 13. 修复:删除流计算时可能的死锁 + 14. 修复:双副本数据库在某个 dnode 断网后写入数据失败的问题 + 15. 修复:查询 information_schema.ins_tables 表时,如果 mnode 的 Leader 发生变更,可能会触发 Sync leader is unreachable 错误 + 16. 修复:数据重新整理后,涉及复合主键的时间过滤查询结果出现错误的问题 + 17. 修复:当主键列的连接条件为非简单等值条件时,可能导致 JOIN 结果出现错误 + 18. 修复:Python WebSocket 连接器 Cusor.fetchmany 接口自定义长度问题 + 19. 修复:Show Grants 命令返回的列数目不正确的问题 + 20. 修复:备份计划未启动时查看备份点列表不符合预期 + 21. 修复:taosX 任务写入中断后未重新启动 + 22. 修复:JDBC select server_version() 结果未释放导致内存持续增长 + 23. 修复:在使用 WHERE tbname IN () 语句时,如果筛选出的子表不属于同一超级表,执行 LAST 查询可能会导致 taosd 崩溃 + 24. 修复:taosd 异常退出并再次启动后,如果未进入数据文件的 WAL 过大,可能导致启动时 oom + 25. 修复:在使用 interp 插值填充时,如果 select list 中包含字符串常量或字符串标签列,返回的字符串内容可能会出现缺失的情况[#29353](https://github.com/taosdata/TDengine/issues/29353) + 26. 修复:在超级表上执行 JOIN 查询时,将子查询用作右表可能会导致结果缺失 + 27. 修复:同时使用 DISTINCT 和 ORDER BY 关键字时出现的语法错误问题[#29263](https://github.com/taosdata/TDengine/issues/29263) + 28. 修复:使用 CAST 函数将浮点数转换为字符串后进行比较时,可能会因精度丢失而导致结果不准确[#29382](https://github.com/taosdata/TDengine/issues/29382) + 29. 修复:在从 3.3.4 版本升级到 3.3.5 版本后,如果配置的字符集在系统中不存在,taosd 服务将无法正常启动 + 30. 修复:websocket 接口 timing 字段有时为负值 + 31. 修复:taosX 备份任务显示备份点重复 + 32. 修复:配置项 s3BucketName 被误设为全局变量参数,导致文件上传到 S3 失败 + diff --git a/include/common/streamMsg.h b/include/common/streamMsg.h index d410bd17e0..6fc24ccf2e 100644 --- a/include/common/streamMsg.h +++ b/include/common/streamMsg.h @@ -17,7 +17,6 @@ #define TDENGINE_STREAMMSG_H #include "tmsg.h" -//#include "trpc.h" #ifdef __cplusplus extern "C" { @@ -256,6 +255,14 @@ typedef struct { int32_t tEncodeStreamTaskRunReq(SEncoder* pEncoder, const SStreamTaskRunReq* pReq); int32_t tDecodeStreamTaskRunReq(SDecoder* pDecoder, SStreamTaskRunReq* pReq); +typedef struct { + SMsgHead head; + int64_t streamId; +} SStreamTaskStopReq; + +int32_t tEncodeStreamTaskStopReq(SEncoder* pEncoder, const SStreamTaskStopReq* pReq); +int32_t tDecodeStreamTaskStopReq(SDecoder* pDecoder, SStreamTaskStopReq* pReq); + #ifdef __cplusplus } #endif diff --git a/include/common/tmsg.h b/include/common/tmsg.h index a5c369be12..5d4af4cd08 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1847,6 +1847,7 @@ typedef struct { int32_t statusSeq; int64_t ipWhiteVer; int64_t analVer; + int64_t timestamp; } SStatusReq; int32_t tSerializeSStatusReq(void* buf, int32_t bufLen, SStatusReq* pReq); diff --git a/include/common/tmsgcb.h b/include/common/tmsgcb.h index c934cb6961..cdff5aaba2 100644 --- a/include/common/tmsgcb.h +++ b/include/common/tmsgcb.h @@ -31,6 +31,7 @@ typedef enum { QUERY_QUEUE, FETCH_QUEUE, READ_QUEUE, + STATUS_QUEUE, WRITE_QUEUE, APPLY_QUEUE, SYNC_QUEUE, @@ -38,6 +39,7 @@ typedef enum { STREAM_QUEUE, ARB_QUEUE, STREAM_CTRL_QUEUE, + STREAM_LONG_EXEC_QUEUE, QUEUE_MAX, } EQueueType; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 29a7e52482..13a26910c1 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -401,6 +401,7 @@ TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TASK_CHECK, "vnode-stream-task-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_UNUSED, "vnd-stream-unused", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_GET_STREAM_PROGRESS, "vnd-stream-progress", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_ALL_STOP, "vnd-stream-allstop", NULL, NULL) TD_CLOSE_MSG_SEG(TDMT_VND_STREAM_MSG) TD_NEW_MSG_SEG(TDMT_VND_TMQ_MSG) //8 << 8 diff --git a/include/common/ttime.h b/include/common/ttime.h index 47875752d5..77a3041b2b 100644 --- a/include/common/ttime.h +++ b/include/common/ttime.h @@ -82,7 +82,7 @@ int32_t convertStringToTimestamp(int16_t type, char* inputData, int64_t timePrec int32_t getDuration(int64_t val, char unit, int64_t* result, int32_t timePrecision); int32_t taosFormatUtcTime(char* buf, int32_t bufLen, int64_t ts, int32_t precision); - +char* formatTimestampLocal(char* buf, int64_t val, int precision); struct STm { struct tm tm; int64_t fsec; // in NANOSECOND diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h index 4d5e18520c..f5eaacfe1a 100644 --- a/include/dnode/vnode/tqCommon.h +++ b/include/dnode/vnode/tqCommon.h @@ -19,7 +19,7 @@ // message process int32_t tqStreamTaskStartAsync(SStreamMeta* pMeta, SMsgCb* cb, bool restart); int32_t tqStreamStartOneTaskAsync(SStreamMeta* pMeta, SMsgCb* cb, int64_t streamId, int32_t taskId); -int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pMsg, bool restored); +int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pMsg, bool restored, bool isLeader); int32_t tqStreamTaskProcessDispatchReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessDispatchRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessRetrieveReq(SStreamMeta* pMeta, SRpcMsg* pMsg); @@ -39,6 +39,7 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta); int32_t tqStreamTasksGetTotalNum(SStreamMeta* pMeta); int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, char* msg); +int32_t tqStreamTaskProcessAllTaskStopReq(SStreamMeta* pMeta, SMsgCb* pMsgCb, SRpcMsg* pMsg); int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg); diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index f05234b82f..e2bb6eefbf 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -183,7 +183,7 @@ void qCleanExecTaskBlockBuf(qTaskInfo_t tinfo); */ int32_t qAsyncKillTask(qTaskInfo_t tinfo, int32_t rspCode); -int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode); +int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode, int64_t waitDuration); bool qTaskIsExecuting(qTaskInfo_t qinfo); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 041d888d33..372322c0b8 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -58,6 +58,7 @@ extern "C" { #define STREAM_EXEC_T_STOP_ALL_TASKS (-5) #define STREAM_EXEC_T_RESUME_TASK (-6) #define STREAM_EXEC_T_ADD_FAILED_TASK (-7) +#define STREAM_EXEC_T_STOP_ONE_TASK (-8) typedef struct SStreamTask SStreamTask; typedef struct SStreamQueue SStreamQueue; @@ -140,6 +141,7 @@ enum { STREAM_QUEUE__SUCESS = 1, STREAM_QUEUE__FAILED, STREAM_QUEUE__PROCESSING, + STREAM_QUEUE__CHKPTFAILED, }; typedef enum EStreamTaskEvent { @@ -498,8 +500,10 @@ typedef struct STaskUpdateInfo { } STaskUpdateInfo; typedef struct SScanWalInfo { - int32_t scanCounter; + int32_t scanSentinel; tmr_h scanTimer; + int64_t lastScanTs; + int32_t tickCounter; } SScanWalInfo; typedef struct SFatalErrInfo { @@ -768,15 +772,19 @@ void streamMetaCleanup(); int32_t streamMetaOpen(const char* path, void* ahandle, FTaskBuild expandFunc, FTaskExpand expandTaskFn, int32_t vgId, int64_t stage, startComplete_fn_t fn, SStreamMeta** pMeta); void streamMetaClose(SStreamMeta* streamMeta); -int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); // save to stream meta store -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pKey); + +int32_t streamMetaSaveTaskInMeta(SStreamMeta* pMeta, SStreamTask* pTask); // save to stream meta store +int32_t streamMetaRemoveTaskInMeta(SStreamMeta* pMeta, STaskId* pKey); + int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded); int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); + int32_t streamMetaAcquireTaskNoLock(SStreamMeta* pMeta, int64_t streamId, int32_t taskId, SStreamTask** pTask); int32_t streamMetaAcquireTaskUnsafe(SStreamMeta* pMeta, STaskId* pId, SStreamTask** pTask); int32_t streamMetaAcquireTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId, SStreamTask** pTask); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); + void streamMetaClear(SStreamMeta* pMeta); void streamMetaInitBackend(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); @@ -797,6 +805,7 @@ void streamMetaClearSetUpdateTaskListComplete(SStreamMeta* pMeta); bool streamMetaInitUpdateTaskList(SStreamMeta* pMeta, int32_t transId); void streamMetaRLock(SStreamMeta* pMeta); +int32_t streamMetaTryRlock(SStreamMeta* pMeta); void streamMetaRUnLock(SStreamMeta* pMeta); void streamMetaWLock(SStreamMeta* pMeta); void streamMetaWUnLock(SStreamMeta* pMeta); @@ -810,6 +819,7 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta); int32_t streamMetaStartAllTasks(SStreamMeta* pMeta); int32_t streamMetaStopAllTasks(SStreamMeta* pMeta); int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); +int32_t streamMetaStopOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); bool streamMetaAllTasksReady(const SStreamMeta* pMeta); int32_t streamTaskSendNegotiateChkptIdMsg(SStreamTask* pTask); int32_t streamTaskCheckIfReqConsenChkptId(SStreamTask* pTask, int64_t ts); diff --git a/include/util/tworker.h b/include/util/tworker.h index a3ba7dba6d..bc0dde1a37 100644 --- a/include/util/tworker.h +++ b/include/util/tworker.h @@ -76,7 +76,7 @@ void tQWorkerFreeQueue(SQWorkerPool *pool, STaosQueue *queue); int32_t tAutoQWorkerInit(SAutoQWorkerPool *pool); void tAutoQWorkerCleanup(SAutoQWorkerPool *pool); -STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem fp); +STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem fp, int32_t minNum); void tAutoQWorkerFreeQueue(SAutoQWorkerPool *pool, STaosQueue *queue); int32_t tWWorkerInit(SWWorkerPool *pool); diff --git a/packaging/docker/Dockerfile b/packaging/docker/Dockerfile index a67724d5a8..2f1f5a1787 100644 --- a/packaging/docker/Dockerfile +++ b/packaging/docker/Dockerfile @@ -8,18 +8,44 @@ ARG cpuType RUN echo ${pkgFile} && echo ${dirName} COPY ${pkgFile} /root/ + ENV TINI_VERSION v0.19.0 -ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${cpuType} /tini ENV DEBIAN_FRONTEND=noninteractive -WORKDIR /root/ -RUN tar -zxf ${pkgFile} && cd /root/${dirName}/ && /bin/bash install.sh -e no && cd /root && rm /root/${pkgFile} && rm -rf /root/${dirName} && apt-get update && apt-get install -y locales tzdata netcat curl gdb vim tmux less net-tools valgrind && locale-gen en_US.UTF-8 && apt-get clean && rm -rf /var/lib/apt/lists/ && chmod +x /tini + +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${cpuType} /tini +RUN chmod +x /tini + +RUN tar -zxf /root/${pkgFile} && \ + cd /root/${dirName}/ && \ + /bin/bash /root/${dirName}/install.sh -e no && \ + cd /root/ && \ + rm /root/${pkgFile} && \ + rm -rf /root/${dirName} && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + locales \ + tzdata \ + netcat \ + curl \ + gdb \ + vim \ + tmux \ + less \ + net-tools \ + valgrind \ + rsync && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + locale-gen en_US.UTF-8 ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib" \ LC_CTYPE=en_US.UTF-8 \ LANG=en_US.UTF-8 \ LC_ALL=en_US.UTF-8 + COPY ./bin/* /usr/bin/ ENTRYPOINT ["/tini", "--", "/usr/bin/entrypoint.sh"] CMD ["taosd"] + VOLUME [ "/var/lib/taos", "/var/log/taos", "/corefile" ] diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index f2fdecd18d..eefba479c1 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -2232,6 +2232,7 @@ static int32_t doConvertJson(SReqResultInfo* pResultInfo) { int32_t blockVersion = *(int32_t*)p; int32_t dataLen = estimateJsonLen(pResultInfo); if (dataLen <= 0) { + tscError("doConvertJson error: estimateJsonLen failed"); return TSDB_CODE_TSC_INTERNAL_ERROR; } diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 136c740974..dae96e32b8 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -891,6 +891,7 @@ int taos_select_db(TAOS *taos, const char *db) { if (db == NULL || strlen(db) == 0) { releaseTscObj(*(int64_t *)taos); + tscError("invalid parameter for %s", db == NULL ? "db is NULL" : "db is empty"); terrno = TSDB_CODE_TSC_INVALID_INPUT; return terrno; } diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index 83366c5c36..9cc6e621d9 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -372,11 +372,13 @@ int32_t processUseDbRsp(void* param, SDataBuf* pMsg, int32_t code) { int32_t processCreateSTableRsp(void* param, SDataBuf* pMsg, int32_t code) { if (pMsg == NULL) { + tscError("processCreateSTableRsp: invalid input param, pMsg is NULL"); return TSDB_CODE_TSC_INVALID_INPUT; } if (param == NULL) { taosMemoryFree(pMsg->pEpSet); taosMemoryFree(pMsg->pData); + tscError("processCreateSTableRsp: invalid input param, param is NULL"); return TSDB_CODE_TSC_INVALID_INPUT; } diff --git a/source/common/src/msg/streamMsg.c b/source/common/src/msg/streamMsg.c index 7e7952eb60..44c0703337 100644 --- a/source/common/src/msg/streamMsg.c +++ b/source/common/src/msg/streamMsg.c @@ -866,3 +866,28 @@ int32_t tDecodeStreamTaskRunReq(SDecoder* pDecoder, SStreamTaskRunReq* pReq) { _exit: return code; } + +int32_t tEncodeStreamTaskStopReq(SEncoder* pEncoder, const SStreamTaskStopReq* pReq) { + int32_t code = 0; + int32_t lino; + + TAOS_CHECK_EXIT(tStartEncode(pEncoder)); + TAOS_CHECK_EXIT(tEncodeI64(pEncoder, pReq->streamId)); + tEndEncode(pEncoder); + +_exit: + return code; +} + +int32_t tDecodeStreamTaskStopReq(SDecoder* pDecoder, SStreamTaskStopReq* pReq) { + int32_t code = 0; + int32_t lino; + + TAOS_CHECK_EXIT(tStartDecode(pDecoder)); + TAOS_CHECK_EXIT(tDecodeI64(pDecoder, &pReq->streamId)); + tEndDecode(pDecoder); + +_exit: + return code; + +} diff --git a/source/common/src/msg/tmsg.c b/source/common/src/msg/tmsg.c index b69ca1d707..5bdbc35958 100644 --- a/source/common/src/msg/tmsg.c +++ b/source/common/src/msg/tmsg.c @@ -1476,6 +1476,9 @@ int32_t tSerializeSStatusReq(void *buf, int32_t bufLen, SStatusReq *pReq) { TAOS_CHECK_EXIT(tEncodeI64(&encoder, pload->syncAppliedIndex)); TAOS_CHECK_EXIT(tEncodeI64(&encoder, pload->syncCommitIndex)); } + TAOS_CHECK_EXIT(tEncodeI64(&encoder, pReq->timestamp)); + + TAOS_CHECK_EXIT(tEncodeI64(&encoder, pReq->timestamp)); tEndEncode(&encoder); @@ -1614,6 +1617,10 @@ int32_t tDeserializeSStatusReq(void *buf, int32_t bufLen, SStatusReq *pReq) { } } + if (!tDecodeIsEnd(&decoder)) { + TAOS_CHECK_EXIT(tDecodeI64(&decoder, &pReq->timestamp)); + } + tEndDecode(&decoder); _exit: diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index 9746fea034..22ab106bb3 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -1000,6 +1000,34 @@ int32_t taosFormatUtcTime(char* buf, int32_t bufLen, int64_t t, int32_t precisio TAOS_RETURN(TSDB_CODE_SUCCESS); } +char* formatTimestampLocal(char* buf, int64_t val, int precision) { + time_t tt; + if (precision == TSDB_TIME_PRECISION_MICRO) { + tt = (time_t)(val / 1000000); + } + if (precision == TSDB_TIME_PRECISION_NANO) { + tt = (time_t)(val / 1000000000); + } else { + tt = (time_t)(val / 1000); + } + + struct tm tm; + if (taosLocalTime(&tt, &tm, NULL, 0, NULL) == NULL) { + return NULL; + } + size_t pos = taosStrfTime(buf, 32, "%Y-%m-%d %H:%M:%S", &tm); + + if (precision == TSDB_TIME_PRECISION_MICRO) { + sprintf(buf + pos, ".%06d", (int)(val % 1000000)); + } else if (precision == TSDB_TIME_PRECISION_NANO) { + sprintf(buf + pos, ".%09d", (int)(val % 1000000000)); + } else { + sprintf(buf + pos, ".%03d", (int)(val % 1000)); + } + + return buf; +} + int32_t taosTs2Tm(int64_t ts, int32_t precision, struct STm* tm, timezone_t tz) { tm->fsec = ts % TICK_PER_SECOND[precision] * (TICK_PER_SECOND[TSDB_TIME_PRECISION_NANO] / TICK_PER_SECOND[precision]); time_t t = ts / TICK_PER_SECOND[precision]; diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c index 54f086569e..27a6ba7e2b 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c @@ -25,6 +25,8 @@ extern SConfig *tsCfg; SMonVloadInfo tsVinfo = {0}; +SMnodeLoad tsMLoad = {0}; +SDnodeData tsDnodeData = {0}; static void dmUpdateDnodeCfg(SDnodeMgmt *pMgmt, SDnodeCfg *pCfg) { int32_t code = 0; @@ -167,23 +169,30 @@ static void dmProcessStatusRsp(SDnodeMgmt *pMgmt, SRpcMsg *pRsp) { void dmSendStatusReq(SDnodeMgmt *pMgmt) { int32_t code = 0; SStatusReq req = {0}; + req.timestamp = taosGetTimestampMs(); + pMgmt->statusSeq++; - dDebug("send status req to mnode, statusSeq:%d, begin to mgnt lock", pMgmt->statusSeq); - (void)taosThreadRwlockRdlock(&pMgmt->pData->lock); + dDebug("send status req to mnode, statusSeq:%d, begin to mgnt statusInfolock", pMgmt->statusSeq); + if (taosThreadMutexLock(&pMgmt->pData->statusInfolock) != 0) { + dError("failed to lock status info lock"); + return; + } + + dDebug("send status req to mnode, statusSeq:%d, begin to get dnode info", pMgmt->statusSeq); req.sver = tsVersion; - req.dnodeVer = pMgmt->pData->dnodeVer; - req.dnodeId = pMgmt->pData->dnodeId; - req.clusterId = pMgmt->pData->clusterId; + req.dnodeVer = tsDnodeData.dnodeVer; + req.dnodeId = tsDnodeData.dnodeId; + req.clusterId = tsDnodeData.clusterId; if (req.clusterId == 0) req.dnodeId = 0; - req.rebootTime = pMgmt->pData->rebootTime; - req.updateTime = pMgmt->pData->updateTime; + req.rebootTime = tsDnodeData.rebootTime; + req.updateTime = tsDnodeData.updateTime; req.numOfCores = tsNumOfCores; req.numOfSupportVnodes = tsNumOfSupportVnodes; req.numOfDiskCfg = tsDiskCfgNum; req.memTotal = tsTotalMemoryKB * 1024; req.memAvail = req.memTotal - tsQueueMemoryAllowed - tsApplyMemoryAllowed - 16 * 1024 * 1024; tstrncpy(req.dnodeEp, tsLocalEp, TSDB_EP_LEN); - tstrncpy(req.machineId, pMgmt->pData->machineId, TSDB_MACHINE_ID_LEN + 1); + tstrncpy(req.machineId, tsDnodeData.machineId, TSDB_MACHINE_ID_LEN + 1); req.clusterCfg.statusInterval = tsStatusInterval; req.clusterCfg.checkTime = 0; @@ -205,29 +214,23 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) { memcpy(req.clusterCfg.timezone, tsTimezoneStr, TD_TIMEZONE_LEN); memcpy(req.clusterCfg.locale, tsLocale, TD_LOCALE_LEN); memcpy(req.clusterCfg.charset, tsCharset, TD_LOCALE_LEN); - (void)taosThreadRwlockUnlock(&pMgmt->pData->lock); dDebug("send status req to mnode, statusSeq:%d, begin to get vnode loads", pMgmt->statusSeq); - if (taosThreadMutexLock(&pMgmt->pData->statusInfolock) != 0) { - dError("failed to lock status info lock"); - return; - } + req.pVloads = tsVinfo.pVloads; tsVinfo.pVloads = NULL; + + dDebug("send status req to mnode, statusSeq:%d, begin to get mnode loads", pMgmt->statusSeq); + req.mload = tsMLoad; + if (taosThreadMutexUnlock(&pMgmt->pData->statusInfolock) != 0) { dError("failed to unlock status info lock"); return; } - dDebug("send status req to mnode, statusSeq:%d, begin to get mnode loads", pMgmt->statusSeq); - SMonMloadInfo minfo = {0}; - (*pMgmt->getMnodeLoadsFp)(&minfo); - req.mload = minfo.load; - dDebug("send status req to mnode, statusSeq:%d, begin to get qnode loads", pMgmt->statusSeq); (*pMgmt->getQnodeLoadsFp)(&req.qload); - pMgmt->statusSeq++; req.statusSeq = pMgmt->statusSeq; req.ipWhiteVer = pMgmt->pData->ipWhiteVer; req.analVer = taosAnalGetVersion(); @@ -267,7 +270,7 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) { code = rpcSendRecvWithTimeout(pMgmt->msgCb.statusRpc, &epSet, &rpcMsg, &rpcRsp, &epUpdated, tsStatusInterval * 5 * 1000); if (code != 0) { - dError("failed to send status req since %s", tstrerror(code)); + dError("failed to SendRecv with timeout %d status req since %s", tsStatusInterval * 5 * 1000, tstrerror(code)); return; } @@ -275,7 +278,8 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) { dmRotateMnodeEpSet(pMgmt->pData); char tbuf[512]; dmEpSetToStr(tbuf, sizeof(tbuf), &epSet); - dError("failed to send status req since %s, epSet:%s, inUse:%d", tstrerror(rpcRsp.code), tbuf, epSet.inUse); + dInfo("Rotate mnode ep set since failed to SendRecv status req %s, epSet:%s, inUse:%d", tstrerror(rpcRsp.code), + tbuf, epSet.inUse); } else { if (epUpdated == 1) { dmSetMnodeEpSet(pMgmt->pData, &epSet); @@ -403,7 +407,7 @@ void dmSendConfigReq(SDnodeMgmt *pMgmt) { code = rpcSendRecvWithTimeout(pMgmt->msgCb.statusRpc, &epSet, &rpcMsg, &rpcRsp, &epUpdated, tsStatusInterval * 5 * 1000); if (code != 0) { - dError("failed to send status req since %s", tstrerror(code)); + dError("failed to SendRecv config req with timeout %d since %s", tsStatusInterval * 5 * 1000, tstrerror(code)); return; } if (rpcRsp.code != 0) { @@ -414,14 +418,37 @@ void dmSendConfigReq(SDnodeMgmt *pMgmt) { } void dmUpdateStatusInfo(SDnodeMgmt *pMgmt) { - SMonVloadInfo vinfo = {0}; + dDebug("begin to get dnode info"); + SDnodeData dnodeData = {0}; + (void)taosThreadRwlockRdlock(&pMgmt->pData->lock); + dnodeData.dnodeVer = pMgmt->pData->dnodeVer; + dnodeData.dnodeId = pMgmt->pData->dnodeId; + dnodeData.clusterId = pMgmt->pData->clusterId; + dnodeData.rebootTime = pMgmt->pData->rebootTime; + dnodeData.updateTime = pMgmt->pData->updateTime; + tstrncpy(dnodeData.machineId, pMgmt->pData->machineId, TSDB_MACHINE_ID_LEN + 1); + (void)taosThreadRwlockUnlock(&pMgmt->pData->lock); + dDebug("begin to get vnode loads"); - (*pMgmt->getVnodeLoadsFp)(&vinfo); + SMonVloadInfo vinfo = {0}; + (*pMgmt->getVnodeLoadsFp)(&vinfo); // dmGetVnodeLoads + + dDebug("begin to get mnode loads"); + SMonMloadInfo minfo = {0}; + (*pMgmt->getMnodeLoadsFp)(&minfo); // dmGetMnodeLoads + dDebug("begin to lock status info"); if (taosThreadMutexLock(&pMgmt->pData->statusInfolock) != 0) { dError("failed to lock status info lock"); return; } + tsDnodeData.dnodeVer = dnodeData.dnodeVer; + tsDnodeData.dnodeId = dnodeData.dnodeId; + tsDnodeData.clusterId = dnodeData.clusterId; + tsDnodeData.rebootTime = dnodeData.rebootTime; + tsDnodeData.updateTime = dnodeData.updateTime; + tstrncpy(tsDnodeData.machineId, dnodeData.machineId, TSDB_MACHINE_ID_LEN + 1); + if (tsVinfo.pVloads == NULL) { tsVinfo.pVloads = vinfo.pVloads; vinfo.pVloads = NULL; @@ -429,6 +456,9 @@ void dmUpdateStatusInfo(SDnodeMgmt *pMgmt) { taosArrayDestroy(vinfo.pVloads); vinfo.pVloads = NULL; } + + tsMLoad = minfo.load; + if (taosThreadMutexUnlock(&pMgmt->pData->statusInfolock) != 0) { dError("failed to unlock status info lock"); return; diff --git a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h index bc2977fe31..2d7aa35c13 100644 --- a/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h +++ b/source/dnode/mgmt/mgmt_mnode/inc/mmInt.h @@ -32,6 +32,7 @@ typedef struct SMnodeMgmt { SSingleWorker queryWorker; SSingleWorker fetchWorker; SSingleWorker readWorker; + SSingleWorker statusWorker; SSingleWorker writeWorker; SSingleWorker arbWorker; SSingleWorker syncWorker; @@ -58,6 +59,7 @@ int32_t mmPutMsgToArbQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToSyncRdQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); +int32_t mmPutMsgToStatusQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToQueryQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToFetchQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t mmPutMsgToQueue(SMnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc); diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 46b0877476..f9fdbff4bd 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -204,7 +204,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_KILL_QUERY, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_KILL_CONN, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_HEARTBEAT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_MND_STATUS, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_STATUS, mmPutMsgToStatusQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_CONFIG, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_NOTIFY, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_SYSTABLE_RETRIEVE, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; @@ -258,6 +258,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_ALL_STOP_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_CHKPT_REPORT, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c index cfdc09f29e..cb82ddae2e 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c @@ -133,6 +133,10 @@ int32_t mmPutMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { return mmPutMsgToWorker(pMgmt, &pMgmt->readWorker, pMsg); } +int32_t mmPutMsgToStatusQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { + return mmPutMsgToWorker(pMgmt, &pMgmt->statusWorker, pMsg); +} + int32_t mmPutMsgToQueryQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) { int32_t code = 0; if (NULL == pMgmt->pMnode) { @@ -172,6 +176,9 @@ int32_t mmPutMsgToQueue(SMnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { case READ_QUEUE: pWorker = &pMgmt->readWorker; break; + case STATUS_QUEUE: + pWorker = &pMgmt->statusWorker; + break; case ARB_QUEUE: pWorker = &pMgmt->arbWorker; break; @@ -246,6 +253,18 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) { return code; } + SSingleWorkerCfg stautsCfg = { + .min = 1, + .max = 1, + .name = "mnode-status", + .fp = (FItem)mmProcessRpcMsg, + .param = pMgmt, + }; + if ((code = tSingleWorkerInit(&pMgmt->statusWorker, &stautsCfg)) != 0) { + dError("failed to start mnode-status worker since %s", tstrerror(code)); + return code; + } + SSingleWorkerCfg wCfg = { .min = 1, .max = 1, @@ -304,6 +323,7 @@ void mmStopWorker(SMnodeMgmt *pMgmt) { tSingleWorkerCleanup(&pMgmt->queryWorker); tSingleWorkerCleanup(&pMgmt->fetchWorker); tSingleWorkerCleanup(&pMgmt->readWorker); + tSingleWorkerCleanup(&pMgmt->statusWorker); tSingleWorkerCleanup(&pMgmt->writeWorker); tSingleWorkerCleanup(&pMgmt->arbWorker); tSingleWorkerCleanup(&pMgmt->syncWorker); diff --git a/source/dnode/mgmt/mgmt_qnode/src/qmWorker.c b/source/dnode/mgmt/mgmt_qnode/src/qmWorker.c index ab5b70079a..950da46a40 100644 --- a/source/dnode/mgmt/mgmt_qnode/src/qmWorker.c +++ b/source/dnode/mgmt/mgmt_qnode/src/qmWorker.c @@ -71,6 +71,7 @@ int32_t qmPutRpcMsgToQueue(SQnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { code = taosWriteQitem(pMgmt->queryWorker.queue, pMsg); return code; case READ_QUEUE: + case STATUS_QUEUE: case FETCH_QUEUE: dTrace("msg:%p, is created and will put into qnode-fetch queue, len:%d", pMsg, pRpc->contLen); code = taosWriteQitem(pMgmt->fetchWorker.queue, pMsg); diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index c2d146462a..024e2e4e99 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -96,6 +96,7 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_ALL_STOP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_GET_STREAM_PROGRESS, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h index 84f5149624..e33730130d 100644 --- a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h +++ b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h @@ -32,6 +32,7 @@ typedef struct SVnodeMgmt { const char *name; SQueryAutoQWorkerPool queryPool; SAutoQWorkerPool streamPool; + SAutoQWorkerPool streamLongExecPool; SWWorkerPool streamCtrlPool; SWWorkerPool fetchPool; SSingleWorker mgmtWorker; @@ -39,7 +40,7 @@ typedef struct SVnodeMgmt { SHashObj *runngingHash; SHashObj *closedHash; SHashObj *creatingHash; - TdThreadRwlock lock; + TdThreadRwlock hashLock; TdThreadMutex mutex; SVnodesStat state; STfs *pTfs; @@ -75,6 +76,7 @@ typedef struct { STaosQueue *pQueryQ; STaosQueue *pStreamQ; STaosQueue *pStreamCtrlQ; + STaosQueue *pStreamLongExecQ; STaosQueue *pFetchQ; STaosQueue *pMultiMgmQ; } SVnodeObj; @@ -137,6 +139,8 @@ int32_t vmPutMsgToQueryQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t vmPutMsgToFetchQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t vmPutMsgToStreamQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t vmPutMsgToStreamCtrlQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); +int32_t vmPutMsgToStreamLongExecQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); + int32_t vmPutMsgToMergeQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t vmPutMsgToMgmtQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t vmPutMsgToMultiMgmtQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c index dbef048c23..cb14155b1c 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c @@ -20,7 +20,7 @@ #define MAX_CONTENT_LEN 2 * 1024 * 1024 int32_t vmGetAllVnodeListFromHash(SVnodeMgmt *pMgmt, int32_t *numOfVnodes, SVnodeObj ***ppVnodes) { - (void)taosThreadRwlockRdlock(&pMgmt->lock); + (void)taosThreadRwlockRdlock(&pMgmt->hashLock); int32_t num = 0; int32_t size = taosHashGetSize(pMgmt->runngingHash); @@ -28,7 +28,7 @@ int32_t vmGetAllVnodeListFromHash(SVnodeMgmt *pMgmt, int32_t *numOfVnodes, SVnod size += closedSize; SVnodeObj **pVnodes = taosMemoryCalloc(size, sizeof(SVnodeObj *)); if (pVnodes == NULL) { - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); return terrno; } @@ -60,7 +60,7 @@ int32_t vmGetAllVnodeListFromHash(SVnodeMgmt *pMgmt, int32_t *numOfVnodes, SVnod } } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); *numOfVnodes = num; *ppVnodes = pVnodes; @@ -68,7 +68,7 @@ int32_t vmGetAllVnodeListFromHash(SVnodeMgmt *pMgmt, int32_t *numOfVnodes, SVnod } int32_t vmGetAllVnodeListFromHashWithCreating(SVnodeMgmt *pMgmt, int32_t *numOfVnodes, SVnodeObj ***ppVnodes) { - (void)taosThreadRwlockRdlock(&pMgmt->lock); + (void)taosThreadRwlockRdlock(&pMgmt->hashLock); int32_t num = 0; int32_t size = taosHashGetSize(pMgmt->runngingHash); @@ -76,7 +76,7 @@ int32_t vmGetAllVnodeListFromHashWithCreating(SVnodeMgmt *pMgmt, int32_t *numOfV size += creatingSize; SVnodeObj **pVnodes = taosMemoryCalloc(size, sizeof(SVnodeObj *)); if (pVnodes == NULL) { - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); return terrno; } @@ -107,7 +107,7 @@ int32_t vmGetAllVnodeListFromHashWithCreating(SVnodeMgmt *pMgmt, int32_t *numOfV taosHashCancelIterate(pMgmt->creatingHash, pIter); } } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); *numOfVnodes = num; *ppVnodes = pVnodes; @@ -116,13 +116,13 @@ int32_t vmGetAllVnodeListFromHashWithCreating(SVnodeMgmt *pMgmt, int32_t *numOfV } int32_t vmGetVnodeListFromHash(SVnodeMgmt *pMgmt, int32_t *numOfVnodes, SVnodeObj ***ppVnodes) { - (void)taosThreadRwlockRdlock(&pMgmt->lock); + (void)taosThreadRwlockRdlock(&pMgmt->hashLock); int32_t num = 0; int32_t size = taosHashGetSize(pMgmt->runngingHash); SVnodeObj **pVnodes = taosMemoryCalloc(size, sizeof(SVnodeObj *)); if (pVnodes == NULL) { - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); return terrno; } @@ -140,7 +140,7 @@ int32_t vmGetVnodeListFromHash(SVnodeMgmt *pMgmt, int32_t *numOfVnodes, SVnodeOb } } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); *numOfVnodes = num; *ppVnodes = pVnodes; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 234d4f41e1..fc8ff3133a 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -25,7 +25,7 @@ void vmGetVnodeLoads(SVnodeMgmt *pMgmt, SMonVloadInfo *pInfo, bool isReset) { tfsUpdateSize(pMgmt->pTfs); - (void)taosThreadRwlockRdlock(&pMgmt->lock); + (void)taosThreadRwlockRdlock(&pMgmt->hashLock); void *pIter = taosHashIterate(pMgmt->runngingHash, NULL); while (pIter) { @@ -46,14 +46,14 @@ void vmGetVnodeLoads(SVnodeMgmt *pMgmt, SMonVloadInfo *pInfo, bool isReset) { pIter = taosHashIterate(pMgmt->runngingHash, pIter); } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); } void vmGetVnodeLoadsLite(SVnodeMgmt *pMgmt, SMonVloadInfo *pInfo) { pInfo->pVloads = taosArrayInit(pMgmt->state.totalVnodes, sizeof(SVnodeLoadLite)); if (!pInfo->pVloads) return; - (void)taosThreadRwlockRdlock(&pMgmt->lock); + (void)taosThreadRwlockRdlock(&pMgmt->hashLock); void *pIter = taosHashIterate(pMgmt->runngingHash, NULL); while (pIter) { @@ -74,7 +74,7 @@ void vmGetVnodeLoadsLite(SVnodeMgmt *pMgmt, SMonVloadInfo *pInfo) { pIter = taosHashIterate(pMgmt->runngingHash, pIter); } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); } void vmGetMonitorInfo(SVnodeMgmt *pMgmt, SMonVmInfo *pInfo) { @@ -137,7 +137,7 @@ void vmCleanExpriedSamples(SVnodeMgmt *pMgmt) { dError("failed to get vgroup ids"); return; } - (void)taosThreadRwlockRdlock(&pMgmt->lock); + (void)taosThreadRwlockRdlock(&pMgmt->hashLock); for (int i = 0; i < list_size; i++) { int32_t vgroup_id = vgroup_ids[i]; void *vnode = taosHashGet(pMgmt->runngingHash, &vgroup_id, sizeof(int32_t)); @@ -148,7 +148,7 @@ void vmCleanExpriedSamples(SVnodeMgmt *pMgmt) { } } } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); if (vgroup_ids) taosMemoryFree(vgroup_ids); if (keys) taosMemoryFree(keys); return; @@ -1008,27 +1008,32 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY_RSP, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER_RSP, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_ALL_STOP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT_RSP, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT_RSP, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_CHKPT_REPORT_RSP, vmPutMsgToStreamCtrlQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY, vmPutMsgToStreamLongExecQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_GET_STREAM_PROGRESS, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_UPDATE_CHKPT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_CONSEN_CHKPT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_STOP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index d71e0b02c4..8871cd575f 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -24,12 +24,12 @@ int32_t vmGetPrimaryDisk(SVnodeMgmt *pMgmt, int32_t vgId) { int32_t diskId = -1; SVnodeObj *pVnode = NULL; - (void)taosThreadRwlockRdlock(&pMgmt->lock); + (void)taosThreadRwlockRdlock(&pMgmt->hashLock); int32_t r = taosHashGetDup(pMgmt->runngingHash, &vgId, sizeof(int32_t), (void *)&pVnode); if (pVnode != NULL) { diskId = pVnode->diskPrimary; } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); return diskId; } @@ -62,7 +62,7 @@ static int32_t vmRegisterCreatingState(SVnodeMgmt *pMgmt, int32_t vgId, int32_t pCreatingVnode->vgId = vgId; pCreatingVnode->diskPrimary = diskId; - code = taosThreadRwlockWrlock(&pMgmt->lock); + code = taosThreadRwlockWrlock(&pMgmt->hashLock); if (code != 0) { taosMemoryFree(pCreatingVnode); return code; @@ -75,7 +75,7 @@ static int32_t vmRegisterCreatingState(SVnodeMgmt *pMgmt, int32_t vgId, int32_t taosMemoryFree(pCreatingVnode); } - int32_t r = taosThreadRwlockUnlock(&pMgmt->lock); + int32_t r = taosThreadRwlockUnlock(&pMgmt->hashLock); if (r != 0) { dError("vgId:%d, failed to unlock since %s", vgId, tstrerror(r)); } @@ -86,7 +86,7 @@ static int32_t vmRegisterCreatingState(SVnodeMgmt *pMgmt, int32_t vgId, int32_t static void vmUnRegisterCreatingState(SVnodeMgmt *pMgmt, int32_t vgId) { SVnodeObj *pOld = NULL; - (void)taosThreadRwlockWrlock(&pMgmt->lock); + (void)taosThreadRwlockWrlock(&pMgmt->hashLock); int32_t r = taosHashGetDup(pMgmt->creatingHash, &vgId, sizeof(int32_t), (void *)&pOld); if (r != 0) { dError("vgId:%d, failed to get vnode from creating Hash", vgId); @@ -96,7 +96,7 @@ static void vmUnRegisterCreatingState(SVnodeMgmt *pMgmt, int32_t vgId) { if (r != 0) { dError("vgId:%d, failed to remove vnode from creatingHash", vgId); } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); if (pOld) { dTrace("vgId:%d, free vnode pOld:%p", vgId, &pOld); @@ -205,7 +205,7 @@ void vmCleanPrimaryDisk(SVnodeMgmt *pMgmt, int32_t vgId) { vmUnRegisterCreatingS SVnodeObj *vmAcquireVnodeImpl(SVnodeMgmt *pMgmt, int32_t vgId, bool strict) { SVnodeObj *pVnode = NULL; - (void)taosThreadRwlockRdlock(&pMgmt->lock); + (void)taosThreadRwlockRdlock(&pMgmt->hashLock); int32_t r = taosHashGetDup(pMgmt->runngingHash, &vgId, sizeof(int32_t), (void *)&pVnode); if (pVnode == NULL || strict && (pVnode->dropped || pVnode->failed)) { terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; @@ -214,7 +214,7 @@ SVnodeObj *vmAcquireVnodeImpl(SVnodeMgmt *pMgmt, int32_t vgId, bool strict) { int32_t refCount = atomic_add_fetch_32(&pVnode->refCount, 1); dTrace("vgId:%d, acquire vnode, vnode:%p, ref:%d", pVnode->vgId, pVnode, refCount); } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); return pVnode; } @@ -334,10 +334,10 @@ int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) { pVnode->failed = 1; } - (void)taosThreadRwlockWrlock(&pMgmt->lock); + (void)taosThreadRwlockWrlock(&pMgmt->hashLock); int32_t code = vmRegisterRunningState(pMgmt, pVnode); vmUnRegisterClosedState(pMgmt, pVnode); - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); return code; } @@ -350,15 +350,15 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal, vnodeProposeCommitOnNeed(pVnode->pImpl, atExit); } - (void)taosThreadRwlockWrlock(&pMgmt->lock); + (void)taosThreadRwlockWrlock(&pMgmt->hashLock); vmUnRegisterRunningState(pMgmt, pVnode->vgId); if (keepClosed) { if (vmRegisterClosedState(pMgmt, pVnode) != 0) { - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); return; }; } - (void)taosThreadRwlockUnlock(&pMgmt->lock); + (void)taosThreadRwlockUnlock(&pMgmt->hashLock); vmReleaseVnode(pMgmt, pVnode); @@ -398,10 +398,14 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal, dInfo("vgId:%d, wait for vnode stream queue:%p is empty, %d remains", pVnode->vgId, pVnode->pStreamQ, taosQueueItemSize(pVnode->pStreamQ)); - while (!taosQueueEmpty(pVnode->pStreamQ)) taosMsleep(10); + while (!taosQueueEmpty(pVnode->pStreamQ)) taosMsleep(50); dInfo("vgId:%d, wait for vnode stream ctrl queue:%p is empty", pVnode->vgId, pVnode->pStreamCtrlQ); - while (!taosQueueEmpty(pVnode->pStreamCtrlQ)) taosMsleep(10); + while (!taosQueueEmpty(pVnode->pStreamCtrlQ)) taosMsleep(50); + + dInfo("vgId:%d, wait for vnode stream long-exec queue:%p is empty, %d remains", pVnode->vgId, + pVnode->pStreamLongExecQ, taosQueueItemSize(pVnode->pStreamLongExecQ)); + while (!taosQueueEmpty(pVnode->pStreamLongExecQ)) taosMsleep(50); dInfo("vgId:%d, all vnode queues is empty", pVnode->vgId); @@ -450,14 +454,14 @@ _closed: void vmCloseFailedVnode(SVnodeMgmt *pMgmt, int32_t vgId) { int32_t r = 0; - r = taosThreadRwlockWrlock(&pMgmt->lock); + r = taosThreadRwlockWrlock(&pMgmt->hashLock); if (r != 0) { dError("vgId:%d, failed to lock since %s", vgId, tstrerror(r)); } if (r == 0) { vmUnRegisterRunningState(pMgmt, vgId); } - r = taosThreadRwlockUnlock(&pMgmt->lock); + r = taosThreadRwlockUnlock(&pMgmt->hashLock); if (r != 0) { dError("vgId:%d, failed to unlock since %s", vgId, tstrerror(r)); } @@ -792,7 +796,7 @@ static void vmCleanup(SVnodeMgmt *pMgmt) { vmCloseVnodes(pMgmt); vmStopWorker(pMgmt); vnodeCleanup(); - (void)taosThreadRwlockDestroy(&pMgmt->lock); + (void)taosThreadRwlockDestroy(&pMgmt->hashLock); (void)taosThreadMutexDestroy(&pMgmt->mutex); (void)taosThreadMutexDestroy(&pMgmt->fileLock); taosMemoryFree(pMgmt); @@ -880,7 +884,7 @@ static int32_t vmInit(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { pMgmt->msgCb.qsizeFp = (GetQueueSizeFp)vmGetQueueSize; pMgmt->msgCb.mgmt = pMgmt; - code = taosThreadRwlockInit(&pMgmt->lock, NULL); + code = taosThreadRwlockInit(&pMgmt->hashLock, NULL); if (code != 0) { code = TAOS_SYSTEM_ERROR(errno); goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index b398bdf242..5acd06bbda 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -150,7 +150,7 @@ static void vmProcessStreamCtrlQueue(SQueueInfo *pInfo, STaosQall* pQall, int32_ SRpcMsg *pMsg = pItem; const STraceId *trace = &pMsg->info.traceId; - dGTrace("vgId:%d, msg:%p get from vnode-ctrl-stream queue", pVnode->vgId, pMsg); + dGTrace("vgId:%d, msg:%p get from vnode-stream-ctrl queue", pVnode->vgId, pMsg); code = vnodeProcessStreamCtrlMsg(pVnode->pImpl, pMsg, pInfo); if (code != 0) { terrno = code; @@ -165,6 +165,26 @@ static void vmProcessStreamCtrlQueue(SQueueInfo *pInfo, STaosQall* pQall, int32_ } } +static void vmProcessStreamLongExecQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { + SVnodeObj *pVnode = pInfo->ahandle; + const STraceId *trace = &pMsg->info.traceId; + int32_t code = 0; + + dGTrace("vgId:%d, msg:%p get from vnode-stream long-exec queue", pVnode->vgId, pMsg); + + code = vnodeProcessStreamLongExecMsg(pVnode->pImpl, pMsg, pInfo); + if (code != 0) { + terrno = code; + dGError("vgId:%d, msg:%p failed to process stream msg %s since %s", pVnode->vgId, pMsg, TMSG_INFO(pMsg->msgType), + tstrerror(code)); + vmSendRsp(pMsg, code); + } + + dGTrace("vgId:%d, msg:%p is freed, code:0x%x", pVnode->vgId, pMsg, code); + rpcFreeCont(pMsg->pCont); + taosFreeQitem(pMsg); +} + static void vmProcessFetchQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { SVnodeObj *pVnode = pInfo->ahandle; SRpcMsg *pMsg = NULL; @@ -274,9 +294,13 @@ static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtyp code = taosWriteQitem(pVnode->pStreamQ, pMsg); break; case STREAM_CTRL_QUEUE: - dGTrace("vgId:%d, msg:%p put into vnode-ctrl-stream queue", pVnode->vgId, pMsg); + dGTrace("vgId:%d, msg:%p put into vnode-stream-ctrl queue", pVnode->vgId, pMsg); code = taosWriteQitem(pVnode->pStreamCtrlQ, pMsg); break; + case STREAM_LONG_EXEC_QUEUE: + dGTrace("vgId:%d, msg:%p put into vnode-stream-long-exec queue", pVnode->vgId, pMsg); + code = taosWriteQitem(pVnode->pStreamLongExecQ, pMsg); + break; case FETCH_QUEUE: dGTrace("vgId:%d, msg:%p put into vnode-fetch queue", pVnode->vgId, pMsg); code = taosWriteQitem(pVnode->pFetchQ, pMsg); @@ -335,6 +359,8 @@ int32_t vmPutMsgToStreamQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMs int32_t vmPutMsgToStreamCtrlQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, STREAM_CTRL_QUEUE); } +int32_t vmPutMsgToStreamLongExecQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, STREAM_LONG_EXEC_QUEUE); } + int32_t vmPutMsgToMultiMgmtQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { const STraceId *trace = &pMsg->info.traceId; dGTrace("msg:%p, put into vnode-multi-mgmt queue", pMsg); @@ -409,6 +435,10 @@ int32_t vmGetQueueSize(SVnodeMgmt *pMgmt, int32_t vgId, EQueueType qtype) { break; case STREAM_CTRL_QUEUE: size = taosQueueItemSize(pVnode->pStreamCtrlQ); + break; + case STREAM_LONG_EXEC_QUEUE: + size = taosQueueItemSize(pVnode->pStreamLongExecQ); + break; default: break; } @@ -451,13 +481,16 @@ int32_t vmAllocQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { } pVnode->pQueryQ = tQueryAutoQWorkerAllocQueue(&pMgmt->queryPool, pVnode, (FItem)vmProcessQueryQueue); - pVnode->pStreamQ = tAutoQWorkerAllocQueue(&pMgmt->streamPool, pVnode, (FItem)vmProcessStreamQueue); pVnode->pFetchQ = tWWorkerAllocQueue(&pMgmt->fetchPool, pVnode, (FItems)vmProcessFetchQueue); + + // init stream msg processing queue family + pVnode->pStreamQ = tAutoQWorkerAllocQueue(&pMgmt->streamPool, pVnode, (FItem)vmProcessStreamQueue, 2); pVnode->pStreamCtrlQ = tWWorkerAllocQueue(&pMgmt->streamCtrlPool, pVnode, (FItems)vmProcessStreamCtrlQueue); + pVnode->pStreamLongExecQ = tAutoQWorkerAllocQueue(&pMgmt->streamLongExecPool, pVnode, (FItem)vmProcessStreamLongExecQueue, 1); if (pVnode->pWriteW.queue == NULL || pVnode->pSyncW.queue == NULL || pVnode->pSyncRdW.queue == NULL || pVnode->pApplyW.queue == NULL || pVnode->pQueryQ == NULL || pVnode->pStreamQ == NULL || pVnode->pFetchQ == NULL - || pVnode->pStreamCtrlQ == NULL) { + || pVnode->pStreamCtrlQ == NULL || pVnode->pStreamLongExecQ == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -473,6 +506,7 @@ int32_t vmAllocQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { dInfo("vgId:%d, fetch-queue:%p is alloced, thread:%08" PRId64, pVnode->vgId, pVnode->pFetchQ, taosQueueGetThreadId(pVnode->pFetchQ)); dInfo("vgId:%d, stream-queue:%p is alloced", pVnode->vgId, pVnode->pStreamQ); + dInfo("vgId:%d, stream-long-exec-queue:%p is alloced", pVnode->vgId, pVnode->pStreamLongExecQ); dInfo("vgId:%d, stream-ctrl-queue:%p is alloced, thread:%08" PRId64, pVnode->vgId, pVnode->pStreamCtrlQ, taosQueueGetThreadId(pVnode->pStreamCtrlQ)); return 0; @@ -481,17 +515,22 @@ int32_t vmAllocQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { void vmFreeQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { tQueryAutoQWorkerFreeQueue(&pMgmt->queryPool, pVnode->pQueryQ); tAutoQWorkerFreeQueue(&pMgmt->streamPool, pVnode->pStreamQ); + tAutoQWorkerFreeQueue(&pMgmt->streamLongExecPool, pVnode->pStreamLongExecQ); tWWorkerFreeQueue(&pMgmt->streamCtrlPool, pVnode->pStreamCtrlQ); tWWorkerFreeQueue(&pMgmt->fetchPool, pVnode->pFetchQ); pVnode->pQueryQ = NULL; + pVnode->pFetchQ = NULL; + pVnode->pStreamQ = NULL; pVnode->pStreamCtrlQ = NULL; - pVnode->pFetchQ = NULL; + pVnode->pStreamLongExecQ = NULL; + dDebug("vgId:%d, queue is freed", pVnode->vgId); } int32_t vmStartWorker(SVnodeMgmt *pMgmt) { - int32_t code = 0; + int32_t code = 0; + SQueryAutoQWorkerPool *pQPool = &pMgmt->queryPool; pQPool->name = "vnode-query"; pQPool->min = tsNumOfVnodeQueryThreads; @@ -505,8 +544,13 @@ int32_t vmStartWorker(SVnodeMgmt *pMgmt) { pStreamPool->ratio = tsRatioOfVnodeStreamThreads; if ((code = tAutoQWorkerInit(pStreamPool)) != 0) return code; + SAutoQWorkerPool *pLongExecPool = &pMgmt->streamLongExecPool; + pLongExecPool->name = "vnode-stream-long-exec"; + pLongExecPool->ratio = tsRatioOfVnodeStreamThreads/3; + if ((code = tAutoQWorkerInit(pLongExecPool)) != 0) return code; + SWWorkerPool *pStreamCtrlPool = &pMgmt->streamCtrlPool; - pStreamCtrlPool->name = "vnode-ctrl-stream"; + pStreamCtrlPool->name = "vnode-stream-ctrl"; pStreamCtrlPool->max = 1; if ((code = tWWorkerInit(pStreamCtrlPool)) != 0) return code; @@ -541,6 +585,7 @@ int32_t vmStartWorker(SVnodeMgmt *pMgmt) { void vmStopWorker(SVnodeMgmt *pMgmt) { tQueryAutoQWorkerCleanup(&pMgmt->queryPool); tAutoQWorkerCleanup(&pMgmt->streamPool); + tAutoQWorkerCleanup(&pMgmt->streamLongExecPool); tWWorkerCleanup(&pMgmt->streamCtrlPool); tWWorkerCleanup(&pMgmt->fetchPool); dDebug("vnode workers are closed"); diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index d694dc67eb..509dac7a53 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -151,6 +151,8 @@ int32_t mndStreamSetChkptIdAction(SMnode *pMnode, STrans *pTrans, SStreamTask *p int32_t mndStreamSetRestartAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); int32_t mndStreamSetCheckpointAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask, int64_t checkpointId, int8_t mndTrigger); +int32_t mndStreamSetStopStreamTasksActions(SMnode* pMnode, STrans *pTrans, uint64_t dbUid); + int32_t mndCreateStreamChkptInfoUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SArray *pChkptInfoList); int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq); int32_t mndCreateSetConsensusChkptIdTrans(SMnode *pMnode, SStreamObj *pStream, int32_t taskId, int64_t checkpointId, diff --git a/source/dnode/mnode/impl/src/mndConfig.c b/source/dnode/mnode/impl/src/mndConfig.c index 099fff7aee..1e69ae2b5a 100644 --- a/source/dnode/mnode/impl/src/mndConfig.c +++ b/source/dnode/mnode/impl/src/mndConfig.c @@ -299,6 +299,8 @@ _OVER: } sdbRelease(pMnode->pSdb, vObj); cfgArrayCleanUp(array); + + tFreeSConfigReq(&configReq); return code; } diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index ebf7f86ea6..f4226d78a3 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -1714,6 +1714,7 @@ static int32_t mndDropDb(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb) { #endif TAOS_CHECK_GOTO(mndDropSmasByDb(pMnode, pTrans, pDb), NULL, _OVER); TAOS_CHECK_GOTO(mndDropIdxsByDb(pMnode, pTrans, pDb), NULL, _OVER); + TAOS_CHECK_GOTO(mndStreamSetStopStreamTasksActions(pMnode, pTrans, pDb->uid), NULL, _OVER); TAOS_CHECK_GOTO(mndSetDropDbRedoActions(pMnode, pTrans, pDb), NULL, _OVER); TAOS_CHECK_GOTO(mndUserRemoveDb(pMnode, pTrans, pDb->name), NULL, _OVER); diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 1a74573490..6a3b6e1cd8 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -782,8 +782,12 @@ static int32_t mndProcessStatusReq(SRpcMsg *pReq) { bool needCheck = !online || dnodeChanged || reboot || supportVnodesChanged || analVerChanged || pMnode->ipWhiteVer != statusReq.ipWhiteVer || encryptKeyChanged || enableWhiteListChanged; const STraceId *trace = &pReq->info.traceId; - mGTrace("dnode:%d, status received, accessTimes:%d check:%d online:%d reboot:%d changed:%d statusSeq:%d", pDnode->id, - pDnode->accessTimes, needCheck, online, reboot, dnodeChanged, statusReq.statusSeq); + char timestamp[TD_TIME_STR_LEN] = {0}; + if (mDebugFlag & DEBUG_TRACE) (void)formatTimestampLocal(timestamp, statusReq.timestamp, TSDB_TIME_PRECISION_MILLI); + mGTrace( + "dnode:%d, status received, accessTimes:%d check:%d online:%d reboot:%d changed:%d statusSeq:%d " + "timestamp:%s", + pDnode->id, pDnode->accessTimes, needCheck, online, reboot, dnodeChanged, statusReq.statusSeq, timestamp); if (reboot) { tsGrantHBInterval = GRANT_HEART_BEAT_MIN; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index b241af5adb..09314c9e63 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -1174,6 +1174,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr } int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) { + mTrace("mnode get load"); SSyncState state = syncGetState(pMnode->syncMgmt.sync); pLoad->syncState = state.state; pLoad->syncRestore = state.restored; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 46d22b39c2..7fe5c5fb80 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -116,6 +116,7 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_STREAM_DROP_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_VND_STREAM_ALL_STOP_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamCheckpoint); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_DROP_ORPHANTASKS, mndProcessDropOrphanTaskReq); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_TASK_RESET, mndProcessResetStatusReq); @@ -2013,15 +2014,16 @@ static int32_t mndProcessResetStreamReq(SRpcMsg *pReq) { return TSDB_CODE_ACTION_IN_PROGRESS; } -static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo, bool includeAllNodes) { +static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo, bool includeAllNodes, STrans** pUpdateTrans) { SSdb *pSdb = pMnode->pSdb; - SStreamObj *pStream = NULL; void *pIter = NULL; STrans *pTrans = NULL; int32_t code = 0; + *pUpdateTrans = NULL; // conflict check for nodeUpdate trans, here we randomly chose one stream to add into the trans pool while (1) { + SStreamObj *pStream = NULL; pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); if (pIter == NULL) { break; @@ -2038,6 +2040,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange } while (1) { + SStreamObj *pStream = NULL; pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); if (pIter == NULL) { break; @@ -2058,7 +2061,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange void *p1 = taosHashGet(pChangeInfo->pDBMap, pStream->targetDb, strlen(pStream->targetDb)); void *p2 = taosHashGet(pChangeInfo->pDBMap, pStream->sourceDb, strlen(pStream->sourceDb)); if (p1 == NULL && p2 == NULL) { - mDebug("stream:0x%" PRIx64 " %s not involved nodeUpdate, ignore", pStream->uid, pStream->name); + mDebug("stream:0x%" PRIx64 " %s not involved in nodeUpdate, ignore", pStream->uid, pStream->name); sdbRelease(pSdb, pStream); continue; } @@ -2093,20 +2096,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange } // no need to build the trans to handle the vgroup update - if (pTrans == NULL) { - return 0; - } - - code = mndTransPrepare(pMnode, pTrans); - if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_ACTION_IN_PROGRESS) { - mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, tstrerror(code)); - sdbRelease(pMnode->pSdb, pStream); - mndTransDrop(pTrans); - return code; - } - - sdbRelease(pMnode->pSdb, pStream); - mndTransDrop(pTrans); + *pUpdateTrans = pTrans; return code; } @@ -2188,7 +2178,7 @@ static int32_t refreshNodeListFromExistedStreams(SMnode *pMnode, SArray *pNodeLi taosHashCleanup(pHash); - mDebug("numOfNodes:%d for stream after extract nodeInfo from stream", (int32_t)taosArrayGetSize(pNodeList)); + mDebug("numOfvNodes:%d get after extracting nodeInfo from all streams", (int32_t)taosArrayGetSize(pNodeList)); return code; } @@ -2212,14 +2202,49 @@ static void addAllDbsIntoHashmap(SHashObj *pDBMap, SSdb *pSdb) { } } +static int32_t doProcessNodeCheckHelp(SArray *pNodeSnapshot, SMnode *pMnode, SVgroupChangeInfo *pChangeInfo, + bool *pUpdateAllVgroups) { + int32_t code = removeExpiredNodeEntryAndTaskInBuf(pNodeSnapshot); + if (code) { + mDebug("failed to remove expired node entry in buf, code:%s", tstrerror(code)); + return code; + } + + code = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot, pChangeInfo); + if (code) { + mDebug("failed to find changed vnode(s) during vnode(s) check, code:%s", tstrerror(code)); + return code; + } + + { + if (execInfo.role == NODE_ROLE_LEADER && execInfo.switchFromFollower) { + mInfo("rollback all stream due to mnode leader/follower switch by using nodeUpdate trans"); + *pUpdateAllVgroups = true; + execInfo.switchFromFollower = false; // reset the flag + addAllDbsIntoHashmap(pChangeInfo->pDBMap, pMnode->pSdb); + } + } + + if (taosArrayGetSize(pChangeInfo->pUpdateNodeList) > 0 || (*pUpdateAllVgroups)) { + // kill current active checkpoint transaction, since the transaction is vnode wide. + killAllCheckpointTrans(pMnode, pChangeInfo); + } else { + mDebug("no update found in vnode(s) list"); + } + + return code; +} + // this function runs by only one thread, so it is not multi-thread safe static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { - int32_t code = 0; - bool allReady = true; - SArray *pNodeSnapshot = NULL; - SMnode *pMnode = pMsg->info.node; - int64_t ts = taosGetTimestampSec(); - bool updateAllVgroups = false; + int32_t code = 0; + bool allReady = true; + SArray *pNodeSnapshot = NULL; + SMnode *pMnode = pMsg->info.node; + int64_t tsms = taosGetTimestampMs(); + int64_t ts = tsms / 1000; + bool updateAllVgroups = false; + SVgroupChangeInfo changeInfo = {0}; int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1); if (old != 0) { @@ -2227,7 +2252,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { return 0; } - mDebug("start to do node changing check"); + mDebug("start to do node changing check, ts:%" PRId64, tsms); streamMutexLock(&execInfo.lock); int32_t numOfNodes = extractStreamNodeList(pMnode); @@ -2253,58 +2278,60 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { } streamMutexLock(&execInfo.lock); + code = doProcessNodeCheckHelp(pNodeSnapshot, pMnode, &changeInfo, &updateAllVgroups); + streamMutexUnlock(&execInfo.lock); - code = removeExpiredNodeEntryAndTaskInBuf(pNodeSnapshot); if (code) { goto _end; } - SVgroupChangeInfo changeInfo = {0}; - code = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot, &changeInfo); - if (code) { - goto _end; - } - - { - if (execInfo.role == NODE_ROLE_LEADER && execInfo.switchFromFollower) { - mInfo("rollback all stream due to mnode leader/follower switch by using nodeUpdate trans"); - updateAllVgroups = true; - execInfo.switchFromFollower = false; // reset the flag - addAllDbsIntoHashmap(changeInfo.pDBMap, pMnode->pSdb); - } - } - if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0 || updateAllVgroups) { - // kill current active checkpoint transaction, since the transaction is vnode wide. - killAllCheckpointTrans(pMnode, &changeInfo); - code = mndProcessVgroupChange(pMnode, &changeInfo, updateAllVgroups); + mDebug("vnode(s) change detected, build trans to update stream task epsets"); + + STrans *pTrans = NULL; + + streamMutexLock(&execInfo.lock); + code = mndProcessVgroupChange(pMnode, &changeInfo, updateAllVgroups, &pTrans); + streamMutexUnlock(&execInfo.lock); + + // NOTE: sync trans out of lock + if (code == 0 && pTrans != NULL) { + code = mndTransPrepare(pMnode, pTrans); + if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_ACTION_IN_PROGRESS) { + mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, tstrerror(code)); + } + + mndTransDrop(pTrans); + } // keep the new vnode snapshot if success if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { + streamMutexLock(&execInfo.lock); + code = refreshNodeListFromExistedStreams(pMnode, execInfo.pNodeList); + int32_t num = (int)taosArrayGetSize(execInfo.pNodeList); + if (code == 0) { + execInfo.ts = ts; + mDebug("create trans successfully, update cached node list, numOfNodes:%d", num); + } + + streamMutexUnlock(&execInfo.lock); + if (code) { mError("failed to extract node list from stream, code:%s", tstrerror(code)); goto _end; } - - execInfo.ts = ts; - mDebug("create trans successfully, update cached node list, numOfNodes:%d", - (int)taosArrayGetSize(execInfo.pNodeList)); - } else { - mError("unexpected code during create nodeUpdate trans, code:%s", tstrerror(code)); } - } else { - mDebug("no update found in nodeList"); } mndDestroyVgroupChangeInfo(&changeInfo); _end: - streamMutexUnlock(&execInfo.lock); taosArrayDestroy(pNodeSnapshot); - mDebug("end to do stream task node change checking"); + mDebug("end to do stream task node change checking, elapsed time:%" PRId64 "ms", taosGetTimestampMs() - tsms); atomic_store_32(&mndNodeCheckSentinel, 0); + return 0; } @@ -2749,7 +2776,7 @@ int32_t mndProcessConsensusInTmr(SRpcMsg *pMsg) { int64_t now = taosGetTimestampMs(); bool allReady = true; SArray *pNodeSnapshot = NULL; - int32_t maxAllowedTrans = 50; + int32_t maxAllowedTrans = 20; int32_t numOfTrans = 0; int32_t code = 0; void *pIter = NULL; @@ -2836,6 +2863,7 @@ int32_t mndProcessConsensusInTmr(SRpcMsg *pMsg) { return TSDB_CODE_FAILED; } + // todo: check for redundant consensus-checkpoint trans, if this kinds of trans repeatly failed. code = mndCreateSetConsensusChkptIdTrans(pMnode, pStream, pe->req.taskId, chkId, pe->req.startTs); if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_ACTION_IN_PROGRESS) { mError("failed to create consensus-checkpoint trans, stream:0x%" PRIx64, pStream->uid); diff --git a/source/dnode/mnode/impl/src/mndStreamTransAct.c b/source/dnode/mnode/impl/src/mndStreamTransAct.c index 5ccb626609..f0e4c7559d 100644 --- a/source/dnode/mnode/impl/src/mndStreamTransAct.c +++ b/source/dnode/mnode/impl/src/mndStreamTransAct.c @@ -666,3 +666,75 @@ int32_t mndStreamSetRestartAction(SMnode* pMnode, STrans *pTrans, SStreamObj* pS return 0; } + +static int32_t doSetStopAllTasksAction(SMnode* pMnode, STrans* pTrans, SVgObj* pVgObj) { + void *pBuf = NULL; + int32_t len = 0; + int32_t code = 0; + SEncoder encoder; + + SStreamTaskStopReq req = {.streamId = -1}; + tEncodeSize(tEncodeStreamTaskStopReq, &req, len, code); + if (code < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; + } + + int32_t tlen = sizeof(SMsgHead) + len; + + pBuf = taosMemoryMalloc(tlen); + if (pBuf == NULL) { + return terrno; + } + + void *abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); + tEncoderInit(&encoder, abuf, tlen); + code = tEncodeStreamTaskStopReq(&encoder, &req); + if (code == -1) { + tEncoderClear(&encoder); + taosMemoryFree(pBuf); + return code; + } + + SMsgHead *pMsgHead = (SMsgHead *)pBuf; + pMsgHead->contLen = htonl(tlen); + pMsgHead->vgId = htonl(pVgObj->vgId); + + tEncoderClear(&encoder); + + SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); + mndReleaseVgroup(pMnode, pVgObj); + + code = setTransAction(pTrans, pBuf, tlen, TDMT_VND_STREAM_ALL_STOP, &epset, 0, TSDB_CODE_VND_INVALID_VGROUP_ID); + if (code != TSDB_CODE_SUCCESS) { + mError("failed to create stop all streams trans, code:%s", tstrerror(code)); + taosMemoryFree(pBuf); + } + + return 0; +} + +int32_t mndStreamSetStopStreamTasksActions(SMnode* pMnode, STrans *pTrans, uint64_t dbUid) { + int32_t code = 0; + SSdb *pSdb = pMnode->pSdb; + void *pIter = NULL; + + while (1) { + SVgObj *pVgroup = NULL; + pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); + if (pIter == NULL) break; + + if (pVgroup->dbUid == dbUid) { + if ((code = doSetStopAllTasksAction(pMnode, pTrans, pVgroup)) != 0) { + sdbCancelFetch(pSdb, pIter); + sdbRelease(pSdb, pVgroup); + TAOS_RETURN(code); + } + } + + sdbRelease(pSdb, pVgroup); + } + + TAOS_RETURN(code); + return 0; +} \ No newline at end of file diff --git a/source/dnode/mnode/impl/test/trans/trans2.cpp b/source/dnode/mnode/impl/test/trans/trans2.cpp index b73f07c778..e520fce111 100644 --- a/source/dnode/mnode/impl/test/trans/trans2.cpp +++ b/source/dnode/mnode/impl/test/trans/trans2.cpp @@ -65,7 +65,8 @@ class MndTestTrans2 : public ::testing::Test { msgCb.sendRspFp = sendRsp; msgCb.queueFps[SYNC_QUEUE] = putToQueue; msgCb.queueFps[WRITE_QUEUE] = putToQueue; - msgCb.queueFps[READ_QUEUE] = putToQueue; + msgCb.queueFps[READ_QUEUE] = putToQueue; + msgCb.queueFps[STATUS_QUEUE] = putToQueue; msgCb.mgmt = (SMgmtWrapper *)(&msgCb); // hack tmsgSetDefault(&msgCb); diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index fe1b333108..851bf25665 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -157,9 +157,11 @@ int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { case TDMT_STREAM_TASK_DROP: return tqStreamTaskProcessDropReq(pSnode->pMeta, pMsg->pCont, pMsg->contLen); case TDMT_VND_STREAM_TASK_UPDATE: - return tqStreamTaskProcessUpdateReq(pSnode->pMeta, &pSnode->msgCb, pMsg, true); + return tqStreamTaskProcessUpdateReq(pSnode->pMeta, &pSnode->msgCb, pMsg, true, true); case TDMT_VND_STREAM_TASK_RESET: return tqStreamTaskProcessTaskResetReq(pSnode->pMeta, pMsg->pCont); + case TDMT_VND_STREAM_ALL_STOP: + return tqStreamTaskProcessAllTaskStopReq(pSnode->pMeta, &pSnode->msgCb, pMsg); case TDMT_STREAM_TASK_PAUSE: return tqStreamTaskProcessTaskPauseReq(pSnode->pMeta, pMsg->pCont); case TDMT_STREAM_TASK_RESUME: diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index f0e7af50f3..d224f9a411 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -113,6 +113,7 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); int32_t vnodeProcessStreamCtrlMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); +int32_t vnodeProcessStreamLongExecMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); void vnodeProposeCommitOnNeed(SVnode *pVnode, bool atExit); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index b13a66da99..635f9946d5 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -242,14 +242,16 @@ void tqClose(STQ*); int tqPushMsg(STQ*, tmsg_t msgType); int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); void tqUnregisterPushHandle(STQ* pTq, void* pHandle); -int tqScanWalAsync(STQ* pTq, bool ckPause); +void tqScanWalAsync(STQ* pTq); int32_t tqStopStreamTasksAsync(STQ* pTq); +int32_t tqStopStreamAllTasksAsync(SStreamMeta* pMeta, SMsgCb* pMsgCb); int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp); int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveTriggerRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessAllTaskStopReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessStreamReqCheckpointRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskChkptReportRsp(STQ* pTq, SRpcMsg* pMsg); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 1a33a69f2b..ad3e4d015a 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1302,7 +1302,7 @@ _checkpoint: } streamMetaWLock(pMeta); - if ((code = streamMetaSaveTask(pMeta, pTask)) != 0) { + if ((code = streamMetaSaveTaskInMeta(pMeta, pTask)) != 0) { streamMetaWUnLock(pMeta); taosHashCancelIterate(pInfoHash, infoHash); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 5df4f852c2..7d65673226 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -928,12 +928,12 @@ static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask // now the fill-history task starts to scan data from wal files. code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE); - if (code == TSDB_CODE_SUCCESS) { - code = tqScanWalAsync(pTq, false); - if (code) { - tqError("vgId:%d failed to start scan wal file, code:%s", vgId, tstrerror(code)); - } - } +// if (code == TSDB_CODE_SUCCESS) { +// code = tqScanWalAsync(pTq, false); +// if (code) { +// tqError("vgId:%d failed to start scan wal file, code:%s", vgId, tstrerror(code)); +// } +// } } } @@ -1121,23 +1121,14 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { // extracted submit data from wal files for all tasks if (req.reqType == STREAM_EXEC_T_EXTRACT_WAL_DATA) { return tqScanWal(pTq); - } + } else { + code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode)); + if (code) { + tqError("vgId:%d failed to create task run req, code:%s", TD_VID(pTq->pVnode), tstrerror(code)); + } - code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode)); - if (code) { - tqError("vgId:%d failed to create task run req, code:%s", TD_VID(pTq->pVnode), tstrerror(code)); return code; } - - // let's continue scan data in the wal files - if (req.reqType >= 0 || req.reqType == STREAM_EXEC_T_RESUME_TASK) { - code = tqScanWalAsync(pTq, false); // it's ok to failed - if (code) { - tqError("vgId:%d failed to start scan wal file, code:%s", pTq->pStreamMeta->vgId, tstrerror(code)); - } - } - - return code; } int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg) { @@ -1381,13 +1372,18 @@ int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { - return tqStreamTaskProcessUpdateReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, pMsg, pTq->pVnode->restored); + return tqStreamTaskProcessUpdateReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, pMsg, + pTq->pVnode->restored, (pTq->pStreamMeta->role == NODE_ROLE_LEADER)); } int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg->pCont); } +int32_t tqProcessAllTaskStopReq(STQ* pTq, SRpcMsg* pMsg) { + return tqStreamTaskProcessAllTaskStopReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, pMsg); +} + int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg) { int32_t vgId = TD_VID(pTq->pVnode); diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 2b2667773a..fc83343c99 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -49,20 +49,6 @@ int32_t tqPushMsg(STQ* pTq, tmsg_t msgType) { } } - streamMetaRLock(pTq->pStreamMeta); - int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta); - streamMetaRUnLock(pTq->pStreamMeta); - -// tqTrace("vgId:%d handle submit, restore:%d, numOfTasks:%d", TD_VID(pTq->pVnode), pTq->pVnode->restored, numOfTasks); - - // push data for stream processing: - // 1. the vnode has already been restored. - // 2. the vnode should be the leader. - // 3. the stream is not suspended yet. - if ((!tsDisableStream) && (numOfTasks > 0)) { - code = tqScanWalAsync(pTq, true); - } - return code; } diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 7e0b118474..c1c3623bde 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -1428,6 +1428,7 @@ int32_t handleResultBlockMsg(SStreamTask* pTask, SSDataBlock* pDataBlock, int32_ code = setDstTableDataUid(pVnode, pTask, pDataBlock, stbFullName, &tbData); if (code != TSDB_CODE_SUCCESS) { tqError("vgId:%d s-task:%s dst-table not exist, stb:%s discard stream results", vgId, id, stbFullName); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); return code; } @@ -1439,12 +1440,14 @@ int32_t handleResultBlockMsg(SStreamTask* pTask, SSDataBlock* pDataBlock, int32_ tbData.pCreateTbReq = NULL; } + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); return code; } void* p = taosArrayPush(submitReq.aSubmitTbData, &tbData); if (p == NULL) { tqDebug("vgId:%d, s-task:%s failed to build submit msg, code:%s, data lost", vgId, id, tstrerror(terrno)); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); return terrno; } diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 9ea84830f1..08ca02e20e 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -16,23 +16,19 @@ #include "tq.h" #include "vnd.h" -#define MAX_REPEAT_SCAN_THRESHOLD 3 -#define SCAN_WAL_IDLE_DURATION 500 // idle for 500ms to do next wal scan +#define SCAN_WAL_IDLE_DURATION 250 // idle for 500ms to do next wal scan +#define SCAN_WAL_WAIT_COUNT 2 typedef struct SBuildScanWalMsgParam { int64_t metaId; - int32_t numOfTasks; - int8_t restored; SMsgCb msgCb; } SBuildScanWalMsgParam; -static int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta); +static int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, int32_t* pNumOfTasks); static int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId); static bool handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver); static bool taskReadyForDataFromWal(SStreamTask* pTask); static int32_t doPutDataIntoInputQ(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems, bool* pSucc); -static int32_t tqScanWalInFuture(STQ* pTq, int32_t numOfTasks, int32_t idleDuration); -static int32_t doScanWalAsync(STQ* pTq, bool ckPause); // extract data blocks(submit/delete) from WAL, and add them into the input queue for all the sources tasks. int32_t tqScanWal(STQ* pTq) { @@ -40,46 +36,59 @@ int32_t tqScanWal(STQ* pTq) { int32_t vgId = pMeta->vgId; int64_t st = taosGetTimestampMs(); int32_t numOfTasks = 0; + int64_t el = 0; + int32_t code = 0; - tqDebug("vgId:%d continue to check if data in wal are available, scanCounter:%d", vgId, pMeta->scanInfo.scanCounter); - - // check all tasks - int32_t code = doScanWalForAllTasks(pMeta); - if (code) { - tqError("vgId:%d failed to start all tasks, try next time, code:%s", vgId, tstrerror(code)); + int32_t old = atomic_val_compare_exchange_32(&pMeta->scanInfo.scanSentinel, 0, 1); + if (old == 0) { + tqDebug("vgId:%d try to scan wal to extract data", vgId); + } else { + tqDebug("vgId:%d already in wal scan, abort", vgId); return code; } - streamMetaWLock(pMeta); - int32_t times = (--pMeta->scanInfo.scanCounter); - if (times < 0) { - tqError("vgId:%d invalid scan counter:%d, reset to 0", vgId, times); - times = 0; + // the scan wal interval less than 200, not scan, actually. + if ((pMeta->scanInfo.lastScanTs > 0) && (st - pMeta->scanInfo.lastScanTs < 200)) { + tqDebug("vgId:%d scan wal less than 200ms, do nothing", vgId); + atomic_store_32(&pMeta->scanInfo.scanSentinel, 0); + return code; } - numOfTasks = taosArrayGetSize(pMeta->pTaskList); - streamMetaWUnLock(pMeta); + // check all tasks + code = doScanWalForAllTasks(pMeta, &numOfTasks); - int64_t el = (taosGetTimestampMs() - st); - tqDebug("vgId:%d scan wal for stream tasks completed, elapsed time:%" PRId64 " ms", vgId, el); + pMeta->scanInfo.lastScanTs = taosGetTimestampMs(); + el = (pMeta->scanInfo.lastScanTs - st); - if (times > 0) { - tqDebug("vgId:%d scan wal for stream tasks for %d times in %dms", vgId, times, SCAN_WAL_IDLE_DURATION); - code = tqScanWalInFuture(pTq, numOfTasks, SCAN_WAL_IDLE_DURATION); - if (code) { - tqError("vgId:%d sched scan wal in %dms failed, ignore this failure", vgId, SCAN_WAL_IDLE_DURATION); - } + if (code) { + tqError("vgId:%d failed to scan wal for all tasks, try next time, elapsed time:%" PRId64 "ms code:%s", vgId, el, + tstrerror(code)); + } else { + tqDebug("vgId:%d scan wal for stream tasks completed, elapsed time:%" PRId64 "ms", vgId, el); } + atomic_store_32(&pMeta->scanInfo.scanSentinel, 0); return code; } -static void doStartScanWal(void* param, void* tmrId) { - int32_t vgId = 0; - int32_t code = 0; +static bool waitEnoughDuration(SStreamMeta* pMeta) { + if ((++pMeta->scanInfo.tickCounter) >= SCAN_WAL_WAIT_COUNT) { + pMeta->scanInfo.tickCounter = 0; + return true; + } + return false; +} + +static void doStartScanWal(void* param, void* tmrId) { + int32_t vgId = 0; + int32_t code = 0; + int32_t numOfTasks = 0; + tmr_h pTimer = NULL; SBuildScanWalMsgParam* pParam = (SBuildScanWalMsgParam*)param; + tqDebug("start to do scan wal in tmr, metaRid:%" PRId64, pParam->metaId); + SStreamMeta* pMeta = taosAcquireRef(streamMetaRefPool, pParam->metaId); if (pMeta == NULL) { tqError("metaRid:%" PRId64 " not valid now, stream meta has been freed", pParam->metaId); @@ -87,10 +96,18 @@ static void doStartScanWal(void* param, void* tmrId) { return; } + vgId = pMeta->vgId; + code = streamTimerGetInstance(&pTimer); + if (code) { + tqFatal("vgId:%d failed to get tmr ctrl during sched scan wal, not scan wal, code:%s", vgId, tstrerror(code)); + taosMemoryFree(pParam); + return; + } + if (pMeta->closeFlag) { code = taosReleaseRef(streamMetaRefPool, pParam->metaId); if (code == TSDB_CODE_SUCCESS) { - tqDebug("vgId:%d jump out of scan wal timer since closed", vgId); + tqInfo("vgId:%d jump out of scan wal timer since closed", vgId); } else { tqError("vgId:%d failed to release ref for streamMeta, rid:%" PRId64 " code:%s", vgId, pParam->metaId, tstrerror(code)); @@ -100,77 +117,107 @@ static void doStartScanWal(void* param, void* tmrId) { return; } - vgId = pMeta->vgId; + if (pMeta->role != NODE_ROLE_LEADER) { + tqDebug("vgId:%d not leader, role:%d not scan wal anymore", vgId, pMeta->role); - tqDebug("vgId:%d create msg to start wal scan, numOfTasks:%d, vnd restored:%d", vgId, pParam->numOfTasks, - pParam->restored); -#if 0 - // wait for the vnode is freed, and invalid read may occur. + code = taosReleaseRef(streamMetaRefPool, pParam->metaId); + if (code == TSDB_CODE_SUCCESS) { + tqDebug("vgId:%d jump out of scan wal timer since not leader", vgId); + } else { + tqError("vgId:%d failed to release ref for streamMeta, rid:%" PRId64 " code:%s", vgId, pParam->metaId, + tstrerror(code)); + } + + taosMemFree(pParam); + return; + } + + if (pMeta->startInfo.startAllTasks) { + tqDebug("vgId:%d in restart procedure, not ready to scan wal", vgId); + goto _end; + } + + if (!waitEnoughDuration(pMeta)) { + streamTmrStart(doStartScanWal, SCAN_WAL_IDLE_DURATION, pParam, pTimer, &pMeta->scanInfo.scanTimer, vgId, + "scan-wal"); + code = taosReleaseRef(streamMetaRefPool, pParam->metaId); + if (code) { + tqError("vgId:%d failed to release ref for streamMeta, rid:%" PRId64 " code:%s", vgId, pParam->metaId, + tstrerror(code)); + } + return; + } + + code = streamMetaTryRlock(pMeta); + if (code == 0) { + numOfTasks = taosArrayGetSize(pMeta->pTaskList); + streamMetaRUnLock(pMeta); + } else { + numOfTasks = 0; + } + + if (numOfTasks == 0) { + goto _end; + } + + tqDebug("vgId:%d create msg to start wal scan, numOfTasks:%d", vgId, numOfTasks); + + #if 0 + // wait for the vnode is freed, and invalid read may occur. taosMsleep(10000); -#endif + #endif code = streamTaskSchedTask(&pParam->msgCb, vgId, 0, 0, STREAM_EXEC_T_EXTRACT_WAL_DATA); if (code) { tqError("vgId:%d failed sched task to scan wal, code:%s", vgId, tstrerror(code)); } +_end: + streamTmrStart(doStartScanWal, SCAN_WAL_IDLE_DURATION, pParam, pTimer, &pMeta->scanInfo.scanTimer, vgId, "scan-wal"); + tqDebug("vgId:%d scan-wal will start in %dms", vgId, SCAN_WAL_IDLE_DURATION*SCAN_WAL_WAIT_COUNT); + code = taosReleaseRef(streamMetaRefPool, pParam->metaId); if (code) { tqError("vgId:%d failed to release ref for streamMeta, rid:%" PRId64 " code:%s", vgId, pParam->metaId, tstrerror(code)); } - - taosMemoryFree(pParam); } -int32_t tqScanWalInFuture(STQ* pTq, int32_t numOfTasks, int32_t idleDuration) { +void tqScanWalAsync(STQ* pTq) { SStreamMeta* pMeta = pTq->pStreamMeta; int32_t code = 0; int32_t vgId = TD_VID(pTq->pVnode); tmr_h pTimer = NULL; SBuildScanWalMsgParam* pParam = NULL; + // 1. the vnode should be the leader. + // 2. the stream isn't disabled + if ((pMeta->role == NODE_ROLE_FOLLOWER) || tsDisableStream) { + tqInfo("vgId:%d follower node or stream disabled, not scan wal", vgId); + return; + } + pParam = taosMemoryMalloc(sizeof(SBuildScanWalMsgParam)); if (pParam == NULL) { - return terrno; + tqError("vgId:%d failed to start scan wal, stream not executes, code:%s", vgId, tstrerror(code)); + return; } pParam->metaId = pMeta->rid; - pParam->numOfTasks = numOfTasks; - pParam->restored = pTq->pVnode->restored; pParam->msgCb = pTq->pVnode->msgCb; code = streamTimerGetInstance(&pTimer); if (code) { - tqError("vgId:%d failed to get tmr ctrl during sched scan wal", vgId); + tqFatal("vgId:%d failed to get tmr ctrl during sched scan wal", vgId); taosMemoryFree(pParam); } else { - streamTmrStart(doStartScanWal, idleDuration, pParam, pTimer, &pMeta->scanInfo.scanTimer, vgId, "scan-wal-fut"); + streamTmrStart(doStartScanWal, SCAN_WAL_IDLE_DURATION, pParam, pTimer, &pMeta->scanInfo.scanTimer, vgId, + "scan-wal"); } - - return code; } -int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { - SStreamMeta* pMeta = pTq->pStreamMeta; - bool alreadyRestored = pTq->pVnode->restored; - int32_t code = 0; - - // do not launch the stream tasks, if it is a follower or not restored vnode. - if (!(vnodeIsRoleLeader(pTq->pVnode) && alreadyRestored)) { - return TSDB_CODE_SUCCESS; - } - - streamMetaWLock(pMeta); - code = doScanWalAsync(pTq, ckPause); - streamMetaWUnLock(pMeta); - return code; -} - -int32_t tqStopStreamTasksAsync(STQ* pTq) { - SStreamMeta* pMeta = pTq->pStreamMeta; - int32_t vgId = pMeta->vgId; - return streamTaskSchedTask(&pTq->pVnode->msgCb, vgId, 0, 0, STREAM_EXEC_T_STOP_ALL_TASKS); +int32_t tqStopStreamAllTasksAsync(SStreamMeta* pMeta, SMsgCb* pMsgCb) { + return streamTaskSchedTask(pMsgCb, pMeta->vgId, 0, 0, STREAM_EXEC_T_STOP_ALL_TASKS); } int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId) { @@ -273,9 +320,13 @@ bool taskReadyForDataFromWal(SStreamTask* pTask) { return false; } - // check if input queue is full or not + // check whether input queue is full or not if (streamQueueIsFull(pTask->inputq.queue)) { - tqTrace("s-task:%s input queue is full, do nothing", pTask->id.idStr); + tqTrace("s-task:%s input queue is full, launch task without scanning wal", pTask->id.idStr); + int32_t code = streamTrySchedExec(pTask); + if (code) { + tqError("s-task:%s failed to start task while inputQ is full", pTask->id.idStr); + } return false; } @@ -347,13 +398,10 @@ int32_t doPutDataIntoInputQ(SStreamTask* pTask, int64_t maxVer, int32_t* numOfIt return code; } -int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta) { +int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, int32_t* pNumOfTasks) { int32_t vgId = pStreamMeta->vgId; SArray* pTaskList = NULL; - int32_t numOfTasks = taosArrayGetSize(pStreamMeta->pTaskList); - if (numOfTasks == 0) { - return TSDB_CODE_SUCCESS; - } + int32_t numOfTasks = 0; // clone the task list, to avoid the task update during scan wal files streamMetaWLock(pStreamMeta); @@ -364,10 +412,13 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta) { return terrno; } - tqDebug("vgId:%d start to check wal to extract new submit block for %d tasks", vgId, numOfTasks); - // update the new task number numOfTasks = taosArrayGetSize(pTaskList); + if (pNumOfTasks != NULL) { + *pNumOfTasks = numOfTasks; + } + + tqDebug("vgId:%d start to check wal to extract new submit block for %d tasks", vgId, numOfTasks); for (int32_t i = 0; i < numOfTasks; ++i) { STaskId* pTaskId = taosArrayGet(pTaskList, i); @@ -426,51 +477,9 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta) { return TSDB_CODE_SUCCESS; } -int32_t doScanWalAsync(STQ* pTq, bool ckPause) { - SStreamMeta* pMeta = pTq->pStreamMeta; - bool alreadyRestored = pTq->pVnode->restored; - int32_t vgId = pMeta->vgId; - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - - if (numOfTasks == 0) { - tqDebug("vgId:%d no stream tasks existed to run", vgId); - return 0; - } - - if (pMeta->startInfo.startAllTasks) { - tqTrace("vgId:%d in restart procedure, not scan wal", vgId); - return 0; - } - - pMeta->scanInfo.scanCounter += 1; - if (pMeta->scanInfo.scanCounter > MAX_REPEAT_SCAN_THRESHOLD) { - pMeta->scanInfo.scanCounter = MAX_REPEAT_SCAN_THRESHOLD; - } - - if (pMeta->scanInfo.scanCounter > 1) { - tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->scanInfo.scanCounter); - return 0; - } - - int32_t numOfPauseTasks = pMeta->numOfPausedTasks; - if (ckPause && numOfTasks == numOfPauseTasks) { - tqDebug("vgId:%d ignore all submit, all streams had been paused, reset the walScanCounter", vgId); - - // reset the counter value, since we do not launch the scan wal operation. - pMeta->scanInfo.scanCounter = 0; - return 0; - } - - tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d, vnd restored:%d", vgId, - numOfTasks, alreadyRestored); - - return streamTaskSchedTask(&pTq->pVnode->msgCb, vgId, 0, 0, STREAM_EXEC_T_EXTRACT_WAL_DATA); -} - void streamMetaFreeTQDuringScanWalError(STQ* pTq) { SBuildScanWalMsgParam* p = taosMemoryCalloc(1, sizeof(SBuildScanWalMsgParam)); p->metaId = pTq->pStreamMeta->rid; - p->numOfTasks = 0; doStartScanWal(p, 0); } \ No newline at end of file diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index f92cd6d972..de21f6eb0b 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -47,6 +47,10 @@ END: void tqUpdateNodeStage(STQ* pTq, bool isLeader) { SSyncState state = syncGetState(pTq->pVnode->sync); streamMetaUpdateStageRole(pTq->pStreamMeta, state.term, isLeader); + + if (isLeader) { + tqScanWalAsync(pTq); + } } static int32_t tqInitTaosxRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset) { diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index fe4b2ae8f1..1afccd3d01 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -147,7 +147,7 @@ int32_t tqStreamStartOneTaskAsync(SStreamMeta* pMeta, SMsgCb* cb, int64_t stream } // this is to process request from transaction, always return true. -int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pMsg, bool restored) { +int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pMsg, bool restored, bool isLeader) { int32_t vgId = pMeta->vgId; char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t len = pMsg->contLen - sizeof(SMsgHead); @@ -268,13 +268,13 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM // stream do update the nodeEp info, write it into stream meta. if (updated) { tqDebug("s-task:%s vgId:%d save task after update epset, and stop task", idstr, vgId); - code = streamMetaSaveTask(pMeta, pTask); + code = streamMetaSaveTaskInMeta(pMeta, pTask); if (code) { tqError("s-task:%s vgId:%d failed to save task, code:%s", idstr, vgId, tstrerror(code)); } if (pHTask != NULL) { - code = streamMetaSaveTask(pMeta, pHTask); + code = streamMetaSaveTaskInMeta(pMeta, pHTask); if (code) { tqError("s-task:%s vgId:%d failed to save related history task, code:%s", idstr, vgId, tstrerror(code)); } @@ -306,14 +306,19 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); int32_t updateTasks = taosHashGetSize(pMeta->updateInfo.pTasks); - if (restored) { + if (restored && isLeader) { tqDebug("vgId:%d s-task:0x%x update epset transId:%d, set the restart flag", vgId, req.taskId, req.transId); pMeta->startInfo.tasksWillRestart = 1; } if (updateTasks < numOfTasks) { - tqDebug("vgId:%d closed tasks:%d, unclosed:%d, all tasks will be started when nodeEp update completed", vgId, - updateTasks, (numOfTasks - updateTasks)); + if (isLeader) { + tqDebug("vgId:%d closed tasks:%d, unclosed:%d, all tasks will be started when nodeEp update completed", vgId, + updateTasks, (numOfTasks - updateTasks)); + } else { + tqDebug("vgId:%d closed tasks:%d, unclosed:%d, follower not restart tasks", vgId, updateTasks, + (numOfTasks - updateTasks)); + } } else { if ((code = streamMetaCommit(pMeta)) < 0) { // always return true @@ -324,17 +329,21 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM streamMetaClearSetUpdateTaskListComplete(pMeta); - if (!restored) { - tqDebug("vgId:%d vnode restore not completed, not start all tasks", vgId); - } else { - tqDebug("vgId:%d all %d task(s) nodeEp updated and closed, transId:%d", vgId, numOfTasks, req.transId); + if (isLeader) { + if (!restored) { + tqDebug("vgId:%d vnode restore not completed, not start all tasks", vgId); + } else { + tqDebug("vgId:%d all %d task(s) nodeEp updated and closed, transId:%d", vgId, numOfTasks, req.transId); #if 0 taosMSleep(5000);// for test purpose, to trigger the leader election #endif - code = tqStreamTaskStartAsync(pMeta, cb, true); - if (code) { - tqError("vgId:%d async start all tasks, failed, code:%s", vgId, tstrerror(code)); + code = tqStreamTaskStartAsync(pMeta, cb, true); + if (code) { + tqError("vgId:%d async start all tasks, failed, code:%s", vgId, tstrerror(code)); + } } + } else { + tqDebug("vgId:%d follower nodes not restart tasks", vgId); } } @@ -751,6 +760,8 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen } streamMetaWUnLock(pMeta); + tqDebug("vgId:%d process drop task:0x%x completed", vgId, pReq->taskId); + return 0; // always return success } @@ -865,6 +876,9 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead } else if (type == STREAM_EXEC_T_ADD_FAILED_TASK) { code = streamMetaAddFailedTask(pMeta, req.streamId, req.taskId); return code; + } else if (type == STREAM_EXEC_T_STOP_ONE_TASK) { + code = streamMetaStopOneTask(pMeta, req.streamId, req.taskId); + return code; } else if (type == STREAM_EXEC_T_RESUME_TASK) { // task resume to run after idle for a while SStreamTask* pTask = NULL; code = streamMetaAcquireTask(pMeta, req.streamId, req.taskId, &pTask); @@ -946,10 +960,10 @@ int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta) { streamMetaWUnLock(pMeta); - if (scanWal && (vgId != SNODE_HANDLE)) { - tqDebug("vgId:%d start scan wal for executing tasks", vgId); - code = tqScanWalAsync(pMeta->ahandle, true); - } +// if (scanWal && (vgId != SNODE_HANDLE)) { +// tqDebug("vgId:%d start scan wal for executing tasks", vgId); +// code = tqScanWalAsync(pMeta->ahandle, true); +// } return code; } @@ -991,6 +1005,39 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, char* pMsg) { return TSDB_CODE_SUCCESS; } +int32_t tqStreamTaskProcessAllTaskStopReq(SStreamMeta* pMeta, SMsgCb* pMsgCb, SRpcMsg* pMsg) { + int32_t code = 0; + int32_t vgId = pMeta->vgId; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + SDecoder decoder; + + SStreamTaskStopReq req = {0}; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if ((code = tDecodeStreamTaskStopReq(&decoder, &req)) < 0) { + tqError("vgId:%d failed to decode stop all streams, code:%s", pMeta->vgId, tstrerror(code)); + tDecoderClear(&decoder); + return TSDB_CODE_SUCCESS; + } + + tDecoderClear(&decoder); + + // stop all stream tasks, only invoked when trying to drop db + if (req.streamId <= 0) { + tqDebug("vgId:%d recv msg to stop all tasks in sync before dropping vnode", vgId); + code = streamMetaStopAllTasks(pMeta); + if (code) { + tqError("vgId:%d failed to stop all tasks, code:%s", vgId, tstrerror(code)); + } + + } else { // stop only one stream tasks + + } + + // always return success + return TSDB_CODE_SUCCESS; +} + int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { SRetrieveChkptTriggerReq req = {0}; SStreamTask* pTask = NULL; @@ -1178,7 +1225,7 @@ static int32_t tqProcessTaskResumeImpl(void* handle, SStreamTask* pTask, int64_t pTask->hTaskInfo.operatorOpen = false; code = streamStartScanHistoryAsync(pTask, igUntreated); } else if (level == TASK_LEVEL__SOURCE && (streamQueueGetNumOfItems(pTask->inputq.queue) == 0)) { - code = tqScanWalAsync((STQ*)handle, false); +// code = tqScanWalAsync((STQ*)handle, false); } else { code = streamTrySchedExec(pTask); } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index abaa61744d..22d9c2657d 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -750,6 +750,12 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg goto _err; } + } break; + case TDMT_VND_STREAM_ALL_STOP: { + if (pVnode->restored && vnodeIsLeader(pVnode) && (code = tqProcessAllTaskStopReq(pVnode->pTq, pMsg)) < 0) { + goto _err; + } + } break; case TDMT_VND_ALTER_CONFIRM: needCommit = pVnode->config.hashChange; @@ -948,8 +954,6 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg); case TDMT_STREAM_RETRIEVE_RSP: return tqProcessTaskRetrieveRsp(pVnode->pTq, pMsg); - case TDMT_VND_STREAM_SCAN_HISTORY: - return tqProcessTaskScanHistory(pVnode->pTq, pMsg); case TDMT_VND_GET_STREAM_PROGRESS: return tqStreamProgressRetrieveReq(pVnode->pTq, pMsg); default: @@ -996,6 +1000,22 @@ int32_t vnodeProcessStreamCtrlMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pIn } } +int32_t vnodeProcessStreamLongExecMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { + vTrace("vgId:%d, msg:%p in stream long exec queue is processing", pVnode->config.vgId, pMsg); + if (!syncIsReadyForRead(pVnode->sync)) { + vnodeRedirectRpcMsg(pVnode, pMsg, terrno); + return 0; + } + + switch (pMsg->msgType) { + case TDMT_VND_STREAM_SCAN_HISTORY: + return tqProcessTaskScanHistory(pVnode->pTq, pMsg); + default: + vError("unknown msg type:%d in stream long exec queue", pMsg->msgType); + return TSDB_CODE_APP_ERROR; + } +} + void smaHandleRes(void *pVnode, int64_t smaId, const SArray *data) { int32_t code = tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, smaId, (const char *)data); if (code) { diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 068f4dec3d..a7e8a43fae 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -624,7 +624,7 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) { if (pVnode->pTq) { tqUpdateNodeStage(pVnode->pTq, false); - if (tqStopStreamTasksAsync(pVnode->pTq) != 0) { + if (tqStopStreamAllTasksAsync(pVnode->pTq->pStreamMeta, &pVnode->msgCb) != 0) { vError("vgId:%d, failed to stop stream tasks", pVnode->config.vgId); } } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index ce23a9063f..7f4335282f 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -995,26 +995,43 @@ int32_t qAsyncKillTask(qTaskInfo_t qinfo, int32_t rspCode) { return TSDB_CODE_SUCCESS; } -int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode) { +int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode, int64_t waitDuration) { + int64_t st = taosGetTimestampMs(); SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; if (pTaskInfo == NULL) { return TSDB_CODE_QRY_INVALID_QHANDLE; } - qDebug("%s sync killed execTask", GET_TASKID(pTaskInfo)); + if (waitDuration > 0) { + qDebug("%s sync killed execTask, and waiting for %.2fs", GET_TASKID(pTaskInfo), waitDuration/1000.0); + } else { + qDebug("%s async killed execTask", GET_TASKID(pTaskInfo)); + } + setTaskKilled(pTaskInfo, TSDB_CODE_TSC_QUERY_KILLED); - while (1) { - taosWLockLatch(&pTaskInfo->lock); - if (qTaskIsExecuting(pTaskInfo)) { // let's wait for 100 ms and try again - taosWUnLockLatch(&pTaskInfo->lock); - taosMsleep(100); - } else { // not running now - pTaskInfo->code = rspCode; - taosWUnLockLatch(&pTaskInfo->lock); - return TSDB_CODE_SUCCESS; + if (waitDuration > 0) { + while (1) { + taosWLockLatch(&pTaskInfo->lock); + if (qTaskIsExecuting(pTaskInfo)) { // let's wait for 100 ms and try again + taosWUnLockLatch(&pTaskInfo->lock); + + taosMsleep(200); + + int64_t d = taosGetTimestampMs() - st; + if (d >= waitDuration && waitDuration >= 0) { + qWarn("%s waiting more than %.2fs, not wait anymore", GET_TASKID(pTaskInfo), waitDuration / 1000.0); + return TSDB_CODE_SUCCESS; + } + } else { // not running now + pTaskInfo->code = rspCode; + taosWUnLockLatch(&pTaskInfo->lock); + return TSDB_CODE_SUCCESS; + } } } + + return TSDB_CODE_SUCCESS; } bool qTaskIsExecuting(qTaskInfo_t qinfo) { diff --git a/source/libs/executor/src/projectoperator.c b/source/libs/executor/src/projectoperator.c index eb448a13f5..b0f78c1aad 100644 --- a/source/libs/executor/src/projectoperator.c +++ b/source/libs/executor/src/projectoperator.c @@ -42,9 +42,7 @@ typedef struct SIndefOperatorInfo { } SIndefOperatorInfo; static int32_t doGenerateSourceData(SOperatorInfo* pOperator); -static SSDataBlock* doProjectOperation1(SOperatorInfo* pOperator); static int32_t doProjectOperation(SOperatorInfo* pOperator, SSDataBlock** pResBlock); -static SSDataBlock* doApplyIndefinitFunction1(SOperatorInfo* pOperator); static int32_t doApplyIndefinitFunction(SOperatorInfo* pOperator, SSDataBlock** pResBlock); static int32_t setRowTsColumnOutputInfo(SqlFunctionCtx* pCtx, int32_t numOfCols, SArray** pResList); static int32_t setFunctionResultOutput(SOperatorInfo* pOperator, SOptrBasicInfo* pInfo, SAggSupporter* pSup, @@ -557,12 +555,6 @@ static void doHandleDataBlock(SOperatorInfo* pOperator, SSDataBlock* pBlock, SOp } } -SSDataBlock* doApplyIndefinitFunction1(SOperatorInfo* pOperator) { - SSDataBlock* pResBlock = NULL; - pOperator->pTaskInfo->code = doApplyIndefinitFunction(pOperator, &pResBlock); - return pResBlock; -} - int32_t doApplyIndefinitFunction(SOperatorInfo* pOperator, SSDataBlock** pResBlock) { QRY_PARAM_CHECK(pResBlock); SIndefOperatorInfo* pIndefInfo = pOperator->info; diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index dc90e07218..ba39489d02 100755 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -7836,6 +7836,52 @@ static SNode* createSetOperProject(const char* pTableAlias, SNode* pNode) { return (SNode*)pCol; } +static bool isUionOperator(SNode* pNode) { + return QUERY_NODE_SET_OPERATOR == nodeType(pNode) && (((SSetOperator*)pNode)->opType == SET_OP_TYPE_UNION || + ((SSetOperator*)pNode)->opType == SET_OP_TYPE_UNION_ALL); +} + +static int32_t pushdownCastForUnion(STranslateContext* pCxt, SNode* pNode, SExprNode* pExpr, int pos) { + int32_t code = TSDB_CODE_SUCCESS; + if (isUionOperator(pNode)) { + SSetOperator* pSetOperator = (SSetOperator*)pNode; + SNodeList* pLeftProjections = getProjectList(pSetOperator->pLeft); + SNodeList* pRightProjections = getProjectList(pSetOperator->pRight); + if (LIST_LENGTH(pLeftProjections) != LIST_LENGTH(pRightProjections)) { + return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INCORRECT_NUM_OF_COL); + } + + SNode* pLeft = NULL; + SNode* pRight = NULL; + int32_t index = 0; + FORBOTH(pLeft, pLeftProjections, pRight, pRightProjections) { + ++index; + if (index < pos) { + continue; + } + SNode* pRightFunc = NULL; + code = createCastFunc(pCxt, pRight, pExpr->resType, &pRightFunc); + if (TSDB_CODE_SUCCESS != code || NULL == pRightFunc) { + return code; + } + REPLACE_LIST2_NODE(pRightFunc); + code = pushdownCastForUnion(pCxt, pSetOperator->pRight, (SExprNode*)pRightFunc, index); + if (TSDB_CODE_SUCCESS != code ) return code; + + SNode* pLeftFunc = NULL; + code = createCastFunc(pCxt, pLeft, pExpr->resType, &pLeftFunc); + if (TSDB_CODE_SUCCESS != code || NULL == pLeftFunc) { + return code; + } + REPLACE_LIST1_NODE(pLeftFunc); + code = pushdownCastForUnion(pCxt, pSetOperator->pLeft, (SExprNode*)pLeftFunc, index); + if (TSDB_CODE_SUCCESS != code ) return code; + break; + } + } + return TSDB_CODE_SUCCESS; +} + static int32_t translateSetOperProject(STranslateContext* pCxt, SSetOperator* pSetOperator) { SNodeList* pLeftProjections = getProjectList(pSetOperator->pLeft); SNodeList* pRightProjections = getProjectList(pSetOperator->pRight); @@ -7845,9 +7891,11 @@ static int32_t translateSetOperProject(STranslateContext* pCxt, SSetOperator* pS SNode* pLeft = NULL; SNode* pRight = NULL; + int32_t index = 0; FORBOTH(pLeft, pLeftProjections, pRight, pRightProjections) { SExprNode* pLeftExpr = (SExprNode*)pLeft; SExprNode* pRightExpr = (SExprNode*)pRight; + ++index; int32_t comp = dataTypeComp(&pLeftExpr->resType, &pRightExpr->resType); if (comp > 0) { SNode* pRightFunc = NULL; @@ -7857,6 +7905,8 @@ static int32_t translateSetOperProject(STranslateContext* pCxt, SSetOperator* pS } REPLACE_LIST2_NODE(pRightFunc); pRightExpr = (SExprNode*)pRightFunc; + code = pushdownCastForUnion(pCxt, pSetOperator->pRight, pRightExpr, index); + if (TSDB_CODE_SUCCESS != code ) return code; } else if (comp < 0) { SNode* pLeftFunc = NULL; int32_t code = createCastFunc(pCxt, pLeft, pRightExpr->resType, &pLeftFunc); @@ -7869,6 +7919,8 @@ static int32_t translateSetOperProject(STranslateContext* pCxt, SSetOperator* pS snprintf(pLeftFuncExpr->userAlias, sizeof(pLeftFuncExpr->userAlias), "%s", pLeftExpr->userAlias); pLeft = pLeftFunc; pLeftExpr = pLeftFuncExpr; + code = pushdownCastForUnion(pCxt, pSetOperator->pLeft, pLeftExpr, index); + if (TSDB_CODE_SUCCESS != code ) return code; } snprintf(pRightExpr->aliasName, sizeof(pRightExpr->aliasName), "%s", pLeftExpr->aliasName); SNode* pProj = createSetOperProject(pSetOperator->stmtName, pLeft); diff --git a/source/libs/qcom/src/querymsg.c b/source/libs/qcom/src/querymsg.c index 1a275de13b..ee41909109 100644 --- a/source/libs/qcom/src/querymsg.c +++ b/source/libs/qcom/src/querymsg.c @@ -91,10 +91,8 @@ int32_t queryBuildTableMetaReqMsg(void *input, char **msg, int32_t msgSize, int3 if (NULL == pBuf) { return terrno; } - if(tSerializeSTableInfoReq(pBuf, bufLen, &infoReq) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSTableInfoReq(pBuf, bufLen, &infoReq); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -121,10 +119,8 @@ int32_t queryBuildUseDbMsg(void *input, char **msg, int32_t msgSize, int32_t *ms if (NULL == pBuf) { return terrno; } - if(tSerializeSUseDbReq(pBuf, bufLen, &usedbReq) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSUseDbReq(pBuf, bufLen, &usedbReq); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -144,10 +140,9 @@ int32_t queryBuildQnodeListMsg(void *input, char **msg, int32_t msgSize, int32_t if (NULL == pBuf) { return terrno; } - if(tSerializeSQnodeListReq(pBuf, bufLen, &qnodeListReq) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + + int32_t ret = tSerializeSQnodeListReq(pBuf, bufLen, &qnodeListReq); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -167,10 +162,8 @@ int32_t queryBuildDnodeListMsg(void *input, char **msg, int32_t msgSize, int32_t if (NULL == pBuf) { return terrno; } - if(tSerializeSDnodeListReq(pBuf, bufLen, &dnodeListReq) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSDnodeListReq(pBuf, bufLen, &dnodeListReq); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -189,10 +182,8 @@ int32_t queryBuildGetSerVerMsg(void *input, char **msg, int32_t msgSize, int32_t if (NULL == pBuf) { return terrno; } - if(tSerializeSServerVerReq(pBuf, bufLen, &req) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSServerVerReq(pBuf, bufLen, &req); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -213,10 +204,8 @@ int32_t queryBuildGetDBCfgMsg(void *input, char **msg, int32_t msgSize, int32_t if (NULL == pBuf) { return terrno; } - if(tSerializeSDbCfgReq(pBuf, bufLen, &dbCfgReq) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSDbCfgReq(pBuf, bufLen, &dbCfgReq); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -237,10 +226,8 @@ int32_t queryBuildGetIndexMsg(void *input, char **msg, int32_t msgSize, int32_t if (NULL == pBuf) { return terrno; } - if(tSerializeSUserIndexReq(pBuf, bufLen, &indexReq) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSUserIndexReq(pBuf, bufLen, &indexReq); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -272,10 +259,10 @@ int32_t queryBuildRetrieveFuncMsg(void *input, char **msg, int32_t msgSize, int3 taosArrayDestroy(funcReq.pFuncNames); return terrno; } - if(tSerializeSRetrieveFuncReq(pBuf, bufLen, &funcReq) < 0) - { + int32_t ret = tSerializeSRetrieveFuncReq(pBuf, bufLen, &funcReq); + if (ret < 0) { taosArrayDestroy(funcReq.pFuncNames); - return TSDB_CODE_TSC_INVALID_INPUT; + return ret; } taosArrayDestroy(funcReq.pFuncNames); @@ -299,9 +286,8 @@ int32_t queryBuildGetUserAuthMsg(void *input, char **msg, int32_t msgSize, int32 if (NULL == pBuf) { return terrno; } - if (tSerializeSGetUserAuthReq(pBuf, bufLen, &req) < 0) { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSGetUserAuthReq(pBuf, bufLen, &req); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -322,10 +308,8 @@ int32_t queryBuildGetTbIndexMsg(void *input, char **msg, int32_t msgSize, int32_ if (NULL == pBuf) { return terrno; } - if(tSerializeSTableIndexReq(pBuf, bufLen, &indexReq) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSTableIndexReq(pBuf, bufLen, &indexReq); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -349,10 +333,8 @@ int32_t queryBuildGetTbCfgMsg(void *input, char **msg, int32_t msgSize, int32_t if (NULL == pBuf) { return terrno; } - if(tSerializeSTableCfgReq(pBuf, bufLen, &cfgReq) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSTableCfgReq(pBuf, bufLen, &cfgReq); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -373,10 +355,8 @@ int32_t queryBuildGetViewMetaMsg(void *input, char **msg, int32_t msgSize, int32 if (NULL == pBuf) { return terrno; } - if(tSerializeSViewMetaReq(pBuf, bufLen, &req) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeSViewMetaReq(pBuf, bufLen, &req); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -398,10 +378,8 @@ int32_t queryBuildGetTableTSMAMsg(void *input, char **msg, int32_t msgSize, int3 if (NULL == pBuf) { return terrno; } - if(tSerializeTableTSMAInfoReq(pBuf, bufLen, &req) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeTableTSMAInfoReq(pBuf, bufLen, &req); + if (ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -424,10 +402,8 @@ int32_t queryBuildGetTSMAMsg(void *input, char **msg, int32_t msgSize, int32_t * { return terrno; } - if(tSerializeTableTSMAInfoReq(pBuf, bufLen, &req) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeTableTSMAInfoReq(pBuf, bufLen, &req); + if(ret < 0) return ret; *msg = pBuf; *msgLen = bufLen; @@ -445,10 +421,8 @@ int32_t queryBuildGetStreamProgressMsg(void* input, char** msg, int32_t msgSize, return terrno; } - if(tSerializeStreamProgressReq(pBuf, len, input) < 0) - { - return TSDB_CODE_TSC_INVALID_INPUT; - } + int32_t ret = tSerializeStreamProgressReq(pBuf, len, input); + if (ret < 0) return ret; *msg = pBuf; *msgLen = len; @@ -462,6 +436,7 @@ int32_t queryProcessUseDBRsp(void *output, char *msg, int32_t msgSize) { if (NULL == output || NULL == msg || msgSize <= 0) { code = TSDB_CODE_TSC_INVALID_INPUT; + qError("invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); goto PROCESS_USEDB_OVER; } @@ -673,6 +648,7 @@ int32_t queryProcessTableMetaRsp(void *output, char *msg, int32_t msgSize) { STableMetaRsp metaRsp = {0}; if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessTableMetaRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); code = TSDB_CODE_TSC_INVALID_INPUT; goto PROCESS_META_OVER; } @@ -729,6 +705,7 @@ static int32_t queryProcessTableNameRsp(void *output, char *msg, int32_t msgSize STableMetaRsp metaRsp = {0}; if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessTableNameRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); code = TSDB_CODE_TSC_INVALID_INPUT; goto PROCESS_NAME_OVER; } @@ -785,6 +762,7 @@ int32_t queryProcessQnodeListRsp(void *output, char *msg, int32_t msgSize) { int32_t code = 0; if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessQnodeListRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); code = TSDB_CODE_TSC_INVALID_INPUT; return code; } @@ -804,6 +782,7 @@ int32_t queryProcessDnodeListRsp(void *output, char *msg, int32_t msgSize) { int32_t code = 0; if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessDnodeListRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); code = TSDB_CODE_TSC_INVALID_INPUT; return code; } @@ -824,6 +803,7 @@ int32_t queryProcessGetSerVerRsp(void *output, char *msg, int32_t msgSize) { int32_t code = 0; if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessGetSerVerRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); code = TSDB_CODE_TSC_INVALID_INPUT; return code; } @@ -846,6 +826,7 @@ int32_t queryProcessGetDbCfgRsp(void *output, char *msg, int32_t msgSize) { SDbCfgRsp out = {0}; if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessGetDbCfgRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); return TSDB_CODE_TSC_INVALID_INPUT; } @@ -863,6 +844,7 @@ int32_t queryProcessGetIndexRsp(void *output, char *msg, int32_t msgSize) { SUserIndexRsp out = {0}; if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessGetIndexRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); return TSDB_CODE_TSC_INVALID_INPUT; } @@ -880,6 +862,7 @@ int32_t queryProcessRetrieveFuncRsp(void *output, char *msg, int32_t msgSize) { SRetrieveFuncRsp out = {0}; if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessRetrieveFuncRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); return TSDB_CODE_TSC_INVALID_INPUT; } @@ -904,6 +887,7 @@ int32_t queryProcessRetrieveFuncRsp(void *output, char *msg, int32_t msgSize) { int32_t queryProcessGetUserAuthRsp(void *output, char *msg, int32_t msgSize) { if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessGetUserAuthRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); return TSDB_CODE_TSC_INVALID_INPUT; } @@ -917,6 +901,7 @@ int32_t queryProcessGetUserAuthRsp(void *output, char *msg, int32_t msgSize) { int32_t queryProcessGetTbIndexRsp(void *output, char *msg, int32_t msgSize) { if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessGetTbIndexRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); return TSDB_CODE_TSC_INVALID_INPUT; } @@ -931,6 +916,7 @@ int32_t queryProcessGetTbIndexRsp(void *output, char *msg, int32_t msgSize) { int32_t queryProcessGetTbCfgRsp(void *output, char *msg, int32_t msgSize) { if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessGetTbCfgRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); return TSDB_CODE_TSC_INVALID_INPUT; } @@ -952,6 +938,7 @@ int32_t queryProcessGetTbCfgRsp(void *output, char *msg, int32_t msgSize) { int32_t queryProcessGetViewMetaRsp(void *output, char *msg, int32_t msgSize) { if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessGetViewMetaRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); return TSDB_CODE_TSC_INVALID_INPUT; } @@ -975,6 +962,7 @@ int32_t queryProcessGetViewMetaRsp(void *output, char *msg, int32_t msgSize) { int32_t queryProcessGetTbTSMARsp(void* output, char* msg, int32_t msgSize) { if (NULL == output || NULL == msg || msgSize <= 0) { + qError("queryProcessGetTbTSMARsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); return TSDB_CODE_TSC_INVALID_INPUT; } @@ -988,6 +976,7 @@ int32_t queryProcessGetTbTSMARsp(void* output, char* msg, int32_t msgSize) { int32_t queryProcessStreamProgressRsp(void* output, char* msg, int32_t msgSize) { if (!output || !msg || msgSize <= 0) { + qError("queryProcessStreamProgressRsp: invalid input param, output:%p, msg:%p, msgSize:%d", output, msg, msgSize); return TSDB_CODE_TSC_INVALID_INPUT; } diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index 6caa132982..e1affae73d 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -148,6 +148,7 @@ int32_t schedulerUpdatePolicy(int32_t policy) { qDebug("schedule policy updated to %d", schMgmt.cfg.schPolicy); break; default: + qError("invalid schedule policy %d", policy); SCH_RET(TSDB_CODE_TSC_INVALID_INPUT); } diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index bc47cd4ce1..dbaddd15a8 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -144,6 +144,8 @@ struct SStreamQueue { STaosQall* qall; void* qItem; int8_t status; + STaosQueue* pChkptQueue; + void* qChkptItem; }; struct SStreamQueueItem { diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index c53e1a19a3..eb8f2c741a 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -734,7 +734,7 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, bool restored, SV pTask->status.taskStatus = TASK_STATUS__READY; - code = streamMetaSaveTask(pMeta, pTask); + code = streamMetaSaveTaskInMeta(pMeta, pTask); streamMutexUnlock(&pTask->lock); if (code != TSDB_CODE_SUCCESS) { @@ -910,9 +910,12 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { int64_t startTs = pTask->chkInfo.startTs; int64_t ckId = pTask->chkInfo.pActiveInfo->activeId; const char* id = pTask->id.idStr; - bool dropRelHTask = (streamTaskGetPrevStatus(pTask) == TASK_STATUS__HALT); SStreamMeta* pMeta = pTask->pMeta; + streamMutexLock(&pTask->lock); + bool dropRelHTask = (streamTaskGetPrevStatus(pTask) == TASK_STATUS__HALT); + streamMutexUnlock(&pTask->lock); + // sink task does not need to save the status, and generated the checkpoint if (pTask->info.taskLevel != TASK_LEVEL__SINK) { stDebug("s-task:%s level:%d start gen checkpoint, checkpointId:%" PRId64, id, pTask->info.taskLevel, ckId); @@ -1581,6 +1584,14 @@ int32_t streamTaskSendNegotiateChkptIdMsg(SStreamTask* pTask) { streamFreeTaskState(pTask, p); pTask->pBackend = NULL; } + + streamMetaWLock(pTask->pMeta); + if (pTask->exec.pExecutor != NULL) { + qDestroyTask(pTask->exec.pExecutor); + pTask->exec.pExecutor = NULL; + } + streamMetaWUnLock(pTask->pMeta); + return 0; } diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 0aa9d6fbb4..24ac193937 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -1098,6 +1098,8 @@ static void chkptReadyMsgSendMonitorFn(void* param, void* tmrId) { pActiveInfo = pTask->chkInfo.pActiveInfo; pTmrInfo = &pActiveInfo->chkptReadyMsgTmr; + stDebug("s-task:%s acquire task, refId:%" PRId64, id, taskRefId); + // check the status every 100ms if (streamTaskShouldStop(pTask)) { streamCleanBeforeQuitTmr(pTmrInfo, param); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index d44323d572..077557c1e6 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -698,6 +698,8 @@ static int32_t doHandleChkptBlock(SStreamTask* pTask) { streamMutexLock(&pTask->lock); SStreamTaskState pState = streamTaskGetStatus(pTask); + streamMutexUnlock(&pTask->lock); + if (pState.state == TASK_STATUS__CK) { // todo other thread may change the status stDebug("s-task:%s checkpoint block received, set status:%s", id, pState.name); code = streamTaskBuildCheckpoint(pTask); // ignore this error msg, and continue @@ -715,7 +717,7 @@ static int32_t doHandleChkptBlock(SStreamTask* pTask) { } } - streamMutexUnlock(&pTask->lock); +// streamMutexUnlock(&pTask->lock); return code; } @@ -878,7 +880,7 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { } double el = (taosGetTimestampMs() - st) / 1000.0; - if (el > 5.0) { // elapsed more than 5 sec, not occupy the CPU anymore + if (el > 2.0) { // elapsed more than 5 sec, not occupy the CPU anymore stDebug("s-task:%s occupy more than 5.0s, release the exec threads and idle for 500ms", id); streamTaskSetIdleInfo(pTask, 500); return code; @@ -913,8 +915,35 @@ bool streamTaskReadyToRun(const SStreamTask* pTask, char** pStatus) { } } +static bool shouldNotCont(SStreamTask* pTask) { + int32_t level = pTask->info.taskLevel; + SStreamQueue* pQueue = pTask->inputq.queue; + ETaskStatus status = streamTaskGetStatus(pTask).state; + + // 1. task should jump out + bool quit = (status == TASK_STATUS__STOP) || (status == TASK_STATUS__PAUSE) || (status == TASK_STATUS__DROPPING); + + // 2. checkpoint procedure, the source task's checkpoint queue is empty, not read from ordinary queue + bool emptyCkQueue = (taosQueueItemSize(pQueue->pChkptQueue) == 0); + + // 3. no data in ordinary queue + bool emptyBlockQueue = (streamQueueGetNumOfItems(pQueue) == 0); + + if (quit) { + return true; + } else { + if (status == TASK_STATUS__CK && level == TASK_LEVEL__SOURCE) { + // in checkpoint procedure, we only check whether the controller queue is empty or not + return emptyCkQueue; + } else { // otherwise, if the block queue is empty, not continue. + return emptyBlockQueue && emptyCkQueue; + } + } +} + int32_t streamResumeTask(SStreamTask* pTask) { const char* id = pTask->id.idStr; + int32_t level = pTask->info.taskLevel; int32_t code = 0; if (pTask->status.schedStatus != TASK_SCHED_STATUS__ACTIVE) { @@ -927,11 +956,10 @@ int32_t streamResumeTask(SStreamTask* pTask) { if (code) { stError("s-task:%s failed to exec stream task, code:%s, continue", id, tstrerror(code)); } - // check if continue + streamMutexLock(&pTask->lock); - int32_t numOfItems = streamQueueGetNumOfItems(pTask->inputq.queue); - if ((numOfItems == 0) || streamTaskShouldStop(pTask) || streamTaskShouldPause(pTask)) { + if (shouldNotCont(pTask)) { atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); streamTaskClearSchedIdleInfo(pTask); streamMutexUnlock(&pTask->lock); diff --git a/source/libs/stream/src/streamHb.c b/source/libs/stream/src/streamHb.c index 7c157bb05e..ca5b6630fd 100644 --- a/source/libs/stream/src/streamHb.c +++ b/source/libs/stream/src/streamHb.c @@ -327,7 +327,7 @@ void streamMetaHbToMnode(void* param, void* tmrId) { pMeta->pHbInfo->hbStart = 0; code = taosReleaseRef(streamMetaRefPool, rid); if (code == TSDB_CODE_SUCCESS) { - stDebug("vgId:%d jump out of meta timer", vgId); + stInfo("vgId:%d jump out of meta timer since closed", vgId); } else { stError("vgId:%d jump out of meta timer, failed to release the meta rid:%" PRId64, vgId, rid); } @@ -341,7 +341,7 @@ void streamMetaHbToMnode(void* param, void* tmrId) { if (code == TSDB_CODE_SUCCESS) { stInfo("vgId:%d role:%d not leader not send hb to mnode", vgId, role); } else { - stError("vgId:%d role:%d not leader not send hb to mnodefailed to release the meta rid:%" PRId64, vgId, role, rid); + stError("vgId:%d role:%d not leader not send hb to mnode, failed to release meta rid:%" PRId64, vgId, role, rid); } // taosMemoryFree(param); return; @@ -363,13 +363,24 @@ void streamMetaHbToMnode(void* param, void* tmrId) { pMeta->pHbInfo->hbStart = taosGetTimestampMs(); } - streamMetaRLock(pMeta); - code = streamMetaSendHbHelper(pMeta); - if (code) { - stError("vgId:%d failed to send hmMsg to mnode, try again in 5s, code:%s", pMeta->vgId, tstrerror(code)); + // NOTE: stream task in restart procedure. not generate the hb now, try to acquire the lock may cause stuck this timer. + int32_t count = 30; + bool send = false; + while ((--count) >= 0) { + int32_t ret = streamMetaTryRlock(pMeta); + if (ret != 0) { + taosMsleep(10); + } else { + send = true; + code = streamMetaSendHbHelper(pMeta); + streamMetaRUnLock(pMeta); + break; + } } - streamMetaRUnLock(pMeta); + if (!send) { + stError("vgId:%d failed to send hmMsg to mnode, retry again in 5s, code:%s", pMeta->vgId, tstrerror(code)); + } streamTmrStart(streamMetaHbToMnode, META_HB_CHECK_INTERVAL, param, streamTimer, &pMeta->pHbInfo->hbTmr, pMeta->vgId, "meta-hb-tmr"); @@ -413,7 +424,7 @@ void destroyMetaHbInfo(SMetaHbInfo* pInfo) { void streamMetaWaitForHbTmrQuit(SStreamMeta* pMeta) { // wait for the stream meta hb function stopping if (pMeta->role == NODE_ROLE_LEADER) { - taosMsleep(2 * META_HB_CHECK_INTERVAL); + taosMsleep(3 * META_HB_CHECK_INTERVAL); stDebug("vgId:%d wait for meta to stop timer", pMeta->vgId); } } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 7482c6229b..605cf3fe21 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -428,7 +428,10 @@ int32_t streamMetaOpen(const char* path, void* ahandle, FTaskBuild buildTaskFn, pMeta->pTaskList = taosArrayInit(4, sizeof(SStreamTaskId)); TSDB_CHECK_NULL(pMeta->pTaskList, code, lino, _err, terrno); - pMeta->scanInfo.scanCounter = 0; + pMeta->scanInfo.scanSentinel = 0; + pMeta->scanInfo.lastScanTs = 0; + pMeta->scanInfo.tickCounter = 0; + pMeta->vgId = vgId; pMeta->ahandle = ahandle; pMeta->buildTaskFn = buildTaskFn; @@ -633,7 +636,7 @@ void streamMetaCloseImpl(void* arg) { } // todo let's check the status for each task -int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { +int32_t streamMetaSaveTaskInMeta(SStreamMeta* pMeta, SStreamTask* pTask) { int32_t vgId = pTask->pMeta->vgId; void* buf = NULL; int32_t len; @@ -683,7 +686,7 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { return code; } -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pTaskId) { +int32_t streamMetaRemoveTaskInMeta(SStreamMeta* pMeta, STaskId* pTaskId) { int64_t key[2] = {pTaskId->streamId, pTaskId->taskId}; int32_t code = tdbTbDelete(pMeta->pTaskDb, key, STREAM_TASK_KEY_LEN, pMeta->txn); if (code != 0) { @@ -706,7 +709,7 @@ int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTa void* p = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (p != NULL) { - stDebug("s-task:%" PRIx64 " already exist in meta, no need to register", id.taskId); + stDebug("s-task:0x%" PRIx64 " already exist in meta, no need to register", id.taskId); tFreeStreamTask(pTask); return code; } @@ -736,7 +739,7 @@ int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTa return code; } - if ((code = streamMetaSaveTask(pMeta, pTask)) != 0) { + if ((code = streamMetaSaveTaskInMeta(pMeta, pTask)) != 0) { int32_t unused = taosHashRemove(pMeta->pTasksMap, &id, sizeof(id)); void* pUnused = taosArrayPop(pMeta->pTaskList); @@ -886,6 +889,7 @@ static void doRemoveIdFromList(SArray* pTaskList, int32_t num, SStreamTaskId* id static int32_t streamTaskSendTransSuccessMsg(SStreamTask* pTask, void* param) { int32_t code = 0; + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { code = streamTaskSendCheckpointSourceRsp(pTask); if (code) { @@ -896,7 +900,7 @@ static int32_t streamTaskSendTransSuccessMsg(SStreamTask* pTask, void* param) { // let's kill the query procedure within stream, to end it ASAP. if (pTask->info.taskLevel != TASK_LEVEL__SINK && pTask->exec.pExecutor != NULL) { - code = qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); + code = qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS, -1); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s failed to kill task related query handle, code:%s", pTask->id.idStr, tstrerror(code)); } @@ -933,7 +937,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t code = taosHashRemove(pMeta->pTasksMap, &id, sizeof(id)); doRemoveIdFromList(pMeta->pTaskList, (int32_t)taosArrayGetSize(pMeta->pTaskList), &pTask->id); - code = streamMetaRemoveTask(pMeta, &id); + code = streamMetaRemoveTaskInMeta(pMeta, &id); if (code) { stError("vgId:%d failed to remove task:0x%" PRIx64 ", code:%s", pMeta->vgId, id.taskId, tstrerror(code)); } @@ -964,6 +968,32 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t return 0; } +int32_t streamMetaStopOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { + SStreamTask* pTask = NULL; + int32_t code = 0; + int32_t vgId = pMeta->vgId; + int32_t numOfTasks = 0; + + streamMetaWLock(pMeta); + +// code = streamMetaUnregisterTask(pMeta, streamId, taskId); +// numOfTasks = streamMetaGetNumOfTasks(pMeta); +// if (code) { +// stError("vgId:%d failed to drop task:0x%x, code:%s", vgId, taskId, tstrerror(code)); +// } +// +// code = streamMetaCommit(pMeta); +// if (code) { +// stError("vgId:%d failed to commit after drop task:0x%x, code:%s", vgId, taskId, tstrerror(code)); +// } else { +// stDebug("s-task:0x%"PRIx64"-0x%x vgId:%d dropped, remain tasks:%d", streamId, taskId, pMeta->vgId, numOfTasks); +// } + + streamMetaWUnLock(pMeta); + + return code; +} + int32_t streamMetaBegin(SStreamMeta* pMeta) { streamMetaWLock(pMeta); int32_t code = tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, @@ -1187,7 +1217,7 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { if (taosArrayGetSize(pRecycleList) > 0) { for (int32_t i = 0; i < taosArrayGetSize(pRecycleList); ++i) { STaskId* pId = taosArrayGet(pRecycleList, i); - code = streamMetaRemoveTask(pMeta, pId); + code = streamMetaRemoveTaskInMeta(pMeta, pId); if (code) { stError("s-task:0x%" PRIx64 " failed to remove task, code:%s", pId->taskId, tstrerror(code)); } @@ -1215,8 +1245,8 @@ void streamMetaNotifyClose(SStreamMeta* pMeta) { vgId, (pMeta->role == NODE_ROLE_LEADER), startTs, sendCount); // wait for the stream meta hb function stopping - streamMetaWaitForHbTmrQuit(pMeta); pMeta->closeFlag = true; + streamMetaWaitForHbTmrQuit(pMeta); stDebug("vgId:%d start to check all tasks for closing", vgId); int64_t st = taosGetTimestampMs(); @@ -1255,6 +1285,12 @@ void streamMetaNotifyClose(SStreamMeta* pMeta) { double el = (taosGetTimestampMs() - st) / 1000.0; stDebug("vgId:%d stop all %d task(s) completed, elapsed time:%.2f Sec.", pMeta->vgId, numOfTasks, el); + + if (pMeta->scanInfo.scanTimer != NULL) { + streamTmrStop(pMeta->scanInfo.scanTimer); + pMeta->scanInfo.scanTimer = NULL; + } + streamMetaRUnLock(pMeta); } @@ -1322,7 +1358,7 @@ void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool isLeader) // mark the sign to send msg before close all tasks // 1. for leader vnode, always send msg before closing - // 2. for follower vnode, if it's is changed from leader, also sending msg before closing. + // 2. for follower vnode, if it's changed from leader, also sending msg before closing. if (pMeta->role == NODE_ROLE_LEADER) { pMeta->sendMsgBeforeClosing = true; } @@ -1332,11 +1368,11 @@ void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool isLeader) if (isLeader) { stInfo("vgId:%d update meta stage:%" PRId64 ", prev:%" PRId64 " leader:%d, start to send Hb, rid:%" PRId64, - pMeta->vgId, prevStage, stage, isLeader, pMeta->rid); + pMeta->vgId, stage, prevStage, isLeader, pMeta->rid); streamMetaStartHb(pMeta); } else { stInfo("vgId:%d update meta stage:%" PRId64 " prev:%" PRId64 " leader:%d sendMsg beforeClosing:%d", pMeta->vgId, - prevStage, stage, isLeader, pMeta->sendMsgBeforeClosing); + stage, prevStage, isLeader, pMeta->sendMsgBeforeClosing); } } diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 401aa7530d..7c7ef83d6d 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -32,11 +32,12 @@ typedef struct SQueueReader { static bool streamTaskExtractAvailableToken(STokenBucket* pBucket, const char* id); static void streamTaskPutbackToken(STokenBucket* pBucket); static void streamTaskConsumeQuota(STokenBucket* pBucket, int32_t bytes); +static void streamQueueNextItemInSourceQ(SStreamQueue* pQueue, SStreamQueueItem** pItem, ETaskStatus status, const char* id); static void streamQueueCleanup(SStreamQueue* pQueue) { SStreamQueueItem* qItem = NULL; while (1) { - streamQueueNextItem(pQueue, &qItem); + streamQueueNextItemInSourceQ(pQueue, &qItem, TASK_STATUS__READY, ""); if (qItem == NULL) { break; } @@ -47,7 +48,9 @@ static void streamQueueCleanup(SStreamQueue* pQueue) { int32_t streamQueueOpen(int64_t cap, SStreamQueue** pQ) { *pQ = NULL; + int32_t code = 0; + int32_t lino = 0; SStreamQueue* pQueue = taosMemoryCalloc(1, sizeof(SStreamQueue)); if (pQueue == NULL) { @@ -55,24 +58,26 @@ int32_t streamQueueOpen(int64_t cap, SStreamQueue** pQ) { } code = taosOpenQueue(&pQueue->pQueue); - if (code) { - taosMemoryFreeClear(pQueue); - return code; - } + TSDB_CHECK_CODE(code, lino, _error); code = taosAllocateQall(&pQueue->qall); - if (code) { - taosCloseQueue(pQueue->pQueue); - taosMemoryFree(pQueue); - return code; - } + TSDB_CHECK_CODE(code, lino, _error); + + code = taosOpenQueue(&pQueue->pChkptQueue); + TSDB_CHECK_CODE(code, lino, _error); pQueue->status = STREAM_QUEUE__SUCESS; + taosSetQueueCapacity(pQueue->pQueue, cap); taosSetQueueMemoryCapacity(pQueue->pQueue, cap * 1024); *pQ = pQueue; return code; + +_error: + streamQueueClose(pQueue, 0); + stError("failed to open stream queue at line:%d, code:%s", lino, tstrerror(code)); + return code; } void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) { @@ -82,6 +87,11 @@ void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) { taosFreeQall(pQueue->qall); taosCloseQueue(pQueue->pQueue); + pQueue->pQueue = NULL; + + taosCloseQueue(pQueue->pChkptQueue); + pQueue->pChkptQueue = NULL; + taosMemoryFree(pQueue); } @@ -94,6 +104,7 @@ void streamQueueNextItem(SStreamQueue* pQueue, SStreamQueueItem** pItem) { } else { pQueue->qItem = NULL; (void) taosGetQitem(pQueue->qall, &pQueue->qItem); + if (pQueue->qItem == NULL) { (void) taosReadAllQitems(pQueue->pQueue, pQueue->qall); (void) taosGetQitem(pQueue->qall, &pQueue->qItem); @@ -103,6 +114,46 @@ void streamQueueNextItem(SStreamQueue* pQueue, SStreamQueueItem** pItem) { } } +void streamQueueNextItemInSourceQ(SStreamQueue* pQueue, SStreamQueueItem** pItem, ETaskStatus status, const char* id) { + *pItem = NULL; + int8_t flag = atomic_exchange_8(&pQueue->status, STREAM_QUEUE__PROCESSING); + + if (flag == STREAM_QUEUE__CHKPTFAILED) { + *pItem = pQueue->qChkptItem; + return; + } + + if (flag == STREAM_QUEUE__FAILED) { + *pItem = pQueue->qItem; + return; + } + + pQueue->qChkptItem = NULL; + taosReadQitem(pQueue->pChkptQueue, (void**)&pQueue->qChkptItem); + if (pQueue->qChkptItem != NULL) { + stDebug("s-task:%s read data from checkpoint queue, status:%d", id, status); + *pItem = pQueue->qChkptItem; + return; + } + + // if in checkpoint status, not read data from ordinary input q. + if (status == TASK_STATUS__CK) { + stDebug("s-task:%s in checkpoint status, not read data in block queue, status:%d", id, status); + return; + } + + // let's try the ordinary input q + pQueue->qItem = NULL; + int32_t num = taosGetQitem(pQueue->qall, &pQueue->qItem); + + if (pQueue->qItem == NULL) { + num = taosReadAllQitems(pQueue->pQueue, pQueue->qall); + num = taosGetQitem(pQueue->qall, &pQueue->qItem); + } + + *pItem = streamQueueCurItem(pQueue); +} + void streamQueueProcessSuccess(SStreamQueue* queue) { if (atomic_load_8(&queue->status) != STREAM_QUEUE__PROCESSING) { stError("invalid queue status:%d, expect:%d", atomic_load_8(&queue->status), STREAM_QUEUE__PROCESSING); @@ -110,6 +161,7 @@ void streamQueueProcessSuccess(SStreamQueue* queue) { } queue->qItem = NULL; + queue->qChkptItem = NULL; atomic_store_8(&queue->status, STREAM_QUEUE__SUCESS); } @@ -121,6 +173,14 @@ void streamQueueProcessFail(SStreamQueue* queue) { atomic_store_8(&queue->status, STREAM_QUEUE__FAILED); } +void streamQueueGetSourceChkptFailed(SStreamQueue* pQueue) { + if (atomic_load_8(&pQueue->status) != STREAM_QUEUE__PROCESSING) { + stError("invalid queue status:%d, expect:%d", atomic_load_8(&pQueue->status), STREAM_QUEUE__PROCESSING); + return; + } + atomic_store_8(&pQueue->status, STREAM_QUEUE__CHKPTFAILED); +} + bool streamQueueIsFull(const SStreamQueue* pQueue) { int32_t numOfItems = streamQueueGetNumOfItems(pQueue); if (numOfItems >= STREAM_TASK_QUEUE_CAPACITY) { @@ -175,8 +235,9 @@ const char* streamQueueItemGetTypeStr(int32_t type) { EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, int32_t* blockSize) { - const char* id = pTask->id.idStr; - int32_t taskLevel = pTask->info.taskLevel; + const char* id = pTask->id.idStr; + int32_t taskLevel = pTask->info.taskLevel; + SStreamQueue* pQueue = pTask->inputq.queue; *pInput = NULL; *numOfBlocks = 0; @@ -189,13 +250,19 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte } while (1) { - if (streamTaskShouldPause(pTask) || streamTaskShouldStop(pTask)) { - stDebug("s-task:%s task should pause, extract input blocks:%d", id, *numOfBlocks); + ETaskStatus status = streamTaskGetStatus(pTask).state; + if (status == TASK_STATUS__PAUSE || status == TASK_STATUS__STOP) { + stDebug("s-task:%s task should pause/stop, extract input blocks:%d", id, *numOfBlocks); return EXEC_CONTINUE; } SStreamQueueItem* qItem = NULL; - streamQueueNextItem(pTask->inputq.queue, (SStreamQueueItem**)&qItem); + if (taskLevel == TASK_LEVEL__SOURCE) { + streamQueueNextItemInSourceQ(pQueue, &qItem, status, id); + } else { + streamQueueNextItem(pQueue, &qItem); + } + if (qItem == NULL) { // restore the token to bucket if (*numOfBlocks > 0) { @@ -225,14 +292,19 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte *numOfBlocks = 1; *pInput = qItem; return EXEC_CONTINUE; - } else { // previous existed blocks needs to be handle, before handle the checkpoint msg block + } else { // previous existed blocks needs to be handled, before handle the checkpoint msg block stDebug("s-task:%s %s msg extracted, handle previous blocks, numOfBlocks:%d", id, p, *numOfBlocks); *blockSize = streamQueueItemGetSize(*pInput); if (taskLevel == TASK_LEVEL__SINK) { streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize); } - streamQueueProcessFail(pTask->inputq.queue); + if ((type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__CHECKPOINT) && + (taskLevel == TASK_LEVEL__SOURCE)) { + streamQueueGetSourceChkptFailed(pQueue); + } else { + streamQueueProcessFail(pQueue); + } return EXEC_CONTINUE; } } else { @@ -252,7 +324,7 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize); } - streamQueueProcessFail(pTask->inputq.queue); + streamQueueProcessFail(pQueue); return EXEC_CONTINUE; } @@ -260,7 +332,7 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte } *numOfBlocks += 1; - streamQueueProcessSuccess(pTask->inputq.queue); + streamQueueProcessSuccess(pQueue); if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) { stDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); @@ -279,6 +351,7 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) { int8_t type = pItem->type; STaosQueue* pQueue = pTask->inputq.queue->pQueue; + int32_t level = pTask->info.taskLevel; int32_t total = streamQueueGetNumOfItems(pTask->inputq.queue) + 1; if (type == STREAM_INPUT__DATA_SUBMIT) { @@ -326,15 +399,28 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) stDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__TRANS_STATE || type == STREAM_INPUT__DATA_RETRIEVE) { - int32_t code = taosWriteQitem(pQueue, pItem); - if (code != TSDB_CODE_SUCCESS) { - streamFreeQitem(pItem); - return code; - } - double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); - stDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, - pTask->info.taskLevel, streamQueueItemGetTypeStr(type), total, size); + int32_t code = 0; + if ((type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__CHECKPOINT) && (level == TASK_LEVEL__SOURCE)) { + STaosQueue* pChkptQ = pTask->inputq.queue->pChkptQueue; + code = taosWriteQitem(pChkptQ, pItem); + + double size = SIZE_IN_MiB(taosQueueMemorySize(pChkptQ)); + int32_t num = taosQueueItemSize(pChkptQ); + + stDebug("s-task:%s level:%d %s checkpoint enqueue ctrl queue, total in queue:%d, size:%.2fMiB, data queue:%d", + pTask->id.idStr, pTask->info.taskLevel, streamQueueItemGetTypeStr(type), num, size, (total - 1)); + } else { + code = taosWriteQitem(pQueue, pItem); + if (code != TSDB_CODE_SUCCESS) { + streamFreeQitem(pItem); + return code; + } + + double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); + stDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, + pTask->info.taskLevel, streamQueueItemGetTypeStr(type), total, size); + } } else if (type == STREAM_INPUT__GET_RES) { // use the default memory limit, refactor later. int32_t code = taosWriteQitem(pQueue, pItem); diff --git a/source/libs/stream/src/streamStartHistory.c b/source/libs/stream/src/streamStartHistory.c index 54a8929123..f8b1b5ecbc 100644 --- a/source/libs/stream/src/streamStartHistory.c +++ b/source/libs/stream/src/streamStartHistory.c @@ -76,7 +76,7 @@ int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated) { memcpy(serializedReq, &req, len); SRpcMsg rpcMsg = {.contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_SCAN_HISTORY}; - return tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg); + return tmsgPutToQueue(pTask->pMsgCb, STREAM_LONG_EXEC_QUEUE, &rpcMsg); } void streamExecScanHistoryInFuture(SStreamTask* pTask, int32_t idleDuration) { diff --git a/source/libs/stream/src/streamStartTask.c b/source/libs/stream/src/streamStartTask.c index c40d5ef928..60c1694dda 100644 --- a/source/libs/stream/src/streamStartTask.c +++ b/source/libs/stream/src/streamStartTask.c @@ -451,7 +451,6 @@ int32_t streamMetaStopAllTasks(SStreamMeta* pMeta) { continue; } - int64_t refId = pTask->id.refId; int32_t ret = streamTaskStop(pTask); if (ret) { stError("s-task:0x%x failed to stop task, code:%s", pTaskId->taskId, tstrerror(ret)); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 7209b6434f..378aaa27d0 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -710,7 +710,7 @@ int32_t streamTaskStop(SStreamTask* pTask) { } if (pTask->info.taskLevel != TASK_LEVEL__SINK && pTask->exec.pExecutor != NULL) { - code = qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); + code = qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS, 5000); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s failed to kill task related query handle, code:%s", id, tstrerror(code)); } @@ -869,7 +869,7 @@ int32_t streamTaskClearHTaskAttr(SStreamTask* pTask, int32_t resetRelHalt) { pStreamTask->status.taskStatus = TASK_STATUS__READY; } - code = streamMetaSaveTask(pMeta, pStreamTask); + code = streamMetaSaveTaskInMeta(pMeta, pStreamTask); streamMutexUnlock(&(pStreamTask->lock)); streamMetaReleaseTask(pMeta, pStreamTask); @@ -1034,7 +1034,7 @@ static int32_t taskPauseCallback(SStreamTask* pTask, void* param) { // in case of fill-history task, stop the tsdb file scan operation. if (pTask->info.fillHistory == 1) { void* pExecutor = pTask->exec.pExecutor; - code = qKillTask(pExecutor, TSDB_CODE_SUCCESS); + code = qKillTask(pExecutor, TSDB_CODE_SUCCESS, 10000); } stDebug("vgId:%d s-task:%s set pause flag and pause task", pMeta->vgId, pTask->id.idStr); @@ -1296,6 +1296,8 @@ const char* streamTaskGetExecType(int32_t type) { return "resume-task-from-idle"; case STREAM_EXEC_T_ADD_FAILED_TASK: return "record-start-failed-task"; + case STREAM_EXEC_T_STOP_ONE_TASK: + return "stop-one-task"; case 0: return "exec-all-tasks"; default: diff --git a/source/libs/stream/src/streamUtil.c b/source/libs/stream/src/streamUtil.c index 4c481e6041..11e291c876 100644 --- a/source/libs/stream/src/streamUtil.c +++ b/source/libs/stream/src/streamUtil.c @@ -54,6 +54,15 @@ void streamMetaRUnLock(SStreamMeta* pMeta) { } } +int32_t streamMetaTryRlock(SStreamMeta* pMeta) { + int32_t code = taosThreadRwlockTryRdlock(&pMeta->lock); + if (code) { + stError("vgId:%d try meta-rlock failed, code:%s", pMeta->vgId, tstrerror(code)); + } + + return code; +} + void streamMetaWLock(SStreamMeta* pMeta) { // stTrace("vgId:%d meta-wlock", pMeta->vgId); int32_t code = taosThreadRwlockWrlock(&pMeta->lock); diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index eeb24d2f16..147f8a67ae 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -40,6 +40,7 @@ typedef struct SSyncLogReplMgr { int32_t retryBackoff; int32_t peerId; int32_t sendCount; + TdThreadMutex mutex; } SSyncLogReplMgr; typedef struct SSyncLogBufEntry { diff --git a/source/libs/sync/inc/syncRaftStore.h b/source/libs/sync/inc/syncRaftStore.h index 38a8ed234b..f45dccffd8 100644 --- a/source/libs/sync/inc/syncRaftStore.h +++ b/source/libs/sync/inc/syncRaftStore.h @@ -35,6 +35,7 @@ void raftStoreClearVote(SSyncNode *pNode); void raftStoreNextTerm(SSyncNode *pNode); void raftStoreSetTerm(SSyncNode *pNode, SyncTerm term); SyncTerm raftStoreGetTerm(SSyncNode *pNode); +SyncTerm raftStoreTryGetTerm(SSyncNode *pNode); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncUtil.h b/source/libs/sync/inc/syncUtil.h index 7c4f9b2781..e82c36e8e6 100644 --- a/source/libs/sync/inc/syncUtil.h +++ b/source/libs/sync/inc/syncUtil.h @@ -34,7 +34,7 @@ extern "C" { #define sGTrace(param, ...) do { if (sDebugFlag & DEBUG_TRACE) { char buf[40] = {0}; TRACE_TO_STR(trace, buf); sTrace(param ", QID:%s", __VA_ARGS__, buf);}} while(0) #define sGFatal(param, ...) do { if (sDebugFlag & DEBUG_FATAL) { char buf[40] = {0}; TRACE_TO_STR(trace, buf); sFatal(param ", QID:%s", __VA_ARGS__, buf);}} while(0) -#define sGError(param, ...) do { if (sDebugFlag & DEBUG_ERROR) { char buf[40] = {0}; TRACE_TO_STR(trace, buf);sError(param ", QID:%s", __VA_ARGS__, buf);}} while(0) +#define sGError(param, ...) do { if (sDebugFlag & DEBUG_ERROR) { char buf[40] = {0}; TRACE_TO_STR(trace, buf); sError(param ", QID:%s", __VA_ARGS__, buf);}} while(0) #define sGWarn(param, ...) do { if (sDebugFlag & DEBUG_WARN) { char buf[40] = {0}; TRACE_TO_STR(trace, buf); sWarn(param ", QID:%s", __VA_ARGS__, buf);}} while(0) #define sGInfo(param, ...) do { if (sDebugFlag & DEBUG_INFO) { char buf[40] = {0}; TRACE_TO_STR(trace, buf); sInfo(param ", QID:%s", __VA_ARGS__, buf);}} while(0) #define sGDebug(param, ...) do { if (sDebugFlag & DEBUG_DEBUG) { char buf[40] = {0}; TRACE_TO_STR(trace, buf); sDebug(param ", QID:%s", __VA_ARGS__, buf);}} while(0) @@ -46,12 +46,19 @@ extern "C" { #define sLDebug(...) if (sDebugFlag & DEBUG_DEBUG) { taosPrintLongString("SYN DEBUG ", DEBUG_DEBUG, sDebugFlag, __VA_ARGS__); } #define sLTrace(...) if (sDebugFlag & DEBUG_TRACE) { taosPrintLongString("SYN TRACE ", DEBUG_TRACE, sDebugFlag, __VA_ARGS__); } -#define sNFatal(pNode, ...) if (sDebugFlag & DEBUG_FATAL) { syncPrintNodeLog("SYN FATAL ", DEBUG_FATAL, 255, true, pNode, __VA_ARGS__); } -#define sNError(pNode, ...) if (sDebugFlag & DEBUG_ERROR) { syncPrintNodeLog("SYN ERROR ", DEBUG_ERROR, 255, true, pNode, __VA_ARGS__); } -#define sNWarn(pNode, ...) if (sDebugFlag & DEBUG_WARN) { syncPrintNodeLog("SYN WARN ", DEBUG_WARN, 255, true, pNode, __VA_ARGS__); } -#define sNInfo(pNode, ...) if (sDebugFlag & DEBUG_INFO) { syncPrintNodeLog("SYN INFO ", DEBUG_INFO, 255, true, pNode, __VA_ARGS__); } -#define sNDebug(pNode, ...) if (sDebugFlag & DEBUG_DEBUG) { syncPrintNodeLog("SYN DEBUG ", DEBUG_DEBUG, sDebugFlag, false, pNode, __VA_ARGS__); } -#define sNTrace(pNode, ...) if (sDebugFlag & DEBUG_TRACE) { syncPrintNodeLog("SYN TRACE ", DEBUG_TRACE, sDebugFlag, false, pNode, __VA_ARGS__); } +#define sNFatal(pNode, ...) if (sDebugFlag & DEBUG_FATAL) { syncPrintNodeLog("SYN FATAL ", DEBUG_FATAL, 255, true, pNode, __VA_ARGS__); } +#define sNError(pNode, ...) if (sDebugFlag & DEBUG_ERROR) { syncPrintNodeLog("SYN ERROR ", DEBUG_ERROR, 255, true, pNode, __VA_ARGS__); } +#define sNWarn(pNode, ...) if (sDebugFlag & DEBUG_WARN) { syncPrintNodeLog("SYN WARN ", DEBUG_WARN, 255, true, pNode, __VA_ARGS__); } +#define sNInfo(pNode, ...) if (sDebugFlag & DEBUG_INFO) { syncPrintNodeLog("SYN INFO ", DEBUG_INFO, 255, true, pNode, __VA_ARGS__); } +#define sNDebug(pNode, ...) if (sDebugFlag & DEBUG_DEBUG) { syncPrintNodeLog("SYN DEBUG ", DEBUG_DEBUG, sDebugFlag, true, pNode, __VA_ARGS__); } +#define sNTrace(pNode, ...) if (sDebugFlag & DEBUG_TRACE) { syncPrintNodeLog("SYN TRACE ", DEBUG_TRACE, sDebugFlag, true, pNode, __VA_ARGS__); } + +#define sHFatal(pNode, ...) if (sDebugFlag & DEBUG_FATAL) { syncPrintHbLog("SYN FATAL ", DEBUG_FATAL, 255, true, pNode, __VA_ARGS__); } +#define sHError(pNode, ...) if (sDebugFlag & DEBUG_ERROR) { syncPrintHbLog("SYN ERROR ", DEBUG_ERROR, 255, true, pNode, __VA_ARGS__); } +#define sHWarn(pNode, ...) if (sDebugFlag & DEBUG_WARN) { syncPrintHbLog("SYN WARN ", DEBUG_WARN, 255, true, pNode, __VA_ARGS__); } +#define sHInfo(pNode, ...) if (sDebugFlag & DEBUG_INFO) { syncPrintHbLog("SYN INFO ", DEBUG_INFO, 255, true, pNode, __VA_ARGS__); } +#define sHDebug(pNode, ...) if (sDebugFlag & DEBUG_DEBUG) { syncPrintHbLog("SYN DEBUG ", DEBUG_DEBUG, sDebugFlag, true, pNode, __VA_ARGS__); } +#define sHTrace(pNode, ...) if (sDebugFlag & DEBUG_TRACE) { syncPrintHbLog("SYN TRACE ", DEBUG_TRACE, sDebugFlag, true, pNode, __VA_ARGS__); } #define sSFatal(pSender, ...) if (sDebugFlag & DEBUG_FATAL) { syncPrintSnapshotSenderLog("SYN FATAL ", DEBUG_FATAL, 255, pSender, __VA_ARGS__); } #define sSError(pSender, ...) if (sDebugFlag & DEBUG_ERROR) { syncPrintSnapshotSenderLog("SYN ERROR ", DEBUG_ERROR, 255, pSender, __VA_ARGS__); } @@ -87,6 +94,8 @@ void syncUtilGenerateArbToken(int32_t nodeId, int32_t groupId, char* buf); void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, bool formatTime, SSyncNode* pNode, const char* format, ...); +void syncPrintHbLog(const char* flags, ELogLevel level, int32_t dflag, bool formatTime, SSyncNode* pNode, + const char* format, ...); void syncPrintSnapshotSenderLog(const char* flags, ELogLevel level, int32_t dflag, SSyncSnapshotSender* pSender, const char* format, ...); void syncPrintSnapshotReceiverLog(const char* flags, ELogLevel level, int32_t dflag, SSyncSnapshotReceiver* pReceiver, diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 18252db9ee..8d81a03344 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -1140,26 +1140,29 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn int32_t syncLogReplProcessHeartbeatReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncHeartbeatReply* pMsg) { SSyncLogBuffer* pBuf = pNode->pLogBuf; - (void)taosThreadMutexLock(&pBuf->mutex); + (void)taosThreadMutexLock(&pMgr->mutex); if (pMsg->startTime != 0 && pMsg->startTime != pMgr->peerStartTime) { sInfo("vgId:%d, reset sync log repl in heartbeat. peer:%" PRIx64 ", start time:%" PRId64 ", old:%" PRId64 "", pNode->vgId, pMsg->srcId.addr, pMsg->startTime, pMgr->peerStartTime); syncLogReplReset(pMgr); pMgr->peerStartTime = pMsg->startTime; } - (void)taosThreadMutexUnlock(&pBuf->mutex); + (void)taosThreadMutexUnlock(&pMgr->mutex); return 0; } int32_t syncLogReplProcessReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg) { SSyncLogBuffer* pBuf = pNode->pLogBuf; - (void)taosThreadMutexLock(&pBuf->mutex); + (void)taosThreadMutexLock(&pMgr->mutex); if (pMsg->startTime != pMgr->peerStartTime) { sInfo("vgId:%d, reset sync log repl in appendlog reply. peer:%" PRIx64 ", start time:%" PRId64 ", old:%" PRId64, pNode->vgId, pMsg->srcId.addr, pMsg->startTime, pMgr->peerStartTime); syncLogReplReset(pMgr); pMgr->peerStartTime = pMsg->startTime; } + (void)taosThreadMutexUnlock(&pMgr->mutex); + + (void)taosThreadMutexLock(&pBuf->mutex); int32_t code = 0; if (pMgr->restored) { @@ -1324,6 +1327,12 @@ SSyncLogReplMgr* syncLogReplCreate() { return NULL; } + int32_t code = taosThreadMutexInit(&pMgr->mutex, NULL); + if (code) { + terrno = code; + return NULL; + } + return pMgr; } @@ -1331,6 +1340,7 @@ void syncLogReplDestroy(SSyncLogReplMgr* pMgr) { if (pMgr == NULL) { return; } + (void)taosThreadMutexDestroy(&pMgr->mutex); taosMemoryFree(pMgr); return; } diff --git a/source/libs/sync/src/syncRaftStore.c b/source/libs/sync/src/syncRaftStore.c index c61be4356c..f2f0bf35c2 100644 --- a/source/libs/sync/src/syncRaftStore.c +++ b/source/libs/sync/src/syncRaftStore.c @@ -213,3 +213,13 @@ SyncTerm raftStoreGetTerm(SSyncNode *pNode) { (void)taosThreadMutexUnlock(&pNode->raftStore.mutex); return term; } + +SyncTerm raftStoreTryGetTerm(SSyncNode *pNode) { + SyncTerm term = 0; + if (taosThreadMutexTryLock(&pNode->raftStore.mutex) == 0) { + term = pNode->raftStore.currentTerm; + (void)taosThreadMutexUnlock(&pNode->raftStore.mutex); + } + + return term; +} diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index 38fd660cfd..b20f0d8aeb 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -116,7 +116,7 @@ static void syncPrintTime(bool formatTime, int32_t* len, int64_t tsMs, int32_t i if (formatTime) { char pBuf[TD_TIME_STR_LEN] = {0}; if (tsMs > 0) { - if (taosFormatUtcTime(pBuf, TD_TIME_STR_LEN, tsMs, TSDB_TIME_PRECISION_MILLI) != 0) { + if (formatTimestampLocal(pBuf, tsMs, TSDB_TIME_PRECISION_MILLI) == NULL) { pBuf[0] = '\0'; } } @@ -215,7 +215,7 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, bool fo SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; if (pNode->pFsm != NULL && pNode->pFsm->FpGetSnapshotInfo != NULL) { - (void)pNode->pFsm->FpGetSnapshotInfo(pNode->pFsm, &snapshot); + (void)pNode->pFsm->FpGetSnapshotInfo(pNode->pFsm, &snapshot); // vnodeSyncGetSnapshotInfo } SyncIndex logLastIndex = SYNC_INDEX_INVALID; @@ -254,12 +254,12 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, bool fo int32_t aqItems = 0; if (pNode != NULL && pNode->pFsm != NULL && pNode->pFsm->FpApplyQueueItems != NULL) { - aqItems = pNode->pFsm->FpApplyQueueItems(pNode->pFsm); + aqItems = pNode->pFsm->FpApplyQueueItems(pNode->pFsm); // vnodeApplyQueueItems } // restore error code terrno = errCode; - SyncIndex appliedIndex = pNode->pFsm->FpAppliedIndexCb(pNode->pFsm); + SyncIndex appliedIndex = pNode->pFsm->FpAppliedIndexCb(pNode->pFsm); // vnodeSyncAppliedIndex if (pNode != NULL) { taosPrintLog( @@ -270,15 +270,71 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, bool fo ", elect-times:%d, as-leader-times:%d, as-assigned-leader-times:%d, cfg-ch-times:%d, hb-slow:%d, hbr-slow:%d, " "aq-items:%d, snaping:%" PRId64 ", replicas:%d, last-cfg:%" PRId64 ", chging:%d, restore:%d, quorum:%d, elect-lc-timer:%" PRId64 ", hb:%" PRId64 - ", buffer:%s, repl-mgrs:%s, members:%s, send hb:%s, recv hb:%s, recv hb-reply:%s, arb-token:%s, msg[sent:%d, recv:%d, slow-recev:%d]", + ", buffer:%s, repl-mgrs:%s, members:%s, send hb:%s, recv hb:%s, recv hb-reply:%s, arb-token:%s, msg[sent:%d, " + "recv:%d, slow-recv:%d]", pNode->vgId, eventLog, syncStr(pNode->state), currentTerm, pNode->commitIndex, pNode->assignedCommitIndex, appliedIndex, logBeginIndex, logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->electNum, pNode->becomeLeaderNum, pNode->becomeAssignedLeaderNum, pNode->configChangeNum, pNode->hbSlowNum, pNode->hbrSlowNum, aqItems, pNode->snapshottingIndex, pNode->replicaNum, pNode->raftCfg.lastConfigIndex, pNode->changing, pNode->restoreFinish, syncNodeDynamicQuorum(pNode), pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, bufferStatesStr, - replMgrStatesStr, cfgStr, sentHbTimeStr, hbTimeStr, hbrTimeStr, pNode->arbToken, pNode->sendCount, pNode->recvCount, - pNode->slowCount); + replMgrStatesStr, cfgStr, sentHbTimeStr, hbTimeStr, hbrTimeStr, pNode->arbToken, pNode->sendCount, + pNode->recvCount, pNode->slowCount); + } +} + +void syncPrintHbLog(const char* flags, ELogLevel level, int32_t dflag, bool formatTime, SSyncNode* pNode, + const char* format, ...) { + if (pNode == NULL || pNode->pLogStore == NULL) return; + int64_t currentTerm = raftStoreTryGetTerm(pNode); + + // save error code, otherwise it will be overwritten + int32_t errCode = terrno; + + int32_t cacheHit = pNode->pLogStore->cacheHit; + int32_t cacheMiss = pNode->pLogStore->cacheMiss; + + char cfgStr[1024] = ""; + syncCfg2SimpleStr(&pNode->raftCfg.cfg, cfgStr, sizeof(cfgStr)); + + char replMgrStatesStr[1024] = ""; + syncLogReplStates2Str(pNode, replMgrStatesStr, sizeof(replMgrStatesStr)); + + char bufferStatesStr[256] = ""; + syncLogBufferStates2Str(pNode, bufferStatesStr, sizeof(bufferStatesStr)); + + char hbrTimeStr[256] = ""; + syncHearbeatReplyTime2Str(pNode, hbrTimeStr, sizeof(hbrTimeStr), formatTime); + + char hbTimeStr[256] = ""; + syncHearbeatTime2Str(pNode, hbTimeStr, sizeof(hbTimeStr), formatTime); + + char sentHbTimeStr[512] = ""; + syncSentHearbeatTime2Str(pNode, sentHbTimeStr, sizeof(sentHbTimeStr), formatTime); + + char eventLog[512]; // {0}; + va_list argpointer; + va_start(argpointer, format); + int32_t writeLen = vsnprintf(eventLog, sizeof(eventLog), format, argpointer); + va_end(argpointer); + + terrno = errCode; + + if (pNode != NULL) { + taosPrintLog( + flags, level, dflag, + "vgId:%d, %s, sync:%s, term:%" PRIu64 ", commit-index:%" PRId64 ", assigned-index:%" PRId64 ", min:%" PRId64 + ", elect-times:%d, as-leader-times:%d, as-assigned-leader-times:%d, cfg-ch-times:%d, hb-slow:%d, hbr-slow:%d, " + ", snaping:%" PRId64 ", replicas:%d, last-cfg:%" PRId64 + ", chging:%d, restore:%d, quorum:%d, elect-lc-timer:%" PRId64 ", hb:%" PRId64 + ", buffer:%s, repl-mgrs:%s, members:%s, send hb:%s, recv hb:%s, recv hb-reply:%s, arb-token:%s, msg[sent:%d, " + "recv:%d, slow-recv:%d]", + pNode->vgId, eventLog, syncStr(pNode->state), currentTerm, pNode->commitIndex, pNode->assignedCommitIndex, + pNode->minMatchIndex, pNode->electNum, pNode->becomeLeaderNum, pNode->becomeAssignedLeaderNum, + pNode->configChangeNum, pNode->hbSlowNum, pNode->hbrSlowNum, pNode->snapshottingIndex, pNode->replicaNum, + pNode->raftCfg.lastConfigIndex, pNode->changing, pNode->restoreFinish, syncNodeDynamicQuorum(pNode), + pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, bufferStatesStr, replMgrStatesStr, cfgStr, + sentHbTimeStr, hbTimeStr, hbrTimeStr, pNode->arbToken, pNode->sendCount, pNode->recvCount, pNode->slowCount); } } @@ -411,17 +467,24 @@ void syncLogRecvAppendEntriesReply(SSyncNode* pSyncNode, const SyncAppendEntries void syncLogSendHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, bool printX, int64_t timerElapsed, int64_t execTime) { - if (printX) { - sNTrace(pSyncNode, - "send sync-heartbeat to dnode:%d {term:%" PRId64 ", commit-index:%" PRId64 ", min-match:%" PRId64 - ", ts:%" PRId64 "}, x", - DID(&pMsg->destId), pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pMsg->timeStamp); - } else { - sNTrace(pSyncNode, - "send sync-heartbeat to dnode:%d {term:%" PRId64 ", commit-index:%" PRId64 ", min-match:%" PRId64 - ", ts:%" PRId64 "}, timer-elapsed:%" PRId64 ", next-exec:%" PRId64, - DID(&pMsg->destId), pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pMsg->timeStamp, timerElapsed, - execTime); + if (sDebugFlag & DEBUG_TRACE) { + char pBuf[TD_TIME_STR_LEN] = {0}; + if (pMsg->timeStamp > 0) { + if (formatTimestampLocal(pBuf, pMsg->timeStamp, TSDB_TIME_PRECISION_MILLI) == NULL) { + pBuf[0] = '\0'; + } + } + if (printX) { + sHTrace(pSyncNode, + "send sync-heartbeat to dnode:%d {term:%" PRId64 ", commit-index:%" PRId64 ", min-match:%" PRId64 + ", ts:%s}, x", + DID(&pMsg->destId), pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pBuf); + } else { + sHTrace(pSyncNode, + "send sync-heartbeat to dnode:%d {term:%" PRId64 ", commit-index:%" PRId64 ", min-match:%" PRId64 + ", ts:%s}, timer-elapsed:%" PRId64 ", next-exec:%" PRId64, + DID(&pMsg->destId), pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pBuf, timerElapsed, execTime); + } } } @@ -429,20 +492,36 @@ void syncLogRecvHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, int64 if (timeDiff > SYNC_HEARTBEAT_SLOW_MS) { pSyncNode->hbSlowNum++; - sNTrace(pSyncNode, - "recv sync-heartbeat from dnode:%d slow {term:%" PRId64 ", commit-index:%" PRId64 ", min-match:%" PRId64 - ", ts:%" PRId64 "}, QID:%s, net elapsed:%" PRId64, - DID(&pMsg->srcId), pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pMsg->timeStamp, s, timeDiff); - } + char pBuf[TD_TIME_STR_LEN] = {0}; + if (pMsg->timeStamp > 0) { + if (formatTimestampLocal(pBuf, pMsg->timeStamp, TSDB_TIME_PRECISION_MILLI) == NULL) { + pBuf[0] = '\0'; + } + } - sNTrace(pSyncNode, - "recv sync-heartbeat from dnode:%d {term:%" PRId64 ", commit-index:%" PRId64 ", min-match:%" PRId64 - ", ts:%" PRId64 "}, QID:%s, net elapsed:%" PRId64, - DID(&pMsg->srcId), pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pMsg->timeStamp, s, timeDiff); + sHError(pSyncNode, + "recv sync-heartbeat from dnode:%d slow(%d ms) {term:%" PRId64 ", commit-index:%" PRId64 + ", min-match:%" PRId64 ", ts:%s}, QID:%s, net elapsed:%" PRId64 "ms", + DID(&pMsg->srcId), SYNC_HEARTBEAT_SLOW_MS, pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pBuf, s, + timeDiff); + } else { + if (sDebugFlag & DEBUG_TRACE) { + char pBuf[TD_TIME_STR_LEN] = {0}; + if (pMsg->timeStamp > 0) { + if (formatTimestampLocal(pBuf, pMsg->timeStamp, TSDB_TIME_PRECISION_MILLI) == NULL) { + pBuf[0] = '\0'; + } + } + sHTrace(pSyncNode, + "recv sync-heartbeat from dnode:%d {term:%" PRId64 ", commit-index:%" PRId64 ", min-match:%" PRId64 + ", ts:%s}, QID:%s, net elapsed:%" PRId64 "ms", + DID(&pMsg->srcId), pMsg->term, pMsg->commitIndex, pMsg->minMatchIndex, pBuf, s, timeDiff); + } + } } void syncLogSendHeartbeatReply(SSyncNode* pSyncNode, const SyncHeartbeatReply* pMsg, const char* s) { - sNTrace(pSyncNode, "send sync-heartbeat-reply from dnode:%d {term:%" PRId64 ", ts:%" PRId64 "}, %s", + sHTrace(pSyncNode, "send sync-heartbeat-reply from dnode:%d {term:%" PRId64 ", ts:%" PRId64 "}, %s", DID(&pMsg->destId), pMsg->term, pMsg->timeStamp, s); } @@ -450,14 +529,29 @@ void syncLogRecvHeartbeatReply(SSyncNode* pSyncNode, const SyncHeartbeatReply* p if (timeDiff > SYNC_HEARTBEAT_REPLY_SLOW_MS) { pSyncNode->hbrSlowNum++; - sNTrace(pSyncNode, - "recv sync-heartbeat-reply from dnode:%d slow {term:%" PRId64 ", ts:%" PRId64 "}, %s, net elapsed:%" PRId64, - DID(&pMsg->srcId), pMsg->term, pMsg->timeStamp, s, timeDiff); - } + char pBuf[TD_TIME_STR_LEN] = {0}; + if (pMsg->timeStamp > 0) { + if (formatTimestampLocal(pBuf, pMsg->timeStamp, TSDB_TIME_PRECISION_MILLI) == NULL) { + pBuf[0] = '\0'; + } + } - sNTrace(pSyncNode, - "recv sync-heartbeat-reply from dnode:%d {term:%" PRId64 ", ts:%" PRId64 "}, %s, net elapsed:%" PRId64, - DID(&pMsg->srcId), pMsg->term, pMsg->timeStamp, s, timeDiff); + sHError(pSyncNode, + "recv sync-heartbeat-reply from dnode:%d slow(%d ms) {term:%" PRId64 ", ts:%s}, %s, net elapsed:%" PRId64, + DID(&pMsg->srcId), SYNC_HEARTBEAT_REPLY_SLOW_MS, pMsg->term, pBuf, s, timeDiff); + } else { + if (sDebugFlag & DEBUG_TRACE) { + char pBuf[TD_TIME_STR_LEN] = {0}; + if (pMsg->timeStamp > 0) { + if (formatTimestampLocal(pBuf, pMsg->timeStamp, TSDB_TIME_PRECISION_MILLI) == NULL) { + pBuf[0] = '\0'; + } + } + sHTrace(pSyncNode, + "recv sync-heartbeat-reply from dnode:%d {term:%" PRId64 ", ts:%" PRId64 "}, %s, net elapsed:%" PRId64, + DID(&pMsg->srcId), pMsg->term, pMsg->timeStamp, s, timeDiff); + } + } } void syncLogSendSyncSnapshotSend(SSyncNode* pSyncNode, const SyncSnapshotSend* pMsg, const char* s) { diff --git a/source/libs/transport/src/tmsgcb.c b/source/libs/transport/src/tmsgcb.c index e87011f097..0632e21d20 100644 --- a/source/libs/transport/src/tmsgcb.c +++ b/source/libs/transport/src/tmsgcb.c @@ -37,7 +37,7 @@ int32_t tmsgPutToQueue(const SMsgCb* msgcb, EQueueType qtype, SRpcMsg* pMsg) { } int32_t tmsgGetQueueSize(const SMsgCb* msgcb, int32_t vgId, EQueueType qtype) { - return (*msgcb->qsizeFp)(msgcb->mgmt, vgId, qtype); + return (*msgcb->qsizeFp)(msgcb->mgmt, vgId, qtype); // vmGetQueueSize } int32_t tmsgSendReq(const SEpSet* epSet, SRpcMsg* pMsg) { diff --git a/source/util/src/tworker.c b/source/util/src/tworker.c index dbd8cb159e..469f98fcf0 100644 --- a/source/util/src/tworker.c +++ b/source/util/src/tworker.c @@ -256,7 +256,7 @@ static void *tAutoQWorkerThreadFp(SQueueWorker *worker) { return NULL; } -STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem fp) { +STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem fp, int32_t minNum) { int32_t code; STaosQueue *queue; @@ -280,7 +280,10 @@ STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem int32_t queueNum = taosGetQueueNumber(pool->qset); int32_t curWorkerNum = taosArrayGetSize(pool->workers); int32_t dstWorkerNum = ceilf(queueNum * pool->ratio); - if (dstWorkerNum < 2) dstWorkerNum = 2; + + if (dstWorkerNum < minNum) { + dstWorkerNum = minNum; + } // spawn a thread to process queue while (curWorkerNum < dstWorkerNum) { diff --git a/tests/army/query/queryBugs.py b/tests/army/query/queryBugs.py index cd61b8c620..e6b20addb9 100644 --- a/tests/army/query/queryBugs.py +++ b/tests/army/query/queryBugs.py @@ -325,12 +325,97 @@ class TDTestCase(TBase): tdSql.query("select * from t1 where ts > '2025-01-01 00:00:00';") tdSql.checkRows(0) + def FIX_TS_6058(self): + tdSql.execute("create database iot_60j_production_eqp;") + tdSql.execute("create table iot_60j_production_eqp.realtime_data_collections (device_time TIMESTAMP, item_value VARCHAR(64), \ + upload_time TIMESTAMP) tags(bu_id VARCHAR(64), district_id VARCHAR(64), factory_id VARCHAR(64), production_line_id VARCHAR(64), \ + production_processes_id VARCHAR(64), work_center_id VARCHAR(64), station_id VARCHAR(64), device_name VARCHAR(64), item_name VARCHAR(64));") + + sub1 = " SELECT '实际速度' as name, 0 as rank, '当月' as cycle,\ + CASE \ + WHEN COUNT(item_value) = 0 THEN NULL\ + ELSE AVG(CAST(item_value AS double))\ + END AS item_value\ + FROM iot_60j_production_eqp.realtime_data_collections\ + WHERE device_time >= TO_TIMESTAMP(CONCAT(substring(TO_CHAR(today ,'YYYY-MM-dd'), 1,7), '-01 00:00:00'), 'YYYY-mm-dd')\ + AND item_name = 'Premixer_SpindleMotor_ActualSpeed' " + + sub2 = " SELECT '实际速度' as name, 3 as rank, TO_CHAR(TODAY(),'YYYY-MM-dd') as cycle,\ + CASE \ + WHEN COUNT(item_value) = 0 THEN NULL\ + ELSE AVG(CAST(item_value AS double))\ + END AS item_value\ + FROM iot_60j_production_eqp.realtime_data_collections\ + WHERE device_time >= TODAY()-1d and device_time <= now()\ + AND item_name = 'Premixer_SpindleMotor_ActualSpeed' " + + sub3 = " SELECT '设定速度' as name, 1 as rank, CAST(CONCAT('WEEK-',CAST(WEEKOFYEAR(TODAY()-1w) as VARCHAR)) as VARCHAR) as cycle,\ + CASE \ + WHEN COUNT(item_value) = 0 THEN NULL\ + ELSE AVG(CAST(item_value AS double))\ + END AS item_value\ + FROM iot_60j_production_eqp.realtime_data_collections\ + where \ + item_name = 'Premixer_SpindleMotor_SettingSpeed'\ + AND (\ + (WEEKDAY(now) = 0 AND device_time >= today()-8d and device_time <= today()-1d) OR\ + (WEEKDAY(now) = 1 AND device_time >= today()-9d and device_time <= today()-2d) OR\ + (WEEKDAY(now) = 2 AND device_time >= today()-10d and device_time <= today()-3d) OR\ + (WEEKDAY(now) = 3 AND device_time >= today()-11d and device_time <= today()-4d) OR\ + (WEEKDAY(now) = 4 AND device_time >= today()-12d and device_time <= today()-5d) OR\ + (WEEKDAY(now) = 5 AND device_time >= today()-13d and device_time <= today()-6d) OR\ + (WEEKDAY(now) = 6 AND device_time >= today()-14d and device_time <= today()-7d)\ + ) " + + sub4 = " SELECT '设定速度2' as name, 1 as rank, CAST(CONCAT('WEEK-',CAST(WEEKOFYEAR(TODAY()-1w) as VARCHAR)) as VARCHAR(5000)) as cycle,\ + CASE \ + WHEN COUNT(item_value) = 0 THEN NULL\ + ELSE AVG(CAST(item_value AS double))\ + END AS item_value\ + FROM iot_60j_production_eqp.realtime_data_collections\ + where \ + item_name = 'Premixer_SpindleMotor_SettingSpeed'\ + AND (\ + (WEEKDAY(now) = 0 AND device_time >= today()-8d and device_time <= today()-1d) OR\ + (WEEKDAY(now) = 1 AND device_time >= today()-9d and device_time <= today()-2d) OR\ + (WEEKDAY(now) = 2 AND device_time >= today()-10d and device_time <= today()-3d) OR\ + (WEEKDAY(now) = 3 AND device_time >= today()-11d and device_time <= today()-4d) OR\ + (WEEKDAY(now) = 4 AND device_time >= today()-12d and device_time <= today()-5d) OR\ + (WEEKDAY(now) = 5 AND device_time >= today()-13d and device_time <= today()-6d) OR\ + (WEEKDAY(now) = 6 AND device_time >= today()-14d and device_time <= today()-7d)\ + ) " + for uiontype in ["union" ,"union all"]: + repeatLines = 1 + if uiontype == "union": + repeatLines = 0 + for i in range(1, 10): + tdLog.debug(f"test: realtime_data_collections {i} times...") + tdSql.query(f"select name,cycle,item_value from ( {sub1} {uiontype} {sub2} {uiontype} {sub3}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(3) + tdSql.query(f"select name,cycle,item_value from ( {sub1} {uiontype} {sub2} {uiontype} {sub4}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(3) + tdSql.query(f"select name,cycle,item_value from ( {sub3} {uiontype} {sub2} {uiontype} {sub1}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(3) + tdSql.query(f"select name,cycle,item_value from ( {sub3} {uiontype} {sub2} {uiontype} {sub1}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(3) + tdSql.query(f"select name,cycle,item_value from ( {sub2} {uiontype} {sub4} {uiontype} {sub1}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(3) + tdSql.query(f"select name,cycle,item_value from ( {sub3} {uiontype} {sub2} {uiontype} {sub1} {uiontype} {sub4}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(4) + tdSql.query(f"select name,cycle,item_value from ( {sub2} {uiontype} {sub3} {uiontype} {sub1} {uiontype} {sub4}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(4) + tdSql.query(f"select name,cycle,item_value from ( {sub3} {uiontype} {sub4} {uiontype} {sub1} {uiontype} {sub2}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(4) + tdSql.query(f"select name,cycle,item_value from ( {sub3} {uiontype} {sub4} {uiontype} {sub1} {uiontype} {sub2} {uiontype} {sub4}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(4 + repeatLines) + tdSql.query(f"select name,cycle,item_value from ( {sub3} {uiontype} {sub2} {uiontype} {sub1} {uiontype} {sub2} {uiontype} {sub4}) order by rank,name,cycle;", queryTimes = 1) + tdSql.checkRows(4 + repeatLines) + # run def run(self): tdLog.debug(f"start to excute {__file__}") self.ts5946() - # TD BUGS self.FIX_TD_30686() self.FIX_TD_31684() @@ -340,6 +425,7 @@ class TDTestCase(TBase): self.FIX_TS_5143() self.FIX_TS_5239() self.FIX_TS_5984() + self.FIX_TS_6058() tdLog.success(f"{__file__} successfully executed") diff --git a/tests/ci/func.txt b/tests/ci/func.txt index 45d4fb1c11..ab8269c8e1 100644 --- a/tests/ci/func.txt +++ b/tests/ci/func.txt @@ -79,7 +79,7 @@ (void)streamMetaAddFailedTask (void)streamMetaAddTaskLaunchResult (void)streamMetaCommit -(void)streamMetaRemoveTask +(void)streamMetaRemoveTaskInMeta (void)streamMetaSendHbHelper (void)streamMetaStartAllTasks (void)streamMetaStartOneTask @@ -409,7 +409,7 @@ (void)tqProcessTaskConsenChkptIdReq (void)tqProcessTaskResetReq (void)tqScanWalAsync -(void)tqStopStreamTasksAsync +(void)tqStopStreamAllTasksAsync (void)tqUpdateTbUidList (void)transAcquireExHandle (void)transAsyncSend diff --git a/tests/script/tsim/stream/tag.sim b/tests/script/tsim/stream/tag.sim index f293f4ac05..9f4c62e747 100644 --- a/tests/script/tsim/stream/tag.sim +++ b/tests/script/tsim/stream/tag.sim @@ -26,7 +26,7 @@ sql insert into t1 values(1648791223000,0,1,1,1.0); sql insert into t1 values(1648791223001,9,2,2,1.1); sql insert into t1 values(1648791223009,0,3,3,1.0); -sleep 300 +sleep 1000 sql select * from streamt; if $data01 != 3 then diff --git a/tests/system-test/6-cluster/5dnode2mnode.py b/tests/system-test/6-cluster/5dnode2mnode.py index aa9c3fc053..82e27e6dc4 100644 --- a/tests/system-test/6-cluster/5dnode2mnode.py +++ b/tests/system-test/6-cluster/5dnode2mnode.py @@ -48,6 +48,7 @@ class TDTestCase: tdSql.checkData(4,1,'%s:6430'%self.host) tdSql.checkData(0,4,'ready') tdSql.checkData(4,4,'ready') + time.sleep(1) tdSql.query("select * from information_schema.ins_mnodes;") tdSql.checkData(0,1,'%s:6030'%self.host) tdSql.checkData(0,2,'leader') diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py index fb00fc0846..ab4d2ef990 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py @@ -42,6 +42,7 @@ class TDTestCase: return buildPath def check_setup_cluster_status(self): + time.sleep(1) tdSql.query("select * from information_schema.ins_mnodes") for mnode in tdSql.queryResult: name = mnode[1]