Merge branch '3.0' into doc/analysis

This commit is contained in:
Haojun Liao 2025-03-22 11:17:29 +08:00
commit e0aa0590bd
3835 changed files with 806023 additions and 69391 deletions

View File

@ -1,49 +0,0 @@
version: 1.0.{build}
image:
- Visual Studio 2015
- macos
environment:
matrix:
- ARCH: amd64
- ARCH: x86
matrix:
exclude:
- image: macos
ARCH: x86
for:
-
matrix:
only:
- image: Visual Studio 2015
clone_folder: c:\dev\TDengine
clone_depth: 1
init:
- call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %ARCH%
before_build:
- cd c:\dev\TDengine
- md build
build_script:
- cd build
- cmake -G "NMake Makefiles" .. -DBUILD_JDBC=false
- nmake install
-
matrix:
only:
- image: macos
clone_depth: 1
build_script:
- mkdir debug
- cd debug
- cmake .. > /dev/null
- make > /dev/null
notifications:
- provider: Email
to:
- sangshuduo@gmail.com
on_build_success: true
on_build_failure: true
on_build_status_changed: true

View File

@ -1,13 +0,0 @@
# Use the latest 2.1 version of CircleCI pipeline process engine. See: https://circleci.com/docs/2.0/configuration-reference
version: 2.1
# Use a package of configuration called an orb.
orbs:
# Declare a dependency on the welcome-orb
welcome: circleci/welcome-orb@0.4.1
# Orchestrate or schedule a set of jobs
workflows:
# Name the workflow "welcome"
welcome:
# Run the welcome/run job in its own container
jobs:
- welcome/run

View File

@ -1,266 +0,0 @@
---
kind: pipeline
name: test_amd64
platform:
os: linux
arch: amd64
steps:
- name: build
image: gcc
commands:
- apt-get update
- apt-get install -y cmake build-essential
- mkdir debug
- cd debug
- cmake ..
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0
---
kind: pipeline
name: test_arm64_bionic
platform:
os: linux
arch: arm64
steps:
- name: build
image: arm64v8/ubuntu:bionic
commands:
- apt-get update
- apt-get install -y cmake build-essential
- mkdir debug
- cd debug
- cmake .. -DCPUTYPE=aarch64 > /dev/null
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0
---
kind: pipeline
name: test_arm64_focal
platform:
os: linux
arch: arm64
steps:
- name: build
image: arm64v8/ubuntu:focal
commands:
- echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
- apt-get update
- apt-get install -y -qq cmake build-essential
- mkdir debug
- cd debug
- cmake .. -DCPUTYPE=aarch64 > /dev/null
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0
---
kind: pipeline
name: test_arm64_centos7
platform:
os: linux
arch: arm64
steps:
- name: build
image: arm64v8/centos:7
commands:
- yum install -y gcc gcc-c++ make cmake git
- mkdir debug
- cd debug
- cmake .. -DCPUTYPE=aarch64 > /dev/null
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0
---
kind: pipeline
name: test_arm64_centos8
platform:
os: linux
arch: arm64
steps:
- name: build
image: arm64v8/centos:8
commands:
- dnf install -y gcc gcc-c++ make cmake epel-release git libarchive
- mkdir debug
- cd debug
- cmake .. -DCPUTYPE=aarch64 > /dev/null
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0
---
kind: pipeline
name: test_arm_bionic
platform:
os: linux
arch: arm
steps:
- name: build
image: arm32v7/ubuntu:bionic
commands:
- apt-get update
- apt-get install -y cmake build-essential
- mkdir debug
- cd debug
- cmake .. -DCPUTYPE=aarch32 > /dev/null
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0
---
kind: pipeline
name: build_trusty
platform:
os: linux
arch: amd64
steps:
- name: build
image: ubuntu:trusty
commands:
- apt-get update
- apt-get install -y gcc cmake3 build-essential git binutils-2.26
- mkdir debug
- cd debug
- cmake ..
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0
---
kind: pipeline
name: build_xenial
platform:
os: linux
arch: amd64
steps:
- name: build
image: ubuntu:xenial
commands:
- apt-get update
- apt-get install -y gcc cmake build-essential
- mkdir debug
- cd debug
- cmake ..
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0
---
kind: pipeline
name: build_bionic
platform:
os: linux
arch: amd64
steps:
- name: build
image: ubuntu:bionic
commands:
- apt-get update
- apt-get install -y gcc cmake build-essential
- mkdir debug
- cd debug
- cmake ..
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0
---
kind: pipeline
name: build_centos7
platform:
os: linux
arch: amd64
steps:
- name: build
image: ansible/centos7-ansible
commands:
- yum install -y gcc gcc-c++ make cmake
- mkdir debug
- cd debug
- cmake ..
- make -j4
trigger:
event:
- pull_request
when:
branch:
- develop
- master
- 2.0
- 3.0

26
.github/CODEOWNERS vendored Normal file
View File

@ -0,0 +1,26 @@
# reference
# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
# merge team
# @guanshengliang Shengliang Guan
# @zitsen Linhe Huo
# @wingwing2005 Ya Qiang Li
# @feici02 WANG Xu
# @hzcheng Hongze Cheng
# @dapan1121 Pan Wei
# @sheyanjie-qq She Yanjie
# @pigzhou ZacharyZhou
* @taosdata/merge
/.github/ @feici02
/cmake/ @guanshengliang
/contrib/ @guanshengliang
/deps/ @guanshengliang
/docs/ @guanshengliang @zitsen
/examples/ @guanshengliang @zitsen
/include/ @guanshengliang @hzcheng @dapan1121
/packaging/ @feici02
/source/ @guanshengliang @hzcheng @dapan1121
/tests/ @guanshengliang @zitsen
/tools/ @guanshengliang @zitsen
/utils/ @guanshengliang

11
.github/pull_request_template.md vendored Normal file
View File

@ -0,0 +1,11 @@
# Description
Please briefly describe the code changes in this pull request.
# Checklist
Please check the items in the checklist if applicable.
- [ ] Is the user manual updated?
- [ ] Are the test cases passed and automated?
- [ ] Is there no significant decrease in test coverage?

View File

@ -0,0 +1,25 @@
name: Cancel Workflow on Merge
on:
pull_request:
types: [closed]
jobs:
cancel-workflow:
runs-on: ubuntu-latest
steps:
- name: Cancel Workflow if Merged or Closed
if: ${{ github.event.pull_request.merged || github.event.pull_request.state == 'closed' }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
echo "PR has been merged or closed, cancelling workflow..."
gh auth status
gh run list \
--repo ${{ github.repository }} \
--branch ${{ github.event.pull_request.head.ref }} \
--workflow "TDengine Test" \
--status in_progress \
--status queued \
--json databaseId --jq '.[].databaseId' | \
xargs -I {} gh run cancel --repo ${{ github.repository }} {}

View File

@ -1,14 +1,17 @@
name: TaosKeeper CI
name: taosKeeper Build
on:
push:
pull_request:
branches:
- 'main'
- '3.0'
paths:
- tools/keeper/**
- 'tools/keeper/**'
jobs:
build:
runs-on: ubuntu-latest
name: Run unit tests
name: Build and test on ubuntu-latest
steps:
- name: Checkout the repository

121
.github/workflows/tdengine-build.yml vendored Normal file
View File

@ -0,0 +1,121 @@
name: TDengine Build
on:
pull_request:
branches:
- 'main'
- '3.0'
- '3.1'
- '3.3.6'
- 'enh/cmake-TD-33848'
paths-ignore:
- 'docs/**'
- 'packaging/**'
- 'tests/**'
- '**/*.md'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
name: Run on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-20.04
- ubuntu-22.04
- ubuntu-24.04
- macos-13
- macos-14
- macos-15
steps:
- name: Checkout the repository
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: 1.18
- name: Install dependencies on Linux
if: runner.os == 'Linux'
run: |
sudo apt update -y
sudo apt install -y \
build-essential \
cmake \
gawk \
libgeos-dev \
libjansson-dev \
liblzma-dev \
libsnappy-dev \
libssl-dev \
libz-dev \
pkg-config \
zlib1g
- name: Install dependencies on macOS
if: runner.os == 'macOS'
run: |
brew update
brew install \
argp-standalone \
gawk \
gflags \
geos \
jansson \
openssl \
pkg-config \
snappy \
zlib
- name: Build and install TDengine
run: |
mkdir debug && cd debug
cmake .. -DBUILD_TOOLS=true \
-DBUILD_KEEPER=true \
-DBUILD_HTTP=false \
-DBUILD_TEST=true \
-DWEBSOCKET=true \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_DEPENDENCY_TESTS=false
make -j 4
sudo make install
which taosd
which taosadapter
which taoskeeper
- name: Statistics ldd
run: |
find ${{ github.workspace }}/debug/build/lib -type f -name "*.so" -print0 | xargs -0 ldd || true
find ${{ github.workspace }}/debug/build/bin -type f -print0 | xargs -0 ldd || true
- name: Statistics size
run: |
find ${{ github.workspace }}/debug/build/lib -type f -print0 | xargs -0 ls -lhrS
find ${{ github.workspace }}/debug/build/bin -type f -print0 | xargs -0 ls -lhrS
- name: Start taosd
run: |
cp /etc/taos/taos.cfg ./
sudo echo "supportVnodes 256" >> taos.cfg
nohup sudo taosd -c taos.cfg &
- name: Start taosadapter
run: nohup sudo taosadapter &
- name: Run tests with taosBenchmark
run: |
taosBenchmark -t 10 -n 10 -y
taos -s "select count(*) from test.meters"
- name: Clean up
if: always()
run: |
if pgrep taosd; then sudo pkill taosd; fi
if pgrep taosadapter; then sudo pkill taosadapter; fi

View File

@ -0,0 +1,47 @@
name: TDengine Doc Build
on:
pull_request:
branches:
- 'main'
- '3.0'
paths:
- 'docs/**'
env:
DOC_WKC: '/root/doc_ci_work'
ZH_DOC_REPO: 'docs.taosdata.com'
EN_DOC_REPO: 'docs.tdengine.com'
TD_REPO: 'TDengine'
TOOLS_REPO: 'taos-tools'
jobs:
build-doc:
runs-on:
group: CI
labels: [self-hosted, doc-build]
steps:
- name: Get the latest document contents
run: |
set -e
cd ${{ env.DOC_WKC }}/${{ env.TD_REPO }}
git reset --hard
git clean -f
git remote prune origin
git fetch
git checkout ${{ github.event.pull_request.base.ref }}
git pull >/dev/null
git fetch origin +refs/pull/${{ github.event.pull_request.number }}/merge
git checkout -qf FETCH_HEAD
- name: Build the chinese document
run: |
cd ${{ env.DOC_WKC }}/${{ env.ZH_DOC_REPO }}
yarn ass local
yarn build
- name: Build the english document
run: |
cd ${{ env.DOC_WKC }}/${{ env.EN_DOC_REPO }}
yarn ass local
yarn build

68
.github/workflows/tdengine-test.yml vendored Normal file
View File

@ -0,0 +1,68 @@
name: TDengine Test
on:
pull_request:
branches:
- 'main'
- '3.0'
- '3.1'
paths-ignore:
- 'packaging/**'
- 'docs/**'
- 'tools/tdgpt/**'
- 'source/libs/executor/src/forecastoperator.c'
- 'source/libs/executor/src/anomalywindowoperator.c'
- 'source/dnode/mnode/impl/src/mndAnode.c'
- 'include/common/tanalytics.h'
- 'source/common/src/tanalytics.c'
- 'tests/parallel/tdgpt_cases.task'
- 'tests/script/tsim/analytics'
- '**/*.md'
workflow_dispatch:
inputs:
specified_source_branch:
description: 'Enter the source branch name of TDengine'
required: true
type: string
specified_target_branch:
description: 'Enter the target branch name of TDengine'
required: true
type: string
specified_pr_number:
description: 'Enter the PR number of TDengine'
required: true
type: string
concurrency:
group: ${{ github.workflow }}-${{ github.event_name }}-
${{ github.event_name == 'pull_request' && github.event.pull_request.base.ref || inputs.specified_target_branch }}-
${{ github.event_name == 'pull_request' && github.event.pull_request.number || inputs.specified_pr_number }}-TDengine
cancel-in-progress: true
env:
WKC: '/var/lib/jenkins/workspace/TDinternal/community'
jobs:
run-tests-on-linux:
uses: taosdata/.github/.github/workflows/run-tests-on-linux.yml@main
with:
tdinternal: false
specified_source_branch: ${{ github.event_name == 'pull_request' && 'unavailable' || inputs.specified_source_branch }}
specified_target_branch: ${{ github.event_name == 'pull_request' && 'unavailable' || inputs.specified_target_branch }}
specified_pr_number: ${{ github.event_name == 'pull_request' && 'unavailable' || inputs.specified_pr_number }}
run-tests-on-mac:
uses: taosdata/.github/.github/workflows/run-tests-on-macos.yml@main
with:
tdinternal: false
specified_source_branch: ${{ github.event_name == 'pull_request' && 'unavailable' || inputs.specified_source_branch }}
specified_target_branch: ${{ github.event_name == 'pull_request' && 'unavailable' || inputs.specified_target_branch }}
specified_pr_number: ${{ github.event_name == 'pull_request' && 'unavailable' || inputs.specified_pr_number }}
run-tests-on-windows:
uses: taosdata/.github/.github/workflows/run-tests-on-windows.yml@main
with:
tdinternal: false
specified_source_branch: ${{ github.event_name == 'pull_request' && 'unavailable' || inputs.specified_source_branch }}
specified_target_branch: ${{ github.event_name == 'pull_request' && 'unavailable' || inputs.specified_target_branch }}
specified_pr_number: ${{ github.event_name == 'pull_request' && 'unavailable' || inputs.specified_pr_number }}

203
.github/workflows/tdgpt-test.yml vendored Normal file
View File

@ -0,0 +1,203 @@
# Run unit-test and system-test cases for TDgpt when TDgpt code is changed.
name: TDgpt Test
on:
pull_request:
branches:
- 'main'
- '3.0'
- '3.3.6'
paths:
- 'tools/tdgpt/**'
- 'source/libs/executor/src/forecastoperator.c'
- 'source/libs/executor/src/anomalywindowoperator.c'
- 'source/dnode/mnode/impl/src/mndAnode.c'
- 'include/common/tanalytics.h'
- 'source/common/src/tanalytics.c'
- 'tests/parallel/tdgpt_cases.task'
- 'tests/script/tsim/analytics'
jobs:
unit-test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
defaults:
run:
working-directory: ${{ github.workspace }}/tools/tdgpt
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest pylint
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Checking the code with pylint
run: |
pylint $(git ls-files '*.py') --exit-zero
- name: Checking the code with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Run test cases with pytest
run: |
pytest
function-test:
runs-on:
group: CI
labels: [self-hosted, Linux, X64, testing]
env:
CONTAINER_NAME: 'taosd-test'
WKDIR: '/var/lib/jenkins/workspace'
WK: '/var/lib/jenkins/workspace/TDinternal'
WKC: '/var/lib/jenkins/workspace/TDinternal/community'
SOURCE_BRANCH: ${{ github.event.pull_request.head.ref }}
TARGET_BRANCH: ${{ github.event.pull_request.base.ref }}
PR_NUMBER: ${{ github.event.pull_request.number }}
steps:
- name: Output the environment information
run: |
echo "::group::Environment Info"
date
hostname
env
echo "Runner: ${{ runner.name }}"
echo "Workspace: ${{ env.WKDIR }}"
git --version
echo "${{ env.WKDIR }}/restore.sh -p PR-${{ env.PR_NUMBER }} -n ${{ github.run_number }} -c ${{ env.CONTAINER_NAME }}"
echo "::endgroup::"
- name: Prepare repositories
run: |
set -euo pipefail
prepare_environment() {
cd "$1"
git reset --hard
git clean -f
git remote prune origin
git fetch
git checkout "$2"
}
prepare_environment "${{ env.WK }}" "${{ env.TARGET_BRANCH }}"
prepare_environment "${{ env.WKC }}" "${{ env.TARGET_BRANCH }}"
- name: Get latest codes and logs
run: |
cd ${{ env.WKC }}
git remote prune origin
git pull >/dev/null
git log -5
echo "`date "+%Y%m%d-%H%M%S"` TDengineTest/${{ env.PR_NUMBER }}:${{ github.run_number }}:${{ env.TARGET_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log
echo "CHANGE_BRANCH:${{ env.SOURCE_BRANCH }}" >>${{ env.WKDIR }}/jenkins.log
echo "community log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log
git fetch origin +refs/pull/${{ env.PR_NUMBER }}/merge
git checkout -qf FETCH_HEAD
git log -5
echo "community log merged: `git log -5`" >>${{ env.WKDIR }}/jenkins.log
cd ${{ env.WK }}
git pull >/dev/null
git log -5
echo "TDinternal log: `git log -5`" >>${{ env.WKDIR }}/jenkins.log
- name: Update submodule
run: |
cd ${{ env.WKC }}
git submodule update --init --recursive
- name: Detect non-doc files changed
run: |
mkdir -p ${{ env.WKDIR }}/tmp/${{ env.PR_NUMBER }}_${{ github.run_number }}
cd ${{ env.WKC }}
changed_files_non_doc=$(git --no-pager diff --name-only \
FETCH_HEAD \
$(git merge-base FETCH_HEAD ${{ env.TARGET_BRANCH }}) | \
grep -v "^docs/en/" | \
grep -v "^docs/zh/" | \
grep -v ".md$" | \
tr '\n' ' ' || : \
)
echo $changed_files_non_doc > \
${{ env.WKDIR }}/tmp/${{ env.PR_NUMBER }}_${{ github.run_number }}/docs_changed.txt
- name: Check assert testing
run: |
cd ${{ env.WKC }}/tests/parallel_test
./run_check_assert_container.sh -d ${{ env.WKDIR }}
- name: Check void function testing
run: |
cd ${{ env.WKC }}/tests/parallel_test
./run_check_void_container.sh -d ${{ env.WKDIR }}
- name: Build docker container
run: |
date
rm -rf ${{ env.WKC }}/debug
cd ${{ env.WKC }}/tests/parallel_test
time ./container_build.sh -w ${{ env.WKDIR }} -e
- name: Get parameters for testing
id: get_param
run: |
log_server_file="/home/log_server.json"
timeout_cmd=""
extra_param=""
if [ -f "$log_server_file" ]; then
log_server_enabled=$(jq '.enabled' "$log_server_file")
timeout_param=$(jq '.timeout' "$log_server_file")
if [ "$timeout_param" != "null" ] && [ "$timeout_param" != "0" ]; then
timeout_cmd="timeout $timeout_param"
fi
if [ "$log_server_enabled" == "1" ]; then
log_server=$(jq '.server' "$log_server_file" | sed 's/\\\"//g')
if [ "$log_server" != "null" ] && [ "$log_server" != "" ]; then
extra_param="-w $log_server"
fi
fi
fi
echo "timeout_cmd=$timeout_cmd" >> $GITHUB_OUTPUT
echo "extra_param=$extra_param" >> $GITHUB_OUTPUT
- name: Run function returns with a null pointer scan testing
run: |
cd ${{ env.WKC }}/tests/parallel_test
./run_scan_container.sh \
-d ${{ env.WKDIR }} \
-b ${{ env.PR_NUMBER }}_${{ github.run_number }} \
-f ${{ env.WKDIR }}/tmp/${{ env.PR_NUMBER }}_${{ github.run_number }}/docs_changed.txt \
${{ steps.get_param.outputs.extra_param }}
- name: Run tdgpt test cases
run: |
cd ${{ env.WKC }}/tests/parallel_test
export DEFAULT_RETRY_TIME=2
date
timeout 600 time ./run.sh -e \
-m /home/m.json \
-t tdgpt_cases.task \
-b "${{ env.PR_NUMBER }}_${{ github.run_number }}" \
-l ${{ env.WKDIR }}/log \
-o 300 ${{ steps.get_param.outputs.extra_param }}

View File

@ -0,0 +1,43 @@
# Scheduled updates for the TDgpt service.
name: TDgpt Update Service
on:
schedule:
- cron: '30 00 * * *'
env:
WKC: "/root/TDengine"
jobs:
update-service:
runs-on:
group: CI
labels: [self-hosted, Linux, X64, tdgpt-anode-service]
steps:
- name: Update TDengine codes
run: |
set -euo pipefail
cd ${{ env.WKC }}
git checkout 3.0
- name: Package the TDGpt Anode Service
run: |
set -euo pipefail
cd ${{ env.WKC }}/tools/tdgpt/script && ./release.sh
- name: Reinstall and restart the TDGpt Anode Service
run: |
set -euo pipefail
cd ${{ env.WKC }}/tools/tdgpt/release
if [[ -f "TDengine-enterprise-anode-1.0.1.tar.gz" ]]; then
tar -xzf TDengine-enterprise-anode-1.0.1.tar.gz
cd TDengine-enterprise-anode-1.0.1
./install.sh
fi
systemctl restart taosanoded
- name: Clean up
if: always()
run: |
if [[ -f ${{ env.WKC }}/tools/tdgpt/release/TDengine-enterprise-anode-1.0.1 ]] then rm -rf ${{ env.WKC }}/tools/tdgpt/release/TDengine-enterprise-anode-1.0.1; fi

17
.gitignore vendored
View File

@ -51,7 +51,6 @@ pysim/
tests/script/api/batchprepare
taosadapter
taosadapter-debug
tools/taos-tools/*
tools/taosws-rs/*
tools/taosadapter/*
tools/upx*
@ -60,7 +59,6 @@ tools/upx*
html/
/.vs
/CMakeFiles/3.10.2
/CMakeCache.txt
/Makefile
/*.cmake
/src/cq/test/CMakeFiles/cqtest.dir/*.cmake
@ -99,6 +97,7 @@ tests/examples/JDBC/JDBCDemo/.classpath
tests/examples/JDBC/JDBCDemo/.project
tests/examples/JDBC/JDBCDemo/.settings/
source/libs/parser/inc/sql.*
source/os/src/timezone/
tests/script/tmqResult.txt
tests/system-test/case_to_run.txt
tests/develop-test/case_to_run.txt
@ -132,14 +131,12 @@ tools/THANKS
tools/NEWS
tools/COPYING
tools/BUGS
tools/taos-tools
tools/taosws-rs
tags
.clangd
*CMakeCache*
*CMakeFiles*
.history/
*.txt
*.tcl
*.pc
contrib/geos
@ -155,6 +152,9 @@ pcre2_grep_test.sh
pcre2_chartables.c
geos-config
config.h
!contrib/xml2-cmake
!contrib/xml2-cmake/linux_x86_64/include/config.h
!contrib/xml2-cmake/CMakeLists.txt
pcre2.h
zconf.h
version.h
@ -162,3 +162,12 @@ geos_c.h
source/libs/parser/src/sql.c
include/common/ttokenauto.h
!packaging/smokeTest/pytest_require.txt
tdengine-test-dir/
localtime.c
private.h
strftime.c
tzdir.h
tzfile.h
coverage.info
taos
taosd

402
.lgtm.yml
View File

@ -1,402 +0,0 @@
##########################################################################################
# Customize file classifications. #
# Results from files under any classifier will be excluded from LGTM #
# statistics. #
##########################################################################################
##########################################################################################
# Use the `path_classifiers` block to define changes to the default classification of #
# files. #
##########################################################################################
path_classifiers:
# docs:
# Identify the top-level file called `generate_javadoc.py` as documentation-related.
test:
# Override LGTM's default classification of test files by excluding all files.
- exclude: /
# Classify all files in the top-level directories tests/ and testsuites/ as test code.
- tests
# - testsuites
# Classify all files with suffix `.test` as test code.
# Note: use only forward slash / as a path separator.
# Use ** to indicate an arbitrary parent path.
# Use * to indicate any sequence of characters excluding /.
# Always enclose the expression in double quotes if it includes *.
# - "**/*.test"
# Refine the classifications above by excluding files in test/util/.
# - exclude: test/util
# The default behavior is to tag all files created during the
# build as `generated`. Results are hidden for generated code. You can tag
# further files as being generated by adding them to the `generated` section.
generated:
# Exclude all `*.c` files under the `ui/` directory from classification as
# generated code.
# - exclude: ui/**/*.c
# By default, all files not checked into the repository are considered to be
# 'generated'.
# The default behavior is to tag library code as `library`. Results are hidden
# for library code. You can tag further files as being library code by adding them
# to the `library` section.
library:
- exclude: deps/
# The default behavior is to tag template files as `template`. Results are hidden
# for template files. You can tag further files as being template files by adding
# them to the `template` section.
template:
#- exclude: path/to/template/code/**/*.c
# Define your own category, for example: 'some_custom_category'.
some_custom_category:
# Classify all files in the top-level directory tools/ (or the top-level file
# called tools).
# - tools
#########################################################################################
# Use the `queries` block to change the default display of query results. #
#########################################################################################
# queries:
# Start by hiding the results of all queries.
# - exclude: "*"
# Then include all queries tagged 'security' and 'correctness', and with a severity of
# 'error'.
# - include:
# tags:
# - "security"
# - "correctness"
# severity: "error"
# Specifically hide the results of two queries.
# - exclude: cpp/use-of-goto
# - exclude: java/equals-on-unrelated-types
# Refine by including the `java/command-line-injection` query.
# - include: java/command-line-injection
#########################################################################################
# Define changes to the default code extraction process. #
# Each block configures the extraction of a single language, and modifies actions in a #
# named step. Every named step includes automatic default actions, #
# except for the 'prepare' step. The steps are performed in the following sequence: #
# prepare #
# after_prepare #
# configure (C/C++ only) #
# python_setup (Python only) #
# before_index #
# index #
##########################################################################################
#########################################################################################
# Environment variables available to the steps: #
#########################################################################################
# LGTM_SRC
# The root of the source tree.
# LGTM_WORKSPACE
# An existing (initially empty) folder outside the source tree.
# Used for temporary download and setup commands.
#########################################################################################
# Use the extraction block to define changes to the default code extraction process #
# for one or more languages. The settings for each language are defined in a child #
# block, with one or more steps. #
#########################################################################################
extraction:
# Define settings for C/C++ analysis
#####################################
cpp:
# The `prepare` step exists for customization on LGTM.com only.
prepare:
# # The `packages` section is valid for LGTM.com only. It names Ubuntu packages to
# # be installed.
packages:
- cmake
# Add an `after-prepare` step if you need to run commands after the prepare step.
# Each command should be listed on a separate line.
# This step is useful for C/C++ analysis where you want to prepare the environment
# for the `configure` step without changing the default behavior for that step.
# after_prepare:
#- export GNU_MAKE=make
#- export GIT=true
# The `configure` step generates build configuration files which the `index` step
# then uses to build the codebase.
configure:
command:
- mkdir build
- cd build
- cmake ..
# - ./prepare_deps
# Optional step. You should add a `before_index` step if you need to run commands
# before the `index` step.
# before_index:
# - export BOOST_DIR=$LGTM_SRC/boost
# - export GTEST_DIR=$LGTM_SRC/googletest
# - export HUNSPELL_DIR=$LGTM_SRC/hunspell
# - export CRYPTOPP_DIR=$LGTM_SRC/cryptopp
# The `index` step builds the code and extracts information during the build
# process.
index:
# Override the autobuild process by specifying a list of custom build commands
# to use instead.
build_command:
- cd build
- make
# - $GNU_MAKE -j2 -s
# Specify that all project or solution files should be used for extraction.
# Default: false.
# all_solutions: true
# Specify a list of one or more project or solution files for extraction.
# Default: LGTM chooses the file closest to the root of the repository (this may
# fail if there are multiple candidates).
# solution:
# - myProject.sln
# Specify MSBuild settings
# msbuild:
# Specify a list of additional arguments to MSBuild. Default: empty.
# arguments: /p:Platform=x64 /p:Configuration=Release
# Specify the MSBuild configuration to use, for example, debug or release.
# Default: read from the solution file or files.
# configuration:
# Specify the platform to target, for example: x86, x64, or Any CPU.
# Default: read from the solution file or files.
# platform:
# Specify the MSBuild target. Default: rebuild.
# target:
# Specify whether or not to perform a NuGet restore for extraction. Default: true.
# nuget_restore: false
# Specify a version of Microsoft Visual Studio to use for MSBuild or any custom
# build commands (build_command). For example:
# 10 for Visual Studio 2010
# 12 for Visual Studio 2012
# 14 for Visual Studio 2015
# 15 for Visual Studio 2017
# Default: read from project files.
# vstools_version: 10
# Define settings for C# analysis
##################################
# csharp:
# The `prepare` step exists for customization on LGTM.com only.
# prepare:
# packages:
# - example_package
# Add an `after-prepare` step if you need to run commands after the `prepare` step.
# Each command should be listed on a separate line.
# after_prepare:
# - export PATH=$LGTM_WORKSPACE/tools:$PATH
# The `index` step builds the code and extracts information during the build
# process.
#index:
# Specify that all project or solution files should be used for extraction.
# Default: false.
# all_solutions: true
# Specify a list of one or more project or solution files for extraction.
# Default: LGTM chooses the file closest to the root of the repository (this may
# fail if there are multiple candidates).
# solution:
# - myProject.sln
# Override the autobuild process by specifying a list of custom build commands
# to use instead.
# build_command:
# - ./example-compile-all.sh
# By default, LGTM analyzes the code by building it. You can override this,
# and tell LGTM not to build the code. Beware that this can lead
# to less accurate results.
# buildless: true
# Specify .NET Core settings.
# dotnet:
# Specify additional arguments to `dotnet build`.
# Default: empty.
# arguments: "example_arg"
# Specify the version of .NET Core SDK to use.
# Default: The version installed on the build machine.
# version: 2.1
# Specify MSBuild settings.
# msbuild:
# Specify a list of additional arguments to MSBuild. Default: empty.
# arguments: /P:WarningLevel=2
# Specify the MSBuild configuration to use, for example, debug or release.
# Default: read from the solution file or files.
# configuration: release
# Specify the platform to target, for example: x86, x64, or Any CPU.
# Default: read from the solution file or files.
# platform: x86
# Specify the MSBuild target. Default: rebuild.
# target: notest
# Specify whether or not to perform a NuGet restore for extraction. Default: true.
# nuget_restore: false
# Specify a version of Microsoft Visual Studio to use for MSBuild or any custom
# build commands (build_command). For example:
# 10 for Visual Studio 2010
# 12 for Visual Studio 2012
# 14 for Visual Studio 2015
# 15 for Visual Studio 2017
# Default: read from project files
# vstools_version: 10
# Specify additional options for the extractor,
# for example --fast to perform a faster extraction that produces a smaller
# database.
# extractor: "--fast"
# Define settings for Go analysis
##################################
# go:
# The `prepare` step exists for customization on LGTM.com only.
# prepare:
# packages:
# - example_package
# Add an `after-prepare` step if you need to run commands after the `prepare` step.
# Each command should be listed on a separate line.
# after_prepare:
# - export PATH=$LGTM_WORKSPACE/tools:$PATH
# The `index` step builds the code and extracts information during the build
# process.
# index:
# Override the autobuild process by specifying a list of custom build commands
# to use instead.
# build_command:
# - ./compile-all.sh
# Define settings for Java analysis
####################################
# java:
# The `prepare` step exists for customization on LGTM.com only.
# prepare:
# packages:
# - example_package
# Add an `after-prepare` step if you need to run commands after the prepare step.
# Each command should be listed on a separate line.
# after_prepare:
# - export PATH=$LGTM_WORKSPACE/tools:$PATH
# The `index` step extracts information from the files in the codebase.
# index:
# Specify Gradle settings.
# gradle:
# Specify the required Gradle version.
# Default: determined automatically.
# version: 4.4
# Override the autobuild process by specifying a list of custom build commands
# to use instead.
# build_command: ./compile-all.sh
# Specify the Java version required to build the project.
# java_version: 11
# Specify whether to extract Java .properties files
# Default: false
# properties_files: true
# Specify Maven settings.
# maven:
# Specify the path (absolute or relative) of a Maven settings file to use.
# Default: Maven uses a settings file in the default location, if it exists.
# settings_file: /opt/share/settings.xml
# Specify the path of a Maven toolchains file.
# Default: Maven uses a toolchains file in the default location, if it exists.
# toolchains_file: /opt/share/toolchains.xml
# Specify the required Maven version.
# Default: the Maven version is determined automatically, where feasible.
# version: 3.5.2
# Specify how XML files should be extracted:
# all = extract all XML files.
# default = only extract XML files named `AndroidManifest.xml`, `pom.xml`, and `web.xml`.
# disabled = do not extract any XML files.
# xml_mode: all
# Define settings for JavaScript analysis
##########################################
# javascript:
# The `prepare` step exists for customization on LGTM.com only.
# prepare:
# packages:
# - example_package
# Add an `after-prepare` step if you need to run commands after the prepare step.
# Each command should be listed on a separate line.
# after_prepare:
# - export PATH=$LGTM_WORKSPACE/tools:$PATH
# The `index` step extracts information from the files in the codebase.
# index:
# Specify a list of files and folders to extract.
# Default: The project root directory.
# include:
# - src/js
# Specify a list of files and folders to exclude from extraction.
# exclude:
# - thirdparty/lib
# You can add additional file types for LGTM to extract, by mapping file
# extensions (including the leading dot) to file types. The usual
# include/exclude patterns apply, so, for example, `.jsm` files under
# `thirdparty/lib` will not be extracted.
# filetypes:
# ".jsm": "js"
# ".tmpl": "html"
# Specify a list of glob patterns to include/exclude files from extraction; this
# is applied on top of the include/exclude paths from above; patterns are
# processed in the same way as for path classifiers above.
# Default: include all files with known extensions (such as .js, .ts and .html),
# but exclude files ending in `-min.js` or `.min.js` and folders named `node_modules`
# or `bower_components`
# filters:
# exclude any *.ts files anywhere.
# - exclude: "**/*.ts"
# but include *.ts files under src/js/typescript.
# - include: "src/js/typescript/**/*.ts"
# Specify how TypeScript files should be extracted:
# none = exclude all TypeScript files.
# basic = extract syntactic information from TypeScript files.
# full = extract syntactic and type information from TypeScript files.
# Default: full.
# typescript: basic
# By default, LGTM doesn't extract any XML files. You can override this by
# using the `xml_mode` property and setting it to `all`.
# xml_mode: all
# Define settings for Python analysis
######################################
# python:
# # The `prepare` step exists for customization on LGTM.com only.
# # prepare:
# # # The `packages` section is valid for LGTM.com only. It names packages to
# # # be installed.
# # packages: libpng-dev
# # This step is useful for Python analysis where you want to prepare the
# # environment for the `python_setup` step without changing the default behavior
# # for that step.
# after_prepare:
# - export PATH=$LGTM_WORKSPACE/tools:$PATH
# # This sets up the Python interpreter and virtual environment, ready for the
# # `index` step to extract the codebase.
# python_setup:
# # Specify packages that should NOT be installed despite being mentioned in the
# # requirements.txt file.
# # Default: no package marked for exclusion.
# exclude_requirements:
# - pywin32
# # Specify a list of pip packages to install.
# # If any of these packages cannot be installed, the extraction will fail.
# requirements:
# - Pillow
# # Specify a list of requirements text files to use to set up the environment,
# # or false for none. Default: any requirements.txt, test-requirements.txt,
# # and similarly named files identified in the codebase are used.
# requirements_files:
# - required-packages.txt
# # Specify a setup.py file to use to set up the environment, or false for none.
# # Default: any setup.py files identified in the codebase are used in preference
# # to any requirements text files.
# setup_py: new-setup.py
# # Override the version of the Python interpreter used for setup and extraction
# # Default: Python 3.
# version: 2
# # Optional step. You should add a `before_index` step if you need to run commands
# # before the `index` step.
# before_index:
# - antlr4 -Dlanguage=Python3 Grammar.g4
# # The `index` step extracts information from the files in the codebase.
# index:
# # Specify a list of files and folders to exclude from extraction.
# # Default: Git submodules and Subversion externals.
# exclude:
# - legacy-implementation
# - thirdparty/libs
# filters:
# - exclude: "**/documentation/examples/snippets/*.py"
# - include: "**/documentation/examples/test_application/*"
# include:
# - example/to/include

View File

@ -11,36 +11,29 @@ if(NOT DEFINED TD_SOURCE_DIR)
endif()
SET(TD_COMMUNITY_DIR ${PROJECT_SOURCE_DIR})
set(TD_SUPPORT_DIR "${TD_SOURCE_DIR}/cmake")
set(TD_CONTRIB_DIR "${TD_SOURCE_DIR}/contrib")
include(${TD_SUPPORT_DIR}/cmake.platform)
include(${TD_SUPPORT_DIR}/cmake.define)
include(${TD_SUPPORT_DIR}/cmake.options)
include(${TD_SUPPORT_DIR}/cmake.define)
include(${TD_SUPPORT_DIR}/cmake.version)
# contrib
add_subdirectory(contrib)
include(${TD_SUPPORT_DIR}/cmake.install)
enable_testing()
set_property(GLOBAL PROPERTY GLOBAL_DEPENDS_NO_CYCLES OFF)
add_subdirectory(contrib)
# api
add_library(api INTERFACE)
target_include_directories(api INTERFACE "include/client")
# src
if(${BUILD_TEST})
include(CTest)
enable_testing()
add_subdirectory(examples/c)
endif(${BUILD_TEST})
add_library(api INTERFACE)
target_include_directories(api INTERFACE "include/client")
add_subdirectory(source)
add_subdirectory(tools)
add_subdirectory(utils)
add_subdirectory(examples/c)
add_subdirectory(tests)
include(${TD_SUPPORT_DIR}/cmake.install)
# docs
add_subdirectory(docs/doxgen)

View File

@ -75,4 +75,4 @@ available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.ht
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq
https://www.contributor-covenant.org/faq

View File

@ -5,7 +5,11 @@ node {
}
file_zh_changed = ''
file_en_changed = ''
file_no_doc_changed = ''
file_no_doc_changed = '1'
file_only_tdgpt_change_except = '1'
tdgpt_file = "forecastoperator.c|anomalywindowoperator.c|tanalytics.h|tanalytics.c|tdgpt_cases.task|analytics"
def abortPreviousBuilds() {
def currentJobName = env.JOB_NAME
def currentBuildNumber = env.BUILD_NUMBER.toInteger()
@ -69,13 +73,23 @@ def check_docs(){
file_no_doc_changed = sh (
script: '''
cd ${WKC}
git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${CHANGE_TARGET}`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v "*.md" || :
git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${CHANGE_TARGET}`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v ".md$" || :
''',
returnStdout: true
).trim()
file_only_tdgpt_change_except = sh (
script: '''
cd ${WKC}
git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${CHANGE_TARGET}`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v ".md$" | grep -Ev "forecastoperator.c|anomalywindowoperator.c|tanalytics.h|tanalytics.c|tdgpt_cases.task|analytics" ||:
''',
returnStdout: true
).trim()
echo "file_zh_changed: ${file_zh_changed}"
echo "file_en_changed: ${file_en_changed}"
echo "file_no_doc_changed: ${file_no_doc_changed}"
echo "file_only_tdgpt_change_except: ${file_only_tdgpt_change_except}"
}
}
@ -98,16 +112,6 @@ def build_pre_docs(){
git fetch origin +refs/pull/${CHANGE_ID}/merge
git checkout -qf FETCH_HEAD
'''
sh '''
cd ${DOC_WKC}/${tools_repo}
git reset --hard
git clean -f
git fetch
git remote prune origin
git checkout ''' + env.CHANGE_TARGET + '''
git pull >/dev/null
'''
}
}
@ -354,9 +358,10 @@ def pre_test_build_win() {
'''
bat '''
cd %WIN_COMMUNITY_ROOT%/tests/ci
pip3 install taospy==2.7.16
pip3 install taos-ws-py==0.3.3
pip3 install taospy==2.7.21
pip3 install taos-ws-py==0.3.8
xcopy /e/y/i/f %WIN_INTERNAL_ROOT%\\debug\\build\\lib\\taos.dll C:\\Windows\\System32
xcopy /e/y/i/f %WIN_INTERNAL_ROOT%\\debug\\build\\lib\\taosnative.dll C:\\Windows\\System32
'''
return 1
}
@ -375,7 +380,9 @@ def run_win_test() {
bat '''
echo "windows test ..."
xcopy /e/y/i/f %WIN_INTERNAL_ROOT%\\debug\\build\\lib\\taos.dll C:\\Windows\\System32
xcopy /e/y/i/f %WIN_INTERNAL_ROOT%\\debug\\build\\lib\\taosnative.dll C:\\Windows\\System32
ls -l C:\\Windows\\System32\\taos.dll
ls -l C:\\Windows\\System32\\taosnative.dll
time /t
cd %WIN_SYSTEM_TEST_ROOT%
echo "testing ..."
@ -385,7 +392,7 @@ def run_win_test() {
}
pipeline {
agent none
agent any
options { skipDefaultCheckout() }
environment{
WKDIR = '/var/lib/jenkins/workspace'
@ -451,8 +458,8 @@ pipeline {
stage('run test') {
when {
allOf {
not { expression { file_no_doc_changed == '' }}
expression {
file_no_doc_changed != '' && env.CHANGE_TARGET != 'docs-cloud'
}
}
parallel {
@ -463,19 +470,27 @@ pipeline {
WIN_COMMUNITY_ROOT="C:\\workspace\\${env.EXECUTOR_NUMBER}\\TDinternal\\community"
WIN_SYSTEM_TEST_ROOT="C:\\workspace\\${env.EXECUTOR_NUMBER}\\TDinternal\\community\\tests\\system-test"
}
when {
beforeAgent true
expression { file_only_tdgpt_change_except != '' }
}
steps {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
timeout(time: 126, unit: 'MINUTES'){
pre_test_win()
pre_test_build_win()
run_win_ctest()
run_win_test()
pre_test_win()
pre_test_build_win()
run_win_ctest()
run_win_test()
}
}
}
}
stage('mac test') {
agent{label " Mac_catalina "}
when {
beforeAgent true
expression { file_only_tdgpt_change_except != '' }
}
steps {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
timeout(time: 60, unit: 'MINUTES'){
@ -551,28 +566,25 @@ pipeline {
cd ${WKC}/tests/parallel_test
./run_scan_container.sh -d ${WKDIR} -b ${BRANCH_NAME}_${BUILD_ID} -f ${WKDIR}/tmp/${BRANCH_NAME}_${BUILD_ID}/docs_changed.txt ''' + extra_param + '''
'''
sh '''
cd ${WKC}/tests/parallel_test
export DEFAULT_RETRY_TIME=2
date
''' + timeout_cmd + ''' time ./run.sh -e -m /home/m.json -t cases.task -b ${BRANCH_NAME}_${BUILD_ID} -l ${WKDIR}/log -o 1200 ''' + extra_param + '''
'''
if ( file_no_doc_changed =~ /forecastoperator.c|anomalywindowoperator.c|tanalytics.h|tanalytics.c|tdgpt_cases.task|analytics/ ) {
sh '''
cd ${WKC}/tests/parallel_test
export DEFAULT_RETRY_TIME=2
date
timeout 600 time ./run.sh -e -m /home/m.json -t tdgpt_cases.task -b ${BRANCH_NAME}_${BUILD_ID} -l ${WKDIR}/log -o 300 ''' + extra_param + '''
'''
}
if ( file_only_tdgpt_change_except != '' ) {
sh '''
cd ${WKC}/tests/parallel_test
export DEFAULT_RETRY_TIME=2
date
''' + timeout_cmd + ''' time ./run.sh -e -m /home/m.json -t cases.task -b ${BRANCH_NAME}_${BUILD_ID} -l ${WKDIR}/log -o 1200 ''' + extra_param + '''
'''
}
}
}
}
/*catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
timeout(time: 15, unit: 'MINUTES'){
script {
sh '''
echo "packaging ..."
date
rm -rf ${WKC}/release/*
cd ${WKC}/packaging
./release.sh -v cluster -n 3.0.0.100 -s static
'''
}
}
}*/
}
}
}
@ -656,4 +668,4 @@ pipeline {
)
}
}
}
}

View File

@ -1,6 +1,5 @@
<p>
<p align="center">
<a href="https://tdengine.com" target="_blank">
<a href="https://www.taosdata.com" target="_blank">
<img
src="docs/assets/tdengine.svg"
alt="TDengine"
@ -8,22 +7,45 @@
/>
</a>
</p>
<p>
[![Build Status](https://travis-ci.org/taosdata/TDengine.svg?branch=master)](https://travis-ci.org/taosdata/TDengine)
[![Build status](https://ci.appveyor.com/api/projects/status/kf3pwh2or5afsgl9/branch/master?svg=true)](https://ci.appveyor.com/project/sangshuduo/tdengine-2n8ge/branch/master)
[![Coverage Status](https://coveralls.io/repos/github/taosdata/TDengine/badge.svg?branch=3.0)](https://coveralls.io/github/taosdata/TDengine?branch=3.0)
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/4201/badge)](https://bestpractices.coreinfrastructure.org/projects/4201)
简体中文 | [English](README.md) | [TDengine 云服务](https://cloud.taosdata.com/?utm_medium=cn&utm_source=github) | 很多职位正在热招中,请看 [这里](https://www.taosdata.com/careers/)
简体中文 | [English](README.md) | [TDengine 云服务](https://cloud.taosdata.com/?utm_medium=cn&utm_source=github) | 很多职位正在热招中,请看[这里](https://www.taosdata.com/cn/careers/)
# 目录
# TDengine 简介
1. [TDengine 简介](#1-tdengine-简介)
1. [文档](#2-文档)
1. [必备工具](#3-必备工具)
- [3.1 Linux 预备](#31-Linux系统)
- [3.2 macOS 预备](#32-macOS系统)
- [3.3 Windows 预备](#3.3-Windows系统)
- [3.4 克隆仓库](#34-克隆仓库)
1. [构建](#4-构建)
- [4.1 Linux 系统上构建](#41-Linux系统上构建)
- [4.2 macOS 系统上构建](#42-macOS系统上构建)
- [4.3 Windows 系统上构建](#43-Windows系统上构建)
1. [打包](#5-打包)
1. [安装](#6-安装)
- [6.1 Linux 系统上安装](#61-Linux系统上安装)
- [6.2 macOS 系统上安装](#62-macOS系统上安装)
- [6.3 Windows 系统上安装](#63-Windows系统上安装)
1. [快速运行](#7-快速运行)
- [7.1 Linux 系统上运行](#71-Linux系统上运行)
- [7.2 macOS 系统上运行](#72-macOS系统上运行)
- [7.3 Windows 系统上运行](#73-Windows系统上运行)
1. [测试](#8-测试)
1. [版本发布](#9-版本发布)
1. [工作流](#10-工作流)
1. [覆盖率](#11-覆盖率)
1. [成为社区贡献者](#12-成为社区贡献者)
# 1. 简介
TDengine 是一款开源、高性能、云原生的时序数据库 (Time-Series Database, TSDB)。TDengine 能被广泛运用于物联网、工业互联网、车联网、IT 运维、金融等领域。除核心的时序数据库功能外TDengine 还提供缓存、数据订阅、流式计算等功能是一极简的时序数据处理平台最大程度的减小系统设计的复杂度降低研发和运营成本。与其他时序数据库相比TDengine 的主要优势如下:
- **高性能**通过创新的存储引擎设计无论是数据写入还是查询TDengine 的性能比通用数据库快 10 倍以上也远超其他时序数据库存储空间不及通用数据库的1/10。
- **高性能**通过创新的存储引擎设计无论是数据写入还是查询TDengine 的性能比通用数据库快 10 倍以上,也远超其他时序数据库,存储空间不及通用数据库的 1/10。
- **云原生**通过原生分布式的设计充分利用云平台的优势TDengine 提供了水平扩展能力具备弹性、韧性和可观测性支持k8s部署可运行在公有云、私有云和混合云上。
- **云原生**通过原生分布式的设计充分利用云平台的优势TDengine 提供了水平扩展能力,具备弹性、韧性和可观测性,支持 k8s 部署,可运行在公有云、私有云和混合云上。
- **极简时序数据平台**TDengine 内建消息队列、缓存、流式计算等功能,应用无需再集成 Kafka/Redis/HBase/Spark 等软件,大幅降低系统的复杂度,降低应用开发和运营成本。
@ -31,325 +53,347 @@ TDengine 是一款开源、高性能、云原生的时序数据库 (Time-Series
- **简单易用**无任何依赖安装、集群几秒搞定提供REST以及各种语言连接器与众多第三方工具无缝集成提供命令行程序便于管理和即席查询提供各种运维工具。
- **核心开源**TDengine 的核心代码包括集群功能全部开源截止到2022年8月1日全球超过 135.9k 个运行实例GitHub Star 18.7kFork 4.4k,社区活跃。
- **核心开源**TDengine 的核心代码包括集群功能全部开源,截止到 2022 8 1 日,全球超过 135.9k 个运行实例GitHub Star 18.7kFork 4.4k,社区活跃。
# 文档
了解TDengine高级功能的完整列表请 [点击](https://tdengine.com/tdengine/)。体验 TDengine 最简单的方式是通过 [TDengine云平台](https://cloud.tdengine.com)。
关于完整的使用手册,系统架构和更多细节,请参考 [TDengine 文档](https://docs.taosdata.com) 或者 [TDengine Documentation](https://docs.tdengine.com)。
# 2. 文档
# 构建
关于完整的使用手册,系统架构和更多细节,请参考 [TDengine](https://www.taosdata.com/) 或者 [TDengine 官方文档](https://docs.taosdata.com)。
TDengine 目前可以在 Linux、 Windows、macOS 等平台上安装和运行。任何 OS 的应用也可以选择 taosAdapter 的 RESTful 接口连接服务端 taosd。CPU 支持 X64/ARM64后续会支持 MIPS64、Alpha64、ARM32、RISC-V 等 CPU 架构。目前不支持使用交叉编译器构建。
用户可根据需求选择通过源码、[容器](https://docs.taosdata.com/get-started/docker/)、[安装包](https://docs.taosdata.com/get-started/package/)或[Kubernetes](https://docs.taosdata.com/deployment/k8s/)来安装。本快速指南仅适用于通过源码安装。
用户可根据需求选择通过 [容器](https://docs.taosdata.com/get-started/docker/)、[安装包](https://docs.taosdata.com/get-started/package/)、[Kubernetes](https://docs.taosdata.com/deployment/k8s/) 来安装或直接使用无需安装部署的 [云服务](https://cloud.taosdata.com/)。本快速指南是面向想自己编译、打包、测试的开发者的。
TDengine 还提供一组辅助工具软件 taosTools目前它包含 taosBenchmark曾命名为 taosdemo和 taosdump 两个软件。默认 TDengine 编译不包含 taosTools, 您可以在编译 TDengine 时使用`cmake .. -DBUILD_TOOLS=true` 来同时编译 taosTools
如果想编译或测试 TDengine 连接器,请访问以下仓库:[JDBC连接器](https://github.com/taosdata/taos-connector-jdbc)、[Go连接器](https://github.com/taosdata/driver-go)、[Python连接器](https://github.com/taosdata/taos-connector-python)、[Node.js连接器](https://github.com/taosdata/taos-connector-node)、[C#连接器](https://github.com/taosdata/taos-connector-dotnet)、[Rust连接器](https://github.com/taosdata/taos-connector-rust)。
为了构建TDengine, 请使用 [CMake](https://cmake.org/) 3.13.0 或者更高版本。
# 3. 前置条件
## 安装工具
TDengine 目前可以在 Linux、 Windows、macOS 等平台上安装和运行。任何 OS 的应用也可以选择 taosAdapter 的 RESTful 接口连接服务端 taosd。CPU 支持 X64、ARM64后续会支持 MIPS64、Alpha64、ARM32、RISC-V 等 CPU 架构。目前不支持使用交叉编译器构建。
### Ubuntu 18.04 及以上版本 & Debian
如果你想要编译 taosAdapter 或者 taosKeeper需要安装 Go 1.18 及以上版本。
## 3.1 Linux 系统
<details>
<summary>安装 Linux 必备工具</summary>
### Ubuntu 18.04、20.04、22.04
```bash
sudo apt-get install -y gcc cmake build-essential git libssl-dev libgflags2.2 libgflags-dev
sudo apt-get update
sudo apt-get install -y gcc cmake build-essential git libjansson-dev \
libsnappy-dev liblzma-dev zlib1g-dev pkg-config
```
#### 为 taos-tools 安装编译需要的软件
为了在 Ubuntu/Debian 系统上编译 [taos-tools](https://github.com/taosdata/taos-tools) 需要安装如下软件:
### CentOS 8
```bash
sudo apt install build-essential libjansson-dev libsnappy-dev liblzma-dev libz-dev zlib1g pkg-config
```
### CentOS 7.9
```bash
sudo yum install epel-release
sudo yum update
sudo yum install -y gcc gcc-c++ make cmake3 gflags git openssl-devel
sudo ln -sf /usr/bin/cmake3 /usr/bin/cmake
yum install -y epel-release gcc gcc-c++ make cmake git perl dnf-plugins-core
yum config-manager --set-enabled powertools
yum install -y zlib-static xz-devel snappy-devel jansson-devel pkgconfig libatomic-static libstdc++-static
```
### CentOS 8/Fedora/Rocky Linux
</details>
## 3.2 macOS 系统
<details>
<summary>安装 macOS 必备工具</summary>
根据提示安装依赖工具 [brew](https://brew.sh/)
```bash
sudo dnf install -y gcc gcc-c++ gflags make cmake epel-release git openssl-devel
```
#### 在 CentOS 上构建 taosTools 安装依赖软件
#### CentOS 7.9
```
sudo yum install -y zlib-devel zlib-static xz-devel snappy-devel jansson jansson-devel pkgconfig libatomic libatomic-static libstdc++-static openssl-devel
```
#### CentOS 8/Fedora/Rocky Linux
```
sudo yum install -y epel-release
sudo yum install -y dnf-plugins-core
sudo yum config-manager --set-enabled powertools
sudo yum install -y zlib-devel zlib-static xz-devel snappy-devel jansson jansson-devel pkgconfig libatomic libatomic-static libstdc++-static openssl-devel
```
注意:由于 snappy 缺乏 pkg-config 支持(参考 [链接](https://github.com/google/snappy/pull/86)),会导致 cmake 提示无法发现 libsnappy实际上工作正常。
若 powertools 安装失败,可以尝试改用:
```
sudo yum config-manager --set-enabled powertools
```
#### CentOS + devtoolset
除上述编译依赖包,需要执行以下命令:
```
sudo yum install centos-release-scl
sudo yum install devtoolset-9 devtoolset-9-libatomic-devel
scl enable devtoolset-9 -- bash
```
### macOS
```
brew install argp-standalone gflags pkgconfig
```
### 设置 golang 开发环境
</details>
TDengine 包含数个使用 Go 语言开发的组件比如taosAdapter, 请参考 golang.org 官方文档设置 go 开发环境。
## 3.3 Windows 系统
请使用 1.20 及以上版本。对于中国用户,我们建议使用代理来加速软件包下载。
<details>
```
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
```
<summary>安装 Windows 必备工具</summary>
缺省是不会构建 taosAdapter, 但您可以使用以下命令选择构建 taosAdapter 作为 RESTful 接口的服务
进行中。
```
cmake .. -DBUILD_HTTP=false
```
</details>
### 设置 rust 开发环境
## 3.4 克隆仓库
TDengine 包含数个使用 Rust 语言开发的组件. 请参考 rust-lang.org 官方文档设置 rust 开发环境。
## 获取源码
首先,你需要从 GitHub 克隆源码:
通过如下命令将 TDengine 仓库克隆到指定计算机:
```bash
git clone https://github.com/taosdata/TDengine.git
cd TDengine
```
如果使用 https 协议下载比较慢,可以通过修改 ~/.gitconfig 文件添加以下两行设置使用 ssh 协议下载。需要首先上传 ssh 密钥到 GitHub详细方法请参考 GitHub 官方文档。
```
[url "git@github.com:"]
insteadOf = https://github.com/
```
## 特别说明
# 4. 构建
[JDBC 连接器](https://github.com/taosdata/taos-connector-jdbc) [Go 连接器](https://github.com/taosdata/driver-go)[Python 连接器](https://github.com/taosdata/taos-connector-python)[Node.js 连接器](https://github.com/taosdata/taos-connector-node)[C# 连接器](https://github.com/taosdata/taos-connector-dotnet) [Rust 连接器](https://github.com/taosdata/taos-connector-rust) 和 [Grafana 插件](https://github.com/taosdata/grafanaplugin)已移到独立仓库
TDengine 还提供一组辅助工具软件 taosTools目前它包含 taosBenchmark曾命名为 taosdemo和 taosdump 两个软件。默认 TDengine 编译不包含 taosTools您可以在编译 TDengine 时使用 `cmake .. -DBUILD_TOOLS=true` 来同时编译 taosTools。
为了构建 TDengine请使用 [CMake](https://cmake.org/) 3.13.0 或者更高版本。
## 构建 TDengine
## 4.1 Linux 系统上构建
### Linux 系统
<details>
可以运行代码仓库中的 `build.sh` 脚本编译出 TDengine 和 taosTools包含 taosBenchmark 和 taosdump
<summary>Linux 系统上构建步骤</summary>
可以通过以下命令使用脚本 `build.sh` 编译 TDengine 和 taosTools包括 taosBenchmark 和 taosdump。
```bash
./build.sh
```
这个脚本等价于执行如下命令
也可以通过以下命令进行构建
```bash
mkdir debug
cd debug
mkdir debug && cd debug
cmake .. -DBUILD_TOOLS=true -DBUILD_CONTRIB=true
make
```
您也可以选择使用 jemalloc 作为内存分配器,替代默认的 glibc
如果你想要编译 taosAdapter需要添加 `-DBUILD_HTTP=false` 选项。
如果你想要编译 taosKeeper需要添加 `-DBUILD_KEEPER=true` 选项。
可以使用 Jemalloc 作为内存分配器,而不是使用 glibc
```bash
apt install autoconf
cmake .. -DJEMALLOC_ENABLED=true
cmake .. -DJEMALLOC_ENABLED=ON
```
在 X86-64、X86、arm64 平台上TDengine 生成脚本可以自动检测机器架构。也可以手动配置 CPUTYPE 参数来指定 CPU 类型,如 aarch64 等。
aarch64
TDengine 构建脚本可以自动检测 x86、x86-64、arm64 平台上主机的体系结构。
您也可以通过 CPUTYPE 选项手动指定架构:
```bash
cmake .. -DCPUTYPE=aarch64 && cmake --build .
```
### Windows 系统
</details>
如果你使用的是 Visual Studio 2013 版本:
## 4.2 macOS 系统上构建
打开 cmd.exe执行 vcvarsall.bat 时,为 64 位操作系统指定“x86_amd64”为 32 位操作系统指定“x86”。
<details>
```bash
mkdir debug && cd debug
"C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" < x86_amd64 | x86 >
cmake .. -G "NMake Makefiles"
nmake
```
<summary>macOS 系统上构建步骤</summary>
如果你使用的是 Visual Studio 2019 或 2017 版本:
请安装 XCode 命令行工具和 cmake。使用 XCode 11.4+ 在 Catalina 和 Big Sur 上完成验证。
打开 cmd.exe执行 vcvarsall.bat 时,为 64 位操作系统指定“x64”为 32 位操作系统指定“x86”。
```bash
mkdir debug && cd debug
"c:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" < x64 | x86 >
cmake .. -G "NMake Makefiles"
nmake
```
你也可以从开始菜单中找到"Visual Studio < 2019 | 2017 >"菜单项,根据你的系统选择"x64 Native Tools Command Prompt for VS < 2019 | 2017 >"或"x86 Native Tools Command Prompt for VS < 2019 | 2017 >",打开命令行窗口,执行:
```bash
mkdir debug && cd debug
cmake .. -G "NMake Makefiles"
nmake
```
### macOS 系统
安装 XCode 命令行工具和 cmake. 在 Catalina 和 Big Sur 操作系统上,需要安装 XCode 11.4+ 版本。
```bash
```shell
mkdir debug && cd debug
cmake .. && cmake --build .
```
# 安装
如果你想要编译 taosAdapter需要添加 `-DBUILD_HTTP=false` 选项。
## Linux 系统
如果你想要编译 taosKeeper需要添加 `-DBUILD_KEEPER=true` 选项。
生成完成后,安装 TDengine
</details>
## 4.3 Windows 系统上构建
<details>
<summary>Windows 系统上构建步骤</summary>
如果您使用的是 Visual Studio 2013请执行 “cmd.exe” 打开命令窗口执行如下命令。
执行 vcvarsall.bat 时64 位的 Windows 请指定 “amd64”32 位的 Windows 请指定 “x86”。
```cmd
mkdir debug && cd debug
"C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" < amd64 | x86 >
cmake .. -G "NMake Makefiles"
nmake
```
如果您使用 Visual Studio 2019 或 2017
请执行 “cmd.exe” 打开命令窗口执行如下命令。
执行 vcvarsall.bat 时64 位的 Windows 请指定 “x64”32 位的 Windows 请指定 “x86”。
```cmd
mkdir debug && cd debug
"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" < x64 | x86 >
cmake .. -G "NMake Makefiles"
nmake
```
或者,您可以通过点击 Windows 开始菜单打开命令窗口 -> `Visual Studio < 2019 | 2017 >` 文件夹 -> `x64 原生工具命令提示符 VS < 2019 | 2017 >``x86 原生工具命令提示符 < 2019 | 2017 >` 取决于你的 Windows 是什么架构,然后执行命令如下:
```cmd
mkdir debug && cd debug
cmake .. -G "NMake Makefiles"
nmake
```
</details>
# 5. 打包
由于一些组件依赖关系TDengine 社区安装程序不能仅由该存储库创建。我们仍在努力改进。
# 6. 安装
## 6.1 Linux 系统上安装
<details>
<summary>Linux 系统上安装详细步骤</summary>
构建成功后TDengine 可以通过以下命令进行安装:
```bash
sudo make install
```
从源代码安装还将为 TDengine 配置服务管理。用户也可以使用 [TDengine安装包](https://docs.taosdata.com/get-started/package/)进行安装。
</details>
## 6.2 macOS 系统上安装
<details>
<summary>macOS 系统上安装详细步骤</summary>
构建成功后TDengine可以通过以下命令进行安装
```bash
sudo make install
```
用户可以在[文件目录结构](https://docs.taosdata.com/reference/directory/)中了解更多在操作系统中生成的目录或文件。
</details>
从源代码安装也会为 TDengine 配置服务管理 ,用户也可以选择[从安装包中安装](https://docs.taosdata.com/get-started/package/)。
## 6.3 Windows 系统上安装
安装成功后,在终端中启动 TDengine 服务:
<details>
```bash
sudo systemctl start taosd
```
<summary>Windows 系统上安装详细步骤</summary>
用户可以使用 TDengine CLI 来连接 TDengine 服务,在终端中,输入:
```bash
taos
```
如果 TDengine CLI 连接服务成功,将会打印出欢迎消息和版本信息。如果失败,则会打印出错误消息。
## Windows 系统
生成完成后,安装 TDengine
构建成功后TDengine 可以通过以下命令进行安装:
```cmd
nmake install
```
## macOS 系统
</details>
生成完成后,安装 TDengine
# 7. 快速运行
## 7.1 Linux 系统上运行
<details>
<summary>Linux 系统上运行详细步骤</summary>
在Linux 系统上安装 TDengine 完成后,在终端运行如下命令启动服务:
```bash
sudo make install
sudo systemctl start taosd
```
用户可以在[文件目录结构](https://docs.taosdata.com/reference/directory/)中了解更多在操作系统中生成的目录或文件。
从源代码安装也会为 TDengine 配置服务管理 ,用户也可以选择[从安装包中安装](https://docs.taosdata.com/get-started/package/)。
安装成功后,可以在应用程序中双击 TDengine 图标启动服务,或者在终端中启动 TDengine 服务:
```bash
sudo launchctl start com.tdengine.taosd
```
用户可以使用 TDengine CLI 来连接 TDengine 服务,在终端中,输入:
然后用户可以通过如下命令使用 TDengine 命令行连接 TDengine 服务:
```bash
taos
```
如果 TDengine CLI 连接服务成功,将会打印出欢迎消息和版本信息。如果失败,则会打印出错误消息。
如果 TDengine 命令行连接服务器成功,系统将打印欢迎信息和版本信息。否则,将显示连接错误信息。
## 快速运行
如果不希望以服务方式运行 TDengine也可以在终端中直接运行它。也即在生成完成后执行以下命令在 Windows 下,生成的可执行文件会带有 .exe 后缀,例如会名为 taosd.exe
如果您不想将 TDengine 作为服务运行,您可以在当前终端中运行它。例如,要在构建完成后快速启动 TDengine 服务器,在终端中运行以下命令:(以 Linux 为例Windows 上的命令为 `taosd.exe`
```bash
./build/bin/taosd -c test/cfg
```
在另一个终端,使用 TDengine CLI 连接服务器:
在另一个终端上,使用 TDengine 命令行连接服务器:
```bash
./build/bin/taos -c test/cfg
```
"-c test/cfg"指定系统配置文件所在目录。
选项 `-c test/cfg` 指定系统配置文件的目录。
# 体验 TDengine
</details>
在 TDengine 终端中,用户可以通过 SQL 命令来创建/删除数据库、表等,并进行插入查询操作。
## 7.2 macOS 系统上运行
```sql
CREATE DATABASE demo;
USE demo;
CREATE TABLE t (ts TIMESTAMP, speed INT);
INSERT INTO t VALUES('2019-07-15 00:00:00', 10);
INSERT INTO t VALUES('2019-07-15 01:00:00', 20);
SELECT * FROM t;
ts | speed |
===================================
19-07-15 00:00:00.000| 10|
19-07-15 01:00:00.000| 20|
Query OK, 2 row(s) in set (0.001700s)
<details>
<summary>macOS 系统上运行详细步骤</summary>
在 macOS 上安装完成后启动服务,双击 `/applications/TDengine` 启动程序,或者在终端中执行如下命令:
```bash
sudo launchctl start com.tdengine.taosd
```
# 应用开发
然后在终端中使用如下命令通过 TDengine 命令行连接 TDengine 服务器:
## 官方连接器
```bash
taos
```
TDengine 提供了丰富的应用程序开发接口,其中包括 C/C++、Java、Python、Go、Node.js、C# 、RESTful 等,便于用户快速开发应用:
如果 TDengine 命令行连接服务器成功,系统将打印欢迎信息和版本信息。否则,将显示错误信息。
- [Java](https://docs.taosdata.com/connector/java/)
- [C/C++](https://docs.taosdata.com/connector/cpp/)
- [Python](https://docs.taosdata.com/connector/python/)
- [Go](https://docs.taosdata.com/connector/go/)
- [Node.js](https://docs.taosdata.com/connector/node/)
- [Rust](https://docs.taosdata.com/connector/rust/)
- [C#](https://docs.taosdata.com/connector/csharp/)
- [RESTful API](https://docs.taosdata.com/connector/rest-api/)
</details>
# 成为社区贡献者
## 7.3 Windows 系统上运行
<details>
<summary>Windows 系统上运行详细步骤</summary>
您可以使用以下命令在 Windows 平台上启动 TDengine 服务器:
```cmd
.\build\bin\taosd.exe -c test\cfg
```
在另一个终端上,使用 TDengine 命令行连接服务器:
```cmd
.\build\bin\taos.exe -c test\cfg
```
选项 `-c test/cfg` 指定系统配置文件的目录。
</details>
# 8. 测试
有关如何在 TDengine 上运行不同类型的测试,请参考 [TDengine测试](./tests/README-CN.md)
# 9. 版本发布
TDengine 发布版本的完整列表,请参考 [版本列表](https://github.com/taosdata/TDengine/releases)
# 10. 工作流
TDengine 构建检查工作流可以在参考 [Github Action](https://github.com/taosdata/TDengine/actions/workflows/taosd-ci-build.yml),更多的工作流正在创建中,将很快可用。
# 11. 覆盖率
最新的 TDengine 测试覆盖率报告可参考 [coveralls.io](https://coveralls.io/github/taosdata/TDengine)
<details>
<summary>如何在本地运行测试覆盖率报告?</summary>
在本地创建测试覆盖率报告HTML 格式),请运行以下命令:
```bash
cd tests
bash setup-lcov.sh -v 1.16 && ./run_local_coverage.sh -b main -c task
# on main branch and run cases in longtimeruning_cases.task
# for more infomation about options please refer to ./run_local_coverage.sh -h
```
> **注意**
> 请注意,-b 和 -i 选项将使用 -DCOVER=true 选项重新编译 TDengine这可能需要花费一些时间。
</details>
# 12. 成为社区贡献者
点击 [这里](https://www.taosdata.com/contributor),了解如何成为 TDengine 的贡献者。
# 加入技术交流群
TDengine 官方社群「物联网大数据群」对外开放,欢迎您加入讨论。搜索微信号 "tdengine",加小 T 为好友,即可入群。

409
README.md
View File

@ -1,4 +1,3 @@
<p>
<p align="center">
<a href="https://tdengine.com" target="_blank">
<img
@ -8,11 +7,13 @@
/>
</a>
</p>
<p>
[![Build Status](https://cloud.drone.io/api/badges/taosdata/TDengine/status.svg?ref=refs/heads/master)](https://cloud.drone.io/taosdata/TDengine)
[![Build status](https://ci.appveyor.com/api/projects/status/kf3pwh2or5afsgl9/branch/master?svg=true)](https://ci.appveyor.com/project/sangshuduo/tdengine-2n8ge/branch/master)
[![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/taosdata/tdengine/tdengine-test.yml)](https://github.com/taosdata/TDengine/actions/workflows/tdengine-test.yml)
[![Coverage Status](https://coveralls.io/repos/github/taosdata/TDengine/badge.svg?branch=3.0)](https://coveralls.io/github/taosdata/TDengine?branch=3.0)
[![GitHub commit activity](https://img.shields.io/github/commit-activity/m/taosdata/tdengine)](https://github.com/feici02/TDengine/commits/main/)
<br />
[![GitHub Release](https://img.shields.io/github/v/release/taosdata/tdengine)](https://github.com/taosdata/TDengine/releases)
[![GitHub License](https://img.shields.io/github/license/taosdata/tdengine)](https://github.com/taosdata/TDengine/blob/main/LICENSE)
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/4201/badge)](https://bestpractices.coreinfrastructure.org/projects/4201)
<br />
[![Twitter Follow](https://img.shields.io/twitter/follow/tdenginedb?label=TDengine&style=social)](https://twitter.com/tdenginedb)
@ -21,9 +22,37 @@
[![LinkedIn](https://img.shields.io/badge/Follow_LinkedIn--white?logo=linkedin&style=social)](https://www.linkedin.com/company/tdengine)
[![StackOverflow](https://img.shields.io/badge/Ask_StackOverflow--white?logo=stackoverflow&style=social&logoColor=orange)](https://stackoverflow.com/questions/tagged/tdengine)
English | [简体中文](README-CN.md) | [TDengine Cloud](https://cloud.tdengine.com) | [Learn more about TSDB](https://tdengine.com/tsdb/)
English | [简体中文](README-CN.md) | [TDengine Cloud](https://cloud.tdengine.com) | [Learn more about TSDB](https://tdengine.com/time-series-database/)
# What is TDengine
# Table of Contents
1. [Introduction](#1-introduction)
1. [Documentation](#2-documentation)
1. [Prerequisites](#3-prerequisites)
- [3.1 Prerequisites On Linux](#31-prerequisites-on-linux)
- [3.2 Prerequisites On macOS](#32-prerequisites-on-macos)
- [3.3 Prerequisites On Windows](#33-prerequisites-on-windows)
- [3.4 Clone the repo](#34-clone-the-repo)
1. [Building](#4-building)
- [4.1 Build on Linux](#41-build-on-linux)
- [4.2 Build on macOS](#42-build-on-macos)
- [4.3 Build On Windows](#43-build-on-windows)
1. [Packaging](#5-packaging)
1. [Installation](#6-installation)
- [6.1 Install on Linux](#61-install-on-linux)
- [6.2 Install on macOS](#62-install-on-macos)
- [6.3 Install on Windows](#63-install-on-windows)
1. [Running](#7-running)
- [7.1 Run TDengine on Linux](#71-run-tdengine-on-linux)
- [7.2 Run TDengine on macOS](#72-run-tdengine-on-macos)
- [7.3 Run TDengine on Windows](#73-run-tdengine-on-windows)
1. [Testing](#8-testing)
1. [Releasing](#9-releasing)
1. [Workflow](#10-workflow)
1. [Coverage](#11-coverage)
1. [Contributing](#12-contributing)
# 1. Introduction
TDengine is an open source, high-performance, cloud native [time-series database](https://tdengine.com/tsdb/) optimized for Internet of Things (IoT), Connected Cars, and Industrial IoT. It enables efficient, real-time data ingestion, processing, and monitoring of TB and even PB scale data per day, generated by billions of sensors and data collectors. TDengine differentiates itself from other time-series databases with the following advantages:
@ -33,144 +62,101 @@ TDengine is an open source, high-performance, cloud native [time-series database
- **[Cloud Native](https://tdengine.com/tdengine/cloud-native-time-series-database/)**: Through native distributed design, sharding and partitioning, separation of compute and storage, RAFT, support for kubernetes deployment and full observability, TDengine is a cloud native Time-Series Database and can be deployed on public, private or hybrid clouds.
- **[Ease of Use](https://tdengine.com/tdengine/easy-time-series-data-platform/)**: For administrators, TDengine significantly reduces the effort to deploy and maintain. For developers, it provides a simple interface, simplified solution and seamless integrations for third party tools. For data users, it gives easy data access.
- **[Ease of Use](https://tdengine.com/tdengine/easy-time-series-data-platform/)**: For administrators, TDengine significantly reduces the effort to deploy and maintain. For developers, it provides a simple interface, simplified solution and seamless integrations for third party tools. For data users, it gives easy data access.
- **[Easy Data Analytics](https://tdengine.com/tdengine/time-series-data-analytics-made-easy/)**: Through super tables, storage and compute separation, data partitioning by time interval, pre-computation and other means, TDengine makes it easy to explore, format, and get access to data in a highly efficient way.
- **[Easy Data Analytics](https://tdengine.com/tdengine/time-series-data-analytics-made-easy/)**: Through super tables, storage and compute separation, data partitioning by time interval, pre-computation and other means, TDengine makes it easy to explore, format, and get access to data in a highly efficient way.
- **[Open Source](https://tdengine.com/tdengine/open-source-time-series-database/)**: TDengines core modules, including cluster feature, are all available under open source licenses. It has gathered 19.9k stars on GitHub. There is an active developer community, and over 139k running instances worldwide.
For a full list of TDengine competitive advantages, please [check here](https://tdengine.com/tdengine/). The easiest way to experience TDengine is through [TDengine Cloud](https://cloud.tdengine.com).
For a full list of TDengine competitive advantages, please [check here](https://tdengine.com/tdengine/). The easiest way to experience TDengine is through [TDengine Cloud](https://cloud.tdengine.com).
# Documentation
For the latest TDengine component TDgpt, please refer to [TDgpt README](./tools/tdgpt/README.md) for details.
# 2. Documentation
For user manual, system design and architecture, please refer to [TDengine Documentation](https://docs.tdengine.com) ([TDengine 文档](https://docs.taosdata.com))
# Building
You can choose to install TDengine via [container](https://docs.tdengine.com/get-started/deploy-in-docker/), [installation package](https://docs.tdengine.com/get-started/deploy-from-package/), [Kubernetes](https://docs.tdengine.com/operations-and-maintenance/deploy-your-cluster/#kubernetes-deployment) or try [fully managed service](https://cloud.tdengine.com/) without installation. This quick guide is for developers who want to contribute, build, release and test TDengine by themselves.
At the moment, TDengine server supports running on Linux/Windows/macOS systems. Any application can also choose the RESTful interface provided by taosAdapter to connect the taosd service . TDengine supports X64/ARM64 CPU, and it will support MIPS64, Alpha64, ARM32, RISC-V and other CPU architectures in the future. Right now we don't support build with cross-compiling environment.
For contributing/building/testing TDengine Connectors, please check the following repositories: [JDBC Connector](https://github.com/taosdata/taos-connector-jdbc), [Go Connector](https://github.com/taosdata/driver-go), [Python Connector](https://github.com/taosdata/taos-connector-python), [Node.js Connector](https://github.com/taosdata/taos-connector-node), [C# Connector](https://github.com/taosdata/taos-connector-dotnet), [Rust Connector](https://github.com/taosdata/taos-connector-rust).
You can choose to install through source code, [container](https://docs.tdengine.com/get-started/docker/), [installation package](https://docs.tdengine.com/get-started/package/) or [Kubernetes](https://docs.tdengine.com/deployment/k8s/). This quick guide only applies to installing from source.
# 3. Prerequisites
TDengine provide a few useful tools such as taosBenchmark (was named taosdemo) and taosdump. They were part of TDengine. By default, TDengine compiling does not include taosTools. You can use `cmake .. -DBUILD_TOOLS=true` to make them be compiled with TDengine.
At the moment, TDengine server supports running on Linux/Windows/MacOS systems. Any application can also choose the RESTful interface provided by taosAdapter to connect the taosd service. TDengine supports X64/ARM64 CPU, and it will support MIPS64, Alpha64, ARM32, RISC-V and other CPU architectures in the future. Right now we don't support build with cross-compiling environment.
To build TDengine, use [CMake](https://cmake.org/) 3.13.0 or higher versions in the project directory.
If you want to compile taosAdapter or taosKeeper, you need to install Go 1.18 or above.
## Install build tools
## 3.1 Prerequisites on Linux
### Ubuntu 18.04 and above or Debian
<details>
<summary>Install required tools on Linux</summary>
### For Ubuntu 18.04、20.04、22.04
```bash
sudo apt-get install -y gcc cmake build-essential git libssl-dev libgflags2.2 libgflags-dev
sudo apt-get update
sudo apt-get install -y gcc cmake build-essential git libjansson-dev \
libsnappy-dev liblzma-dev zlib1g-dev pkg-config
```
#### Install build dependencies for taosTools
To build the [taosTools](https://github.com/taosdata/taos-tools) on Ubuntu/Debian, the following packages need to be installed.
### For CentOS 8
```bash
sudo apt install build-essential libjansson-dev libsnappy-dev liblzma-dev libz-dev zlib1g pkg-config
```
### CentOS 7.9
```bash
sudo yum install epel-release
sudo yum update
sudo yum install -y gcc gcc-c++ make cmake3 gflags git openssl-devel
sudo ln -sf /usr/bin/cmake3 /usr/bin/cmake
yum install -y epel-release gcc gcc-c++ make cmake git perl dnf-plugins-core
yum config-manager --set-enabled powertools
yum install -y zlib-static xz-devel snappy-devel jansson-devel pkgconfig libatomic-static libstdc++-static
```
### CentOS 8/Fedora/Rocky Linux
</details>
## 3.2 Prerequisites on macOS
<details>
<summary>Install required tools on macOS</summary>
Please intall the dependencies with [brew](https://brew.sh/).
```bash
sudo dnf install -y gcc gcc-c++ make cmake epel-release gflags git openssl-devel
```
#### Install build dependencies for taosTools on CentOS
#### CentOS 7.9
```
sudo yum install -y zlib-devel zlib-static xz-devel snappy-devel jansson jansson-devel pkgconfig libatomic libatomic-static libstdc++-static openssl-devel
```
#### CentOS 8/Fedora/Rocky Linux
```
sudo yum install -y epel-release
sudo yum install -y dnf-plugins-core
sudo yum config-manager --set-enabled powertools
sudo yum install -y zlib-devel zlib-static xz-devel snappy-devel jansson jansson-devel pkgconfig libatomic libatomic-static libstdc++-static openssl-devel
```
Note: Since snappy lacks pkg-config support (refer to [link](https://github.com/google/snappy/pull/86)), it leads a cmake prompt libsnappy not found. But snappy still works well.
If the PowerTools installation fails, you can try to use:
```
sudo yum config-manager --set-enabled powertools
```
#### For CentOS + devtoolset
Besides above dependencies, please run following commands:
```
sudo yum install centos-release-scl
sudo yum install devtoolset-9 devtoolset-9-libatomic-devel
scl enable devtoolset-9 -- bash
```
### macOS
```
brew install argp-standalone gflags pkgconfig
```
### Setup golang environment
</details>
TDengine includes a few components like taosAdapter developed by Go language. Please refer to golang.org official documentation for golang environment setup.
## 3.3 Prerequisites on Windows
Please use version 1.20+. For the user in China, we recommend using a proxy to accelerate package downloading.
<details>
```
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
```
<summary>Install required tools on Windows</summary>
The default will not build taosAdapter, but you can use the following command to build taosAdapter as the service for RESTful interface.
Work in Progress.
```
cmake .. -DBUILD_HTTP=false
```
</details>
### Setup rust environment
## 3.4 Clone the repo
TDengine includes a few components developed by Rust language. Please refer to rust-lang.org official documentation for rust environment setup.
## Get the source codes
First of all, you may clone the source codes from github:
Clone the repository to the target machine:
```bash
git clone https://github.com/taosdata/TDengine.git
cd TDengine
```
You can modify the file ~/.gitconfig to use ssh protocol instead of https for better download speed. You will need to upload ssh public key to GitHub first. Please refer to GitHub official documentation for detail.
</details>
```
[url "git@github.com:"]
insteadOf = https://github.com/
```
# 4. Building
## Special Note
TDengine provide a few useful tools such as taosBenchmark (was named taosdemo) and taosdump. They were part of TDengine. By default, TDengine compiling does not include taosTools. You can use `cmake .. -DBUILD_TOOLS=true` to make them be compiled with TDengine.
[JDBC Connector](https://github.com/taosdata/taos-connector-jdbc) [Go Connector](https://github.com/taosdata/driver-go)[Python Connector](https://github.com/taosdata/taos-connector-python)[Node.js Connector](https://github.com/taosdata/taos-connector-node)[C# Connector](https://github.com/taosdata/taos-connector-dotnet) [Rust Connector](https://github.com/taosdata/taos-connector-rust) and [Grafana plugin](https://github.com/taosdata/grafanaplugin) has been moved to standalone repository.
TDengine requires [GCC](https://gcc.gnu.org/) 9.3.1 or higher and [CMake](https://cmake.org/) 3.13.0 or higher for building.
## Build TDengine
## 4.1 Build on Linux
### On Linux platform
<details>
<summary>Detailed steps to build on Linux</summary>
You can run the bash script `build.sh` to build both TDengine and taosTools including taosBenchmark and taosdump as below:
@ -181,29 +167,54 @@ You can run the bash script `build.sh` to build both TDengine and taosTools incl
It equals to execute following commands:
```bash
mkdir debug
cd debug
mkdir debug && cd debug
cmake .. -DBUILD_TOOLS=true -DBUILD_CONTRIB=true
make
```
If you want to compile taosAdapter, you need to add the `-DBUILD_HTTP=false` option.
If you want to compile taosKeeper, you need to add the `-DBUILD_KEEPER=true` option.
You can use Jemalloc as memory allocator instead of glibc:
```
apt install autoconf
cmake .. -DJEMALLOC_ENABLED=true
```bash
cmake .. -DJEMALLOC_ENABLED=ON
```
TDengine build script can detect the host machine's architecture on X86-64, X86, arm64 platform.
You can also specify CPUTYPE option like aarch64 too if the detection result is not correct:
aarch64:
TDengine build script can auto-detect the host machine's architecture on x86, x86-64, arm64 platform.
You can also specify architecture manually by CPUTYPE option:
```bash
cmake .. -DCPUTYPE=aarch64 && cmake --build .
```
### On Windows platform
</details>
## 4.2 Build on macOS
<details>
<summary>Detailed steps to build on macOS</summary>
Please install XCode command line tools and cmake. Verified with XCode 11.4+ on Catalina and Big Sur.
```shell
mkdir debug && cd debug
cmake .. && cmake --build .
```
If you want to compile taosAdapter, you need to add the `-DBUILD_HTTP=false` option.
If you want to compile taosKeeper, you need to add the `-DBUILD_KEEPER=true` option.
</details>
## 4.3 Build on Windows
<details>
<summary>Detailed steps to build on Windows</summary>
If you use the Visual Studio 2013, please open a command window by executing "cmd.exe".
Please specify "amd64" for 64 bits Windows or specify "x86" for 32 bits Windows when you execute vcvarsall.bat.
@ -234,31 +245,67 @@ mkdir debug && cd debug
cmake .. -G "NMake Makefiles"
nmake
```
</details>
### On macOS platform
# 5. Packaging
Please install XCode command line tools and cmake. Verified with XCode 11.4+ on Catalina and Big Sur.
The TDengine community installer can NOT be created by this repository only, due to some component dependencies. We are still working on this improvement.
```shell
mkdir debug && cd debug
cmake .. && cmake --build .
```
# 6. Installation
# Installing
## 6.1 Install on Linux
## On Linux platform
<details>
After building successfully, TDengine can be installed by
<summary>Detailed steps to install on Linux</summary>
After building successfully, TDengine can be installed by:
```bash
sudo make install
```
Users can find more information about directories installed on the system in the [directory and files](https://docs.tdengine.com/reference/directory/) section.
Installing from source code will also configure service management for TDengine. Users can also choose to [install from packages](https://docs.tdengine.com/get-started/deploy-from-package/) for it.
Installing from source code will also configure service management for TDengine.Users can also choose to [install from packages](https://docs.tdengine.com/get-started/package/) for it.
</details>
To start the service after installation, in a terminal, use:
## 6.2 Install on macOS
<details>
<summary>Detailed steps to install on macOS</summary>
After building successfully, TDengine can be installed by:
```bash
sudo make install
```
</details>
## 6.3 Install on Windows
<details>
<summary>Detailed steps to install on windows</summary>
After building successfully, TDengine can be installed by:
```cmd
nmake install
```
</details>
# 7. Running
## 7.1 Run TDengine on Linux
<details>
<summary>Detailed steps to run on Linux</summary>
To start the service after installation on linux, in a terminal, use:
```bash
sudo systemctl start taosd
@ -272,28 +319,29 @@ taos
If TDengine CLI connects the server successfully, welcome messages and version info are printed. Otherwise, an error message is shown.
## On Windows platform
After building successfully, TDengine can be installed by:
```cmd
nmake install
```
## On macOS platform
After building successfully, TDengine can be installed by:
If you don't want to run TDengine as a service, you can run it in current shell. For example, to quickly start a TDengine server after building, run the command below in terminal: (We take Linux as an example, command on Windows will be `taosd.exe`)
```bash
sudo make install
./build/bin/taosd -c test/cfg
```
Users can find more information about directories installed on the system in the [directory and files](https://docs.tdengine.com/reference/directory/) section.
In another terminal, use the TDengine CLI to connect the server:
Installing from source code will also configure service management for TDengine.Users can also choose to [install from packages](https://docs.tdengine.com/get-started/package/) for it.
```bash
./build/bin/taos -c test/cfg
```
To start the service after installation, double-click the /applications/TDengine to start the program, or in a terminal, use:
Option `-c test/cfg` specifies the system configuration file directory.
</details>
## 7.2 Run TDengine on macOS
<details>
<summary>Detailed steps to run on macOS</summary>
To start the service after installation on macOS, double-click the /applications/TDengine to start the program, or in a terminal, use:
```bash
sudo launchctl start com.tdengine.taosd
@ -307,64 +355,63 @@ taos
If TDengine CLI connects the server successfully, welcome messages and version info are printed. Otherwise, an error message is shown.
## Quick Run
</details>
If you don't want to run TDengine as a service, you can run it in current shell. For example, to quickly start a TDengine server after building, run the command below in terminal: (We take Linux as an example, command on Windows will be `taosd.exe`)
```bash
./build/bin/taosd -c test/cfg
## 7.3 Run TDengine on Windows
<details>
<summary>Detailed steps to run on windows</summary>
You can start TDengine server on Windows platform with below commands:
```cmd
.\build\bin\taosd.exe -c test\cfg
```
In another terminal, use the TDengine CLI to connect the server:
```bash
./build/bin/taos -c test/cfg
```cmd
.\build\bin\taos.exe -c test\cfg
```
option "-c test/cfg" specifies the system configuration file directory.
# Try TDengine
</details>
It is easy to run SQL commands from TDengine CLI which is the same as other SQL databases.
# 8. Testing
```sql
CREATE DATABASE demo;
USE demo;
CREATE TABLE t (ts TIMESTAMP, speed INT);
INSERT INTO t VALUES('2019-07-15 00:00:00', 10);
INSERT INTO t VALUES('2019-07-15 01:00:00', 20);
SELECT * FROM t;
ts | speed |
===================================
19-07-15 00:00:00.000| 10|
19-07-15 01:00:00.000| 20|
Query OK, 2 row(s) in set (0.001700s)
For how to run different types of tests on TDengine, please see [Testing TDengine](./tests/README.md).
# 9. Releasing
For the complete list of TDengine Releases, please see [Releases](https://github.com/taosdata/TDengine/releases).
# 10. Workflow
TDengine build check workflow can be found in this [Github Action](https://github.com/taosdata/TDengine/actions/workflows/taosd-ci-build.yml). More workflows will be available soon.
# 11. Coverage
Latest TDengine test coverage report can be found on [coveralls.io](https://coveralls.io/github/taosdata/TDengine)
<details>
<summary>How to run the coverage report locally?</summary>
To create the test coverage report (in HTML format) locally, please run following commands:
```bash
cd tests
bash setup-lcov.sh -v 1.16 && ./run_local_coverage.sh -b main -c task
# on main branch and run cases in longtimeruning_cases.task
# for more infomation about options please refer to ./run_local_coverage.sh -h
```
> **NOTE:**
> Please note that the -b and -i options will recompile TDengine with the -DCOVER=true option, which may take a amount of time.
# Developing with TDengine
</details>
## Official Connectors
# 12. Contributing
TDengine provides abundant developing tools for users to develop on TDengine. Follow the links below to find your desired connectors and relevant documentation.
- [Java](https://docs.tdengine.com/reference/connector/java/)
- [C/C++](https://docs.tdengine.com/reference/connector/cpp/)
- [Python](https://docs.tdengine.com/reference/connector/python/)
- [Go](https://docs.tdengine.com/reference/connector/go/)
- [Node.js](https://docs.tdengine.com/reference/connector/node/)
- [Rust](https://docs.tdengine.com/reference/connector/rust/)
- [C#](https://docs.tdengine.com/reference/connector/csharp/)
- [RESTful API](https://docs.tdengine.com/reference/rest-api/)
# Contribute to TDengine
Please follow the [contribution guidelines](CONTRIBUTING.md) to contribute to the project.
# Join the TDengine Community
For more information about TDengine, you can follow us on social media and join our Discord server:
- [Discord](https://discord.com/invite/VZdSuUg4pS)
- [Twitter](https://twitter.com/TDengineDB)
- [LinkedIn](https://www.linkedin.com/company/tdengine/)
- [YouTube](https://www.youtube.com/@tdengine)
Please follow the [contribution guidelines](CONTRIBUTING.md) to contribute to TDengine.

View File

@ -2,7 +2,7 @@
# addr2line
ExternalProject_Add(addr2line
GIT_REPOSITORY https://github.com/davea42/libdwarf-addr2line.git
GIT_TAG master
GIT_TAG main
SOURCE_DIR "${TD_CONTRIB_DIR}/addr2line"
BINARY_DIR "${TD_CONTRIB_DIR}/addr2line"
CONFIGURE_COMMAND ""

View File

@ -2,6 +2,7 @@
ExternalProject_Add(azure
URL https://github.com/Azure/azure-sdk-for-cpp/archive/refs/tags/azure-storage-blobs_12.13.0-beta.1.tar.gz
URL_HASH SHA256=3eca486fd60e3522d0a633025ecd652a71515b1e944799b2e8ee31fd590305a9
DEPENDS xml2
DOWNLOAD_NO_PROGRESS 1
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"
SOURCE_DIR "${TD_CONTRIB_DIR}/azure-sdk-for-cpp-azure-storage-blobs_12.13.0-beta.1"

View File

@ -97,11 +97,10 @@ ELSE()
SET(TD_TAOS_TOOLS TRUE)
ENDIF()
IF(${TD_WINDOWS})
SET(TAOS_LIB taos_static)
ELSE()
SET(TAOS_LIB taos)
ENDIF()
SET(TAOS_LIB taos)
SET(TAOS_LIB_STATIC taos_static)
SET(TAOS_NATIVE_LIB taosnative)
SET(TAOS_NATIVE_LIB_STATIC taosnative_static)
# build TSZ by default
IF("${TSZ_ENABLED}" MATCHES "false")
@ -113,9 +112,6 @@ ELSE()
set(VAR_TSZ "TSZ" CACHE INTERNAL "global variant tsz")
ENDIF()
# force set all platform to JEMALLOC_ENABLED = false
SET(JEMALLOC_ENABLED OFF)
IF(TD_WINDOWS)
MESSAGE("${Yellow} set compiler flag for Windows! ${ColourReset}")
@ -128,7 +124,7 @@ IF(TD_WINDOWS)
SET(COMMON_FLAGS "/w /D_WIN32 /DWIN32 /Zi /MTd")
ENDIF()
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO")
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO /FORCE:MULTIPLE")
# IF (MSVC AND (MSVC_VERSION GREATER_EQUAL 1900))
# SET(COMMON_FLAGS "${COMMON_FLAGS} /Wv:18")
@ -255,3 +251,17 @@ ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-reserved-user-defined-literal -g3 -Wno-literal-suffix -Werror=return-type -fPIC -gdwarf-2 -Wformat=2 -Wno-format-nonliteral -Wno-format-truncation -Wno-format-y2k")
ENDIF()
ENDIF()
IF(TD_LINUX_64)
IF(${JEMALLOC_ENABLED})
MESSAGE(STATUS "JEMALLOC Enabled")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-error=attributes")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=attributes")
SET(LINK_JEMALLOC "-L${CMAKE_BINARY_DIR}/build/lib -ljemalloc")
ADD_DEFINITIONS(-DTD_JEMALLOC_ENABLED -I${CMAKE_BINARY_DIR}/build/include -L${CMAKE_BINARY_DIR}/build/lib -Wl,-rpath,${CMAKE_BINARY_DIR}/build/lib)
ELSE()
MESSAGE(STATUS "JEMALLOC Disabled")
SET(LINK_JEMALLOC "")
ENDIF()
ENDIF()

View File

@ -9,61 +9,61 @@ option(
)
IF(${TD_WINDOWS})
IF(NOT TD_ASTRA)
MESSAGE("build pthread Win32")
option(
BUILD_PTHREAD
"If build pthread on Windows"
ON
)
MESSAGE("build pthread Win32")
option(
BUILD_PTHREAD
"If build pthread on Windows"
ON
)
MESSAGE("build gnu regex for Windows")
option(
BUILD_GNUREGEX
"If build gnu regex on Windows"
ON
)
MESSAGE("build gnu regex for Windows")
option(
BUILD_GNUREGEX
"If build gnu regex on Windows"
ON
)
MESSAGE("build iconv Win32")
option(
BUILD_WITH_ICONV
"If build iconv on Windows"
ON
)
MESSAGE("build iconv Win32")
option(
BUILD_WITH_ICONV
"If build iconv on Windows"
ON
)
MESSAGE("build msvcregex Win32")
option(
BUILD_MSVCREGEX
"If build msvcregex on Windows"
ON
)
MESSAGE("build msvcregex Win32")
option(
BUILD_MSVCREGEX
"If build msvcregex on Windows"
ON
)
MESSAGE("build wcwidth Win32")
option(
BUILD_WCWIDTH
"If build wcwidth on Windows"
ON
)
MESSAGE("build wcwidth Win32")
option(
BUILD_WCWIDTH
"If build wcwidth on Windows"
ON
)
MESSAGE("build wingetopt Win32")
option(
BUILD_WINGETOPT
"If build wingetopt on Windows"
ON
)
MESSAGE("build wingetopt Win32")
option(
BUILD_WINGETOPT
"If build wingetopt on Windows"
ON
)
option(
TDENGINE_3
"TDengine 3.x for taos-tools"
ON
)
option(
BUILD_CRASHDUMP
"If build crashdump on Windows"
ON
)
option(
TDENGINE_3
"TDengine 3.x for taos-tools"
ON
)
option(
BUILD_CRASHDUMP
"If build crashdump on Windows"
ON
)
ENDIF ()
ELSEIF (TD_DARWIN_64)
IF(${BUILD_TEST})
add_definitions(-DCOMPILER_SUPPORTS_CXX13)
@ -71,58 +71,102 @@ ELSEIF (TD_DARWIN_64)
ENDIF ()
option(
BUILD_GEOS
"If build with geos"
BUILD_WITH_LEMON
"If build with lemon"
ON
)
option(
BUILD_WITH_UDF
"If build with UDF"
ON
)
IF(NOT TD_ASTRA)
option(
BUILD_GEOS
"If build with geos"
ON
)
option(
BUILD_SHARED_LIBS
""
OFF
)
option(
RUST_BINDINGS
"If build with rust-bindings"
ON
)
option(
BUILD_PCRE2
"If build with pcre2"
ON
)
option(
BUILD_SHARED_LIBS
""
OFF
option(
JEMALLOC_ENABLED
"If build with jemalloc"
OFF
)
option(
BUILD_SANITIZER
"If build sanitizer"
OFF
)
option(
BUILD_ADDR2LINE
"If build addr2line"
OFF
)
option(
BUILD_WITH_LEVELDB
"If build with leveldb"
OFF
)
option(
RUST_BINDINGS
"If build with rust-bindings"
ON
option(
BUILD_WITH_ROCKSDB
"If build with rocksdb"
ON
)
option(
BUILD_PCRE2
"If build with pcre2"
ON
)
option(
BUILD_WITH_LZ4
"If build with lz4"
ON
)
ELSE ()
option(
JEMALLOC_ENABLED
"If build with jemalloc"
OFF
option(
BUILD_WITH_LZMA2
"If build with lzma2"
ON
)
option(
BUILD_SANITIZER
"If build sanitizer"
OFF
)
ENDIF ()
option(
BUILD_ADDR2LINE
"If build addr2line"
OFF
)
ADD_DEFINITIONS(-DUSE_AUDIT)
ADD_DEFINITIONS(-DUSE_GEOS)
ADD_DEFINITIONS(-DUSE_UDF)
ADD_DEFINITIONS(-DUSE_STREAM)
ADD_DEFINITIONS(-DUSE_PRCE2)
ADD_DEFINITIONS(-DUSE_RSMA)
ADD_DEFINITIONS(-DUSE_TSMA)
ADD_DEFINITIONS(-DUSE_TQ)
ADD_DEFINITIONS(-DUSE_TOPIC)
ADD_DEFINITIONS(-DUSE_MONITOR)
ADD_DEFINITIONS(-DUSE_REPORT)
option(
BUILD_WITH_LEVELDB
"If build with leveldb"
OFF
)
option(
BUILD_WITH_ROCKSDB
"If build with rocksdb"
ON
)
IF(${TD_ASTRA_RPC})
ADD_DEFINITIONS(-DTD_ASTRA_RPC)
ENDIF()
IF(${TD_LINUX})
@ -150,6 +194,12 @@ option(
ON
)
option(
BUILD_WITH_LZMA2
"If build with lzma2"
ON
)
ENDIF ()
IF(NOT TD_ENTERPRISE)
@ -166,6 +216,10 @@ IF(${BUILD_WITH_ANALYSIS})
set(BUILD_WITH_S3 ON)
ENDIF()
IF(${TD_LINUX})
set(BUILD_WITH_ANALYSIS ON)
ENDIF()
IF(${BUILD_S3})
IF(${BUILD_WITH_S3})
@ -187,6 +241,14 @@ option(BUILD_WITH_COS "If build with cos" OFF)
ENDIF ()
IF(${TAOSD_INTEGRATED})
add_definitions(-DTAOSD_INTEGRATED)
ENDIF()
IF(${TD_AS_LIB})
add_definitions(-DTD_AS_LIB)
ENDIF()
option(
BUILD_WITH_SQLITE
"If build with sqlite"
@ -205,13 +267,14 @@ option(
off
)
option(
BUILD_WITH_NURAFT
"If build with NuRaft"
OFF
)
IF(NOT TD_ASTRA)
option(
BUILD_WITH_UV
"If build with libuv"
@ -245,6 +308,7 @@ option(
"If use invertedIndex"
ON
)
ENDIF ()
option(
BUILD_RELEASE

View File

@ -2,7 +2,7 @@
IF (DEFINED VERNUMBER)
SET(TD_VER_NUMBER ${VERNUMBER})
ELSE ()
SET(TD_VER_NUMBER "3.3.4.3.alpha")
SET(TD_VER_NUMBER "3.3.5.8.alpha")
ENDIF ()
IF (DEFINED VERCOMPATIBLE)
@ -15,6 +15,18 @@ IF (TD_PRODUCT_NAME)
ADD_DEFINITIONS(-DTD_PRODUCT_NAME="${TD_PRODUCT_NAME}")
ENDIF ()
IF (CUS_NAME)
ADD_DEFINITIONS(-DCUS_NAME="${CUS_NAME}")
ENDIF ()
IF (CUS_PROMPT)
ADD_DEFINITIONS(-DCUS_PROMPT="${CUS_PROMPT}")
ENDIF ()
IF (CUS_EMAIL)
ADD_DEFINITIONS(-DCUS_EMAIL="${CUS_EMAIL}")
ENDIF ()
find_program(HAVE_GIT NAMES git)
IF (DEFINED GITINFO)

View File

@ -12,7 +12,7 @@ ExternalProject_Add(curl2
BUILD_IN_SOURCE TRUE
BUILD_ALWAYS 1
UPDATE_COMMAND ""
CONFIGURE_COMMAND ./configure --prefix=$ENV{HOME}/.cos-local.2 --with-ssl=$ENV{HOME}/.cos-local.2 --enable-shared=no --disable-ldap --disable-ldaps --without-brotli --without-zstd --without-libidn2 --without-nghttp2 --without-libpsl #--enable-debug
CONFIGURE_COMMAND ${CONTRIB_CONFIG_ENV} ./configure --prefix=$ENV{HOME}/.cos-local.2 --with-ssl=$ENV{HOME}/.cos-local.2 --enable-websockets --enable-shared=no --disable-ldap --disable-ldaps --without-brotli --without-zstd --without-libidn2 --without-nghttp2 --without-libpsl --without-librtmp #--enable-debug
BUILD_COMMAND make -j
INSTALL_COMMAND make install
TEST_COMMAND ""

View File

@ -2,7 +2,7 @@
# libuv
ExternalProject_Add(libuv
GIT_REPOSITORY https://github.com/libuv/libuv.git
GIT_TAG v1.48.0
GIT_TAG v1.49.2
SOURCE_DIR "${TD_CONTRIB_DIR}/libuv"
BINARY_DIR "${TD_CONTRIB_DIR}/libuv"
CONFIGURE_COMMAND ""

View File

@ -1,7 +1,6 @@
# xz
if (${TD_LINUX})
if (${BUILD_WITH_LZMA2})
ExternalProject_Add(lzma2
GIT_REPOSITORY https://github.com/conor42/fast-lzma2.git
SOURCE_DIR "${TD_CONTRIB_DIR}/lzma2"

View File

@ -6,9 +6,9 @@ ExternalProject_Add(openssl
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"
SOURCE_DIR "${TD_CONTRIB_DIR}/openssl"
BUILD_IN_SOURCE TRUE
#BUILD_ALWAYS 1
#UPDATE_COMMAND ""
CONFIGURE_COMMAND ./Configure --prefix=$ENV{HOME}/.cos-local.2 no-shared
BUILD_ALWAYS 1
UPDATE_COMMAND ""
CONFIGURE_COMMAND ${CONTRIB_CONFIG_ENV} ./Configure --prefix=$ENV{HOME}/.cos-local.2 no-shared
BUILD_COMMAND make -j
INSTALL_COMMAND make install_sw -j
TEST_COMMAND ""

View File

@ -2,7 +2,7 @@
# taosadapter
ExternalProject_Add(taosadapter
GIT_REPOSITORY https://github.com/taosdata/taosadapter.git
GIT_TAG main
GIT_TAG 3.0
SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosadapter"
BINARY_DIR ""
#BUILD_IN_SOURCE TRUE

View File

@ -1,13 +0,0 @@
# taos-tools
ExternalProject_Add(taos-tools
GIT_REPOSITORY https://github.com/taosdata/taos-tools.git
GIT_TAG main
SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools"
BINARY_DIR ""
#BUILD_IN_SOURCE TRUE
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)

View File

@ -2,7 +2,7 @@
# taosws-rs
ExternalProject_Add(taosws-rs
GIT_REPOSITORY https://github.com/taosdata/taos-connector-rust.git
GIT_TAG main
GIT_TAG 3.0
SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosws-rs"
BINARY_DIR ""
#BUILD_IN_SOURCE TRUE

View File

@ -0,0 +1,15 @@
# timezone
ExternalProject_Add(tz
GIT_REPOSITORY https://github.com/eggert/tz.git
GIT_TAG main
SOURCE_DIR "${TD_CONTRIB_DIR}/tz"
BINARY_DIR ""
CONFIGURE_COMMAND ""
#BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
GIT_SHALLOW true
GIT_PROGRESS true
BUILD_COMMAND ""
)

View File

@ -1,19 +1,16 @@
# xml2
ExternalProject_Add(xml2
URL https://download.gnome.org/sources/libxml2/2.11/libxml2-2.11.5.tar.xz
URL_HASH SHA256=3727b078c360ec69fa869de14bd6f75d7ee8d36987b071e6928d4720a28df3a6
#https://github.com/GNOME/libxml2/archive/refs/tags/v2.11.5.tar.gz
#GIT_REPOSITORY https://github.com/GNOME/libxml2
#GIT_TAG v2.11.5
URL https://github.com/GNOME/libxml2/archive/refs/tags/v2.10.4.tar.gz
URL_HASH SHA256=6f6fb27f91bb65f9d7196e3c616901b3e18a7dea31ccc2ae857940b125faa780
DOWNLOAD_NO_PROGRESS 1
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"
SOURCE_DIR "${TD_CONTRIB_DIR}/xml2"
SOURCE_DIR "${TD_CONTRIB_DIR}/libxml2"
#BINARY_DIR ""
BUILD_IN_SOURCE TRUE
CONFIGURE_COMMAND ./configure --prefix=$ENV{HOME}/.cos-local.2 --enable-shared=no --enable-static=yes --without-python --without-lzma
BUILD_COMMAND make -j
INSTALL_COMMAND make install && ln -sf $ENV{HOME}/.cos-local.2/include/libxml2/libxml $ENV{HOME}/.cos-local.2/include/libxml
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
GIT_SHALLOW true
)

View File

@ -17,7 +17,6 @@ elseif(${BUILD_WITH_COS})
file(MAKE_DIRECTORY $ENV{HOME}/.cos-local.1/)
cat("${TD_SUPPORT_DIR}/mxml_CMakeLists.txt.in" ${CONTRIB_TMP_FILE3})
cat("${TD_SUPPORT_DIR}/apr_CMakeLists.txt.in" ${CONTRIB_TMP_FILE3})
cat("${TD_SUPPORT_DIR}/curl_CMakeLists.txt.in" ${CONTRIB_TMP_FILE3})
endif(${BUILD_WITH_COS})
configure_file(${CONTRIB_TMP_FILE3} "${TD_CONTRIB_DIR}/deps-download/CMakeLists.txt")
@ -43,11 +42,6 @@ endif()
set(CONTRIB_TMP_FILE "${CMAKE_BINARY_DIR}/deps_tmp_CMakeLists.txt.in")
configure_file("${TD_SUPPORT_DIR}/deps_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
# taos-tools
if(${BUILD_TOOLS})
cat("${TD_SUPPORT_DIR}/taostools_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
endif()
# taosws-rs
if(${WEBSOCKET})
cat("${TD_SUPPORT_DIR}/taosws_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
@ -98,7 +92,9 @@ if(${BUILD_TEST})
endif(${BUILD_TEST})
# lz4
cat("${TD_SUPPORT_DIR}/lz4_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
if(${BUILD_WITH_LZ4})
cat("${TD_SUPPORT_DIR}/lz4_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
endif(${BUILD_WITH_LZ4})
# zlib
cat("${TD_SUPPORT_DIR}/zlib_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
@ -106,6 +102,10 @@ cat("${TD_SUPPORT_DIR}/zlib_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
# cJson
cat("${TD_SUPPORT_DIR}/cjson_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
if(NOT ${TD_WINDOWS})
cat("${TD_SUPPORT_DIR}/tz_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
endif(NOT ${TD_WINDOWS})
# xz
# cat("${TD_SUPPORT_DIR}/xz_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
@ -142,11 +142,16 @@ if(${BUILD_WITH_SQLITE})
cat("${TD_SUPPORT_DIR}/sqlite_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
endif(${BUILD_WITH_SQLITE})
# libcurl
if(NOT ${TD_WINDOWS})
file(MAKE_DIRECTORY $ENV{HOME}/.cos-local.2/)
cat("${TD_SUPPORT_DIR}/ssl_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
cat("${TD_SUPPORT_DIR}/curl_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
endif(NOT ${TD_WINDOWS})
# s3
if(${BUILD_WITH_S3})
cat("${TD_SUPPORT_DIR}/ssl_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
cat("${TD_SUPPORT_DIR}/xml2_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
cat("${TD_SUPPORT_DIR}/curl_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
cat("${TD_SUPPORT_DIR}/libs3_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
cat("${TD_SUPPORT_DIR}/azure_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
add_definitions(-DUSE_S3)
@ -156,7 +161,6 @@ elseif(${BUILD_WITH_COS})
# cat("${TD_SUPPORT_DIR}/mxml_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
# cat("${TD_SUPPORT_DIR}/apr_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
# cat("${TD_SUPPORT_DIR}/apr-util_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
# cat("${TD_SUPPORT_DIR}/curl_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
cat("${TD_SUPPORT_DIR}/cos_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
add_definitions(-DUSE_COS)
endif()
@ -184,23 +188,43 @@ if(${BUILD_PCRE2})
cat("${TD_SUPPORT_DIR}/pcre2_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
endif()
find_program(C_COMPILER_LEMON NAMES gcc)
if(C_COMPILER_LEMON)
message(STATUS "LEMON C compiler: ${C_COMPILER_LEMON}")
else()
set(C_COMPILER_LEMON ${CMAKE_C_COMPILER})
message(STATUS "LEMON C compiler: ${C_COMPILER_LEMON}")
# lemon
if(${BUILD_WITH_LEMON})
if(${TD_ACORE})
set(C_COMPILER_LEMON ${CMAKE_C_COMPILER})
else()
find_program(C_COMPILER_LEMON NAMES gcc)
endif()
if(C_COMPILER_LEMON)
message(STATUS "LEMON C compiler: ${C_COMPILER_LEMON}")
else()
set(C_COMPILER_LEMON ${CMAKE_C_COMPILER})
message(STATUS "LEMON C compiler: ${C_COMPILER_LEMON}")
endif()
cat("${TD_SUPPORT_DIR}/lemon_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
endif()
# lemon
cat("${TD_SUPPORT_DIR}/lemon_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
# Force specify CC=cc on MacOS. Because the default CC setting in the generated Makefile has issues finding standard library headers
IF(${TD_DARWIN})
SET(CONTRIB_CONFIG_ENV "CC=cc")
ENDIF()
# download dependencies
configure_file(${CONTRIB_TMP_FILE} "${TD_CONTRIB_DIR}/deps-download/CMakeLists.txt")
execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
WORKING_DIRECTORY "${TD_CONTRIB_DIR}/deps-download")
WORKING_DIRECTORY "${TD_CONTRIB_DIR}/deps-download"
RESULT_VARIABLE result)
IF(NOT result EQUAL "0")
message(FATAL_ERROR "CMake step for dowloading dependencies failed: ${result}")
ENDIF()
execute_process(COMMAND "${CMAKE_COMMAND}" --build .
WORKING_DIRECTORY "${TD_CONTRIB_DIR}/deps-download")
WORKING_DIRECTORY "${TD_CONTRIB_DIR}/deps-download"
RESULT_VARIABLE result)
IF(NOT result EQUAL "0")
message(FATAL_ERROR "CMake step for building dependencies failed: ${result}")
ENDIF()
# ================================================================================================
# Build
@ -257,11 +281,13 @@ unset(CMAKE_PROJECT_INCLUDE_BEFORE)
# endif()
# lz4
add_subdirectory(lz4/build/cmake EXCLUDE_FROM_ALL)
target_include_directories(
lz4_static
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/lz4/lib
)
if(${BUILD_WITH_LZ4})
add_subdirectory(lz4/build/cmake EXCLUDE_FROM_ALL)
target_include_directories(
lz4_static
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/lz4/lib
)
endif(${BUILD_WITH_LZ4})
# zlib
set(CMAKE_PROJECT_INCLUDE_BEFORE "${TD_SUPPORT_DIR}/EnableCMP0048.txt.in")
@ -648,9 +674,43 @@ if(${BUILD_PCRE2})
endif(${BUILD_PCRE2})
if(${TD_LINUX} AND ${BUILD_WITH_S3})
add_subdirectory(azure-cmake EXCLUDE_FROM_ALL)
set(ORIG_CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
string(REPLACE " -Werror " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
add_subdirectory(xml2-cmake)
set(CMAKE_C_FLAGS ${ORIG_CMAKE_C_FLAGS})
add_subdirectory(azure-cmake)
endif()
IF(TD_LINUX)
SET(TZ_OUTPUT_PATH /usr/share/zoneinfo)
ELSEIF(TD_DARWIN)
SET(TZ_OUTPUT_PATH /var/db/timezone/zoneinfo)
ENDIF()
if(NOT ${TD_WINDOWS})
MESSAGE(STATUS "timezone file path: " ${TZ_OUTPUT_PATH})
execute_process(
COMMAND make TZDIR=${TZ_OUTPUT_PATH}/ clean tzdir.h
WORKING_DIRECTORY "${TD_CONTRIB_DIR}/tz"
)
set(TZ_SRC_DIR "${TD_SOURCE_DIR}/source/os/src/timezone")
file(REMOVE_RECURSE ${TZ_SRC_DIR})
file(MAKE_DIRECTORY ${TZ_SRC_DIR})
file(COPY ${TD_CONTRIB_DIR}/tz/private.h ${TD_CONTRIB_DIR}/tz/tzdir.h ${TD_CONTRIB_DIR}/tz/tzfile.h
${TD_CONTRIB_DIR}/tz/localtime.c ${TD_CONTRIB_DIR}/tz/strftime.c
DESTINATION ${TZ_SRC_DIR})
endif(NOT ${TD_WINDOWS})
#if(NOT ${TD_WINDOWS})
# execute_process(
# COMMAND make CFLAGS+=-fPIC CFLAGS+=-g TZDIR=${TZ_OUTPUT_PATH} clean libtz.a
# WORKING_DIRECTORY "${TD_CONTRIB_DIR}/tz"
# )
#endif(NOT ${TD_WINDOWS})
# ================================================================================================
# Build test
# ================================================================================================

View File

@ -36,10 +36,6 @@ target_include_directories(
)
find_library(CURL_LIBRARY curl $ENV{HOME}/.cos-local.2/lib NO_DEFAULT_PATH)
find_library(XML2_LIBRARY xml2 $ENV{HOME}/.cos-local.2/lib NO_DEFAULT_PATH)
# find_library(CURL_LIBRARY curl)
# find_library(XML2_LIBRARY xml2)
find_library(SSL_LIBRARY ssl $ENV{HOME}/.cos-local.2/lib64 $ENV{HOME}/.cos-local.2/lib NO_DEFAULT_PATH)
find_library(CRYPTO_LIBRARY crypto $ENV{HOME}/.cos-local.2/lib64 $ENV{HOME}/.cos-local.2/lib NO_DEFAULT_PATH)
@ -50,9 +46,8 @@ target_link_libraries(
PRIVATE ${CURL_LIBRARY}
PRIVATE ${SSL_LIBRARY}
PRIVATE ${CRYPTO_LIBRARY}
PRIVATE ${XML2_LIBRARY}
# PRIVATE xml2
PRIVATE _libxml2
PRIVATE zlib
# PRIVATE ${CoreFoundation_Library}

View File

@ -20,14 +20,9 @@ if(${BUILD_WITH_SQLITE})
add_subdirectory(sqlite)
endif(${BUILD_WITH_SQLITE})
if(${BUILD_WITH_CRAFT})
add_subdirectory(craft)
endif(${BUILD_WITH_CRAFT})
# if(${BUILD_S3})
# add_subdirectory(azure)
# endif()
if(${BUILD_WITH_TRAFT})
# add_subdirectory(traft)
endif(${BUILD_WITH_TRAFT})
add_subdirectory(azure)
add_subdirectory(tdev)
add_subdirectory(lz4)

View File

@ -0,0 +1,58 @@
set(LIBXML2_SOURCE_DIR "${TD_CONTRIB_DIR}/libxml2")
set(SRCS
"${LIBXML2_SOURCE_DIR}/SAX.c"
"${LIBXML2_SOURCE_DIR}/entities.c"
"${LIBXML2_SOURCE_DIR}/encoding.c"
"${LIBXML2_SOURCE_DIR}/error.c"
"${LIBXML2_SOURCE_DIR}/parserInternals.c"
"${LIBXML2_SOURCE_DIR}/parser.c"
"${LIBXML2_SOURCE_DIR}/tree.c"
"${LIBXML2_SOURCE_DIR}/hash.c"
"${LIBXML2_SOURCE_DIR}/list.c"
"${LIBXML2_SOURCE_DIR}/xmlIO.c"
"${LIBXML2_SOURCE_DIR}/xmlmemory.c"
"${LIBXML2_SOURCE_DIR}/uri.c"
"${LIBXML2_SOURCE_DIR}/valid.c"
"${LIBXML2_SOURCE_DIR}/xlink.c"
"${LIBXML2_SOURCE_DIR}/HTMLparser.c"
"${LIBXML2_SOURCE_DIR}/HTMLtree.c"
"${LIBXML2_SOURCE_DIR}/debugXML.c"
"${LIBXML2_SOURCE_DIR}/xpath.c"
"${LIBXML2_SOURCE_DIR}/xpointer.c"
"${LIBXML2_SOURCE_DIR}/xinclude.c"
"${LIBXML2_SOURCE_DIR}/nanohttp.c"
"${LIBXML2_SOURCE_DIR}/nanoftp.c"
"${LIBXML2_SOURCE_DIR}/catalog.c"
"${LIBXML2_SOURCE_DIR}/globals.c"
"${LIBXML2_SOURCE_DIR}/threads.c"
"${LIBXML2_SOURCE_DIR}/c14n.c"
"${LIBXML2_SOURCE_DIR}/xmlstring.c"
"${LIBXML2_SOURCE_DIR}/buf.c"
"${LIBXML2_SOURCE_DIR}/xmlregexp.c"
"${LIBXML2_SOURCE_DIR}/xmlschemas.c"
"${LIBXML2_SOURCE_DIR}/xmlschemastypes.c"
"${LIBXML2_SOURCE_DIR}/xmlunicode.c"
"${LIBXML2_SOURCE_DIR}/triostr.c"
"${LIBXML2_SOURCE_DIR}/xmlreader.c"
"${LIBXML2_SOURCE_DIR}/relaxng.c"
"${LIBXML2_SOURCE_DIR}/dict.c"
"${LIBXML2_SOURCE_DIR}/SAX2.c"
"${LIBXML2_SOURCE_DIR}/xmlwriter.c"
"${LIBXML2_SOURCE_DIR}/legacy.c"
"${LIBXML2_SOURCE_DIR}/chvalid.c"
"${LIBXML2_SOURCE_DIR}/pattern.c"
"${LIBXML2_SOURCE_DIR}/xmlsave.c"
"${LIBXML2_SOURCE_DIR}/xmlmodule.c"
"${LIBXML2_SOURCE_DIR}/schematron.c"
"${LIBXML2_SOURCE_DIR}/xzlib.c"
)
add_library(_libxml2 ${SRCS})
#target_link_libraries(_libxml2 PRIVATE td_contrib::zlib)
target_link_libraries(_libxml2 PRIVATE zlib)
target_include_directories(_libxml2 BEFORE PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/include")
target_include_directories(_libxml2 BEFORE PUBLIC "${LIBXML2_SOURCE_DIR}/include")
add_library(td_contrib::libxml2 ALIAS _libxml2)

View File

@ -0,0 +1,285 @@
/* config.h. Generated from config.h.in by configure. */
/* config.h.in. Generated from configure.ac by autoheader. */
/* Type cast for the gethostbyname() argument */
#define GETHOSTBYNAME_ARG_CAST /**/
/* Define to 1 if you have the <arpa/inet.h> header file. */
#define HAVE_ARPA_INET_H 1
/* Define to 1 if you have the <arpa/nameser.h> header file. */
#define HAVE_ARPA_NAMESER_H 1
/* Whether struct sockaddr::__ss_family exists */
/* #undef HAVE_BROKEN_SS_FAMILY */
/* Define to 1 if you have the <ctype.h> header file. */
#define HAVE_CTYPE_H 1
/* Define to 1 if you have the <dirent.h> header file. */
#define HAVE_DIRENT_H 1
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Have dlopen based dso */
#define HAVE_DLOPEN /**/
/* Define to 1 if you have the <dl.h> header file. */
/* #undef HAVE_DL_H */
/* Define to 1 if you have the <errno.h> header file. */
#define HAVE_ERRNO_H 1
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define to 1 if you have the <float.h> header file. */
#define HAVE_FLOAT_H 1
/* Define to 1 if you have the `fprintf' function. */
#define HAVE_FPRINTF 1
/* Define to 1 if you have the `ftime' function. */
#define HAVE_FTIME 1
/* Define if getaddrinfo is there */
#define HAVE_GETADDRINFO /**/
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the `isascii' function. */
#define HAVE_ISASCII 1
/* Define if isinf is there */
#define HAVE_ISINF /**/
/* Define if isnan is there */
#define HAVE_ISNAN /**/
/* Define if history library is there (-lhistory) */
/* #undef HAVE_LIBHISTORY */
/* Define if pthread library is there (-lpthread) */
#define HAVE_LIBPTHREAD /**/
/* Define if readline library is there (-lreadline) */
/* #undef HAVE_LIBREADLINE */
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H 1
/* Define to 1 if you have the `localtime' function. */
#define HAVE_LOCALTIME 1
/* Define to 1 if you have the <lzma.h> header file. */
/* #undef HAVE_LZMA_H */
/* Define to 1 if you have the <malloc.h> header file. */
#define HAVE_MALLOC_H 1
/* Define to 1 if you have the <math.h> header file. */
#define HAVE_MATH_H 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the `mmap' function. */
#define HAVE_MMAP 1
/* Define to 1 if you have the `munmap' function. */
#define HAVE_MUNMAP 1
/* mmap() is no good without munmap() */
#if defined(HAVE_MMAP) && !defined(HAVE_MUNMAP)
# undef /**/ HAVE_MMAP
#endif
/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
/* #undef HAVE_NDIR_H */
/* Define to 1 if you have the <netdb.h> header file. */
#define HAVE_NETDB_H 1
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H 1
/* Define to 1 if you have the <poll.h> header file. */
#define HAVE_POLL_H 1
/* Define to 1 if you have the `printf' function. */
#define HAVE_PRINTF 1
/* Define if <pthread.h> is there */
#define HAVE_PTHREAD_H /**/
/* Define to 1 if you have the `putenv' function. */
#define HAVE_PUTENV 1
/* Define to 1 if you have the `rand' function. */
#define HAVE_RAND 1
/* Define to 1 if you have the `rand_r' function. */
#define HAVE_RAND_R 1
/* Define to 1 if you have the <resolv.h> header file. */
#define HAVE_RESOLV_H 1
/* Have shl_load based dso */
/* #undef HAVE_SHLLOAD */
/* Define to 1 if you have the `signal' function. */
#define HAVE_SIGNAL 1
/* Define to 1 if you have the <signal.h> header file. */
#define HAVE_SIGNAL_H 1
/* Define to 1 if you have the `snprintf' function. */
#define HAVE_SNPRINTF 1
/* Define to 1 if you have the `sprintf' function. */
#define HAVE_SPRINTF 1
/* Define to 1 if you have the `srand' function. */
#define HAVE_SRAND 1
/* Define to 1 if you have the `sscanf' function. */
#define HAVE_SSCANF 1
/* Define to 1 if you have the `stat' function. */
#define HAVE_STAT 1
/* Define to 1 if you have the <stdarg.h> header file. */
#define HAVE_STDARG_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the `strftime' function. */
#define HAVE_STRFTIME 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
*/
/* #undef HAVE_SYS_DIR_H */
/* Define to 1 if you have the <sys/mman.h> header file. */
#define HAVE_SYS_MMAN_H 1
/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
*/
/* #undef HAVE_SYS_NDIR_H */
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H 1
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/timeb.h> header file. */
#define HAVE_SYS_TIMEB_H 1
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the `time' function. */
#define HAVE_TIME 1
/* Define to 1 if you have the <time.h> header file. */
#define HAVE_TIME_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Whether va_copy() is available */
#define HAVE_VA_COPY 1
/* Define to 1 if you have the `vfprintf' function. */
#define HAVE_VFPRINTF 1
/* Define to 1 if you have the `vsnprintf' function. */
#define HAVE_VSNPRINTF 1
/* Define to 1 if you have the `vsprintf' function. */
#define HAVE_VSPRINTF 1
/* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */
/* Whether __va_copy() is available */
/* #undef HAVE___VA_COPY */
/* Define as const if the declaration of iconv() needs const. */
#define ICONV_CONST
/* Define to the sub-directory where libtool stores uninstalled libraries. */
#define LT_OBJDIR ".libs/"
/* Name of package */
#define PACKAGE "libxml2"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME ""
/* Define to the full name and version of this package. */
#define PACKAGE_STRING ""
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME ""
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION ""
/* Type cast for the send() function 2nd arg */
#define SEND_ARG2_CAST /**/
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Support for IPv6 */
#define SUPPORT_IP6 /**/
/* Define if va_list is an array type */
#define VA_LIST_IS_ARRAY 1
/* Version number of package */
#define VERSION "2.9.8"
/* Determine what socket length (socklen_t) data type is */
#define XML_SOCKLEN_T socklen_t
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
/* #undef _UINT32_T */
/* ss_family is not defined here, use __ss_family instead */
/* #undef ss_family */
/* Define to the type of an unsigned integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
/* #undef uint32_t */

View File

@ -0,0 +1,501 @@
/*
* Summary: compile-time version information
* Description: compile-time version information for the XML library
*
* Copy: See Copyright for the status of this software.
*
* Author: Daniel Veillard
*/
#ifndef __XML_VERSION_H__
#define __XML_VERSION_H__
#include <libxml/xmlexports.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* use those to be sure nothing nasty will happen if
* your library and includes mismatch
*/
#ifndef LIBXML2_COMPILING_MSCCDEF
XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
#endif /* LIBXML2_COMPILING_MSCCDEF */
/**
* LIBXML_DOTTED_VERSION:
*
* the version string like "1.2.3"
*/
#define LIBXML_DOTTED_VERSION "2.10.3"
/**
* LIBXML_VERSION:
*
* the version number: 1.2.3 value is 10203
*/
#define LIBXML_VERSION 21003
/**
* LIBXML_VERSION_STRING:
*
* the version number string, 1.2.3 value is "10203"
*/
#define LIBXML_VERSION_STRING "21003"
/**
* LIBXML_VERSION_EXTRA:
*
* extra version information, used to show a git commit description
*/
#define LIBXML_VERSION_EXTRA ""
/**
* LIBXML_TEST_VERSION:
*
* Macro to check that the libxml version in use is compatible with
* the version the software has been compiled against
*/
#define LIBXML_TEST_VERSION xmlCheckVersion(21003);
#ifndef VMS
#if 0
/**
* WITH_TRIO:
*
* defined if the trio support need to be configured in
*/
#define WITH_TRIO
#else
/**
* WITHOUT_TRIO:
*
* defined if the trio support should not be configured in
*/
#define WITHOUT_TRIO
#endif
#else /* VMS */
/**
* WITH_TRIO:
*
* defined if the trio support need to be configured in
*/
#define WITH_TRIO 1
#endif /* VMS */
/**
* LIBXML_THREAD_ENABLED:
*
* Whether the thread support is configured in
*/
#if 1
#define LIBXML_THREAD_ENABLED
#endif
/**
* LIBXML_THREAD_ALLOC_ENABLED:
*
* Whether the allocation hooks are per-thread
*/
#if 0
#define LIBXML_THREAD_ALLOC_ENABLED
#endif
/**
* LIBXML_TREE_ENABLED:
*
* Whether the DOM like tree manipulation API support is configured in
*/
#if 1
#define LIBXML_TREE_ENABLED
#endif
/**
* LIBXML_OUTPUT_ENABLED:
*
* Whether the serialization/saving support is configured in
*/
#if 1
#define LIBXML_OUTPUT_ENABLED
#endif
/**
* LIBXML_PUSH_ENABLED:
*
* Whether the push parsing interfaces are configured in
*/
#if 1
#define LIBXML_PUSH_ENABLED
#endif
/**
* LIBXML_READER_ENABLED:
*
* Whether the xmlReader parsing interface is configured in
*/
#if 1
#define LIBXML_READER_ENABLED
#endif
/**
* LIBXML_PATTERN_ENABLED:
*
* Whether the xmlPattern node selection interface is configured in
*/
#if 1
#define LIBXML_PATTERN_ENABLED
#endif
/**
* LIBXML_WRITER_ENABLED:
*
* Whether the xmlWriter saving interface is configured in
*/
#if 1
#define LIBXML_WRITER_ENABLED
#endif
/**
* LIBXML_SAX1_ENABLED:
*
* Whether the older SAX1 interface is configured in
*/
#if 1
#define LIBXML_SAX1_ENABLED
#endif
/**
* LIBXML_FTP_ENABLED:
*
* Whether the FTP support is configured in
*/
#if 0
#define LIBXML_FTP_ENABLED
#endif
/**
* LIBXML_HTTP_ENABLED:
*
* Whether the HTTP support is configured in
*/
#if 1
#define LIBXML_HTTP_ENABLED
#endif
/**
* LIBXML_VALID_ENABLED:
*
* Whether the DTD validation support is configured in
*/
#if 1
#define LIBXML_VALID_ENABLED
#endif
/**
* LIBXML_HTML_ENABLED:
*
* Whether the HTML support is configured in
*/
#if 1
#define LIBXML_HTML_ENABLED
#endif
/**
* LIBXML_LEGACY_ENABLED:
*
* Whether the deprecated APIs are compiled in for compatibility
*/
#if 0
#define LIBXML_LEGACY_ENABLED
#endif
/**
* LIBXML_C14N_ENABLED:
*
* Whether the Canonicalization support is configured in
*/
#if 1
#define LIBXML_C14N_ENABLED
#endif
/**
* LIBXML_CATALOG_ENABLED:
*
* Whether the Catalog support is configured in
*/
#if 1
#define LIBXML_CATALOG_ENABLED
#endif
/**
* LIBXML_XPATH_ENABLED:
*
* Whether XPath is configured in
*/
#if 1
#define LIBXML_XPATH_ENABLED
#endif
/**
* LIBXML_XPTR_ENABLED:
*
* Whether XPointer is configured in
*/
#if 1
#define LIBXML_XPTR_ENABLED
#endif
/**
* LIBXML_XPTR_LOCS_ENABLED:
*
* Whether support for XPointer locations is configured in
*/
#if 0
#define LIBXML_XPTR_LOCS_ENABLED
#endif
/**
* LIBXML_XINCLUDE_ENABLED:
*
* Whether XInclude is configured in
*/
#if 1
#define LIBXML_XINCLUDE_ENABLED
#endif
/**
* LIBXML_ICONV_ENABLED:
*
* Whether iconv support is available
*/
#if 0
#define LIBXML_ICONV_ENABLED
#endif
/**
* LIBXML_ICU_ENABLED:
*
* Whether icu support is available
*/
#if 0
#define LIBXML_ICU_ENABLED
#endif
/**
* LIBXML_ISO8859X_ENABLED:
*
* Whether ISO-8859-* support is made available in case iconv is not
*/
#if 1
#define LIBXML_ISO8859X_ENABLED
#endif
/**
* LIBXML_DEBUG_ENABLED:
*
* Whether Debugging module is configured in
*/
#if 1
#define LIBXML_DEBUG_ENABLED
#endif
/**
* DEBUG_MEMORY_LOCATION:
*
* Whether the memory debugging is configured in
*/
#if 0
#define DEBUG_MEMORY_LOCATION
#endif
/**
* LIBXML_DEBUG_RUNTIME:
*
* Whether the runtime debugging is configured in
*/
#if 0
#define LIBXML_DEBUG_RUNTIME
#endif
/**
* LIBXML_UNICODE_ENABLED:
*
* Whether the Unicode related interfaces are compiled in
*/
#if 1
#define LIBXML_UNICODE_ENABLED
#endif
/**
* LIBXML_REGEXP_ENABLED:
*
* Whether the regular expressions interfaces are compiled in
*/
#if 1
#define LIBXML_REGEXP_ENABLED
#endif
/**
* LIBXML_AUTOMATA_ENABLED:
*
* Whether the automata interfaces are compiled in
*/
#if 1
#define LIBXML_AUTOMATA_ENABLED
#endif
/**
* LIBXML_EXPR_ENABLED:
*
* Whether the formal expressions interfaces are compiled in
*
* This code is unused and disabled unconditionally for now.
*/
#if 0
#define LIBXML_EXPR_ENABLED
#endif
/**
* LIBXML_SCHEMAS_ENABLED:
*
* Whether the Schemas validation interfaces are compiled in
*/
#if 1
#define LIBXML_SCHEMAS_ENABLED
#endif
/**
* LIBXML_SCHEMATRON_ENABLED:
*
* Whether the Schematron validation interfaces are compiled in
*/
#if 1
#define LIBXML_SCHEMATRON_ENABLED
#endif
/**
* LIBXML_MODULES_ENABLED:
*
* Whether the module interfaces are compiled in
*/
#if 1
#define LIBXML_MODULES_ENABLED
/**
* LIBXML_MODULE_EXTENSION:
*
* the string suffix used by dynamic modules (usually shared libraries)
*/
#define LIBXML_MODULE_EXTENSION ".so"
#endif
/**
* LIBXML_ZLIB_ENABLED:
*
* Whether the Zlib support is compiled in
*/
#if 1
#define LIBXML_ZLIB_ENABLED
#endif
/**
* LIBXML_LZMA_ENABLED:
*
* Whether the Lzma support is compiled in
*/
#if 0
#define LIBXML_LZMA_ENABLED
#endif
#ifdef __GNUC__
/**
* ATTRIBUTE_UNUSED:
*
* Macro used to signal to GCC unused function parameters
*/
#ifndef ATTRIBUTE_UNUSED
# if ((__GNUC__ > 2) || ((__GNUC__ == 2) && (__GNUC_MINOR__ >= 7)))
# define ATTRIBUTE_UNUSED __attribute__((unused))
# else
# define ATTRIBUTE_UNUSED
# endif
#endif
/**
* LIBXML_ATTR_ALLOC_SIZE:
*
* Macro used to indicate to GCC this is an allocator function
*/
#ifndef LIBXML_ATTR_ALLOC_SIZE
# if (!defined(__clang__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))))
# define LIBXML_ATTR_ALLOC_SIZE(x) __attribute__((alloc_size(x)))
# else
# define LIBXML_ATTR_ALLOC_SIZE(x)
# endif
#else
# define LIBXML_ATTR_ALLOC_SIZE(x)
#endif
/**
* LIBXML_ATTR_FORMAT:
*
* Macro used to indicate to GCC the parameter are printf like
*/
#ifndef LIBXML_ATTR_FORMAT
# if ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)))
# define LIBXML_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args)))
# else
# define LIBXML_ATTR_FORMAT(fmt,args)
# endif
#else
# define LIBXML_ATTR_FORMAT(fmt,args)
#endif
#ifndef XML_DEPRECATED
# ifdef IN_LIBXML
# define XML_DEPRECATED
# else
/* Available since at least GCC 3.1 */
# define XML_DEPRECATED __attribute__((deprecated))
# endif
#endif
#else /* ! __GNUC__ */
/**
* ATTRIBUTE_UNUSED:
*
* Macro used to signal to GCC unused function parameters
*/
#define ATTRIBUTE_UNUSED
/**
* LIBXML_ATTR_ALLOC_SIZE:
*
* Macro used to indicate to GCC this is an allocator function
*/
#define LIBXML_ATTR_ALLOC_SIZE(x)
/**
* LIBXML_ATTR_FORMAT:
*
* Macro used to indicate to GCC the parameter are printf like
*/
#define LIBXML_ATTR_FORMAT(fmt,args)
/**
* XML_DEPRECATED:
*
* Macro used to indicate that a function, variable, type or struct member
* is deprecated.
*/
#ifndef XML_DEPRECATED
#define XML_DEPRECATED
#endif
#endif /* __GNUC__ */
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,32 +1,19 @@
---
title: TDengine Documentation
sidebar_label: Documentation Home
description: This website contains the user manuals for TDengine, an open-source, cloud-native time-series database optimized for IoT, Connected Cars, and Industrial IoT.
slug: /
---
TDengine is an [open-source](https://tdengine.com/tdengine/open-source-time-series-database/), [cloud-native](https://tdengine.com/tdengine/cloud-native-time-series-database/) [time-series database](https://tdengine.com/tsdb/) optimized for the Internet of Things (IoT), Connected Cars, and Industrial IoT. It enables efficient, real-time data ingestion, processing, and monitoring of TB and even PB scale data per day, generated by billions of sensors and data collectors. This document is the TDengine user manual. It introduces the basic, as well as novel concepts, in TDengine, and also talks in detail about installation, features, SQL, APIs, operation, maintenance, kernel design, and other topics. It's written mainly for architects, developers, and system administrators.
TDengine™ is a time-series database purpose-built for Industry 4.0 and Industrial IoT. It enables real-time ingestion, storage, analysis, and distribution of petabytes of data per day, generated by billions of sensors and data collectors. TDengine's mission is to make time-series data accessible, valuable, and affordable for everyone — from independent developers and startups to industry stalwarts and multinationals.
To get an overview of TDengine, such as a feature list, benchmarks, and competitive advantages, please browse through the [Introduction](./intro) section.
This website contains the user documentation for TDengine:
TDengine greatly improves the efficiency of data ingestion, querying, and storage by exploiting the characteristics of time series data, introducing the novel concepts of "one table for one data collection point" and "super table", and designing an innovative storage engine. To understand the new concepts in TDengine and make full use of the features and capabilities of TDengine, please read [Concepts](./concept) thoroughly.
- If you are new to time-series data, you can get a quick understanding of the field from ["What Is a Time-Series Database?"](https://tdengine.com/what-is-a-time-series-database/) and [other articles](https://tdengine.com/time-series-database/) on our official website.
- If you would like to install TDengine and experience its features for yourself, see the [Get Started](get-started/) section for instructions.
- System architects are advised to review the [Basic Features](basic-features/) and [Advanced Features](advanced-features/) sections to decide whether TDengine's capabilities can meet their needs, as well as [Inside TDengine](inside-tdengine/) for a more in-depth look at TDengine's design.
- Software developers can consult the [Developer's Guide](developer-guide/) for information about creating applications that interoperate with TDengine and writing user-defined functions that run within TDengine.
- Database administrators will find valuable information in [Operations and Maintenance](operations-and-maintenance/) and [TDengine Reference](tdengine-reference/) to assist in managing, maintaining, and monitoring their TDengine deployments.
If you are a developer, please read the [Developer Guide](./develop) carefully. This section introduces the database connection, data modeling, data ingestion, query, continuous query, cache, data subscription, user-defined functions, and other functionality in detail. Sample code is provided for a variety of programming languages. In most cases, you can just copy and paste the sample code, and make a few changes to accommodate your application, and it will work.
We live in the era of big data, and scale-up is unable to meet the growing needs of the business. Any modern data system must have the ability to scale out, and clustering has become an indispensable feature of big data systems. Not only did the TDengine team develop the cluster feature, but also decided to open source this important feature. To learn how to deploy, manage and maintain a TDengine cluster please refer to [Cluster Deployment](./operation/deployment).
TDengine uses ubiquitous SQL as its query language, which greatly reduces learning costs and migration costs. In addition to the standard SQL, TDengine has extensions to better support time series data analysis. These extensions include functions such as roll-up, interpolation, and time-weighted average, among many others. The [SQL Reference](./reference/taos-sql) chapter describes the SQL syntax in detail and lists the various supported commands and functions.
If you are a system administrator who cares about installation, upgrade, fault tolerance, disaster recovery, data import, data export, system configuration, how to monitor whether TDengine is running healthily, and how to improve system performance, please refer to, and thoroughly read the [Administration](./operation) section.
If you want to know more about TDengine tools and the REST API, please see the [Reference](./reference) chapter.
For information about connecting to TDengine with different programming languages, see [Client Libraries](./reference/connectors).
If you are very interested in the internal design of TDengine, please read the chapter [Inside TDengine](./tdinternal), which introduces the cluster design, data partitioning, sharding, writing, and reading processes in detail. If you want to study TDengine code or even contribute code, please read this chapter carefully.
To get more general introduction about time series database, please read through [a series of articles](https://tdengine.com/tsdb/). To lean more competitive advantages about TDengine, please read through [a series of blogs](https://tdengine.com/tdengine/).
TDengine is an open-source database, and we would love for you to be a part of TDengine. If you find any errors in the documentation or see parts where more clarity or elaboration is needed, please click "Edit this page" at the bottom of each page to edit it directly.
TDengine, including this documentation, is an open-source project, and we welcome contributions from the community. If you find any errors or unclear descriptions, click **Edit this page** at the bottom of the page to submit your corrections. To view the source code, visit our [GitHub repository](https://github.com/taosdata/tdengine).
Together, we make a difference!

View File

@ -1,182 +0,0 @@
---
title: Concepts
description: This document describes the basic concepts of TDengine, including the supertable.
---
In order to explain the basic concepts and provide some sample code, the TDengine documentation smart meters as a typical time series use case. We assume the following: 1. Each smart meter collects three metrics i.e. current, voltage, and phase; 2. There are multiple smart meters; 3. Each meter has static attributes like location and group ID. Based on this, collected data will look similar to the following table:
<div className="center-table">
<table>
<thead>
<tr>
<th rowSpan="2">Device ID</th>
<th rowSpan="2">Timestamp</th>
<th colSpan="3">Collected Metrics</th>
<th colSpan="2">Tags</th>
</tr>
<tr>
<th>current</th>
<th>voltage</th>
<th>phase</th>
<th>location</th>
<th>groupid</th>
</tr>
</thead>
<tbody>
<tr>
<td>d1001</td>
<td>1538548685000</td>
<td>10.3</td>
<td>219</td>
<td>0.31</td>
<td>California.SanFrancisco</td>
<td>2</td>
</tr>
<tr>
<td>d1002</td>
<td>1538548684000</td>
<td>10.2</td>
<td>220</td>
<td>0.23</td>
<td>California.SanFrancisco</td>
<td>3</td>
</tr>
<tr>
<td>d1003</td>
<td>1538548686500</td>
<td>11.5</td>
<td>221</td>
<td>0.35</td>
<td>California.LosAngeles</td>
<td>3</td>
</tr>
<tr>
<td>d1004</td>
<td>1538548685500</td>
<td>13.4</td>
<td>223</td>
<td>0.29</td>
<td>California.LosAngeles</td>
<td>2</td>
</tr>
<tr>
<td>d1001</td>
<td>1538548695000</td>
<td>12.6</td>
<td>218</td>
<td>0.33</td>
<td>California.SanFrancisco</td>
<td>2</td>
</tr>
<tr>
<td>d1004</td>
<td>1538548696600</td>
<td>11.8</td>
<td>221</td>
<td>0.28</td>
<td>California.LosAngeles</td>
<td>2</td>
</tr>
<tr>
<td>d1002</td>
<td>1538548696650</td>
<td>10.3</td>
<td>218</td>
<td>0.25</td>
<td>California.SanFrancisco</td>
<td>3</td>
</tr>
<tr>
<td>d1001</td>
<td>1538548696800</td>
<td>12.3</td>
<td>221</td>
<td>0.31</td>
<td>California.SanFrancisco</td>
<td>2</td>
</tr>
</tbody>
</table>
<a href="#model_table1">Table 1: Smart meter example data</a>
</div>
Each row contains the device ID, timestamp, collected metrics (`current`, `voltage`, `phase` as above), and static tags (`location` and `groupid` in Table 1) associated with the devices. Each smart meter generates a row (measurement) in a pre-defined time interval or triggered by an external event. The device produces a sequence of measurements with associated timestamps.
## Metric
Metric refers to the physical quantity collected by sensors, equipment or other types of data collection devices, such as current, voltage, temperature, pressure, GPS position, etc., which change with time, and the data type can be integer, float, Boolean, or strings. As time goes by, the amount of collected metric data stored increases. In the smart meters example, current, voltage and phase are the metrics.
## Label/Tag
Label/Tag refers to the static properties of sensors, equipment or other types of data collection devices, which do not change with time, such as device model, color, fixed location of the device, etc. The data type can be any type. Although static, TDengine allows users to add, delete or update tag values at any time. Unlike the collected metric data, the amount of tag data stored does not change over time. In the meters example, `location` and `groupid` are the tags.
## Data Collection Point
Data Collection Point (DCP) refers to hardware or software that collects metrics based on preset time periods or triggered by events. A data collection point can collect one or multiple metrics, but these metrics are collected at the same time and have the same timestamp. For some complex equipment, there are often multiple data collection points, and the sampling rate of each collection point may be different, and fully independent. For example, for a car, there could be a data collection point to collect GPS position metrics, a data collection point to collect engine status metrics, and a data collection point to collect the environment metrics inside the car. So in this example the car would have three data collection points. In the smart meters example, d1001, d1002, d1003, and d1004 are the data collection points.
## Table
Since time-series data is most likely to be structured data, TDengine adopts the traditional relational database model to process them with a short learning curve. You need to create a database, create tables, then insert data points and execute queries to explore the data.
To make full use of time-series data characteristics, TDengine adopts a strategy of "**One Table for One Data Collection Point**". TDengine requires the user to create a table for each data collection point (DCP) to store collected time-series data. For example, if there are over 10 million smart meters, it means 10 million tables should be created. For the table above, 4 tables should be created for devices d1001, d1002, d1003, and d1004 to store the data collected. This design has several benefits:
1. Since the metric data from different DCP are fully independent, the data source of each DCP is unique, and a table has only one writer. In this way, data points can be written in a lock-free manner, and the writing speed can be greatly improved.
2. For a DCP, the metric data generated by DCP is ordered by timestamp, so the write operation can be implemented by simple appending, which further greatly improves the data writing speed.
3. The metric data from a DCP is continuously stored, block by block. If you read data for a period of time, it can greatly reduce random read operations and improve read and query performance by orders of magnitude.
4. Inside a data block for a DCP, columnar storage is used, and different compression algorithms are used for different data types. Metrics generally don't vary as significantly between themselves over a time range as compared to other metrics, which allows for a higher compression rate.
If the metric data of multiple DCPs are traditionally written into a single table, due to uncontrollable network delays, the timing of the data from different DCPs arriving at the server cannot be guaranteed, write operations must be protected by locks, and metric data from one DCP cannot be guaranteed to be continuously stored together. **One table for one data collection point can ensure the best performance of insert and query of a single data collection point to the greatest possible extent.**
TDengine suggests using DCP ID as the table name (like d1001 in the above table). Each DCP may collect one or multiple metrics (like the `current`, `voltage`, `phase` as above). Each metric has a corresponding column in the table. The data type for a column can be int, float, string and others. In addition, the first column in the table must be a timestamp. TDengine uses the timestamp as the index, and won't build the index on any metrics stored. Column wise storage is used.
Complex devices, such as connected cars, may have multiple DCPs. In this case, multiple tables are created for a single device, one table per DCP.
## Super Table (STable)
The design of one table for one data collection point will require a huge number of tables, which is difficult to manage. Furthermore, applications often need to take aggregation operations among DCPs, thus aggregation operations will become complicated. To support aggregation over multiple tables efficiently, the STable(Super Table) concept is introduced by TDengine.
STable is a template for a type of data collection point. A STable contains a set of data collection points (tables) that have the same schema or data structure, but with different static attributes (tags). To describe a STable, in addition to defining the table structure of the metrics, it is also necessary to define the schema of its tags. The data type of tags can be int, float, string, and there can be multiple tags, which can be added, deleted, or modified afterward. If the whole system has N different types of data collection points, N STables need to be established.
In the design of TDengine, **a table is used to represent a specific data collection point, and STable is used to represent a set of data collection points of the same type**. In the smart meters example, we can create a super table named `meters`.
## Subtable
When creating a table for a specific data collection point, the user can use a STable as a template and specify the tag values of this specific DCP to create it. ** The table created by using a STable as the template is called subtable** in TDengine. The difference between regular table and subtable is:
1. Subtable is a table, all SQL commands applied on a regular table can be applied on subtable.
2. Subtable is a table with extensions, it has static tags (labels), and these tags can be added, deleted, and updated after it is created. But a regular table does not have tags.
3. A subtable belongs to only one STable, but a STable may have many subtables. Regular tables do not belong to a STable.
4. A regular table can not be converted into a subtable, and vice versa.
The relationship between a STable and the subtables created based on this STable is as follows:
1. A STable contains multiple subtables with the same metric schema but with different tag values.
2. The schema of metrics or labels cannot be adjusted through subtables, and it can only be changed via STable. Changes to the schema of a STable takes effect immediately for all associated subtables.
3. STable defines only one template and does not store any data or label information by itself. Therefore, data cannot be written to a STable, only to subtables.
Queries can be executed on both a table (subtable) and a STable. For a query on a STable, TDengine will treat the data in all its subtables as a whole data set for processing. TDengine will first find the subtables that meet the tag filter conditions, then scan the time-series data of these subtables to perform aggregation operation, which reduces the number of data sets to be scanned which in turn greatly improves the performance of data aggregation across multiple DCPs. In essence, querying a supertable is a very efficient aggregate query on multiple DCPs of the same type.
In TDengine, it is recommended to use a subtable instead of a regular table for a DCP. In the smart meters example, we can create subtables like d1001, d1002, d1003, and d1004 under super table `meters`.
To better understand the data model using metrics, tags, super table and subtable, please refer to the diagram below which demonstrates the data model of the smart meters example.
<figure>
![Meters Data Model Diagram](./supertable.webp)
<center><figcaption>Figure 1. Meters Data Model Diagram</figcaption></center>
</figure>
## Database
A database is a collection of tables. TDengine allows a running instance to have multiple databases, and each database can be configured with different storage policies. The [characteristics of time-series data](https://tdengine.com/tsdb/characteristics-of-time-series-data/) from different data collection points may be different. Characteristics include collection frequency, retention policy and others which determine how you create and configure the database. For e.g. days to keep, number of replicas, data block size, whether data updates are allowed and other configurable parameters would be determined by the characteristics of your data and your business requirements. In order for TDengine to work with maximum efficiency in various scenarios, TDengine recommends that STables with different data characteristics be created in different databases.
In a database, there can be one or more STables, but a STable belongs to only one database. All tables owned by a STable are stored in only one database.
## FQDN & End Point
FQDN (Fully Qualified Domain Name) is the full domain name of a specific computer or host on the Internet. FQDN consists of two parts: hostname and domain name. For example, the FQDN of a mail server might be mail.tdengine.com. The hostname is mail, and the host is located in the domain name tdengine.com. DNS (Domain Name System) is responsible for translating FQDN into IP. For systems without DNS, it can be solved by configuring the hosts file.
Each node of a TDengine cluster is uniquely identified by an End Point, which consists of an FQDN and a Port, such as h1.tdengine.com:6030. In this way, when the IP changes, we can still use the FQDN to dynamically find the node without changing any configuration of the cluster. In addition, FQDN is used to facilitate unified access to the same cluster from the Intranet and the Internet.
TDengine does not recommend using an IP address to access the cluster. FQDN is recommended for cluster management.

View File

@ -1,132 +1,76 @@
---
title: Introduction
description: This document introduces the major features, competitive advantages, typical use cases, and benchmarks of TDengine.
toc_max_heading_level: 2
sidebar_label: Introduction
title: Introduction to TDengine
slug: /introduction
---
TDengine is a big data platform designed and optimized for IoT (Internet of Things) and Industrial Internet. It can safely and effetively converge, store, process and distribute high volume data (TB or even PB) generated everyday by a lot of devices and data acquisition units, monitor and alert business operation status in real time and provide real time business insight. The core component of TDengine is TDengine OSS, which is a high performance, open source, cloud native and simplified time series database.
import Image from '@theme/IdealImage';
import imgEcosystem from './assets/introduction-01.png';
This section introduces the major features, competitive advantages, typical use-cases and benchmarks to help you get a high level overview of TDengine.
TDengine is a time-series database designed to help traditional industries overcome the challenges of Industry 4.0 and Industrial IoT. It enables real-time ingestion, storage, analysis, and distribution of petabytes of data per day, generated by billions of sensors and data collectors. By making big data accessible and affordable, TDengine helps everyone — from independent developers and startups to industry stalwarts and multinationals — unlock the true value of their data.
## Major Features of TDengine OSS
## TDengine Offerings
The major features are listed below:
- [TDengine OSS](https://tdengine.com/oss/) is an open-source, cloud-native time-series database. Its source code is licensed under the AGPL and publicly available on GitHub. TDengine OSS serves as the code base for our paid offerings and provides the same core functionality. Unlike some open-core products, TDengine OSS is a full-featured solution that includes the necessary components for production use, including clustering.
- [TDengine Enterprise](https://tdengine.com/enterprise/) is a high-performance big data platform designed for Industry 4.0 and the Industrial IoT. Built on the open-source TDengine OSS, it delivers an enterprise-grade feature set tailored to the needs of traditional industries.
- [TDengine Cloud](https://cloud.tdengine.com) delivers all features of TDengine Enterprise as a fully managed service that can run on Amazon Web Services (AWS), Microsoft Azure, and Google Cloud Platform (GCP).
1. Insert data
- Supports [using SQL to insert](../develop/insert-data/sql-writing).
- Supports [schemaless writing](../reference/schemaless/) just like NoSQL databases. It also supports standard protocols like [InfluxDB Line](../develop/insert-data/influxdb-line), [OpenTSDB Telnet](../develop/insert-data/opentsdb-telnet), [OpenTSDB JSON ](../develop/insert-data/opentsdb-json) among others.
- Supports seamless integration with third-party tools like [Telegraf](../third-party/telegraf/), [Prometheus](../third-party/prometheus/), [collectd](../third-party/collectd/), [StatsD](../third-party/statsd/), [TCollector](../third-party/tcollector/), [EMQX](../third-party/emq-broker), [HiveMQ](../third-party/hive-mq-broker), and [Icinga2](../third-party/icinga2/), they can write data into TDengine with simple configuration and without a single line of code.
2. Query data
- Supports standard [SQL](../reference/taos-sql/), including nested query.
- Supports [time series specific functions](../reference/taos-sql/function/#time-series-extensions) and [time series specific queries](../reference/taos-sql/distinguished), like downsampling, interpolation, cumulated sum, time weighted average, state window, session window and many others.
- Supports [User Defined Functions (UDF)](../reference/taos-sql/udf).
3. [Caching](../develop/cache/): TDengine always saves the last data point in cache, so Redis is not needed for time-series data processing.
4. [Stream Processing](../develop/stream/): Not only is the continuous query is supported, but TDengine also supports event driven stream processing, so Flink or Spark is not needed for time-series data processing.
5. [Data Subscription](../develop/tmq/): Application can subscribe a table or a set of tables. API is the same as Kafka, but you can specify filter conditions.
6. Visualization
- Supports seamless integration with [Grafana](../third-party/grafana/).
- Supports seamless integration with [Google Data Studio](../third-party/google-data-studio/).
7. Cluster
- Supports [cluster](../operation/deployment/) with the capability of increasing processing power by adding more nodes.
- Supports [deployment on Kubernetes](../operation/deployment).
- Supports high availability via data replication.
8. Administration
- Provides [monitoring](../operation/monitor) on running instances of TDengine.
- Provides many ways to [import](../operation/import) and [export](../operation/export) data.
9. Tools
- Provides an interactive [Command Line Interface (CLI)](../reference/components/taos-shell) for management, maintenance and ad-hoc queries.
- Provides a tool [taosBenchmark](../reference/components/taosbenchmark/) for testing the performance of TDengine.
10. Programming
- Provides [client libraries](../reference/connectors/) for [C/C++](../reference/connectors/cpp), [Java](../reference/connectors/java), [Python](../reference/connectors/python), [Go](../reference/connectors/go), [Rust](../reference/connectors/rust), [Node.js](../reference/connectors/node) and other programming languages.
- Provides a [REST API](../reference/connectors/rest-api).
## What Makes TDengine Different
For more details on features, please read through the entire documentation.
TDengine differentiates itself from typical time-series databases with the following four core competencies:
## Competitive Advantages
1. **High Performance at Any Scale:** With its distributed scalable architecture that grows together with your business, TDengine can store and process massive datasets up to 10.6x faster than other TSDBs — all while providing the split-second latency that your real-time visualization and reporting apps demand.
2. **Efficient Data Storage:** With its unique design and data model, TDengine provides the most cost-effective solution for storing your operations data, including tiered storage, S3, and 10:1 data compression, ensuring that you can get valuable business insights from your data without breaking the bank.
3. **Data Consolidation Across Sites:** With built-in connectors for a wide variety of industrial sources — MQTT, Kafka, OPC, PI System, and more — TDengine delivers zero-code data ingestion and extract, transform, and load (ETL) in a centralized platform that acts as a single source of truth for your business.
4. **Comprehensive Solution for Industrial Data:** With out-of-the-box data subscription, caching, and stream processing, TDengine is more than just a time-series database — it includes all key components needed for industrial data storage and processing built into a single product and accessible through familiar SQL statements.
By making full use of [characteristics of time series data](https://tdengine.com/characteristics-of-time-series-data/), TDengine differentiates itself from other time series databases with the following advantages.
## What TDengine Delivers
- **[High-Performance](https://tdengine.com/high-performance/)**: TDengine is the only time-series database to solve the high cardinality issue to support billions of data collection points while outperforming other time-series databases for data ingestion, querying and data compression.
With its innovative "one table per device" design, unique supertable concept, and highly optimized storage engine, TDengine is purpose-built to meet the unique needs of ingesting, querying, and storing massive time-series datasets. In its role at the core of the industrial data architecture, it provides the following functionality:
- **[Simplified Solution](https://tdengine.com/comprehensive-industrial-data-solution/)**: Through built-in caching, stream processing and data subscription features, TDengine provides a simplified solution for time-series data processing. It reduces system design complexity and operation costs significantly.
1. [Data Ingestion](../basic-features/data-ingestion/): You can write data into TDengine with standard SQL or in schemaless mode over the InfluxDB Line Protocol, OpenTSDN Telnet Protocol, and OpenTSDB JSON Protocol. TDengine also seamlessly integrates with data collectors like Telegraf and Prometheus.
2. [Data Querying](../basic-features/data-querying): In addition to standard SQL query syntax, TDengine includes time-series extensions such as downsampling and windowing and functions such as cumulative sum and time-weighted average to better meet the needs of time-series data processing. TDengine also supports user-defined functions (UDF), which can be written in C or Python.
3. [Read Caching](../advanced-features/caching/): TDengine uses a time-driven first-in, first-out (FIFO) cache management strategy, keeping the most recent data in the cache. This makes it easy and fast to access the real-time status of any metric without the need for other caching tools like Redis, simplifying system architecture and reducing operational costs.
4. [Stream Processing](../advanced-features/stream-processing/): TDengine's built-in stream processing engine provides the capability to process data streams in real-time as they are written, supporting not only continuous queries but also event-driven stream processing. This lightweight but optimized solution can return results in milliseconds even during high-throughput data ingestion.
5. [Data Subscription](../advanced-features/data-subscription): TDengine includes data subscription out of the box, eliminating the need to deploy other complex products to provide this critical feature. You can define topics in SQL, subscribing to a query, supertable, or database, and use a Kafka-like API to consume these topics in your applications.
6. [Visualization](../third-party-tools/visualization/) and [BI](../third-party-tools/analytics/): Through its REST API and standard JDBC and ODBC interfaces, TDengine seamlessly integrates with leading platforms like Grafana, Power BI, and Seeq.
7. [Clustering](../operations-and-maintenance/deploy-your-cluster/): TDengine supports clustered deployment so that you can add nodes to scale your system and increase processing capacity. At the same time, it provides high availability through multi-replica technology and supports Kubernetes deployment. It also offers various operational tools to facilitate system administrators in managing and maintaining robust cluster operations.
8. Data Migration: TDengine provides various convenient data import and export functions, including script file import/export, data file import/export, and the [taosdump](../tdengine-reference/tools/taosdump/) tool.
9. [Client Libraries](../tdengine-reference/client-libraries/): TDengine offers client libraries for a variety of different programming languages, including Java, Python, and C/C++, so that you can build custom applications in your favorite language. Sample code that you can copy and paste into your apps is also provided to make the development process even easier.
10. O&M Tools: You can use the interactive [command-line interface (CLI)](../tdengine-reference/tools/tdengine-cli/) for managing clusters, checking system status, and performing ad hoc queries. The stress-testing tool [taosBenchmark](../tdengine-reference/tools/taosbenchmark/) is a quick way to generate sample data and test the performance of TDengine. And TDengine's GUI component [taosExplorer](../tdengine-reference/components/taosexplorer/) simplifies the operations and management process.
11. [Data Security](https://tdengine.com/security/): With TDengine Enterprise, you can implement fine-grained access controls with rich user and permissions management features. IP whitelisting helps you control which accounts can access your cluster from which servers, and audit logs record sensitive operations. In TDengine Enterprise, you can also configure encryption in transit on the server level and encryption at rest on the database level, which is transparent to operations and has minimal impact on performance.
12. [Zero-Code Data Connectors](https://tdengine.com/data-sources/): TDengine Enterprise includes zero-code connectors for industrial data protocols like MQTT and OPC, traditional data historians like AVEVA PI System and Wonderware Historian, relational databases like Oracle Database and SQL Server, and other time-series databases like InfluxDB and OpenTSDB. With these connectors, you can synchronize or migrate diverse time-series datasets to TDengine in the GUI without touching a line of code.
- **[Cloud Native](https://tdengine.com/cloud-native/)**: Through native distributed design, sharding and partitioning, separation of compute and storage, RAFT, support for Kubernetes deployment and full observability, TDengine is a cloud native Time-series Database and can be deployed on public, private or hybrid clouds.
## How TDengine Benefits You
- **[Ease of Use](https://tdengine.com/easy-to-use/)**: For administrators, TDengine significantly reduces the effort to deploy and maintain. For developers, it provides a simple interface, simplified solution and seamless integrations for third party tools. For data users, it gives easy data access.
With its high performance, standard SQL support, and component integration, TDengine can reduce your total cost of data operations:
- **[Easy Data Analytics](https://tdengine.com/simplifying-time-series-analysis-for-data-scientists/)**: Through super tables, storage and compute separation, data partitioning by time interval, pre-computation and other means, TDengine makes it easy to explore, format, and get access to data in a highly efficient way.
1. **Industry-leading performance:** TDengine significantly outperforms other time-series databases with up to 16 times faster ingestion and over 100 times higher query performance than InfluxDB or TimescaleDB while requiring fewer storage resources. Because TDengine ingests data faster, stores data more efficiently, and responds to queries more quickly, it uses fewer CPU and storage resources and adds less to your bills.
2. **Easy to use with no learning costs:** TDengine is easier to use than other time-series database solutions and does not require specialized training. This is because TDengine supports standard SQL, is easy to integrate with third-party tools, and comes with client libraries for various programming languages, including sample code.
3. **Simplified, fully integrated solution:** By including stream processing, caching, and data subscription as built-in components at no extra cost, TDengine eliminates the need to deploy third-party products just to process time-series data. Its components are simple, easy to use, and purpose-built to process time-series data.
- **[Open Source](https://tdengine.com/open-source/)**: TDengine's core modules, including cluster feature, are all available under open source licenses. It has gathered over 22k stars on GitHub. There is an active developer community, and over 400k running instances worldwide.
## TDengine Ecosystem
With TDengine, the total cost of ownership of your time-series data platform can be greatly reduced.
1. With its superior performance, the computing and storage resources are reduced significantly.
2. With SQL support, it can be seamlessly integrated with many third party tools, and learning costs/migration costs are reduced significantly.
3. With its simplified solution and nearly zero management, the operation and maintenance costs are reduced significantly.
## Technical Ecosystem
This is how TDengine would be situated, in a typical time-series data processing platform:
With its open ecosystem, TDengine allows you the freedom to construct the data stack that is best for your business. Its support for standard SQL, zero-code connectors for a wide range of industrial protocols and data solutions, and seamless integration with visualization, analytics, and business intelligence (BI) applications make it easy to fit TDengine into your infrastructure.
<figure>
![TDengine Database Technical Ecosystem ](eco_system.webp)
<center><figcaption>Figure 1. TDengine Technical Ecosystem</figcaption></center>
<Image img={imgEcosystem} alt="TDengine ecosystem"/>
<figcaption>Figure 1. TDengine ecosystem</figcaption>
</figure>
On the left-hand side, there are data collection agents like OPC-UA, MQTT, Telegraf and Kafka. On the right-hand side, visualization/BI tools, HMI, Python/R, and IoT Apps can be connected. TDengine itself provides an interactive command-line interface and a web interface for management and maintenance.
As shown in the figure, TDengine acts as the central source of truth in an industrial data ecosystem, ingesting data from a variety of sources and sharing that data with business applications and stakeholders.
## Typical Use Cases
## Application Scenarios
As a high-performance, scalable and SQL supported time-series database, TDengine's typical use case include but are not limited to IoT, Industrial Internet, Connected Vehicles, IT operation and maintenance, energy, financial markets and other fields. TDengine is a purpose-built database optimized for the characteristics of time series data. As such, it cannot be used to process data from web crawlers, social media, e-commerce, ERP, CRM and so on. More generally TDengine is not a suitable storage engine for non-time-series data. This section makes a more detailed analysis of the applicable scenarios.
TDengine is the only time-series database purpose-built for industrial scenarios and is fully capable of storing and processing the massive, high-frequency datasets generated by a range of industries, especially the following:
### Characteristics and Requirements of Data Sources
- [Renewable energy](https://tdengine.com/renewable-energy/)
- [Manufacturing](https://tdengine.com/manufacturing/)
- [Connected cars](https://tdengine.com/connected-cars/)
| **Data Source Characteristics and Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| ------------------------------------------------ | ------------------ | ----------------------- | ------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| A massive amount of total data | | | √ | TDengine provides excellent scale-out functions in terms of capacity, and has a storage structure with matching high compression ratio to achieve the best storage efficiency in the industry. |
| Data input velocity is extremely high | | | √ | TDengine's performance is much higher than that of other similar products. It can continuously process larger amounts of input data in the same hardware environment, and provides a performance evaluation tool that can easily run in the user environment. |
| A huge number of data sources | | | √ | TDengine is optimized specifically for a huge number of data sources. It is especially suitable for efficiently ingesting, writing and querying data from billions of data sources. |
TDengine can also form the core component of a data stack to enable the following industrial applications:
### System Architecture Requirements
- [Predictive maintenance](https://tdengine.com/predictive-maintenance/)
- [Vibration analysis](https://tdengine.com/high-frequency-data/)
- [Condition monitoring](https://tdengine.com/condition-monitoring)
| **System Architecture Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| ----------------------------------------- | ------------------ | ----------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| A simple and reliable system architecture | | | √ | TDengine's system architecture is very simple and reliable, with its own message queue, cache, stream computing, monitoring and other functions. There is no need to integrate any additional third-party products. |
| Fault-tolerance and high-reliability | | | √ | TDengine has cluster functions to automatically provide high-reliability and high-availability functions such as fault tolerance and disaster recovery. |
| Standardization support | | | √ | TDengine supports standard SQL and provides SQL extensions for time-series data analysis. |
### System Function Requirements
| **System Function Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| -------------------------------------------- | ------------------ | ----------------------- | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Complete data processing algorithms built-in | | √ | | While TDengine implements various general data processing algorithms, industry specific algorithms and special types of processing will need to be implemented at the application level. |
| A large number of crosstab queries | | √ | | This type of processing is better handled by general purpose relational database systems but TDengine can work in concert with relational database systems to provide more complete solutions. |
### System Performance Requirements
| **System Performance Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| ------------------------------------------------- | ------------------ | ----------------------- | ------------------- | --------------------------------------------------------------------------------------------------------------------------- |
| Very large total processing capacity | | | √ | TDengine's cluster functions can easily improve processing capacity via multi-server coordination. |
| Extremely high-speed data processing | | | √ | TDengine's storage and data processing are optimized for IoT, and can process data many times faster than similar products. |
| Extremely fast processing of high resolution data | | | √ | TDengine has achieved the same or better performance than other relational and NoSQL data processing systems. |
### System Maintenance Requirements
| **System Maintenance Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| --------------------------------------- | ------------------ | ----------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| Native high-reliability | | | √ | TDengine has a very robust, reliable and easily configurable system architecture to simplify routine operation. Human errors and accidents are eliminated to the greatest extent, with a streamlined experience for operators. |
| Minimize learning and maintenance costs | | | √ | In addition to being easily configurable, standard SQL support and the TDengine CLI for ad hoc queries makes maintenance simpler, allows reuse and reduces learning costs. |
| Abundant talent supply | √ | | | Given the above, and given the extensive training and professional services provided by TDengine, it is easy to migrate from existing solutions or create a new and lasting solution based on TDengine. |
## Comparison with other databases
- [TDengine vs. InfluxDB](https://tdengine.com/tsdb-comparison-influxdb-vs-tdengine/)
- [TDengine vs. TimescaleDB](https://tdengine.com/tsdb-comparison-timescaledb-vs-tdengine/)
## Products
For information about our paid offerings, see:
- [TDengine Enterprise](https://tdengine.com/enterprise/)
- [TDengine Cloud](https://cloud.tdengine.com)

View File

@ -1,137 +1,126 @@
---
title: Quick Install on Docker
sidebar_label: Docker
description: This document describes how to install TDengine in a Docker container and perform queries and inserts.
sidebar_label: Deploy in Docker
title: Get Started with TDengine Using Docker
description: Quickly experience TDengines efficient insertion and querying using Docker
slug: /get-started/deploy-in-docker
---
This document describes how to install TDengine in a Docker container and perform queries and inserts.
You can install TDengine in a Docker container and perform some basic tests to verify its performance.
- The easiest way to explore TDengine is through [TDengine Cloud](https://cloud.tdengine.com).
- To get started with TDengine in a non-containerized environment, see [Quick Install from Package](../../get-started/package).
- If you want to view the source code, build TDengine yourself, or contribute to the project, see the [TDengine GitHub repository](https://github.com/taosdata/TDengine).
To install TDengine on your local machine instead of in a container, see [Get Started with TDengine Using an Installation Package](../deploy-from-package/).
## Run TDengine
## Before You Begin
If Docker is already installed on your computer, pull the latest TDengine Docker container image:
- Install Docker. For more information, see the [Docker website](https://www.docker.com/).
- Ensure that the network ports required by TDengine are not currently in use. For more information, see [Network Port Requirements](../../operations-and-maintenance/system-requirements/#network-port-requirements).
```shell
docker pull tdengine/tdengine:latest
```
## Procedure
Or the container image of specific version:
1. Pull the latest TDengine image:
```shell
docker pull tdengine/tdengine:3.0.1.4
```
```bash
docker pull tdengine/tdengine:latest
```
And then run the following command:
:::note
You can also pull a specific version of the image. For example:
```shell
docker run -d -p 6030:6030 -p 6041:6041 -p 6043-6060:6043-6060 -p 6043-6060:6043-6060/udp tdengine/tdengine
```
```bash
docker pull tdengine/tdengine:3.3.0.0
```
Note that TDengine Server 3.0 uses TCP port 6030. Port 6041 is used by taosAdapter for the REST API service. Ports 6043 through 6049 are used by taosAdapter for other connections. You can open these ports as needed.
:::
If you need to persist data to a specific directory on your local machine, please run the following command:
```shell
docker run -d -v ~/data/taos/dnode/data:/var/lib/taos \
-v ~/data/taos/dnode/log:/var/log/taos \
-p 6030:6030 -p 6041:6041 -p 6043-6060:6043-6060 -p 6043-6060:6043-6060/udp tdengine/tdengine
```
:::note
2. Start a container with the following command:
- /var/lib/taos: TDengine's default data file directory. The location can be changed via [configuration file]. Also you can modify ~/data/taos/dnode/data to your any local empty data directory
- /var/log/taos: TDengine's default log file directory. The location can be changed via [configure file]. you can modify ~/data/taos/dnode/log to your any local empty log directory
```bash
docker run -d -p 6030:6030 -p 6041:6041 -p 6043-6060:6043-6060 -p 6043-6060:6043-6060/udp tdengine/tdengine
```
:::
To persist data to your local machine, use the following command:
```bash
docker run -d -v <local-data-directory>:/var/lib/taos -v <local-log-directory>:/var/log/taos -p 6030:6030 -p 6041:6041 -p 6043-6060:6043-6060 -p 6043-6060:6043-6060/udp tdengine/tdengine
```
Run the following command to ensure that your container is running:
3. Verify that the container is running properly:
```shell
docker ps
```
```bash
docker ps
```
Enter the container and open the `bash` shell:
4. Enter the container and open a shell:
```shell
docker exec -it <container name> bash
```
```bash
docker exec -it <container-name> bash
```
You can now access TDengine or run other Linux commands.
You can now work with TDengine inside your container. For example, you can run the `taos` command to open the TDengine command-line interface.
Note: For information about installing docker, see the [official documentation](https://docs.docker.com/get-docker/).
## What to Do Next
## TDengine Command Line Interface
### Test Data Ingestion
On the container, run the following command to open the TDengine CLI:
Your TDengine installation includes taosBenchmark, a tool specifically designed to test TDengines performance. taosBenchmark can simulate data generated by many devices with a wide range of configuration options so that you can perform tests on sample data similar to your real-world use cases. For more information about taosBenchmark, see [taosBenchmark](../../tdengine-reference/tools/taosbenchmark/).
```
$ taos
Perform the following steps to use taosBenchmark to test TDengine's ingestion performance in your container:
taos>
1. In a shell inside your container, run taosBenchmark with the default settings:
```
```bash
taosBenchmark -y
```
## TDegnine Graphic User Interface
taosBenchmark automatically creates the `test` database and the `meters` supertable inside that database. This supertable contains 10,000 subtables, named `d0` to `d9999`, with each subtable containing 10,000 records. Each record includes the following four metrics:
From TDengine 3.3.0.0, there is a new component called `taos-explorer` added in the TDengine docker image. You can use it to manage the databases, super tables, child tables, and data in your TDengine system. There are also some features only available in TDengine Enterprise Edition, please contact TDengine sales team in case you need these features.
- `ts` (timestamp), ranging from `2017-07-14 10:40:00 000` to `2017-07-14 10:40:09 999`
- `current`
- `voltage`
- `phase`
To use taos-explorer in the container, you need to access the host port mapped from container port 6060. Assuming the host name is abc.com, and the port used on host is 6060, you need to access `http://abc.com:6060`. taos-explorer uses port 6060 by default in the container. The default username and password to log in to the TDengine Database Management System is "root/taosdata".
Each subtable also has the following two tags:
## Test data insert performance
- `groupId`, ranging from `1` to `10`
- `location`, indicating a city and state such as `California.Campbell` or `California.Cupertino`
After your TDengine Server is running normally, you can run the taosBenchmark utility to test its performance:
When the ingestion process is finished, taosBenchmark outputs the time taken to ingest the specified sample data. From this, you can estimate how TDengine would perform on your system in a production environment.
Start TDengine service and execute `taosBenchmark` (formerly named `taosdemo`) in a terminal.
### Test Data Querying
```bash
taosBenchmark
```
After inserting data with taosBenchmark as described above, you can use the TDengine CLI to test TDengine's query performance in your container:
This command creates the `meters` supertable in the `test` database. In the `meters` supertable, it then creates 10,000 subtables named `d0` to `d9999`. Each table has 10,000 rows and each row has four columns: `ts`, `current`, `voltage`, and `phase`. The timestamps of the data in these columns range from 2017-07-14 10:40:00 000 to 2017-07-14 10:40:09 999. Each table is randomly assigned a `groupId` tag from 1 to 10 and a `location` tag of either `California.Campbell`, `California.Cupertino`, `California.LosAngeles`, `California.MountainView`, `California.PaloAlto`, `California.SanDiego`, `California.SanFrancisco`, `California.SanJose`, `California.SantaClara` or `California.Sunnyvale`.
1. Start the TDengine CLI:
The `taosBenchmark` command creates a deployment with 100 million data points that you can use for testing purposes. The time required to create the deployment depends on your hardware. On most modern servers, the deployment is created in ten to twenty seconds.
```bash
taos
```
You can customize the test deployment that taosBenchmark creates by specifying command-line parameters. For information about command-line parameters, run the `taosBenchmark --help` command. For more information about taosBenchmark, see [taosBenchmark](../../reference/components/taosbenchmark).
2. Query the total number of records in the `meters` supertable:
## Test data query performance
```sql
SELECT COUNT(*) FROM test.meters;
```
After using `taosBenchmark` to create your test deployment, you can run queries in the TDengine CLI to test its performance:
3. Query the average, maximum, and minimum values of 100 million records:
From the TDengine CLI (taos) query the number of rows in the `meters` supertable:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters;
```
```sql
SELECT COUNT(*) FROM test.meters;
```
4. Query the total number of records where the value of the `location` tag is `California.SanFrancisco`:
Query the average, maximum, and minimum values of all 100 million rows of data:
```sql
SELECT COUNT(*) FROM test.meters WHERE location = "California.SanFrancisco";
```
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters;
```
5. Query the average, maximum, and minimum values of all records where the value of the `groupId` tag is `10`:
Query the number of rows whose `location` tag is `California.SanFrancisco`:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters WHERE groupId = 10;
```
```sql
SELECT COUNT(*) FROM test.meters WHERE location = "California.SanFrancisco";
```
6. Calculate the average, maximum, and minimum values for the `d1001` table every 10 seconds:
Query the average, maximum, and minimum values of all rows whose `groupId` tag is `10`:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters WHERE groupId = 10;
```
Query the average, maximum, and minimum values for table `d10` in 10 second intervals:
```sql
SELECT FIRST(ts), AVG(current), MAX(voltage), MIN(phase) FROM test.d10 INTERVAL(10s);
```
In the query above you are selecting the first timestamp (ts) in the interval, another way of selecting this would be `\_wstart` which will give the start of the time window. For more information about windowed queries, see [Time-Series Extensions](../../reference/taos-sql/distinguished/).
## Additional Information
For more information about deploying TDengine in a Docker environment, see [Deploying TDengine with Docker](../../operation/deployment/#docker).
```sql
SELECT _wstart, AVG(current), MAX(voltage), MIN(phase) FROM test.d1001 INTERVAL(10s);
```

View File

@ -1,326 +1,247 @@
---
title: Quick Install from Package
sidebar_label: Package
description: This document describes how to install TDengine on Linux, Windows, and macOS and perform queries and inserts.
sidebar_label: Deploy from Package
title: Get Started with TDengine Using an Installation Package
description: Quick experience with TDengine using the installation package
slug: /get-started/deploy-from-package
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import PkgListV3 from "/components/PkgListV3";
This document describes how to install TDengine on Linux/Windows/macOS and perform queries and inserts.
You can install TDengine on a local machine and perform some basic tests to verify its performance. The TDengine OSS server can be installed on Linux and macOS, and the TDengine OSS client can be installed on Linux, macOS, and Windows.
- The easiest way to explore TDengine is through [TDengine Cloud](https://cloud.tdengine.com).
- To get started with TDengine on Docker, see [Quick Install on Docker](../../get-started/docker).
- If you want to view the source code, build TDengine yourself, or contribute to the project, see the [TDengine GitHub repository](https://github.com/taosdata/TDengine).
To install TDengine in a Docker container instead of on your machine, see [Get Started with TDengine in Docker](../deploy-in-docker/).
The full package of TDengine includes the TDengine Server (`taosd`), TDengine Client (`taosc`), taosAdapter for connecting with third-party systems and providing a RESTful interface, a command-line interface (CLI, taos), and some tools. Note that taosAdapter supports Linux only. In addition to client libraries for multiple languages, TDengine also provides a [REST API](../../reference/connectors/rest-api) through [taosAdapter](../../reference/components/taosadapter).
## Before You Begin
The standard server installation package includes `taos`, `taosd`, `taosAdapter`, `taosBenchmark`, and sample code. You can also download the Lite package that includes only `taosd` and the C/C++ client library.
- Verify that your machine meets the minimum system requirements for TDengine. For more information, see [Supported Platforms](../../tdengine-reference/supported-platforms/) and [System Requirements](../../operations-and-maintenance/system-requirements/).
- **(Windows only)** Verify that the latest version of the Microsoft Visual C++ Redistributable is installed on your machine. To download the redistributable package, see [Microsoft Visual C++ Redistributable latest supported downloads](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-170).
TDengine OSS is released as Deb and RPM packages. The Deb package can be installed on Debian, Ubuntu, and derivative systems. The RPM package can be installed on CentOS, RHEL, SUSE, and derivative systems. A .tar.gz package is also provided for enterprise customers, and you can install TDengine over `apt-get` as well. The .tar.tz package includes `taosdump` and the TDinsight installation script. If you want to use these utilities with the Deb or RPM package, download and install taosTools separately. TDengine can also be installed on x64 Windows and x64/m1 macOS.
## Procedure
## Operating environment requirements
In the Linux system, the minimum requirements for the operating environment are as follows:
The TDengine OSS installation package is provided for Linux users in .deb, .rpm, and .tar.gz format and can also be installed via APT from our repository. Installation packages are also provided for macOS (client and server) and Windows (client only).
linux core version - 3.10.0-1160.83.1.el7.x86_64;
1. Select the appropriate package for your machine and follow the steps to install TDengine.
glibc version - 2.17;
<Tabs>
<TabItem label=".deb" value="debinst">
If compiling and installing through clone source code, it is also necessary to meet the following requirements:
1. Download the .deb installation package:
<PkgListV3 type={6}/>
2. Run the following command to install TDengine:
cmake version - 3.26.4 or above;
```bash
sudo dpkg -i TDengine-server-<version>-Linux-x64.deb
```
gcc version - 9.3.1 or above;
Replace `<version>` with the version of the package that you downloaded.
## Installation
</TabItem>
**Note**
<TabItem label=".rpm" value="rpminst">
Since TDengine 3.0.6.0, we don't provide standalone taosTools pacakge for downloading. However, all the tools included in the taosTools pacakge can be found in TDengine-server pacakge.
1. Download the .rpm installation package:
<PkgListV3 type={5}/>
2. Run the following command to install TDengine:
<Tabs>
<TabItem label=".deb" value="debinst">
```bash
sudo rpm -ivh TDengine-server-<version>-Linux-x64.rpm
```
1. Download the Deb installation package.
<PkgListV3 type={6}/>
2. In the directory where the package is located, use `dpkg` to install the package:
Replace `<version>` with the version of the package that you downloaded.
> Please replace `<version>` with the corresponding version of the package downloaded
</TabItem>
```bash
sudo dpkg -i TDengine-server-<version>-Linux-x64.deb
```
<TabItem label=".tar.gz" value="tarinst">
</TabItem>
1. Download the desired .tar.gz package from the following list:
<PkgListV3 type={0}/>
2. Run the following command to decompress the package:
<TabItem label=".rpm" value="rpminst">
```bash
tar -zxvf TDengine-server-<version>-Linux-x64.tar.gz
```
1. Download the .rpm installation package.
<PkgListV3 type={5}/>
2. In the directory where the package is located, use rpm to install the package:
Replace `<version>` with the version of the package that you downloaded.
3. In the directory where you decompressed the package, run the following command to install TDengine:
> Please replace `<version>` with the corresponding version of the package downloaded
```bash
sudo ./install.sh
```
```bash
sudo rpm -ivh TDengine-server-<version>-Linux-x64.rpm
```
:::note
</TabItem>
The `install.sh` script requires you to enter configuration information in the terminal. For a non-interactive installation, run `./install.sh -e no`. You can run `./install.sh -h` for detailed information about all parameters.
<TabItem label=".tar.gz" value="tarinst">
:::
1. Download the .tar.gz installation package.
<PkgListV3 type={0}/>
2. In the directory where the package is located, use `tar` to decompress the package:
</TabItem>
> Please replace `<version>` with the corresponding version of the package downloaded
<TabItem label="APT" value="apt-get">
```bash
tar -zxvf TDengine-server-<version>-Linux-x64.tar.gz
```
1. Configure the package repository:
In the directory to which the package was decompressed, run `install.sh`:
```bash
wget -qO - http://repos.taosdata.com/tdengine.key | sudo apt-key add -
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-stable stable main" | sudo tee /etc/apt/sources.list.d/tdengine-stable.list
```
```bash
sudo ./install.sh
```
2. Update the list of available packages and install TDengine.
:::info
Users will be prompted to enter some configuration information when install.sh is executing. The interactive mode can be disabled by executing `./install.sh -e no`. `./install.sh -h` can show all parameters with detailed explanation.
:::
```bash
sudo apt-get update
apt-cache policy tdengine
sudo apt-get install tdengine
```
</TabItem>
</TabItem>
<TabItem value="apt-get" label="apt-get">
You can use `apt-get` to install TDengine from the official package repository.
<TabItem label="Windows" value="windows">
**Configure the package repository**
:::note
```bash
wget -qO - http://repos.taosdata.com/tdengine.key | sudo apt-key add -
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-stable stable main" | sudo tee /etc/apt/sources.list.d/tdengine-stable.list
```
This procedure installs the TDengine OSS client on Windows. The TDengine OSS server does not support Windows.
You can install beta versions by configuring the following repository:
:::
```bash
wget -qO - http://repos.taosdata.com/tdengine.key | sudo apt-key add -
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-beta beta main" | sudo tee /etc/apt/sources.list.d/tdengine-beta.list
```
1. Download the Windows installation package:
<PkgListV3 type={3}/>
2. Run the installation package to install TDengine.
**Install TDengine with `apt-get`**
</TabItem>
```bash
sudo apt-get update
apt-cache policy tdengine
sudo apt-get install tdengine
```
<TabItem label="macOS" value="macos">
:::tip
This installation method is supported only for Debian and Ubuntu.
:::
</TabItem>
<TabItem label="Windows" value="windows">
1. Download the desired installation package from the following list:
<PkgListV3 type={7}/>
2. Run the installation package to install TDengine.
**Note**
- TDengine only supports Windows Server 2016/2019 and Windows 10/11 on the Windows platform.
- Since TDengine 3.1.0.0, we wonly provide client package for Windows. If you need to run TDenginer server on Windows, please contact TDengine sales team to upgrade to TDengine Enterprise.
- To run on Windows, the Microsoft Visual C++ Runtime library is required. If the Microsoft Visual C++ Runtime Library is missing on your platform, you can download and install it from [VC Runtime Library](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-170).
:::note
Follow the steps below:
If the installation is blocked, right-click on the package and choose **Open**.
1. Download the Windows installation package.
<PkgListV3 type={3}/>
2. Run the downloaded package to install TDengine.
Note: From version 3.0.1.7, only TDengine client pacakge can be downloaded for Windows platform. If you want to run TDengine servers on Windows, please contact our sales team to upgrade to TDengine Enterprise.
:::
</TabItem>
</Tabs>
</TabItem>
<TabItem label="macOS" value="macos">
2. When installing the first node and prompted with `Enter FQDN:`, you do not need to input anything. Only when installing the second or subsequent nodes do you need to input the FQDN of any available node in the existing cluster to join the new node to the cluster. Alternatively, you can configure it in the new node's configuration file before starting.
1. Download the macOS installation package.
<PkgListV3 type={7}/>
2. Run the downloaded package to install TDengine. If the installation is blocked, you can right-click or ctrl-click on the installation package and select `Open`.
3. Select your operating system and follow the steps to start TDengine services.
</TabItem>
</Tabs>
<Tabs>
<TabItem label="Linux" value="linux">
:::info
For information about TDengine other releases, check [Release History](../../releases/tdengine).
:::
Run the following command to start all TDengine services:
:::note
On the first node in your TDengine cluster, leave the `Enter FQDN:` prompt blank and press **Enter**. On subsequent nodes, you can enter the endpoint of the first dnode in the cluster. You can also configure this setting after you have finished installing TDengine.
```bash
sudo start-all.sh
```
:::
Alternatively, you can manage specific TDengine services through systemd:
## Quick Launch
```bash
sudo systemctl start taosd
sudo systemctl start taosadapter
sudo systemctl start taoskeeper
sudo systemctl start taos-explorer
```
<Tabs>
<TabItem label="Linux" value="linux">
:::note
After the installation is complete, run the following command to start the TDengine service:
If your machine does not support systemd, you can manually run the TDengine services located in the `/usr/local/taos/bin` directory.
```bash
systemctl start taosd
systemctl start taosadapter
systemctl start taoskeeper
systemctl start taos-explorer
```
:::
Or you can run a scrip to start all the above services together
</TabItem>
```bash
start-all.sh
```
<TabItem label="macOS" value="macos">
systemctl can also be used to stop, restart a specific service or check its status, like below using `taosd` as example:
Run the following command to start all TDengine services:
```bash
systemctl start taosd
systemctl stop taosd
systemctl restart taosd
systemctl status taosd
```
```bash
sudo start-all.sh
```
:::info
Alternatively, you can manage specific TDengine services with the `launchctl` command:
- The `systemctl` command requires _root_ privileges. If you are not logged in as the _root_ user, use the `sudo` command.
- The `systemctl stop taosd` command does not instantly stop TDengine Server. The server is stopped only after all data in memory is flushed to disk. The time required depends on the cache size.
- If your system does not include `systemd`, you can run `/usr/local/taos/bin/taosd` to start TDengine manually.
```bash
sudo launchctl start com.tdengine.taosd
sudo launchctl start com.tdengine.taosadapter
sudo launchctl start com.tdengine.taoskeeper
sudo launchctl start com.tdengine.taos-explorer
```
:::
</TabItem>
</Tabs>
</TabItem>
You can now work with TDengine on your local machine. For example, you can run the `taos` command to open the TDengine command-line interface.
<TabItem label="Windows" value="windows">
## What to Do Next
After the installation is complete, please run `sc start taosd` or run `C:\TDengine\taosd.exe` with administrator privilege to start TDengine Server. Please run `sc start taosadapter` or run `C:\TDengine\taosadapter.exe` with administrator privilege to start taosAdapter to provide http/REST service.
### Test Data Ingestion
</TabItem>
Your TDengine installation includes taosBenchmark, a tool specifically designed to test TDengines performance. taosBenchmark can simulate data generated by many devices with a wide range of configuration options so that you can perform tests on sample data similar to your real-world use cases. For more information about taosBenchmark, see [taosBenchmark](../../tdengine-reference/tools/taosbenchmark/).
<TabItem label="macOS" value="macos">
Perform the following steps to use taosBenchmark to test TDengine's ingestion performance on your machine:
After the installation is complete, double-click the /applications/TDengine to start the program, or run `sudo launchctl start ` to start TDengine services.
1. Run taosBenchmark with the default settings:
```bash
sudo launchctl start com.tdengine.taosd
sudo launchctl start com.tdengine.taosadapter
sudo launchctl start com.tdengine.taoskeeper
sudo launchctl start com.tdengine.taos-explorer
```
```bash
taosBenchmark -y
```
Or you can run a scrip to start all the above services together
```bash
start-all.sh
```
taosBenchmark automatically creates the `test` database and the `meters` supertable inside that database. This supertable contains 10,000 subtables, named `d0` to `d9999`, with each subtable containing 10,000 records. Each record includes the following four metrics:
The following `launchctl` commands can help you manage TDengine service, using `taosd` service as an example below:
- `ts` (timestamp), ranging from `2017-07-14 10:40:00 000" to "2017-07-14 10:40:09 999`
- `current`
- `voltage`
- `phase`
```bash
sudo launchctl start com.tdengine.taosd
sudo launchctl stop com.tdengine.taosd
sudo launchctl list | grep taosd
sudo launchctl print system/com.tdengine.taosd
```
Each subtable also has the following two tags:
:::info
- Please use `sudo` to run `launchctl` to manage _com.tdengine.taosd_ with administrator privileges.
- The administrator privilege is required for service management to enhance security.
- Troubleshooting:
- The first column returned by the command `launchctl list | grep taosd` is the PID of the program. If it's `-`, that means the TDengine service is not running.
- If the service is abnormal, please check the `launchd.log` file from the system log or the `taosdlog` from the `/var/log/taos directory` for more information.
- `groupId`, ranging from `1` to `10`
- `location`, indicating a city and state such as `California.Campbell` or `California.Cupertino`
:::
When the ingestion process is finished, taosBenchmark outputs the time taken to ingest the specified sample data. From this, you can estimate how TDengine would perform on your system in a production environment.
### Test Data Querying
</TabItem>
</Tabs>
After inserting data with taosBenchmark as described above, you can use the TDengine CLI to test TDengine's query performance on your machine:
1. Start the TDengine CLI:
## TDengine Command Line Interface
```bash
taos
```
You can use the TDengine CLI to monitor your TDengine deployment and execute ad hoc queries. To open the CLI, you can execute `taos` (Linux/Mac) or `taos.exe` (Windows) in terminal. The prompt of TDengine CLI is like below:
2. Query the total number of records in the `meters` supertable:
```cmd
taos>
```
```sql
SELECT COUNT(*) FROM test.meters;
```
Using TDengine CLI, you can create and delete databases and tables and run all types of queries. Each SQL command must be end with a semicolon (;). For example:
3. Query the average, maximum, and minimum values of 100 million records:
```sql
CREATE DATABASE demo;
USE demo;
CREATE TABLE t (ts TIMESTAMP, speed INT);
INSERT INTO t VALUES ('2019-07-15 00:00:00', 10);
INSERT INTO t VALUES ('2019-07-15 01:00:00', 20);
SELECT * FROM t;
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters;
```
ts | speed |
========================================
2019-07-15 00:00:00.000 | 10 |
2019-07-15 01:00:00.000 | 20 |
4. Query the total number of records where the value of the `location` tag is `California.SanFrancisco`:
Query OK, 2 row(s) in set (0.003128s)
```
```sql
SELECT COUNT(*) FROM test.meters WHERE location = "California.SanFrancisco";
```
You can also can monitor the deployment status, add and remove user accounts, and manage running instances. You can run the TDengine CLI on either machines. For more information, see [TDengine CLI](../../reference/components/taos-shell/).
5. Query the average, maximum, and minimum values of all records where the value of the `groupId` tag is `10`:
## TDengine Graphic User Interface
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters WHERE groupId = 10;
```
From TDengine 3.3.0.0, there is a new componenet called `taos-explorer` added in the TDengine docker image. You can use it to manage the databases, super tables, child tables, and data in your TDengine system. There are also some features only available in TDengine Enterprise Edition, please contact TDengine sales team in case you need these features.
6. Calculate the average, maximum, and minimum values for the `d1001` table every 10 seconds:
To use taos-explorer in the container, you need to access the host port mapped from container port 6060. Assuming the host name is abc.com, and the port used on host is 6060, you need to access `http://abc.com:6060`. taos-explorer uses port 6060 by default in the container. When you use it the first time, you need to register with your enterprise email, then can logon using your user name and password in the TDengine
## Test data insert performance
After your TDengine Server is running normally, you can run the taosBenchmark utility to test its performance:
Start TDengine service and execute `taosBenchmark` (formerly named `taosdemo`) in a terminal.
```bash
taosBenchmark
```
This command creates the `meters` supertable in the `test` database. In the `meters` supertable, it then creates 10,000 subtables named `d0` to `d9999`. Each table has 10,000 rows and each row has four columns: `ts`, `current`, `voltage`, and `phase`. The timestamps of the data in these columns range from 2017-07-14 10:40:00 000 to 2017-07-14 10:40:09 999. Each table is randomly assigned a `groupId` tag from 1 to 10 and a `location` tag of either `California.Campbell`, `California.Cupertino`, `California.LosAngeles`, `California.MountainView`, `California.PaloAlto`, `California.SanDiego`, `California.SanFrancisco`, `California.SanJose`, `California.SantaClara` or `California.Sunnyvale`.
The `taosBenchmark` command creates a deployment with 100 million data points that you can use for testing purposes. The time required to create the deployment depends on your hardware. On most modern servers, the deployment is created in ten to twenty seconds.
You can customize the test deployment that taosBenchmark creates by specifying command-line parameters. For information about command-line parameters, run the `taosBenchmark --help` command. For more information about taosBenchmark, see [taosBenchmark](../../reference/components/taosbenchmark).
## Test data query performance
After using `taosBenchmark` to create your test deployment, you can run queries in the TDengine CLI to test its performance:
From the TDengine CLI (taos) query the number of rows in the `meters` supertable:
```sql
SELECT COUNT(*) FROM test.meters;
```
Query the average, maximum, and minimum values of all 100 million rows of data:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters;
```
Query the number of rows whose `location` tag is `California.SanFrancisco`:
```sql
SELECT COUNT(*) FROM test.meters WHERE location = "California.SanFrancisco";
```
Query the average, maximum, and minimum values of all rows whose `groupId` tag is `10`:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters WHERE groupId = 10;
```
Query the average, maximum, and minimum values for table `d10` in 10 second intervals:
```sql
SELECT FIRST(ts), AVG(current), MAX(voltage), MIN(phase) FROM test.d10 INTERVAL(10s);
```
In the query above you are selecting the first timestamp (ts) in the interval, another way of selecting this would be `\_wstart` which will give the start of the time window. For more information about windowed queries, see [Time-Series Extensions](../../reference/taos-sql/distinguished/).
```sql
SELECT _wstart, AVG(current), MAX(voltage), MIN(phase) FROM test.d1001 INTERVAL(10s);
```

View File

@ -0,0 +1,42 @@
---
sidebar_label: Use TDengine Cloud
title: Get Started with TDengine Cloud
slug: /get-started/use-tdengine-cloud
---
TDengine Cloud is a fully managed cloud service for industrial big data. It delivers all features of TDengine Enterprise as a cloud-native solution in Amazon Web Services, Microsoft Azure, or Google Cloud Platform.
You can register for a TDengine Cloud account for free and automatically obtain a one-month free trial to test TDengine Cloud for yourself.
## Procedure
1. Register for a TDengine Cloud account.
1. In a web browser, open the [TDengine Cloud](https://cloud.tdengine.com) website.
2. In the **Sign up** section, enter your name and company email address.
3. Click **Get Confirmation Code**. A confirmation email is sent to your email address.
4. Copy the 6-digit confirmation code from the email and paste it into the **Confirmation Code** field.
5. Click **Sign in TDengine Cloud**.
6. On the page displayed, enter your name, company, country of residence, and phone number.
7. Specify a password and click **Continue**.
2. Determine whether you want to use any public databases and click **Next**.
The TDengine DB Mart includes several public databases that you can use for testing purposes. To enable access to a public database in your account, select the toggle. You can modify these settings after the account creation process is finished.
3. Create an organization.
1. Enter a name for your organization in TDengine Cloud. This name must be unique.
2. Specify whether to enable single sign-on (SSO).
- Select **Public** to use GitHub, Microsoft, or Google SSO.
- Select **Azure AD** to use Microsoft Entra ID. Enter the Azure domain, client ID, and client secret as prompted.
3. Click **Next**.
4. Create your first instance.
1. Select a cloud and region from the drop-down lists.
2. Enter a name for your instance.
3. Specify whether to enable high availability.
4. Specify whether to create a sample database.
5. Click **Select Plan** and select your desired price plan.
6. Click **Create**.
Your instance is created according to your specifications and you can begin to use TDengine Cloud. For more information, see the [TDengine Cloud documentation](/cloud).

View File

@ -1,26 +0,0 @@
You can use `apt-get` to install TDengine from the official package repository.
**Configure the package repository**
```
wget -qO - http://repos.taosdata.com/tdengine.key | sudo apt-key add -
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-stable stable main" | sudo tee /etc/apt/sources.list.d/tdengine-stable.list
```
You can install beta versions by configuring the following package repository:
```
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-beta beta main" | sudo tee /etc/apt/sources.list.d/tdengine-beta.list
```
**Install TDengine with `apt-get`**
```
sudo apt-get update
apt-cache policy tdengine
sudo apt-get install tdengine
```
:::tip
This installation method is supported only for Debian and Ubuntu.
::::

View File

@ -1,17 +0,0 @@
import PkgList from "/components/PkgList";
TDengine is easy to download and install.
The standard server installation package includes `taos`, `taosd`, `taosAdapter`, `taosBenchmark`, and sample code. You can also download a lite package that includes only `taosd` and the C/C++ client library.
You can download the TDengine installation package in .rpm, .deb, or .tar.gz format. The .tar.tz package includes `taosdump` and the TDinsight installation script. If you want to use these utilities with the .deb or .rpm package, download and install taosTools separately.
Between official releases, beta versions may be released that contain new features. Do not use beta versions for production or testing environments. Select the installation package appropriate for your system.
<PkgList type={0}/>
For information about installing TDengine, see [Install and Uninstall](../operation/pkg-install).
For information about TDengine releases, see [All Downloads](https://tdengine.com/all-downloads)
and [Release Notes](https://github.com/taosdata/TDengine/releases).

View File

@ -1,7 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="-0.5 -1 32 32" width="50" height="50">
<g fill="#5865f2">
<path
d="M26.0015 6.9529C24.0021 6.03845 21.8787 5.37198 19.6623 5C19.3833 5.48048 19.0733 6.13144 18.8563 6.64292C16.4989 6.30193 14.1585 6.30193 11.8336 6.64292C11.6166 6.13144 11.2911 5.48048 11.0276 5C8.79575 5.37198 6.67235 6.03845 4.6869 6.9529C0.672601 12.8736 -0.41235 18.6548 0.130124 24.3585C2.79599 26.2959 5.36889 27.4739 7.89682 28.2489C8.51679 27.4119 9.07477 26.5129 9.55525 25.5675C8.64079 25.2265 7.77283 24.808 6.93587 24.312C7.15286 24.1571 7.36986 23.9866 7.57135 23.8161C12.6241 26.1255 18.0969 26.1255 23.0876 23.8161C23.3046 23.9866 23.5061 24.1571 23.7231 24.312C22.8861 24.808 22.0182 25.2265 21.1037 25.5675C21.5842 26.5129 22.1422 27.4119 22.7621 28.2489C25.2885 27.4739 27.8769 26.2959 30.5288 24.3585C31.1952 17.7559 29.4733 12.0212 26.0015 6.9529ZM10.2527 20.8402C8.73376 20.8402 7.49382 19.4608 7.49382 17.7714C7.49382 16.082 8.70276 14.7025 10.2527 14.7025C11.7871 14.7025 13.0425 16.082 13.0115 17.7714C13.0115 19.4608 11.7871 20.8402 10.2527 20.8402ZM20.4373 20.8402C18.9183 20.8402 17.6768 19.4608 17.6768 17.7714C17.6768 16.082 18.8873 14.7025 20.4373 14.7025C21.9717 14.7025 23.2271 16.082 23.1961 17.7714C23.1961 19.4608 21.9872 20.8402 20.4373 20.8402Z"
></path>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 1.3 KiB

View File

@ -1,6 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="-1 -2 18 18" width="50" height="50">
<path
fill="#000"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
></path>
</svg>

Before

Width:  |  Height:  |  Size: 705 B

View File

@ -1,43 +1,10 @@
---
title: Get Started
description: This document describes how to install TDengine on various platforms.
slug: /get-started
---
import GitHubSVG from './github.svg'
import DiscordSVG from './discord.svg'
import TwitterSVG from './twitter.svg'
import YouTubeSVG from './youtube.svg'
import LinkedInSVG from './linkedin.svg'
import StackOverflowSVG from './stackoverflow.svg'
This section describes how to set up a TDengine environment quickly using Docker or installation packages and experience its capabilities.
You can install and run TDengine on Linux/Windows/macOS machines as well as Docker containers. You can also deploy TDengine as a managed service with TDengine Cloud.
The full package of TDengine includes the TDengine Server (`taosd`), TDengine Client (`taosc`), taosAdapter for connecting with third-party systems and providing a RESTful interface, a command-line interface, and some tools. In addition to client libraries for multiple languages, TDengine also provides a [RESTful interface](../reference/connectors/rest-api) through [taosAdapter](../reference/components/taosadapter).
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```
## Join TDengine Community
<table width="100%">
<tr align="center" style={{border:0}}>
<td width="16%" style={{border:0}}><a href="https://github.com/taosdata/TDengine" target="_blank"><GitHubSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://discord.com/invite/VZdSuUg4pS" target="_blank"><DiscordSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://twitter.com/TDengineDB" target="_blank"><TwitterSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://www.youtube.com/@tdengine" target="_blank"><YouTubeSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://www.linkedin.com/company/tdengine" target="_blank"><LinkedInSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://stackoverflow.com/questions/tagged/tdengine" target="_blank"><StackOverflowSVG /></a></td>
</tr>
<tr align="center" style={{border:0,backgroundColor:'transparent'}}>
<td width="16%" style={{border:0,padding:0}}><a href="https://github.com/taosdata/TDengine" target="_blank">Star GitHub</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://discord.com/invite/VZdSuUg4pS" target="_blank">Join Discord</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://twitter.com/TDengineDB" target="_blank">Follow Twitter</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://www.youtube.com/@tdengine" target="_blank">Subscribe YouTube</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://www.linkedin.com/company/tdengine" target="_blank">Follow LinkedIn</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://stackoverflow.com/questions/tagged/tdengine" target="_blank">Ask StackOverflow</a></td>
</tr>
</table>
- To deploy TDengine in a container, see [Get Started with TDengine Using Docker](deploy-in-docker/).
- To install TDengine on a local server, see [Get Started with TDengine Using an Installation Package](deploy-from-package/).
- To use TDengine as a fully managed cloud service instead of deploying on your own, see [Get Started with TDengine Cloud](use-tdengine-cloud/).

View File

@ -1,6 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 -2 24 24" width="50" height="50">
<path
fill="rgb(10, 102, 194)"
d="M20.5 2h-17A1.5 1.5 0 002 3.5v17A1.5 1.5 0 003.5 22h17a1.5 1.5 0 001.5-1.5v-17A1.5 1.5 0 0020.5 2zM8 19H5v-9h3zM6.5 8.25A1.75 1.75 0 118.3 6.5a1.78 1.78 0 01-1.8 1.75zM19 19h-3v-4.74c0-1.42-.6-1.93-1.38-1.93A1.74 1.74 0 0013 14.19a.66.66 0 000 .14V19h-3v-9h2.9v1.3a3.11 3.11 0 012.7-1.4c1.55 0 3.36.86 3.36 3.66z"
></path>
</svg>

Before

Width:  |  Height:  |  Size: 461 B

View File

@ -1,7 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="-8 0 48 48" width="50" height="50">
<path d="M26 41v-9h4v13H0V32h4v9h22z" fill="#BCBBBB" />
<path
d="M23 34l.8-3-16.1-3.3L7 31l16 3zM9.2 23.2l15 7 1.4-3-15-7-1.4 3zm4.2-7.4L26 26.4l2.1-2.5-12.7-10.6-2.1 2.5zM21.5 8l-2.7 2 9.9 13.3 2.7-2L21.5 8zM7 38h16v-3H7v3z"
fill="#F48024"
/>
</svg>

Before

Width:  |  Height:  |  Size: 350 B

View File

@ -1,7 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 -2 24 24" width="50" height="50">
<g fill="rgb(29, 155, 240)">
<path
d="M23.643 4.937c-.835.37-1.732.62-2.675.733.962-.576 1.7-1.49 2.048-2.578-.9.534-1.897.922-2.958 1.13-.85-.904-2.06-1.47-3.4-1.47-2.572 0-4.658 2.086-4.658 4.66 0 .364.042.718.12 1.06-3.873-.195-7.304-2.05-9.602-4.868-.4.69-.63 1.49-.63 2.342 0 1.616.823 3.043 2.072 3.878-.764-.025-1.482-.234-2.11-.583v.06c0 2.257 1.605 4.14 3.737 4.568-.392.106-.803.162-1.227.162-.3 0-.593-.028-.877-.082.593 1.85 2.313 3.198 4.352 3.234-1.595 1.25-3.604 1.995-5.786 1.995-.376 0-.747-.022-1.112-.065 2.062 1.323 4.51 2.093 7.14 2.093 8.57 0 13.255-7.098 13.255-13.254 0-.2-.005-.402-.014-.602.91-.658 1.7-1.477 2.323-2.41z"
></path>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 772 B

View File

@ -1,11 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="-2 -8 32 32" width="50" height="50">
<g>
<g>
<path
d="M27.9727 3.12324C27.6435 1.89323 26.6768 0.926623 25.4468 0.597366C23.2197 2.24288e-07 14.285 0 14.285 0C14.285 0 5.35042 2.24288e-07 3.12323 0.597366C1.89323 0.926623 0.926623 1.89323 0.597366 3.12324C2.24288e-07 5.35042 0 10 0 10C0 10 2.24288e-07 14.6496 0.597366 16.8768C0.926623 18.1068 1.89323 19.0734 3.12323 19.4026C5.35042 20 14.285 20 14.285 20C14.285 20 23.2197 20 25.4468 19.4026C26.6768 19.0734 27.6435 18.1068 27.9727 16.8768C28.5701 14.6496 28.5701 10 28.5701 10C28.5701 10 28.5677 5.35042 27.9727 3.12324Z"
fill="#FF0000"
></path>
<path d="M11.4253 14.2854L18.8477 10.0004L11.4253 5.71533V14.2854Z" fill="white"></path>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 801 B

View File

@ -0,0 +1,228 @@
---
sidebar_label: Data Model
title: The TDengine Data Model
slug: /basic-features/data-model
---
import Image from '@theme/IdealImage';
import dataModel from '../assets/data-model-01.png';
To clearly explain the concepts of time-series data and facilitate the writing of example programs, the TDengine documentation uses smart meters as an example. These example smart meters can collect three metrics: current, voltage, and phase. In addition, each smart meter also has two static attributes: location and group ID. The data collected by these smart meters is shown in the table below.
|Device ID| Timestamp | Current | Voltage | Phase | Location | Group ID |
|:-------:|:---------:|:-------:|:-------:|:-----:|:--------:|:--------:|
|d1001 |1538548685000 | 10.3 | 219 | 0.31 | California.SanFrancisco |2|
|d1002 | 1538548684000 | 10.2 | 220 | 0.23 | California.SanFrancisco |3|
|d1003 | 1538548686500 | 11.5 | 221 | 0.35 | California.LosAngeles | 3 |
|d1004 | 1538548685500 | 13.4 | 223 | 0.29 | California.LosAngeles | 2 |
|d1001 | 1538548695000 | 12.6 | 218 | 0.33 | California.SanFrancisco |2|
|d1004 | 1538548696600 | 11.8 | 221 | 0.28 | California.LosAngeles | 2 |
|d1002 | 1538548696650 | 10.3 | 218 | 0.25 | California.SanFrancisco | 3 |
|d1001 | 1538548696800 | 12.3 | 221 | 0.31 | California.SanFrancisco | 2 |
These smart meters collect data based on external trigger events or preset periods, ensuring the continuity and temporality of the data, thus forming a continuously updated data stream.
## Basic Concepts
### Metric
A metric refers to a physical quantity, such as current, voltage, or temperature, obtained from a sensor, device, or other data collection point. Since these physical quantities change over time, the types of data collected are diverse, including integers, floating-point numbers, and strings. As time passes, the stored data will continue to grow. For example, in smart meters, current, voltage, and phase are typical metrics collected.
### Tag
A tag refers to a static attribute associated with a sensor, device, or other data collection point. These are attributes that do not change over time, such as device model, color, or location. The data type of tags can be any type. Although tags themselves are static, in practical applications, you may need to modify, delete, or add tags. Unlike quantities collected, the amount of tag data stored remains relatively stable over time and does not show a significant growth trend. In the example of smart meters, location and group ID are typical tags.
### Data Collection Point
A data collection point (DCP) refers to a hardware or software device responsible for collecting metrics at a certain preset time period or when triggered by specific events. A data collection point can collect one or more quantities at the same time, but these quantities are obtained at the same moment and have the same timestamp. Complex structured devices typically include multiple data collection points, each with different collection cycles, and they operate independently without interference. For example, a car might have a dedicated data collection point for collecting location information, some for monitoring engine status, and others focused on monitoring the interior environment. Thus, a car could contain three different types of data collection points. In the example of smart meters, identifiers such as d1001, d1002, and d1003 represent different data collection points.
### Table
Given that the time-series data collected from DCPs is usually structured, TDengine uses the traditional relational database model to manage data. At the same time, to fully utilize the characteristics of time-series data, TDengine adopts a "one table per device" design, requiring a separate table for each data collection point. For example, if there are millions of smart meters, a corresponding number of tables need to be created in TDengine. In the example data of smart meters, the smart meter with device ID d1001 corresponds to a table in TDengine, and all the time-series data collected by this meter is stored in this table. This design approach retains the usability of relational databases while fully utilizing the unique advantages of time-series data:
1. Since the data generation process at different data collection points is completely independent, and each data collection point has a unique data source, there is only one writer per table. This allows for lock-free data writing, significantly increasing the write speed.
2. For a data collection point, the data it generates is in chronological order, so the write operation can be implemented in an append-only manner, further greatly enhancing the data writing speed.
3. The data from a data collection point is stored continuously in blocks. Thus, reading data from a specific time period can significantly reduce random read operations, dramatically improving the speed of data reading and querying.
4. Within a data block, columnar storage is used, and different compression algorithms can be applied to different data types to improve the compression ratio. Moreover, since the rate of data collection changes is usually slow, the compression ratio will be higher.
If the traditional method of writing data from multiple data collection points into a single table is used, due to uncontrollable network latency, the sequence of data arrival at the server from different data collection points cannot be guaranteed, and the write operation needs to be protected by locks. Moreover, it is difficult to ensure that the data from one data collection point is stored continuously together. Using the method of one data collection point per table can ensure to the greatest extent that the performance of insertion and querying for a single data collection point is optimal, and the data compression ratio is the highest.
In TDengine, the name of the data collection point (e.g., d1001) is usually used as the table name, and each data collection point can have multiple metrics (such as current, voltage, phase, etc.), each corresponding to a column in a table. The data type of the metrics can be integer, floating-point, string, etc.
Additionally, the first column of the table must be a timestamp. For each metric, TDengine will use the first column timestamp to build an index and use columnar storage. For complex devices, such as cars, which have multiple data collection points, multiple tables need to be created for one car.
### Supertable
Although the "one table per device" design helps to manage each collection point specifically, as the number of devices increases, the number of tables also increases dramatically, posing challenges for database management and data analysis. When performing aggregation operations across data collection points, users need to deal with a large number of tables, making the work exceptionally cumbersome.
To solve this problem, TDengine introduces the supertable. A supertable is a data structure that can aggregate certain types of data collection points together into a logically unified table. These data collection points have the same table structure, but their static properties (such as tags) may differ. When creating a supertable, in addition to defining the metrics, it is also necessary to define the tags of the supertable. A supertable must contain at least one timestamp column, one or more metric columns, and one or more tag columns. Moreover, the tags of the supertable can be flexibly added, modified, or deleted.
In TDengine, a table represents a specific data collection point, while a supertable represents a collection of data collection points with the same attributes. Taking smart meters as an example, we can create a supertable for this type of meter, which includes all the common properties and metrics of smart meters. This design not only simplifies table management but also facilitates aggregation operations across data collection points, thereby improving the efficiency of data processing.
### Subtable
A subtable is a logical abstraction of a data collection point and is a specific table belonging to a supertable. You can use the definition of the supertable as a template and create subtables by specifying the tag values of the subtables. Thus, tables generated through the supertable are referred to as subtables. The relationship between the supertable and subtables is mainly reflected in the following aspects.
- A supertable contains multiple subtables, which have the same table structure but different tag values.
- The table structure of subtables cannot be directly modified, but the columns and tags of the supertable can be modified, and the modifications take effect immediately for all subtables.
- A supertable defines a template and does not store any data or tag information itself.
In TDengine, query operations can be performed on both subtables and supertables. For queries on supertables, TDengine treats the data from all subtables as a whole, first filtering out the tables that meet the query conditions through tags, then querying the time-series data on these subtables separately, and finally merging the query results from each subtable. Essentially, by supporting queries on supertables, TDengine achieves efficient aggregation of multiple similar data collection points. To better understand the relationship between metrics, tags, supertables, and subtables, here is an example of a data model for smart meters. You can refer to the data model diagram below for a more intuitive understanding of these concepts.
To better understand the relationship between metrics, tags, supertables, and subtables, taking smart meters as an example, refer to the following diagram.
<figure>
<Image img={dataModel} alt="Data Model Diagram"/>
<figcaption>Figure 1. The TDengine data model</figcaption>
</figure>
### Database
A database in TDengine is used to manage a collection of tables. TDengine allows a running instance to contain multiple databases, and each database can be configured with different storage strategies. Since different types of data collection points usually have different data characteristics, such as data collection frequency, data retention period, number of replicas, data block size, etc., it is recommended to create supertables with different data characteristics in different databases.
In a database, one to many supertables can be included, but each supertable can only belong to one database. At the same time, all subtables owned by a supertable are also stored in that database. This design helps to achieve more fine-grained data management and optimization, ensuring that TDengine can provide the best processing performance based on different data characteristics.
### Timestamps
Timestamps play a crucial role in time-series data processing, especially when applications need to access the database from multiple time zones, making the issue more complex. Before delving into how TDengine handles timestamps and time zones, let's first introduce a few basic concepts.
- Local date and time: Refers to the local time of a specific region, usually expressed as a string in the format yyyy-MM-dd hh:mm:ss.SSS. This representation of time does not include any time zone information, such as "2021-07-21 12:00:00.000".
- Time zone: Standard time in different geographical locations on Earth. Coordinated Universal Time (UTC) or Greenwich Mean Time is the international time standard, and other time zones are usually expressed as an offset from UTC, such as "UTC+8" representing East Eight Zone time. UTC timestamp: Represents the number of milliseconds since the UNIX epoch (i.e., UTC time January 1, 1970, at 0:00). For example, "1700000000000" corresponds to the date and time "2023-11-14 22:13:20 (UTC+0)". In TDengine, when saving time-series data, what is actually saved is the UTC timestamp. When writing data, TDengine handles timestamps in the following two ways.
- RFC-3339 format: When using this format, TDengine can correctly parse time strings with time zone information into UTC timestamps. For example, "2018-10-03T14:38:05.000+08:00" will be converted into a UTC timestamp.
- Non-RFC-3339 format: If the time string does not contain time zone information, TDengine will use the time zone setting of the application to automatically convert the time into a UTC timestamp.
When querying data, the TDengine client will automatically convert the saved UTC timestamps into local time according to the current time zone setting of the application, ensuring that users in different time zones can see the correct time information.
## Data Modeling
This section uses smart meters as an example to briefly introduce how to use SQL to create databases, supertables, and basic table operations in TDengine.
### Creating a Database
The SQL to create a database for storing meter data is as follows:
```sql
CREATE DATABASE power PRECISION 'ms' KEEP 3650 DURATION 10 BUFFER 16;
```
This SQL will create a database named `power`, with the following parameters explained:
- `PRECISION 'ms'`: This database uses millisecond (ms) precision timestamps for its time-series data
- `KEEP 3650`: The data in this database will be retained for 3650 days, and data older than 3650 days will be automatically deleted
- `DURATION 10`: Data for every 10 days is stored in one data file
- `BUFFER 16`: Writing uses a memory pool of size 16MB.
After creating the power database, you can execute the USE statement to switch databases.
```sql
use power;
```
This SQL switches the current database to `power`, indicating that subsequent insertions, queries, and other operations will be performed in the current `power` database.
### Creating a Supertable
The SQL to create a supertable named `meters` is as follows:
```sql
CREATE STABLE meters (
ts timestamp,
current float,
voltage int,
phase float
) TAGS (
location varchar(64),
group_id int
);
```
In TDengine, the SQL statement to create a supertable is similar to that in relational databases. For example, in the SQL above, `CREATE STABLE` is the keyword, indicating the creation of a supertable; then, `meters` is the name of the supertable; in the parentheses following the table name, the columns of the supertable are defined (column names, data types, etc.), with the following rules:
1. The first column must be a timestamp column. For example: `ts timestamp` indicates that the timestamp column name is `ts`, and its data type is `timestamp`;
2. Starting from the second column are the measurement columns. The data types of measurements can be integer, float, string, etc. For example: `current float` indicates that the measurement current `current`, data type is `float`;
Finally, TAGS is a keyword, indicating tags, and in the parentheses following TAGS, the tags of the supertable are defined (tag names, data types, etc.).
1. The data type of tags can be integer, float, string, etc. For example: `location varchar(64)` indicates that the tag region `location`, data type is `varchar(64)`;
2. The names of tags cannot be the same as the names of measurement columns.
### Creating a Table
The SQL to create a subtable `d1001` using the supertable is as follows:
```sql
CREATE TABLE d1001
USING meters (
location,
group_id
) TAGS (
"California.SanFrancisco",
2
);
```
In the SQL above, `CREATE TABLE` is a keyword indicating the creation of a table; `d1001` is the name of the subtable; `USING` is a keyword indicating the use of a supertable as a template; `meters` is the name of the supertable; in the parentheses following the supertable name, `location`, `group_id` are the names of the tag columns of the supertable; `TAGS` is a keyword, and the values of the tag columns for the subtable are specified in the following parentheses. `"California.SanFrancisco"` and `2` indicate that the location of subtable `d1001` is `California.SanFrancisco`, and the group ID is `2`.
When performing write or query operations on a supertable, users can use the pseudocolumn `tbname` to specify or output the name of the corresponding subtable.
### Automatic Table Creation
In TDengine, to simplify user operations and ensure smooth data entry, even if a subtable does not exist, users can use the automatic table creation SQL with the `using` keyword to write data. This mechanism allows the system to automatically create the subtable when it encounters a non-existent subtable, and then perform the data writing operation. If the subtable already exists, the system will write the data directly without any additional steps.
The SQL for writing data while automatically creating tables is as follows:
```sql
INSERT INTO d1002
USING meters
TAGS (
"California.SanFrancisco",
2
) VALUES (
NOW,
10.2,
219,
0.32
);
```
In the SQL above, `INSERT INTO d1002` indicates writing data into the subtable `d1002`; `USING meters` indicates using the supertable `meters` as a template; `TAGS ("California.SanFrancisco", 2)` indicates the tag values for subtable `d1002` are `California.SanFrancisco` and `2`; `VALUES (NOW, 10.2, 219, 0.32)` indicates inserting a record into subtable `d1002` with values NOW (current timestamp), 10.2 (current), 219 (voltage), 0.32 (phase). When TDengine executes this SQL, if subtable `d1002` already exists, it writes the data directly; if subtable `d1002` does not exist, it first automatically creates the subtable, then writes the data.
### Creating Basic Tables
In TDengine, apart from subtables with tags, there are also basic tables without any tags. These tables are similar to tables in traditional relational databases, and users can create them using SQL.
The differences between basic tables and subtables are:
1. Tag Extensibility: Subtables add static tags on top of basic tables, allowing them to carry more metadata. Additionally, the tags of subtables are mutable, and users can add, delete, or modify tags as needed.
2. Table Ownership: Subtables always belong to a supertable and are part of it. Basic tables, however, exist independently and do not belong to any supertable.
3. Conversion Restrictions: In TDengine, basic tables cannot be directly converted into subtables, and likewise, subtables cannot be converted into basic tables. These two types of tables determine their structure and properties at creation and cannot be changed later.
In summary, basic tables provide functionality similar to traditional relational database tables, while subtables introduce a tagging mechanism, offering richer descriptions and more flexible management for time-series data. Users can choose to create basic tables or subtables based on actual needs.
The SQL for creating an basic table without any tags is as follows:
```sql
CREATE TABLE d1003(
ts timestamp,
current float,
voltage int,
phase float,
location varchar(64),
group_id int
);
```
The SQL above indicates the creation of the basic table `d1003`, with a structure including columns `ts`, `current`, `voltage`, `phase`, `location`, `group_id`, totaling 6 columns. This data model is completely consistent with relational databases.
Using basic tables as the data model means that static tag data (such as location and group_id) will be repeatedly stored in each row of the table. This approach not only increases storage space consumption but also significantly lowers query performance compared to using a supertable data model, as it cannot directly utilize tag data for filtering.
### Multi-Column Model vs. Single-Column Model
TDengine supports flexible data model designs, including multi-column and single-column models. The multi-column model allows multiple physical quantities collected simultaneously from the same data collection point with the same timestamp to be stored in different columns of the same supertable. However, in some extreme cases, a single-column model might be used, where each collected physical quantity is established in a separate table. For example, for the three physical quantities of current, voltage, and phase, three separate supertables might be established.
Although TDengine recommends using the multi-column model because it generally offers better writing and storage efficiency, the single-column model might be more suitable in certain specific scenarios. For example, if the types of quantities collected at a data collection point frequently change, using a multi-column model would require frequent modifications to the supertable's structural definition, increasing the complexity of the application. In such cases, using a single-column model can simplify the design and management of the application, as it allows independent management and expansion of each physical quantity's supertable.
Overall, TDengine offers flexible data model options, allowing users to choose the most suitable model based on actual needs and scenarios to optimize performance and manage complexity.

View File

@ -0,0 +1,138 @@
---
sidebar_label: Data Ingestion
title: Data Ingestion
slug: /basic-features/data-ingestion
---
This chapter uses the data model of smart meters as an example to introduce how to write, update, and delete time-series data in TDengine using SQL.
## Writing
In TDengine, you can write time-series data using the SQL insert statement.
### Writing One Record at a Time
Assume that the smart meter with device ID d1001 collected data on October 3, 2018, at 14:38:05: current 10.3A, voltage 219V, phase 0.31. We have already created a subtable d1001 belonging to the supertable meters in the TDengine's power database. Next, you can write time-series data into the subtable d1001 using the following insert statement.
1. You can write time-series data into the subtable d1001 using the following INSERT statement.
```sql
insert into d1001 (ts, current, voltage, phase) values ( "2018-10-03 14:38:05", 10.3, 219, 0.31)
```
The above SQL writes `2018-10-03 14:38:05`, `10.3`, `219`, `0.31` into the columns `ts`, `current`, `voltage`, `phase` of the subtable `d1001`.
2. When the `VALUES` part of the `INSERT` statement includes all columns of the table, the list of fields before `VALUES` can be omitted, as shown in the following SQL statement, which has the same effect as the previous INSERT statement specifying columns.
```sql
insert into d1001 values("2018-10-03 14:38:05", 10.3, 219, 0.31)
```
3. For the table's timestamp column (the first column), you can also directly use the timestamp of the database precision.
```sql
INSERT INTO d1001 VALUES (1538548685000, 10.3, 219, 0.31);
```
The effects of the above three SQL statements are exactly the same.
### Writing Multiple Records at Once
Assume that the smart meter with device ID d1001 collects data every 10s and reports data every 30s, i.e., it needs to write 3 records every 30s. Users can write multiple records in one insert statement. The following SQL writes a total of 3 records.
```sql
insert into d1001 values
( "2018-10-03 14:38:05", 10.2, 220, 0.23),
( "2018-10-03 14:38:15", 12.6, 218, 0.33),
( "2018-10-03 14:38:25", 12.3, 221, 0.31)
```
The above SQL writes a total of three records.
### Writing to Multiple Tables at Once
Assume that the smart meters with device IDs d1001, d1002, and d1003, all need to write 3 records every 30 seconds. For such cases, TDengine supports writing multiple records to multiple tables at once.
```sql
INSERT INTO d1001 VALUES
("2018-10-03 14:38:05", 10.2, 220, 0.23),
("2018-10-03 14:38:15", 12.6, 218, 0.33),
("2018-10-03 14:38:25", 12.3, 221, 0.31)
d1002 VALUES
("2018-10-03 14:38:04", 10.2, 220, 0.23),
("2018-10-03 14:38:14", 10.3, 218, 0.25),
("2018-10-03 14:38:24", 10.1, 220, 0.22)
d1003 VALUES
("2018-10-03 14:38:06", 11.5, 221, 0.35),
("2018-10-03 14:38:16", 10.4, 220, 0.36),
("2018-10-03 14:38:26", 10.3, 220, 0.33)
;
```
The above SQL writes a total of nine records.
### Specifying Columns for Writing
You can write data to specific columns of a table by specifying columns. Columns not appearing in the SQL will be automatically filled with NULL values. Note that the timestamp column must be present, and its value cannot be NULL. The following SQL writes one record to the subtable d1004. This record only includes voltage and phase, with the current value being NULL.
```sql
insert into d1004 (ts, voltage, phase) values("2018-10-04 14:38:06", 223, 0.29)
```
### Automatic Table Creation on Insert
Users can perform inserts using the `using` keyword for automatic table creation. If the subtable does not exist, it triggers automatic table creation before data insertion; if the subtable already exists, it directly inserts the data. An insert statement with automatic table creation can also specify only some tag columns for insertion, leaving the unspecified tag columns as NULL values. The following SQL inserts a record. If the subtable d1005 does not exist, it first creates the table automatically with the tag `group_id` value as NULL, then inserts the data.
```sql
insert into d1005
using meters (location)
tags ( "beijing.chaoyang")
values ( "2018-10-04 14:38:07", 10.15, 217, 0.33)
```
The insert statement with automatic table creation also supports inserting data into multiple tables in one statement. The following SQL uses an automatic table creation insert statement to insert 9 records.
```sql
INSERT INTO d1001 USING meters TAGS ("California.SanFrancisco", 2) VALUES
("2018-10-03 14:38:05", 10.2, 220, 0.23),
("2018-10-03 14:38:15", 12.6, 218, 0.33),
("2018-10-03 14:38:25", 12.3, 221, 0.31)
d1002 USING meters TAGS ("California.SanFrancisco", 3) VALUES
("2018-10-03 14:38:04", 10.2, 220, 0.23),
("2018-10-03 14:38:14", 10.3, 218, 0.25),
("2018-10-03 14:38:24", 10.1, 220, 0.22)
d1003 USING meters TAGS ("California.LosAngeles", 2) VALUES
("2018-10-03 14:38:06", 11.5, 221, 0.35),
("2018-10-03 14:38:16", 10.4, 220, 0.36),
("2018-10-03 14:38:26", 10.3, 220, 0.33)
;
```
### Inserting Through Supertables
TDengine also supports direct data insertion into supertables. It is important to note that a supertable is a template and does not store data itself; the data is stored in the corresponding subtables. The following SQL inserts a record into the subtable d1001 by specifying the tbname column.
```sql
insert into meters (tbname, ts, current, voltage, phase, location, group_id)
values( "d1001, "2018-10-03 14:38:05", 10.2, 220, 0.23, "California.SanFrancisco", 2)
```
### Zero-Code Insertion
To facilitate easy data insertion for users, TDengine has seamlessly integrated with many well-known third-party tools, including Telegraf, Prometheus, EMQX, StatsD, collectd, and HiveMQ. Users only need to perform simple configurations on these tools to easily import data into TDengine. Additionally, TDengine Enterprise offers a variety of connectors, such as MQTT, OPC, AVEVA PI System, Wonderware, Kafka, MySQL, Oracle, etc. By configuring the corresponding connection information on the TDengine side, users can efficiently write data from different data sources into TDengine without writing any code.
## Update
Data in time-series can be updated by inserting a record with a duplicate timestamp; the newly inserted data will replace the old values. The following SQL, by specifying columns, inserts 1 row of data into the subtable `d1001`; when there is already data with the datetime `2018-10-03 14:38:05` in subtable `d1001`, the new `current` (current) value 22 will replace the old value.
```sql
INSERT INTO d1001 (ts, current) VALUES ("2018-10-03 14:38:05", 22);
```
## Delete
To facilitate the cleanup of abnormal data caused by equipment failures and other reasons, TDengine supports deleting time-series data based on timestamps. The following SQL deletes all data in the supertable `meters` with timestamps earlier than `2021-10-01 10:40:00.100`. Data deletion is irreversible, so use it with caution. To ensure that the data being deleted is indeed what you want to delete, it is recommended to first use a select statement with the deletion condition in the where clause to view the data to be deleted, and confirm it is correct before executing delete.
```sql
delete from meters where ts < '2021-10-01 10:40:00.100' ;
```

View File

@ -0,0 +1,723 @@
---
sidebar_label: Data Querying
title: Data Querying
slug: /basic-features/data-querying
---
import Image from '@theme/IdealImage';
import windowModel from '../assets/data-querying-01.png';
import slidingWindow from '../assets/data-querying-02.png';
import sessionWindow from '../assets/data-querying-03.png';
import eventWindow from '../assets/data-querying-04.png';
Compared to many other time-series and real-time databases, a unique advantage of TDengine since its first release is its support for standard SQL queries. This feature significantly reduces the learning curve for users. This chapter will use the data model of smart meters as an example to demonstrate how to use SQL queries in TDengine to handle time-series data. For further details and features of SQL syntax, it is recommended to refer to the official TDengine documentation. By studying this chapter, you will be able to master TDengine's SQL querying techniques and efficiently operate and analyze time-series data.
## Basic Query
To better introduce TDengine data querying, use the following taosBenchmark command to generate the time-series data needed for this chapter.
```shell
taosBenchmark --start-timestamp=1600000000000 --tables=100 --records=10000000 --time-step=10000
```
The above command, the taosBenchmark tool generates a test database in TDengine, producing a total of 1 billion time-series data entries. The timestamp of the time-series data starts from `1600000000000` (2020-09-13T20:26:40+08:00), includes `100` devices (subtables), each device has `10000000` data entries, and the data collection frequency is 10 seconds per entry.
In TDengine, users can specify conditions through the WHERE statement to query time-series data. Taking the data of smart meters as an example:
```sql
SELECT * FROM meters
WHERE voltage > 230
ORDER BY ts DESC
LIMIT 5;
```
The above SQL queries records from the supertable `meters` where the `voltage` is greater than 230V, sorted in descending order by time, and only outputs the first 5 rows. The query results are as follows:
```text
ts | current | voltage | phase | groupid | location |
===================================================================================================
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 10 | California.Sunnyvale |
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 1 | California.LosAngles |
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 10 | California.Sunnyvale |
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 5 | California.Cupertino |
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 4 | California.SanFrancisco |
Query OK, 5 row(s) in set (0.145403s)
```
## Aggregate Query
TDengine supports aggregate queries through the GROUP BY clause. When an SQL statement includes a GROUP BY clause, the SELECT list can only contain the following expressions:
1. Constants
2. Aggregate functions
3. Expressions identical to those after GROUP BY
4. Expressions containing the above expressions
The GROUP BY clause is used to group data and return a summary row for each group. In the GROUP BY clause, any column from tables or views can be used as the basis for grouping, and these columns do not need to appear in the select list. Additionally, users can directly perform aggregate queries on supertables without the need to create subtables beforehand. Taking the data model of smart meters as an example, the SQL using the GROUP BY clause is as follows:
```sql
SELECT groupid, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2023-01-01T00:00:00+08:00"
GROUP BY groupid;
```
The SQL above queries the supertable `meters` for data where the timestamp is greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2023-01-01T00:00:00+08:00`, grouped by `groupid`, to calculate the average voltage for each group. The query results are as follows:
```text
groupid | avg(voltage) |
======================================
8 | 243.961981544901079 |
5 | 243.961981544901079 |
1 | 243.961981544901079 |
7 | 243.961981544901079 |
9 | 243.961981544901079 |
6 | 243.961981544901079 |
4 | 243.961981544901079 |
10 | 243.961981544901079 |
2 | 243.961981544901079 |
3 | 243.961981544901079 |
Query OK, 10 row(s) in set (0.042446s)
```
**Note**: The group by clause does not guarantee that the results are ordered in a specific sequence when aggregating data. To obtain an ordered result set, you can use the order by clause to sort the results. This allows you to adjust the order of the output results as needed to meet specific business requirements or reporting needs.
TDengine provides a variety of built-in aggregation functions. The table below shows:
| Aggregation Function | Description |
|:----------------------:|:--------------------------------------------------------------:|
|APERCENTILE | Calculates the approximate percentile of a specified column in a table/supertable, similar to the PERCENTILE function, but returns an approximate result. |
|AVG | Calculates the average value of a specified field |
|COUNT | Counts the number of records for a specified field |
|ELAPSED| The elapsed function expresses the continuous duration within a statistical period, and when used with the twa function, it can calculate the area under the statistical curve. When specifying a window with the INTERVAL clause, it calculates the time range covered by data in each window within the given time range; if there is no INTERVAL clause, it returns the time range covered by data for the entire given time range. Note that ELAPSED returns not the absolute value of the time range, but the number of units obtained by dividing the absolute value by time_unit.|
|LEASTSQUARES | Calculates the fitted line equation for a column in the table. start_val is the initial value of the independent variable, and step_val is the step value of the independent variable. |
|SPREAD | Calculates the difference between the maximum and minimum values of a column in the table.|
|STDDEV | Calculates the standard deviation of a column in the table. |
|SUM | Calculates the sum of a column in a table/supertable. |
|HYPERLOGLOG | Uses the hyperloglog algorithm to return the cardinality of a column. This algorithm significantly reduces memory usage in large data volumes, producing an estimated cardinality with a standard error of 0.81%. The algorithm is not very accurate with smaller data volumes, and the method `select count(data) from (select unique(col) as data from table)` can be used instead. |
|HISTOGRAM | Calculates the distribution of data according to user-specified intervals. |
|PERCENTILE | Calculates the percentile of a column's values in the table.|
## Data Partitioning Query
TDengine supports the PARTITION BY clause. When you need to partition data by certain dimensions and then perform a series of calculations within the partitioned data space, you can use the PARTITION BY clause to query, with the syntax as follows:
```sql
PARTITION BY part_list
```
`part_list` can be any scalar expression, including columns, constants, scalar functions, and their combinations.
TDengine processes the data partitioning clause as follows:
1. The data partitioning clause is placed after the WHERE clause;
2. The data partitioning clause divides the table data by the specified dimensions, and each partitioned shard undergoes specified calculations. The calculations are defined by subsequent clauses (window clause, GROUP BY clause, or SELECT clause);
3. The data partitioning clause can be used together with a window partitioning clause (or GROUP BY clause), in which case the subsequent clauses apply to each partitioned shard.
The SQL for data partitioning is as follows:
```sql
SELECT location, avg(voltage)
FROM meters
PARTITION BY location;
```
The example SQL above queries the supertable `meters`, grouping the data by the label `location`, and calculates the average voltage for each group. The query results are as follows:
```text
location | avg(voltage) |
======================================================
California.SantaClara | 243.962050000000005 |
California.SanFrancisco | 243.962050000000005 |
California.SanJose | 243.962050000000005 |
California.LosAngles | 243.962050000000005 |
California.SanDiego | 243.962050000000005 |
California.Sunnyvale | 243.962050000000005 |
California.PaloAlto | 243.962050000000005 |
California.Cupertino | 243.962050000000005 |
California.MountainView | 243.962050000000005 |
California.Campbell | 243.962050000000005 |
Query OK, 10 row(s) in set (2.415961s)
```
## Window Partition Query
In TDengine, you can use the window clause to perform aggregation queries by time window partitioning, which is particularly suitable for scenarios requiring analysis of large amounts of time-series data, such as smart meters collecting data every 10s but needing to query the average temperature every 1min.
The window clause allows you to partition the queried data set by windows and aggregate the data within each window. The logic of window partitioning is shown in the following image:
<figure>
<Image img={windowModel} alt="Windowing description"/>
<figcaption>Figure 1. Windowing logic</figcaption>
</figure>
- Time Window: Data is divided based on time intervals, supporting sliding and tumbling time windows, suitable for data aggregation over fixed time periods.
- Status Window: Windows are divided based on changes in device status values, with data of the same status value grouped into one window, which closes when the status value changes.
- Session Window: Sessions are divided based on the differences in record timestamps, with records having a timestamp interval less than the predefined value belonging to the same session.
- Event Window: Windows are dynamically divided based on the start and end conditions of events, opening when the start condition is met and closing when the end condition is met.
- Count Window: Windows are divided based on the number of data rows, with each window consisting of a specified number of rows for aggregation calculations.
The syntax for the window clause is as follows:
```sql
window_clause: {
SESSION(ts_col, tol_val)
| STATE_WINDOW(col)
| INTERVAL(interval_val [, interval_offset]) [SLIDING (sliding_val)] [FILL(fill_mod_and_val)]
| EVENT_WINDOW START WITH start_trigger_condition END WITH end_trigger_condition
}
```
**Note** When using the window clause, the following rules should be observed:
1. The window clause is located after the data partitioning clause and cannot be used together with the GROUP BY clause.
2. The window clause partitions the data by windows and performs calculations on the expressions in the SELECT list for each window. The expressions in the SELECT list can only include: constants; pseudocolumns: _wstart pseudo-column,_wend pseudo-column, and _wduration pseudo-column; aggregate functions (including selection functions and time-series specific functions that can determine the number of output rows by parameters)
3. WHERE statements can specify the start and end times of the query and other filtering conditions.
### Timestamp Pseudocolumns
In the window aggregation query results, if the SQL does not specify the timestamp column in the output query results, the final results will not automatically include the time column information of the window. However, if you need to output the time window information corresponding to the aggregated query results in the results, you can use the timestamp-related pseudocolumns in the select clause, such as the start time of the time window (`_wstart`), the end time of the time window (`_wend`), the duration of the time window (`_wduration`), and the pseudocolumns related to the overall query window, such as the start time of the query window (`_qstart`) and the end time of the query window (`_qend`). Note that both the start and end times of the time window are closed intervals, and the duration of the time window is the value under the current time resolution of the data. For example, if the current database's time precision is milliseconds (ms), then 500 in the results represents the duration of the current time window is 500ms.
### Time Windows
Time windows can be divided into: sliding time windows and tumbling time windows. The syntax for the time window clause is as follows:
```sql
INTERVAL(interval_val [, interval_offset])
[SLIDING (sliding_val)]
[FILL(fill_mod_and_val)]
```
The time window clause includes 3 sub-clauses:
- INTERVAL clause: used to generate windows of equal time periods, where interval_val specifies the size of each time window, and interval_offset specifies its starting offset. By default, windows begin at Unix time 0 (1970-01-01 00:00:00 UTC). If interval_offset is specified, the windows start from "Unix time 0 + interval_offset";
- SLIDING clause: used to specify the time the window slides forward;
- FILL: used to specify the filling mode of data in case of missing data in the window interval.
For time windows, both interval_val and sliding_val represent time periods, and syntactically support three ways. For example:
1. INTERVAL(1s, 500a) SLIDING(1s), with time units, where the time units are represented by single characters, respectively: a (milliseconds), b (nanoseconds), d (days), h (hours), m (minutes), n (months), s (seconds), u (microseconds), w (weeks), y (years);
2. INTERVAL(1000, 500) SLIDING(1000), without time units, will use the time precision of the query database as the default time unit, and when there are multiple databases, the one with higher precision will be used by default;
3. INTERVAL('1s', '500a') SLIDING('1s'), with time units in string form, where the string cannot contain any spaces or other characters.
Example SQL is as follows:
```sql
SELECT tbname, _wstart, _wend, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
INTERVAL(1m, 5s)
SLIMIT 2;
```
The above SQL queries the supertable `meters` for data with timestamps greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`; data is first partitioned by subtable name `tbname`, then partitioned by a 1-minute time window, with each time window offset by 5 seconds; finally, only the data from the first 2 partitions is taken as the result. The query results are as follows:
```text
tbname | _wstart | _wend | avg(voltage) |
======================================================================================
d2 | 2021-12-31 23:59:05.000 | 2022-01-01 00:00:05.000 | 253.000000000000000 |
d2 | 2022-01-01 00:00:05.000 | 2022-01-01 00:01:05.000 | 244.166666666666657 |
d2 | 2022-01-01 00:01:05.000 | 2022-01-01 00:02:05.000 | 241.833333333333343 |
d2 | 2022-01-01 00:02:05.000 | 2022-01-01 00:03:05.000 | 243.166666666666657 |
d2 | 2022-01-01 00:03:05.000 | 2022-01-01 00:04:05.000 | 240.833333333333343 |
d2 | 2022-01-01 00:04:05.000 | 2022-01-01 00:05:05.000 | 244.800000000000011 |
d26 | 2021-12-31 23:59:05.000 | 2022-01-01 00:00:05.000 | 253.000000000000000 |
d26 | 2022-01-01 00:00:05.000 | 2022-01-01 00:01:05.000 | 244.166666666666657 |
d26 | 2022-01-01 00:01:05.000 | 2022-01-01 00:02:05.000 | 241.833333333333343 |
d26 | 2022-01-01 00:02:05.000 | 2022-01-01 00:03:05.000 | 243.166666666666657 |
d26 | 2022-01-01 00:03:05.000 | 2022-01-01 00:04:05.000 | 240.833333333333343 |
d26 | 2022-01-01 00:04:05.000 | 2022-01-01 00:05:05.000 | 244.800000000000011 |
Query OK, 12 row(s) in set (0.021265s)
```
#### Sliding Window
Each query execution is a time window, and the time window slides forward as time progresses. When defining a continuous query, it is necessary to specify the size of the time window (time window) and the forward increment time (forward sliding times). As shown in the figure below, [t0s, t0e], [t1s, t1e], [t2s, t2e] are the time window ranges for three consecutive queries, and the time range of the window's forward sliding is indicated by sliding time. Query filtering, aggregation, and other operations are performed independently for each time window.
<figure>
<Image img={slidingWindow} alt="Sliding window logic"/>
<figcaption>Figure 2. Sliding window logic</figcaption>
</figure>
**Note**
1. INTERVAL and SLIDING clauses need to be used in conjunction with aggregation and selection functions, therefore, the following SQL statement is illegal:
```sql
SELECT COUNT(*) FROM temp_tb_1 INTERVAL(1m) SLIDING(2m);
```
2. The forward sliding time of SLIDING cannot exceed the time range of a window, therefore, the following SQL statement is also illegal:
```sql
SELECT COUNT(*) FROM temp_tb_1 INTERVAL(1m) SLIDING(2m);
```
**Points to note when using time windows**
1. The window width of the aggregation period is specified by the keyword INTERVAL, with a minimum interval of 10 milliseconds (10a); it also supports an offset (offset must be less than the interval), which is the offset of the time window division compared to "UTC moment 0". The SLIDING statement is used to specify the forward increment of the aggregation period, i.e., the duration of each window's forward slide.
2. When using the INTERVAL statement, unless in very special circumstances, it is required to configure the timezone parameter in the taos.cfg configuration file of both client and server to the same value, to avoid frequent cross-time zone conversions by time handling functions, which could lead to severe performance impacts.
3. The returned results have a strictly monotonically increasing time sequence.
Example:
```sql
SELECT tbname, _wstart, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
INTERVAL(1m) SLIDING(30s)
SLIMIT 1;
```
The above SQL queries the supertable `meters` for data with timestamps greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`. Data is first partitioned by subtable name `tbname`, then divided into 1-minute time windows, with the time windows sliding every 30 seconds; finally, only the data from the first partition is taken as the result. The query results are as follows:
```text
tbname | _wstart | avg(voltage) |
=============================================================
d2 | 2021-12-31 23:59:30.000 | 248.333333333333343 |
d2 | 2022-01-01 00:00:00.000 | 246.000000000000000 |
d2 | 2022-01-01 00:00:30.000 | 244.666666666666657 |
d2 | 2022-01-01 00:01:00.000 | 240.833333333333343 |
d2 | 2022-01-01 00:01:30.000 | 239.500000000000000 |
d2 | 2022-01-01 00:02:00.000 | 243.833333333333343 |
d2 | 2022-01-01 00:02:30.000 | 243.833333333333343 |
d2 | 2022-01-01 00:03:00.000 | 241.333333333333343 |
d2 | 2022-01-01 00:03:30.000 | 241.666666666666657 |
d2 | 2022-01-01 00:04:00.000 | 244.166666666666657 |
d2 | 2022-01-01 00:04:30.000 | 244.666666666666657 |
Query OK, 11 row(s) in set (0.013153s)
```
#### Tumbling Window
When SLIDING is equal to INTERVAL, the sliding window becomes a tumbling window. The difference between a tumbling window and a sliding window is that in a sliding window, due to different interval_val and sliding_val, there is data overlap between different time windows, whereas in a tumbling window, there is no data overlap. Essentially, a tumbling window divides the time window according to interval_val, and INTERVAL(1m) with INTERVAL(1m) SLIDING(1m) are equivalent.
Example:
```sql
SELECT tbname, _wstart, _wend, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
INTERVAL(1m) SLIDING(1m)
SLIMIT 1;
```
The above SQL queries the supertable `meters` for data with timestamps greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`. The data is first partitioned by the subtable name `tbname`, then divided into 1-minute time windows, with each time window also being 1 minute long; finally, only the data from the first partition is taken as the result. The query results are as follows:
```text
tbname | _wstart | _wend | avg(voltage) |
======================================================================================
d2 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:00.000 | 246.000000000000000 |
d2 | 2022-01-01 00:01:00.000 | 2022-01-01 00:02:00.000 | 240.833333333333343 |
d2 | 2022-01-01 00:02:00.000 | 2022-01-01 00:03:00.000 | 243.833333333333343 |
d2 | 2022-01-01 00:03:00.000 | 2022-01-01 00:04:00.000 | 241.333333333333343 |
d2 | 2022-01-01 00:04:00.000 | 2022-01-01 00:05:00.000 | 244.166666666666657 |
Query OK, 5 row(s) in set (0.016812s)
```
#### FILL Clause
The FILL clause is used to specify the fill mode when data is missing in a window interval. The fill modes include the following:
1. No fill: NONE (default fill mode).
2. VALUE fill: Fixed value fill, where the fill value must be specified. For example: FILL(VALUE, 1.23). Note that the final fill value is determined by the type of the corresponding column, such as FILL(VALUE, 1.23) for an INT type column, the fill value would be 1.
3. PREV fill: Fill with the previous non-NULL value. For example: FILL(PREV).
4. NULL fill: Fill with NULL. For example: FILL(NULL).
5. LINEAR fill: Perform linear interpolation based on the nearest non-NULL values before and after. For example: FILL(LINEAR).
6. NEXT fill: Fill with the next non-NULL value. For example: FILL(NEXT).
Among these fill modes, except for the NONE mode which does not fill by default, other modes will be ignored if there is no data in the entire query time range, resulting in no fill data and an empty query result. This behavior is reasonable under some modes (PREV, NEXT, LINEAR) because no data means no fill value can be generated.
For other modes (NULL, VALUE), theoretically, fill values can be generated. Whether to output fill values depends on the application's requirements. To meet the needs of applications that require forced filling of data or NULL, and to maintain compatibility with existing fill modes, TDengine also supports two new fill modes:
1. NULL_F: Force fill with NULL values
2. VALUE_F: Force fill with VALUE
The differences between NULL, NULL_F, VALUE, and VALUE_F for different scenarios are as follows:
1. INTERVAL clause: NULL_F, VALUE_F are forced fill modes; NULL, VALUE are non-forced modes. In this mode, their semantics match their names.
2. Stream computing's INTERVAL clause: NULL_F and NULL behave the same, both are non-forced modes; VALUE_F and VALUE behave the same, both are non-forced modes. That is, there is no forced mode in stream computing's INTERVAL.
3. INTERP clause: NULL and NULL_F behave the same, both are forced modes; VALUE and VALUE_F behave the same, both are forced modes. That is, there is no non-forced mode in INTERP.
**Note**
1. Using the FILL statement may generate a large amount of filled output, be sure to specify the time range for the query.
2. For each query, the system can return no more than 10 million results with interpolation.
3. In time dimension aggregation, the returned results have a strictly monotonic increasing time sequence.
4. If the query target is a supertable, the aggregate function will apply to the data of all tables under the supertable that meet the value filtering conditions. If the query does not use a PARTITION BY statement, the results are returned in a strictly monotonic increasing time sequence; if the query uses a PARTITION BY statement for grouping, the results within each PARTITION are strictly monotonic increasing in time sequence.
Example:
```sql
SELECT tbname, _wstart, _wend, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
INTERVAL(1m) FILL(prev)
SLIMIT 2;
```
The above SQL queries the supertable `meters` for data with timestamps greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`; data is first partitioned by subtable name `tbname`, then by each 1-minute time window. If data is missing within a window, it is filled with the previous non-NULL value; finally, only the data from the first 2 partitions is taken as the result. The query results are as follows:
```text
tbname | _wstart | _wend | avg(voltage) |
=======================================================================================
d2 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:00.000 | 246.000000000000000 |
d2 | 2022-01-01 00:01:00.000 | 2022-01-01 00:02:00.000 | 240.833333333333343 |
d2 | 2022-01-01 00:02:00.000 | 2022-01-01 00:03:00.000 | 243.833333333333343 |
d2 | 2022-01-01 00:03:00.000 | 2022-01-01 00:04:00.000 | 241.333333333333343 |
d2 | 2022-01-01 00:04:00.000 | 2022-01-01 00:05:00.000 | 244.166666666666657 |
d26 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:00.000 | 246.000000000000000 |
d26 | 2022-01-01 00:01:00.000 | 2022-01-01 00:02:00.000 | 240.833333333333343 |
d26 | 2022-01-01 00:02:00.000 | 2022-01-01 00:03:00.000 | 243.833333333333343 |
d26 | 2022-01-01 00:03:00.000 | 2022-01-01 00:04:00.000 | 241.333333333333343 |
d26 | 2022-01-01 00:04:00.000 | 2022-01-01 00:05:00.000 | 244.166666666666657 |
Query OK, 10 row(s) in set (0.022866s)
```
### State Window
Use integers (boolean values) or strings to identify the state of the device when the record is generated. Records with the same state value belong to the same state window, and the window closes when the value changes. TDengine also supports using CASE expressions on state values, which can express that the start of a state is triggered by meeting a certain condition, and the end of the state is triggered by meeting another condition. For example, with smart meters, if the voltage is within the normal range of 225V to 235V, you can monitor the voltage to determine if the circuit is normal.
```sql
SELECT tbname, _wstart, _wend,_wduration, CASE WHEN voltage >= 225 and voltage <= 235 THEN 1 ELSE 0 END status
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
STATE_WINDOW(
CASE WHEN voltage >= 225 and voltage <= 235 THEN 1 ELSE 0 END
)
SLIMIT 2;
```
The above SQL queries data from the supertable `meters`, where the timestamp is greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`. Data is first partitioned by the subtable name `tbname`. It then divides into status windows based on whether the voltage is within the normal range. Finally, it retrieves data from the first 2 partitions as the result. The query results are as follows: (Since the data is randomly generated, the number of data entries in the result set may vary)
```text
tbname | _wstart | _wend | _wduration | status |
===============================================================================================
d2 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:20.000 | 80000 | 0 |
d2 | 2022-01-01 00:01:30.000 | 2022-01-01 00:01:30.000 | 0 | 1 |
d2 | 2022-01-01 00:01:40.000 | 2022-01-01 00:01:40.000 | 0 | 0 |
d2 | 2022-01-01 00:01:50.000 | 2022-01-01 00:01:50.000 | 0 | 1 |
d2 | 2022-01-01 00:02:00.000 | 2022-01-01 00:02:20.000 | 20000 | 0 |
d2 | 2022-01-01 00:02:30.000 | 2022-01-01 00:02:30.000 | 0 | 1 |
d2 | 2022-01-01 00:02:40.000 | 2022-01-01 00:03:00.000 | 20000 | 0 |
d2 | 2022-01-01 00:03:10.000 | 2022-01-01 00:03:10.000 | 0 | 1 |
d2 | 2022-01-01 00:03:20.000 | 2022-01-01 00:03:40.000 | 20000 | 0 |
d2 | 2022-01-01 00:03:50.000 | 2022-01-01 00:03:50.000 | 0 | 1 |
d2 | 2022-01-01 00:04:00.000 | 2022-01-01 00:04:50.000 | 50000 | 0 |
d26 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:20.000 | 80000 | 0 |
d26 | 2022-01-01 00:01:30.000 | 2022-01-01 00:01:30.000 | 0 | 1 |
d26 | 2022-01-01 00:01:40.000 | 2022-01-01 00:01:40.000 | 0 | 0 |
d26 | 2022-01-01 00:01:50.000 | 2022-01-01 00:01:50.000 | 0 | 1 |
d26 | 2022-01-01 00:02:00.000 | 2022-01-01 00:02:20.000 | 20000 | 0 |
d26 | 2022-01-01 00:02:30.000 | 2022-01-01 00:02:30.000 | 0 | 1 |
d26 | 2022-01-01 00:02:40.000 | 2022-01-01 00:03:00.000 | 20000 | 0 |
d26 | 2022-01-01 00:03:10.000 | 2022-01-01 00:03:10.000 | 0 | 1 |
d26 | 2022-01-01 00:03:20.000 | 2022-01-01 00:03:40.000 | 20000 | 0 |
d26 | 2022-01-01 00:03:50.000 | 2022-01-01 00:03:50.000 | 0 | 1 |
d26 | 2022-01-01 00:04:00.000 | 2022-01-01 00:04:50.000 | 50000 | 0 |
Query OK, 22 row(s) in set (0.153403s)
```
### Session Window
The session window determines whether records belong to the same session based on the value of the timestamp primary key. As shown in the figure below, if the interval between consecutive timestamps is set to be less than or equal to 12 seconds, the following 6 records form 2 session windows: [2019-04-28 14:22:10, 2019-04-28 14:22:30] and [2019-04-28 14:23:10, 2019-04-28 14:23:30]. This is because the interval between 2019-04-28 14:22:30 and 2019-04-28 14:23:10 is 40 seconds, which exceeds the continuous interval (12 seconds).
<figure>
<Image img={sessionWindow} alt="Session window example"/>
<figcaption>Figure 3. Session window example</figcaption>
</figure>
Within the tol_value time interval, results are considered to belong to the same window. If the time between two consecutive records exceeds tol_val, a new window is automatically started.
```sql
SELECT COUNT(*), FIRST(ts) FROM temp_tb_1 SESSION(ts, tol_val);
```
Example:
```sql
SELECT tbname, _wstart, _wend, _wduration, count(*)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:10:00+08:00"
PARTITION BY tbname
SESSION(ts, 10m)
SLIMIT 10;
```
The above SQL queries the supertable meters for data with timestamps greater than or equal to 2022-01-01T00:00:00+08:00 and less than 2022-01-01T00:10:00+08:00; data is first partitioned by the subtable name tbname, then split according to a 10-minute session window; finally, data from the first 10 partitions is returned, showing subtable name, window start time, window end time, window duration, and the number of records within the window. The query results are as follows:
```text
tbname | _wstart | _wend | _wduration | count(*) |
===============================================================================================
d2 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d26 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d52 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d64 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d76 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d28 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d4 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d88 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d77 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d54 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
Query OK, 10 row(s) in set (0.043489s)
```
### Event Window
Event windows are defined by start and end conditions. The window starts when the `start_trigger_condition` is met and closes when the `end_trigger_condition` is satisfied. Both `start_trigger_condition` and `end_trigger_condition` can be any condition expression supported by TDengine and can include different columns.
An event window can contain only one data point. That is, when a single data point meets both the `start_trigger_condition` and `end_trigger_condition` and is not currently within a window, it alone constitutes a window.
If an event window cannot be closed, it does not form a window and will not be output. That is, if data meets the `start_trigger_condition` and the window opens, but subsequent data does not meet the `end_trigger_condition`, the window cannot be closed. This data does not form a window and will not be output.
If event window queries are performed directly on a supertable, TDengine will aggregate the data of the supertable into a single timeline and then perform the event window calculation. If you need to perform event window queries on the result set of a subquery, the result set of the subquery needs to meet the requirements of outputting along a timeline and can output a valid timestamp column.
Consider the following SQL statement, the event window segmentation is illustrated in the diagram below.
```sql
select _wstart, _wend, count(*) from t event_window start with c1 > 0 end with c2 < 10
```
<figure>
<Image img={eventWindow} alt="Event window example"/>
<figcaption>Figure 4. Event window example</figcaption>
</figure>
Example SQL:
```sql
SELECT tbname, _wstart, _wend, _wduration, count(*)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:10:00+08:00"
PARTITION BY tbname
EVENT_WINDOW START WITH voltage >= 225 END WITH voltage < 235
LIMIT 5;
```
The above SQL queries the supertable meters for data with timestamps greater than or equal to 2022-01-01T00:00:00+08:00 and less than 2022-01-01T00:10:00+08:00; data is first partitioned by subtable name tbname, then segmented according to the event window conditions: voltage greater than or equal to 225V and less than 235V; finally, the first 5 rows of data from each partition are taken as the result, returning the subtable name, window start time, window end time, window duration, and the number of data points in the window. The query results are as follows:
```text
tbname | _wstart | _wend | _wduration | count(*) |
==============================================================================================
d0 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:30.000 | 90000 | 10 |
d0 | 2022-01-01 00:01:40.000 | 2022-01-01 00:02:30.000 | 50000 | 6 |
d0 | 2022-01-01 00:02:40.000 | 2022-01-01 00:03:10.000 | 30000 | 4 |
d0 | 2022-01-01 00:03:20.000 | 2022-01-01 00:07:10.000 | 230000 | 24 |
d0 | 2022-01-01 00:07:20.000 | 2022-01-01 00:07:50.000 | 30000 | 4 |
d1 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:30.000 | 90000 | 10 |
d1 | 2022-01-01 00:01:40.000 | 2022-01-01 00:02:30.000 | 50000 | 6 |
d1 | 2022-01-01 00:02:40.000 | 2022-01-01 00:03:10.000 | 30000 | 4 |
d1 | 2022-01-01 00:03:20.000 | 2022-01-01 00:07:10.000 | 230000 | 24 |
……
Query OK, 500 row(s) in set (0.328557s)
```
### Count Window
Count window is a method of dividing windows based on a fixed number of data rows. By default, the count window first sorts the data by timestamp, then divides the data into multiple windows based on the value of count_val, and finally performs aggregation calculations.
count_val represents the maximum number of data rows in each count window. When the total number of data rows is not divisible by count_val, the number of rows in the last window will be less than count_val.
sliding_val is a constant that represents the number of sliding windows, similar to the sliding function of interval. By adjusting sliding_val, you can control the degree of overlap between windows, thus achieving detailed analysis of the data.
For example, using the data model of a smart meter, the query SQL is as follows.
```sql
select _wstart, _wend, count(*)
from meters
where ts >= "2022-01-01T00:00:00+08:00" and ts < "2022-01-01T00:30:00+08:00"
count_window(1000);
```
The above SQL query returns the data from the supertable meters where the timestamp is greater than or equal to 2022-01-01T00:00:00+08:00 and less than 2022-01-01T00:10:00+08:00. It groups every 1000 data rows and returns the start time, end time, and count of each group. The query results are as follows:
```text
_wstart | _wend | count(*) |
=====================================================================
2022-01-01 00:00:00.000 | 2022-01-01 00:01:30.000 | 1000 |
2022-01-01 00:01:40.000 | 2022-01-01 00:03:10.000 | 1000 |
2022-01-01 00:03:20.000 | 2022-01-01 00:04:50.000 | 1000 |
2022-01-01 00:05:00.000 | 2022-01-01 00:06:30.000 | 1000 |
2022-01-01 00:06:40.000 | 2022-01-01 00:08:10.000 | 1000 |
2022-01-01 00:08:20.000 | 2022-01-01 00:09:50.000 | 1000 |
2022-01-01 00:10:00.000 | 2022-01-01 00:11:30.000 | 1000 |
2022-01-01 00:11:40.000 | 2022-01-01 00:13:10.000 | 1000 |
2022-01-01 00:13:20.000 | 2022-01-01 00:14:50.000 | 1000 |
2022-01-01 00:15:00.000 | 2022-01-01 00:16:30.000 | 1000 |
Query OK, 10 row(s) in set (0.062794s)
```
## Time-Series Extensions
Time-series extensions are a set of functions specially designed by TDengine for time-series data query scenarios. In general databases, similar functions usually require complex query statements and are less efficient. To reduce user costs and simplify the query process, TDengine provides these functions in the form of built-in functions, thus achieving efficient and easy-to-use time series data processing capabilities. The time series specific functions are as follows.
| Function | Description |
|:--------------:|:------------------------------------------------------------------------:|
|CSUM | Cumulative sum, ignoring NULL values. |
|DERIVATIVE | Calculates the rate of change per unit of a column in the table. The unit time interval can be specified by the time_interval parameter, with a minimum of 1 second (1s); the ignore_negative parameter can be 0 or 1, with 1 meaning negative values are ignored. |
|DIFF | Calculates the difference between the value of a column and the corresponding value of the previous row. ignore_negative can be 0 or 1, default is 0, not ignoring negative values. When ignore_negative is 1, it means negative values are ignored.|
|IRATE | Calculates the instantaneous growth rate using the last two samples in the time interval; if these two values are decreasing, only the last value is used for calculation, not the difference between them. |
|MAVG | Calculates the moving average of consecutive k values. If the number of input rows is less than k, no result is output. The valid input range for parameter k is 1 ≤ k ≤ 1000.|
|STATECOUNT | Returns the number of consecutive records that meet a certain condition, appending the result as a new column at the end of each row. The condition is calculated based on the parameter, adding 1 if true, resetting to -1 if false, and skipping if the data is NULL. |
|STATEDURATION | Returns the duration of consecutive records that meet a certain condition, appending the result as a new column at the end of each row. The condition is calculated based on the parameter, adding the time length between two records if true (the time length of the first record meeting the condition is counted as 0), resetting to -1 if false, and skipping if the data is NULL.|
|TWA | Time Weighted Average function. Calculates the time-weighted average of a column over a period of time. |
## Nested Queries
Nested queries, also known as subqueries, refer to a structure in SQL where the result of an inner query can be used as the input for an outer query. TDengine supports non-correlated subqueries within the from clause. Non-correlated means that the subquery does not use parameters from the parent query. After the from clause in a select query, an independent select statement can be included, which is enclosed in parentheses. By using nested queries, you can reference the result of another query within a single query, thus enabling more complex data processing and analysis. For example, consider the following SQL for smart meters:
```sql
SELECT max(voltage),*
FROM (
SELECT tbname,last_row(ts),voltage,current,phase,groupid,location
FROM meters
PARTITION BY tbname
)
GROUP BY groupid;
```
The above SQL performs an inner query on the supertable meters, grouping by subtable name, and querying the latest data for each subtable; the outer query takes the result of the inner query as input and aggregates by groupid, querying the maximum voltage for each group.
TDengine's nested queries follow these rules:
1. The result of the inner query serves as a "virtual table" for the outer query to use, and it is recommended to alias this virtual table for easy reference in the outer query.
2. The outer query supports direct referencing of columns or pseudocolumns from the inner query by column name or alias.
3. Both inner and outer queries support regular joins between tables/supertables. The result of the inner query can also participate in joins with data subtables.
4. The features supported by the inner query are consistent with those of non-nested queries. The ORDER BY clause in the inner query generally has no meaning and is recommended to be avoided to prevent unnecessary resource consumption.
5. Compared to non-nested queries, the outer query has the following limitations in supported features:
6. If the result data of the inner query does not provide timestamps, then functions implicitly dependent on timestamps will not work properly in the outer query. Examples include: INTERP, DERIVATIVE, IRATE, LAST_ROW, FIRST, LAST, TWA, STATEDURATION, TAIL, UNIQUE.
7. If the result data of the inner query is not ordered by timestamp, then functions dependent on data being ordered by time will not work properly in the outer query. Examples include: LEASTSQUARES, ELAPSED, INTERP, DERIVATIVE, IRATE, TWA, DIFF, STATECOUNT, STATEDURATION, CSUM, MAVG, TAIL, UNIQUE.
8. Functions that require two passes of scanning will not work properly in the outer query. Such functions include: PERCENTILE.
## UNION Clause
TDengine supports the UNION operator. That is, if multiple SELECT clauses return result sets with the exact same structure (column names, types, number, and order), these result sets can be combined using the UNION clause.
Example:
```sql
(SELECT tbname,* FROM d1 limit 1)
UNION ALL
(SELECT tbname,* FROM d11 limit 2)
UNION ALL
(SELECT tbname,* FROM d21 limit 3);
```
The above SQL queries 1 record from subtable d1, 2 records from subtable d11, and 3 records from subtable d21, and combines the results. The returned results are as follows:
```text
tbname | ts | current | voltage | phase |
====================================================================================
d11 | 2020-09-13 20:26:40.000 | 11.5680809 | 247 | 146.5000000 |
d11 | 2020-09-13 20:26:50.000 | 14.2392311 | 234 | 148.0000000 |
d1 | 2020-09-13 20:26:40.000 | 11.5680809 | 247 | 146.5000000 |
d21 | 2020-09-13 20:26:40.000 | 11.5680809 | 247 | 146.5000000 |
d21 | 2020-09-13 20:26:50.000 | 14.2392311 | 234 | 148.0000000 |
d21 | 2020-09-13 20:27:00.000 | 10.0999422 | 251 | 146.0000000 |
Query OK, 6 row(s) in set (0.006438s)
```
In the same SQL statement, a maximum of 100 UNION clauses are supported.
## Association Query
### Join Concept
1. Driving Table
In association queries, the role of the driving table depends on the type of join used: in the Left Join series, the left table acts as the driving table; in the Right Join series, the right table acts as the driving table.
2. Join Condition
In TDengine, the join condition refers to the condition specified for table association. For all association queries (except ASOF Join and Window Join), a join condition must be specified, usually appearing after `on`. In ASOF Join, conditions appearing after `where` can also be considered as join conditions, while Window Join specifies join conditions through `window_offset`.
Except for ASOF Join, all Join types supported by TDengine must explicitly specify join conditions. ASOF Join, because it defines implicit join conditions by default, does not need to explicitly specify join conditions if the default conditions meet the requirements.
For types of joins other than ASOF Join and Window Join, the join conditions can include not only the primary join condition but also any number of other join conditions. There must be an `and` relationship between the primary join condition and other join conditions, but there is no such restriction among other join conditions. Other join conditions can include any logical operation combination of primary key columns, tag columns, ordinary columns, constants, and their scalar functions or operations.
For example, with smart meters, the following SQL statements all contain valid join conditions.
```sql
select a.* from meters a left join meters b on a.ts = b.ts and a.ts > '2023-10-18 10:00:00.000';
select a.* from meters a left join meters b on a.ts = b.ts and (a.ts > '2023-10-18 10:00:00.000' or a.ts < '2023-10-17 10:00:00.000');
select a.* from meters a left join meters b on timetruncate(a.ts, 1s) = timetruncate(b.ts, 1s) and (a.ts + 1s > '2023-10-18 10:00:00.000' or a.groupId > 0);
select a.* from meters a left asof join meters b on timetruncate(a.ts, 1s) < timetruncate(b.ts, 1s) and a.groupId = b.groupId;
```
3. Primary Join Condition
As a time-series database, all association queries in TDengine revolve around the primary key column. Therefore, for all association queries except ASOF Join and Window Join, an equality join condition on the primary key column must be included. The first appearing primary key column equality join condition in the join conditions will be considered as the primary join condition. The primary join condition of ASOF Join can include non-equality conditions, while the primary join condition of Window Join is specified through `window_offset`.
Except for Window Join, TDengine supports performing `timetruncate` function operations in the primary join condition, such as `on timetruncate(a.ts, 1s) = timetruncate(b.ts, 1s)`. Apart from this, other functions and scalar operations are currently not supported.
4. Grouping Condition
The time-series database features of ASOF Join and Window Join support grouping the input data of the association query first, and then performing association operations for each group. Grouping only applies to the input of the association query, and the output results will not contain grouping information. Equality conditions appearing after `on` in ASOF Join and Window Join (except for the primary join condition of ASOF Join) will be treated as grouping conditions.
5. Primary Key Timeline
As a time-series database, TDengine requires each table (subtable) to have a primary key timestamp column, which will serve as the primary key timeline of the table for many time-related operations, and in the results of subqueries or Join operations, it is also necessary to clearly identify which column will be considered as the primary key timeline for subsequent time-related operations. In subqueries, the first appearing ordered primary key column (or its operation) or pseudo primary key column (_wstart,_wend) in the query results will be considered as the primary key timeline of the output table. The selection of the primary key timeline in Join output results follows these rules:
- In the Left Join and Right Join series, the primary key column of the driving table (subquery) will be used as the primary key timeline for subsequent queries. Additionally, in the Window Join window, since both tables are ordered, any table's primary key column can be used as the primary key timeline within the window, with a preference for the primary key column of the local table.
- Inner Join can use the primary key column of any table as the primary key timeline, and when there are grouping conditions similar to tag column equality conditions and they are in an `and` relationship with the primary join condition, a primary key timeline cannot be produced.
- Full Join, because it cannot produce any valid primary key time-series, does not have a primary key timeline, which also means that operations related to the timeline cannot be performed in Full Join.
### Syntax Explanation
In the following content, we will introduce the Left Join and Right Join series in a unified and parallel manner. Therefore, in the subsequent introduction of the Outer, Semi, Anti-Semi, ASOF, Window series, etc., we have adopted the expression "Left/Right" to cover both Left Join and Right Join related knowledge simultaneously. The "/" symbol here specifically refers to Left Join before the slash, and Right Join after the slash. By using this expression, we can more clearly demonstrate the characteristics and usage of these two types of Join operations.
For example, when we mention "left / right table", for Left Join, it specifically refers to the left table, and for Right Join, it specifically refers to the right table. Similarly, when we mention "right / left table", for Left Join, it specifically refers to the right table, and for Right Join, it specifically refers to the left table.
### Join Features
The table below lists the types of Joins supported in TDengine and their definitions.
| Join Type | Definition |
|:------------------------:|:--------------------------------------------------------:|
|Inner Join | Inner join, only data that meets the join conditions in both the left and right tables are returned, can be seen as the intersection of data that meets the join conditions in both tables |
|Left/Right Outer Join | Left / Right (outer) join, includes both the set of data that meets the join conditions in both tables and the set of data in the left / right table that does not meet the join conditions |
|Left/Right Semi Join | Left / Right semi join, usually expresses the meaning of in, exists, i.e., for any data in the left / right table, it returns the left / right table row data only if there is any data in the right / left table that meets the join conditions |
|Left/Right Anti-Semi Join | Left / Right anti join, the logic is exactly opposite to that of the left / right semi join, usually expresses the meaning of not in, not exists, i.e., for any data in the left / right table, it returns the left / right table row data only if there is no data in the right / left table that meets the join conditions |
|Left/Right ASOF Join | Left / Right approximate match join, unlike other traditional join operations that require exact matches, ASOF Join allows for approximate matching in a specified matching mode, i.e., matching by the closest primary key timestamp |
|Left/Right Window Join | Left / Right window join, constructs windows based on the primary key timestamp of each row in the left / right table and the window boundaries and performs window joining, supports projection, scalar, and aggregation operations within the window |
|Full Outer Join | Full (outer) join, includes both the set of data that meets the join conditions in both tables and the set of data in both tables that does not meet the join conditions |
### Constraints and Limitations
1. Input Timeline Limitation
Currently, all Join operations in TDengine require the input data to contain a valid primary key timeline. For all table queries, this requirement is usually met. However, for subqueries, it is necessary to ensure that the output data contains a valid primary key timeline.
2. Join Condition Limitations
The limitations on join conditions include the following.
- Except for ASOF Join and Window Join, other join operations must include the primary join condition of the primary key column.
- Only and operations are supported between the primary join condition and other conditions.
- The primary key column as the primary join condition only supports the timetruncate function operation, not other functions and scalar operations, while there are no restrictions when used as other conditions.
3. Grouping Condition Limitations
The limitations on grouping conditions include the following.
- Only supports equality conditions for label columns and ordinary columns other than the primary key column.
- Does not support scalar operations.
- Supports multiple grouping conditions, only and operations are supported between conditions.
4. Query Result Order Limitations
The limitations on the order of query results include the following.
- In scenarios of basic tables, subtables, subqueries with no grouping conditions and no sorting, the query results will be output in the order of the primary key column of the driving table.
- Due to supertable queries, Full Join or scenarios with grouping conditions and no sorting, there is no fixed order of output for the query results, therefore, in scenarios where sorting is required and the output order is not fixed, sorting operations need to be performed. Some functions that depend on the timeline may not be executable due to the lack of a valid timeline output.

13
docs/en/05-basic/index.md Normal file
View File

@ -0,0 +1,13 @@
---
title: Basic Features
slug: /basic-features
---
This chapter mainly introduces the data model of TDengine as well as its write and query functions.
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```

View File

@ -0,0 +1,143 @@
---
title: Data Subscription
slug: /advanced-features/data-subscription
---
To meet the needs of applications to obtain data written to TDengine in real-time, or to process data in the order of event arrival, TDengine provides data subscription and consumption interfaces similar to those of message queue products. In many scenarios, by adopting TDengine's time-series big data platform, there is no need to integrate additional message queue products, thus simplifying application design and reducing maintenance costs.
Similar to Kafka, users need to define topics in TDengine. However, a topic in TDengine can be a database, a supertable, or based on existing supertables, subtables, or basic tables with specific query conditions, i.e., a query statement. Users can use SQL to filter by tags, table names, columns, expressions, etc., and perform scalar function and UDF computations (excluding data aggregation). Compared to other message queue tools, this is the biggest advantage of TDengine's data subscription feature. It offers greater flexibility; the granularity of the data is determined by the SQL defining the topic, and the filtering and preprocessing of data are automatically handled by TDengine, reducing the amount of data transmitted and simplifying application complexity.
After subscribing to a topic, consumers can receive the latest data in real-time. Multiple consumers can form a consumption group to share consumption progress, enabling multi-threaded, distributed data consumption to increase consumption speed. Consumers in different consumption groups do not share consumption progress even if they consume the same topic. A consumer can subscribe to multiple topics. If the topic corresponds to a supertable or database, the data may be distributed across multiple different nodes or data shards. When there are multiple consumers in a consumption group, consumption efficiency can be improved. TDengine's message queue provides an ACK (Acknowledgment) mechanism to ensure at least once consumption in complex environments such as crashes and restarts.
To implement the above functions, TDengine automatically creates indexes for Write-Ahead Logging (WAL) files to support fast random access and provides flexible and configurable file switching and retention mechanisms. Users can specify the retention time and size of WAL files according to their needs. Through these methods, WAL is transformed into a persistent storage engine that retains the order of event arrival. For queries created in the form of topics, TDengine reads data from WAL. During consumption, TDengine reads data directly from WAL based on the current consumption progress, performs filtering, transformation, and other operations using a unified query engine, and then pushes the data to consumers.
Starting from version 3.2.0.0, data subscription supports vnode migration and splitting. Due to the dependence of data subscription on wal files, wal does not synchronize during vnode migration and splitting. Therefore, after migration or splitting, wal data that has not been consumed before cannot be consumed. So please ensure that all data has been consumed before proceeding with vnode migration or splitting, otherwise data loss may occur during consumption.
## Topics
TDengine uses SQL to create three types of topics, which are introduced below.
### Query Topic
Subscribe to the results of an SQL query, essentially a continuous query, returning only the latest values each time, with the following creation syntax:
```sql
CREATE TOPIC [IF NOT EXISTS] topic_name as subquery
```
This SQL subscribes through a SELECT statement (including SELECT *, or specific query subscriptions like SELECT ts, c1, with condition filtering, scalar function computations, but does not support aggregate functions or time window aggregation). Note that:
1. Once this type of TOPIC is created, the structure of the subscribed data is fixed.
2. Columns or tags that are subscribed to or used for calculations cannot be deleted (ALTER table DROP) or modified (ALTER table MODIFY).
3. If table structure changes occur, newly added columns will not appear in the results.
4. For select *, it subscribes to all columns at the time of creation (data columns for subtables and basic tables, data columns plus tag columns for supertables).
Suppose you need to subscribe to data where the voltage value in all smart meters is greater than 200, and only return the timestamp, current, and voltage (not phase), then you can create the topic power_topic with the following SQL.
```sql
CREATE TOPIC power_topic AS SELECT ts, current, voltage FROM power.meters WHERE voltage > 200;
```
### Supertable Topic
Subscribe to all data in a supertable, with the following syntax:
```sql
CREATE TOPIC [IF NOT EXISTS] topic_name [with meta] AS STABLE stb_name [where_condition]
```
The difference from subscribing using `SELECT * from stbName` is:
1. It does not restrict user table structure changes, i.e., both structure changes and new data after changes can be subscribed to.
2. It returns unstructured data, and the structure of the returned data will change with the structure of the supertable.
3. The with meta parameter is optional; when selected, it returns statements for creating supertables, subtables, etc., mainly used for supertable migration in taosx.
4. The where_condition parameter is optional; when selected, it will be used to filter subtables that meet the conditions, subscribing to these subtables. The where condition cannot include ordinary columns, only tags or tbname, and functions can be used to filter tags, but not aggregate functions, as subtable tag values cannot be aggregated. It can also be a constant expression, such as 2 > 1 (subscribe to all subtables), or false (subscribe to 0 subtables).
5. Returned data does not include tags.
### Database Topics
Subscribe to all data in a database, with the syntax as follows:
```sql
CREATE TOPIC [IF NOT EXISTS] topic_name [with meta] AS DATABASE db_name;
```
This statement creates a subscription that includes all table data in the database:
1. The `with meta` parameter is optional. When selected, it will return the creation, deletion, and modification statements of all supertables, subtables, and basic tables' metadata in the database, mainly used for database migration in taosx.
2. Subscriptions to supertables and databases are advanced subscription modes and are prone to errors. If you really need to use them, please consult technical support personnel.
## Delete Topic
If you no longer need to subscribe to the data, you can delete the topic. If the current topic is subscribed to by a consumer, it can be forcibly deleted using the FORCE syntax. After the forced deletion, the subscribed consumer will consume data with errors (FORCE syntax supported from version 3.3.6.0).
```sql
DROP TOPIC [IF EXISTS] [FORCE] topic_name;
```
## View Topics
```sql
SHOW TOPICS;
```
The above SQL will display information about all topics under the current database.
## Consumers
### Creating Consumers
Consumers can only be created through the TDengine client driver or APIs provided by connectors. For details, refer to the development guide or reference manual.
### View Consumers
```sql
SHOW CONSUMERS;
```
Displays information about all consumers in the current database, including the consumer's status, creation time, etc.
### Delete Consumer Group
When creating a consumer, a consumer group is assigned to the consumer. Consumers cannot be explicitly deleted, but the consumer group can be deleted. If there are consumers in the current consumer group who are consuming, the FORCE syntax can be used to force deletion. After forced deletion, subscribed consumers will consume data with errors (FORCE syntax supported from version 3.3.6.0).
```sql
DROP CONSUMER GROUP [IF EXISTS] [FORCE] cgroup_name ON topic_name;
```
## Data Subscription
### View Subscription Information
```sql
SHOW SUBSCRIPTIONS;
```
Displays consumption information of the topic on different vgroups, useful for viewing consumption progress.
### Subscribe to Data
TDengine provides comprehensive and rich data subscription APIs, aimed at meeting data subscription needs under different programming languages and frameworks. These interfaces include but are not limited to creating consumers, subscribing to topics, unsubscribing, obtaining real-time data, submitting consumption progress, and getting and setting consumption progress. Currently, TDengine supports a variety of mainstream programming languages, including C, Java, Go, Rust, Python, and C#, enabling developers to easily use TDengine's data subscription features in various application scenarios.
It is worth mentioning that TDengine's data subscription APIs are highly consistent with the popular Kafka subscription APIs in the industry, making it easy for developers to get started and leverage their existing knowledge and experience. To facilitate user understanding and reference, TDengine's official documentation provides detailed descriptions and example codes of various APIs, which can be accessed in the connectors section of the TDengine official website. Through these APIs, developers can efficiently implement real-time data subscription and processing to meet data handling needs in various complex scenarios.
### Replay Feature
TDengine's data subscription feature supports a replay function, allowing users to replay the data stream in the actual order of data writing. This feature is based on TDengine's efficient WAL mechanism, ensuring data consistency and reliability.
To use the data subscription's replay feature, users can specify the time range in the query statement to precisely control the start and end times of the replay. This allows users to easily replay data within a specific time period, whether for troubleshooting, data analysis, or other purposes.
If the following 3 data entries were written, then during replay, the first entry is returned first, followed by the second entry after 5 seconds, and the third entry 3 seconds after obtaining the second entry.
```text
2023/09/22 00:00:00.000
2023/09/22 00:00:05.000
2023/09/22 00:00:08.000
```
When using the data subscription's replay feature, note the following:
- Enable replay function by configuring the consumption parameter enable.replay to true
- The replay function of data subscription only supports data playback for query subscriptions; supertable and database subscriptions do not support playback.
- Replay does not support progress saving.
- Because data playback itself requires processing time, there is a precision error of several tens of milliseconds in playback.

View File

@ -0,0 +1,100 @@
---
title: Caching
slug: /advanced-features/caching
---
In the big data applications of the Internet of Things (IoT) and the Industrial Internet of Things (IIoT), the value of real-time data often far exceeds that of historical data. Enterprises not only need data processing systems to have efficient real-time writing capabilities but also need to quickly obtain the latest status of devices or perform real-time calculations and analyses on the latest data. Whether it's monitoring the status of industrial equipment, tracking vehicle locations in the Internet of Vehicles, or real-time readings of smart meters, current values are indispensable core data in business operations. These data are directly related to production safety, operational efficiency, and user experience.
For example, in industrial production, the current operating status of production line equipment is crucial. Operators need to monitor key indicators such as temperature, pressure, and speed in real-time. If there is an anomaly in the equipment, these data must be presented immediately so that process parameters can be quickly adjusted to avoid downtime or greater losses. In the field of the Internet of Vehicles, taking DiDi as an example, the real-time location data of vehicles is key to optimizing dispatch strategies and improving operational efficiency on the DiDi platform, ensuring that each passenger gets on the vehicle quickly and enjoys a higher quality travel experience.
At the same time, dashboard systems and smart meters, as windows for on-site operations and user ends, also need real-time data support. Whether it's factory managers obtaining real-time production indicators through dashboards or household users checking the usage of smart water and electricity meters at any time, real-time data not only affects operational and decision-making efficiency but also directly relates to user satisfaction with the service.
## Limitations of Traditional Caching Solutions
To meet these high-frequency real-time query needs, many enterprises choose to integrate caching technologies like Redis into their big data platforms, enhancing query performance by adding a caching layer between the database and applications. However, this approach also brings several problems:
- Increased system complexity: Additional deployment and maintenance of the cache cluster are required, raising higher demands on system architecture.
- Rising operational costs: Additional hardware resources are needed to support the cache, increasing maintenance and management expenses.
- Consistency issues: Data synchronization between the cache and the database requires additional mechanisms to ensure consistency, otherwise data inconsistencies may occur.
## TDengine's Solution: Built-in Read Cache
To address these issues, TDengine has designed and implemented a read cache mechanism specifically for high-frequency real-time query scenarios in IoT and IIoT. This mechanism automatically caches the last record of each table in memory, thus meeting users' real-time query needs for current values without introducing third-party caching technologies.
TDengine uses a time-driven cache management strategy, prioritizing the storage of the latest data in the cache, allowing for quick results without needing to access the hard disk. When the cache capacity reaches the set limit, the system will batch-write the earliest data to the disk, enhancing query efficiency and effectively reducing the disk's write load, thereby extending the hardware's lifespan.
Users can customize the cache mode by setting the `cachemodel` parameter, including caching the latest row of data, the most recent non-NULL value of each column, or caching both rows and columns. This flexible design is particularly important in IoT scenarios, making real-time queries of device status more efficient and accurate.
This built-in read cache mechanism significantly reduces query latency, avoids the complexity and operational costs of introducing external systems like Redis, and reduces the pressure of frequent queries on the storage system, greatly enhancing the overall throughput of the system. It ensures stable and efficient operation even in high-concurrency scenarios. Through read caching, TDengine provides a more lightweight real-time data processing solution, not only optimizing query performance but also reducing overall operational costs, providing strong technical support for IoT and IIoT users.
## TDengine's Read Cache Configuration
When creating a database, users can choose whether to enable the caching mechanism to store the latest data of each subtable in that database. This caching mechanism is controlled by the database creation parameter `cachemodel`. The parameter `cachemodel` has the following 4 options:
- none: no caching
- last_row: caches the most recent row of data from the subtable, significantly improving the performance of the `last_row` function
- last_value: caches the most recent non-NULL value of each column from the subtable, significantly improving the performance of the `last` function when there are no special effects (such as WHERE, ORDER BY, GROUP BY, INTERVAL)
- both: caches both the most recent row and column, equivalent to the behaviors of `last_row` and `last_value` simultaneously effective
When using database read caching, the `cachesize` parameter can be used to configure the memory size for each vnode.
- cachesize: represents the memory size used to cache the most recent data of subtables in each vnode. The default is 1, the range is [1, 65536], in MB. It should be configured reasonably according to the machine memory.
For specific database creation, related parameters, and operation instructions, please refer to [Creating a Database](../../tdengine-reference/sql-manual/manage-databases/)
## Caching Practices for Real-Time Data Queries
This section takes smart electric meters as an example to look in detail at how LAST caching improves the performance of real-time data queries. First, use the taosBenchmark tool to generate the time-series data of smart electric meters needed for this chapter.
```shell
# taosBenchmark -d power -Q --start-timestamp=1600000000000 --tables=10000 --records=10000 --time-step=10000 -y
```
The above command, the taosBenchmark tool in TDengine created a test database for electric meters named `power`, generating a total of 1 billion time-series data entries. The timestamp of the time-series data starts from `1600000000000 (2020-09-13T20:26:40+08:00)`, with the supertable `meters` containing 10,000 devices (subtables), each device having 10,000 data entries, and the data collection frequency is 10 seconds per entry.
To query the latest current and timestamp data of any electric meter, execute the following SQL:
```sql
taos> select last(ts,current) from meters;
last(ts) | last(current) |
=================================================
2020-09-15 00:13:10.000 | 1.1294620 |
Query OK, 1 row(s) in set (0.353815s)
taos> select last_row(ts,current) from meters;
last_row(ts) | last_row(current) |
=================================================
2020-09-15 00:13:10.000 | 1.1294620 |
Query OK, 1 row(s) in set (0.344070s)
```
If you want to use caching to query the latest timestamp data of any electric meter, execute the following SQL and check if the database cache is effective.
```sql
taos> alter database power cachemodel 'both' ;
Query OK, 0 row(s) affected (0.046092s)
taos> show create database power\G;
*************************** 1.row ***************************
Database: power
Create Database: CREATE DATABASE `power` BUFFER 256 CACHESIZE 1 CACHEMODEL 'both' COMP 2 DURATION 14400m WAL_FSYNC_P...
Query OK, 1 row(s) in set (0.000282s)
```
Query the latest real-time data of the electric meter again; the first query will perform cache computation, significantly reducing the latency of subsequent queries.
```sql
taos> select last(ts,current) from meters;
last(ts) | last(current) |
=================================================
2020-09-15 00:13:10.000 | 1.1294620 |
Query OK, 1 row(s) in set (0.044021s)
taos> select last_row(ts,current) from meters;
last_row(ts) | last_row(current) |
=================================================
2020-09-15 00:13:10.000 | 1.1294620 |
Query OK, 1 row(s) in set (0.046682s)
```
As can be seen, the query latency has been reduced from 353/344ms to 44ms, an improvement of approximately 8 times.

View File

@ -0,0 +1,299 @@
---
title: Stream Processing
slug: /advanced-features/stream-processing
---
import Image from '@theme/IdealImage';
import watermarkImg from '../assets/stream-processing-01.png';
In the processing of time-series data, it is often necessary to clean and preprocess the raw data before using a time-series database for long-term storage. Moreover, it is common to use the original time-series data to generate new time-series data through calculations. In traditional time-series data solutions, it is often necessary to deploy systems like Kafka, Flink, etc., for stream processing. However, the complexity of stream processing systems brings high development and operational costs.
TDengine's stream computing engine provides the capability to process data streams in real-time as they are written. It uses SQL to define real-time stream transformations. Once data is written into the stream's source table, it is automatically processed in the defined manner and pushed to the destination table according to the defined trigger mode. It offers a lightweight solution that replaces complex stream processing systems and can provide millisecond-level computational result latency under high-throughput data writing scenarios.
Stream computing can include data filtering, scalar function computations (including UDFs), and window aggregation (supporting sliding windows, session windows, and state windows). It can use supertables, subtables, and basic tables as source tables, writing into destination supertables. When creating a stream, the destination supertable is automatically created, and newly inserted data is processed and written into it as defined by the stream. Using the `partition by` clause, partitions can be divided by table name or tags, and different partitions will be written into different subtables of the destination supertable.
TDengine's stream computing can support aggregation of supertables distributed across multiple nodes and can handle out-of-order data writing. It provides a watermark mechanism to measure the degree of tolerance for data disorder and offers an `ignore expired` configuration option to decide the handling strategy for out-of-order data — either discard or recalculate.
Below is a detailed introduction to the specific methods used in stream computing.
## Creating Stream Computing
The syntax is as follows:
```sql
CREATE STREAM [IF NOT EXISTS] stream_name [stream_options] INTO stb_name
[(field1_name, ...)] [TAGS (column_definition [, column_definition] ...)]
SUBTABLE(expression) AS subquery
stream_options: {
TRIGGER [AT_ONCE | WINDOW_CLOSE | MAX_DELAY time | FORCE_WINDOW_CLOSE | CONTINUOUS_WINDOW_CLOSE [recalculate rec_time_val] ]
WATERMARK time
IGNORE EXPIRED [0|1]
DELETE_MARK time
FILL_HISTORY [0|1] [ASYNC]
IGNORE UPDATE [0|1]
}
column_definition:
col_name col_type [COMMENT 'string_value']
```
The subquery is a subset of the regular query syntax.
```sql
subquery: SELECT select_list
from_clause
[WHERE condition]
[PARTITION BY tag_list]
[window_clause]
window_clause: {
SESSION(ts_col, tol_val)
| STATE_WINDOW(col)
| INTERVAL(interval_val [, interval_offset]) [SLIDING (sliding_val)]
| EVENT_WINDOW START WITH start_trigger_condition END WITH end_trigger_condition
| COUNT_WINDOW(count_val[, sliding_val])
}
```
The subquery supports session windows, state windows, time windows, event windows, and count windows. When used with supertables, state windows, event windows, and count windows must be used together with `partition by tbname`.
1. SESSION is a session window, where tol_val is the maximum range of the time interval. All data within the tol_val time interval belong to the same window. If the time interval between two consecutive data points exceeds tol_val, the next window automatically starts.
2. STATE_WINDOW is a state window. The col is used to identify the state value. Values with the same state value belong to the same state window. When the value of col changes, the current window ends and the next window is automatically opened.
3. INTERVAL is a time window, which can be further divided into sliding time windows and tumbling time windows.The INTERVAL clause is used to specify the equal time period of the window, and the SLIDING clause is used to specify the time by which the window slides forward. When the value of interval_val is equal to the value of sliding_val, the time window is a tumbling time window; otherwise, it is a sliding time window. Note: The value of sliding_val must be less than or equal to the value of interval_val.
4. EVENT_WINDOW is an event window, defined by start and end conditions. The window starts when the start_trigger_condition is met and closes when the end_trigger_condition is met. start_trigger_condition and end_trigger_condition can be any condition expressions supported by TDengine and can include different columns.
5. COUNT_WINDOW is a counting window, divided by a fixed number of data rows. count_val is a constant, a positive integer, and must be at least 2 and less than 2147483648. count_val represents the maximum number of data rows in each COUNT_WINDOW. If the total number of data rows cannot be evenly divided by count_val, the last window will have fewer rows than count_val. sliding_val is a constant, representing the number of rows the window slides, similar to the SLIDING in INTERVAL.
The definition of a window is exactly the same as in the time-series data window query, for details refer to the TDengine window functions section.
The following SQL will create a stream computation. After execution, TDengine will automatically create a supertable named avg_vol. This stream computation uses a 1min time window and a 30s forward increment to calculate the average voltage of these smart meters, and writes the results from the meters data into avg_vol. Data from different partitions will be written into separate subtables.
```sql
CREATE STREAM avg_vol_s INTO avg_vol AS
SELECT _wstart, count(*), avg(voltage) FROM power.meters PARTITION BY tbname INTERVAL(1m) SLIDING(30s);
```
The explanations of the relevant parameters involved in this section are as follows.
- stb_name is the table name of the supertable where the computation results are saved. If this supertable does not exist, it will be automatically created; if it already exists, the column schema information will be checked. See section 6.3.8.
- The tags clause defines the rules for creating tags in the stream computation. Through the tags field, custom tag values can be generated for each partition's corresponding subtable.
## Rules and Strategies for Stream Computation
### Partitioning in Stream Computation
In TDengine, we can use the partition by clause combined with tbname, tag columns, ordinary columns, or expressions to perform multi-partition computations on a stream. Each partition has its own timeline and time window, and they will aggregate data separately and write the results into different subtables of the destination table. If the partition by clause is not used, all data will be written into the same subtable by default.
Specifically, partition by + tbname is a very practical operation, which means performing stream computation for each subtable. The advantage of this is that it allows for customized processing based on the characteristics of each subtable, thereby improving computational efficiency.
When creating a stream, if the substable clause is not used, the supertable created by the stream computation will contain a unique tag column groupId. Each partition will be assigned a unique groupId, and the corresponding subtable name will be calculated using the MD5 algorithm. TDengine will automatically create these subtables to store the computation results of each partition. This mechanism makes data management more flexible and efficient, and also facilitates subsequent data querying and analysis.
If the statement for creating the stream contains a substable clause, users can generate custom table names for each partition's corresponding subtable. Example as follows.
```sql
CREATE STREAM avg_vol_s INTO avg_vol SUBTABLE(CONCAT('new-', tname)) AS SELECT _wstart, count(*), avg(voltage) FROM meters PARTITION BY tbname tname INTERVAL(1m);
```
In the PARTITION clause, an alias tname is defined for tbname, and the alias in the PARTITION clause can be used for expression calculation in the SUBTABLE clause. In the example above, the rule for newly created subtables is new- + subtable name + _supertable name +_groupId.
**Note**: If the length of the subtable name exceeds the limit of TDengine, it will be truncated. If the subtable name to be generated already exists in another supertable, since TDengine's subtable names are unique, the creation of the corresponding new subtable and the writing of data will fail.
### Stream Computation Processing Historical Data
Under normal circumstances, stream computation tasks will not process data that was written to the source table before the stream was created. This is because the trigger for stream computation is based on newly written data, not existing data. However, if we need to process these existing historical data, we can set the fill_history option to 1 when creating the stream.
By enabling the fill_history option, the created stream computation task will be capable of processing data written before, during, and after the creation of the stream. This means that data written either before or after the creation of the stream will be included in the scope of stream computation, thus ensuring data integrity and consistency. This setting provides users with greater flexibility, allowing them to flexibly handle historical and new data according to actual needs.
Tips:
- When enabling fill_history, creating a stream requires finding the boundary point of historical data. If there is a lot of historical data, it may cause the task of creating a stream to take a long time. In this case, you can use fill_history 1 async (supported since version 3.3.6.0) , then the task of creating a stream can be processed in the background. The statement of creating a stream can be returned immediately without blocking subsequent operations. async only takes effect when fill_history 1 is used, and creating a stream with fill_history 0 is very fast and does not require asynchronous processing.
- Show streams can be used to view the progress of background stream creation (ready status indicates success, init status indicates stream creation in progress, failed status indicates that the stream creation has failed, and the message column can be used to view the reason for the failure. In the case of failed stream creation, the stream can be deleted and rebuilt).
- Besides, do not create multiple streams asynchronously at the same time, as transaction conflicts may cause subsequent streams to fail.
For example, create a stream to count the number of data entries generated by all smart meters every 10s, and also calculate historical data. SQL as follows:
```sql
create stream if not exists count_history_s fill_history 1 into count_history as select count(*) from power.meters interval(10s)
```
Combined with the fill_history 1 option, it is possible to process data only within a specific historical time range, such as data after a historical moment (January 30, 2020).
```sql
create stream if not exists count_history_s fill_history 1 into count_history as select count(*) from power.meters where ts > '2020-01-30' interval(10s)
```
For instance, to process data within a specific time period, the end time can be a future date.
```sql
create stream if not exists count_history_s fill_history 1 into count_history as select count(*) from power.meters where ts > '2020-01-30' and ts < '2023-01-01' interval(10s)
```
If the stream task has completely expired and you no longer want it to monitor or process data, you can manually delete it, and the computed data will still be retained.
### Trigger Modes for Stream Computing
When creating a stream, you can specify the trigger mode of stream computing through the TRIGGER command. For non-window computations, the trigger is real-time; for window computations, there are currently 4 trigger modes, with WINDOW_CLOSE as the default.
1. AT_ONCE: Triggered immediately upon writing.
2. WINDOW_CLOSE: Triggered when the window closes (the closing of the window is determined by the event time, can be used in conjunction with watermark).
3. MAX_DELAY time: If the window closes, computation is triggered. If the window has not closed, and the duration since it has not closed exceeds the time specified by max delay, computation is triggered.
4. FORCE_WINDOW_CLOSE: Based on the current time of the operating system, only the results of the currently closed window are calculated and pushed out. The window is only calculated once at the moment of closure, and will not be recalculated subsequently. This mode currently only supports INTERVAL windows (does support sliding); In this mode, FILL_HISTORY is automatically set to 0, IGNORE EXPIRED is automatically set to 1 and IGNORE UPDATE is automatically set to 1; FILL only supports PREV, NULL, NONE, VALUE.
- This mode can be used to implement continuous queries, such as creating a stream that queries the number of data entries in the past 10 seconds window every 1 second。SQL as follows:
```sql
create stream if not exists continuous_query_s trigger force_window_close into continuous_query as select count(*) from power.meters interval(10s) sliding(1s)
```
5. CONTINUOUS_WINDOW_CLOSE: Results are output when the window is closed. Modifying or deleting data does not immediately trigger a recalculation. Instead, periodic recalculations are performed every rec_time_val duration. If rec_time_val is not specified, the recalculation period is 60 minutes. If the recalculation time exceeds rec_time_val, the next recalculation will be automatically initiated after the current one is completed. Currently, this mode only supports INTERVAL windows. If the FILL clause is used, relevant information of the adapter needs to be configured, including adapterFqdn, adapterPort, and adapterToken. The adapterToken is a string obtained by Base64-encoding `{username}:{password}`. For example, after encoding `root:taosdata`, the result is `cm9vdDp0YW9zZGF0YQ==`.
The closing of the window is determined by the event time, such as when the event stream is interrupted or continuously delayed, at which point the event time cannot be updated, possibly leading to outdated computation results.
Therefore, stream computing provides the MAX_DELAY trigger mode that combines event time with processing time: MAX_DELAY mode triggers computation immediately when the window closes, and its unit can be specified, specific units: a (milliseconds), s (seconds), m (minutes), h (hours), d (days), w (weeks). Additionally, when data is written, if the time that triggers computation exceeds the time specified by MAX_DELAY, computation is triggered immediately.
### Window Closure in Stream Computing
The core of stream computing lies in using the event time (i.e., the timestamp primary key in the written record) as the basis for calculating the window closure time, rather than relying on the TDengine server's time. Using event time as the basis effectively avoids issues caused by discrepancies between client and server times and can properly address challenges such as out-of-order data writing.
To further control the tolerance level for out-of-order data, stream computing introduces the watermark mechanism. When creating a stream, users can specify the value of watermark through the stream_option parameter, which defines the upper bound of tolerance for out-of-order data, defaulting to 0.
Assuming T = Latest event time - watermark, each time new data is written, the system updates the window closure time based on this formula. Specifically, the system closes all open windows whose end time is less than T. If the trigger mode is set to window_close or max_delay, the aggregated results of the window are pushed. The diagram below illustrates the window closure process in stream computing.
<figure>
<Image img={watermarkImg} alt="Window closure in stream processing"/>
<figcaption>Figure 1. Window closure diagram</figcaption>
</figure>
In the diagram above, the vertical axis represents moments, and the dots on the horizontal axis represent the data received. The related process is described as follows.
1. At moment T1, the 7th data point arrives, and based on T = Latest event - watermark, the calculated time falls within the second window, so the second window does not close.
2. At moment T2, the 6th and 8th data points arrive late to TDengine, and since the Latest event has not changed, T also remains unchanged, and the out-of-order data entering the second window has not yet been closed, thus it can be correctly processed.
3. At moment T3, the 10th data point arrives, T moves forward beyond the closure time of the second window, which is then closed, and the out-of-order data is correctly processed.
In window_close or max_delay modes, window closure directly affects the push results. In at_once mode, window closure only relates to memory usage.
### Expired Data Handling Strategy
For windows that have closed, data that falls into such windows again is marked as expired data. TDengine offers two ways to handle expired data, specified by the IGNORE EXPIRED option.
1. Recalculate, i.e., IGNORE EXPIRED 0: Re-find all data corresponding to the window from the TSDB and recalculate to get the latest result.
2. Directly discard, i.e., IGNORE EXPIRED 1: Default configuration, ignore expired data.
Regardless of the mode, the watermark should be properly set to obtain correct results (direct discard mode) or avoid frequent re-triggering of recalculations that lead to performance overhead (recalculation mode).
### Data Update Handling Strategy
TDengine offers two ways to handle modified data, specified by the IGNORE UPDATE option.
1. Check whether the data has been modified, i.e., IGNORE UPDATE 0: Default configuration, if modified, recalculate the corresponding window.
2. Do not check whether the data has been modified, calculate all as incremental data, i.e., IGNORE UPDATE 1.
## Other Strategies for Stream Computing
### Writing to an Existing Supertable
When the result of stream computing needs to be written into an existing supertable, ensure that the `stb_name` column corresponds correctly with the subquery output results. If the position and number of the `stb_name` column match exactly with the subquery output results, there is no need to explicitly specify the correspondence; if the data types do not match, the system will automatically convert the subquery output results to the corresponding `stb_name` column type.
For already existing supertables, the system will check the schema information of the columns to ensure they match the subquery output results. Here are some key points:
1. Check if the schema information of the columns matches; if not, automatically perform type conversion. Currently, an error is reported only if the data length exceeds 4096 bytes; otherwise, type conversion can be performed.
2. Check if the number of columns is the same; if different, explicitly specify the correspondence between the supertable and the subquery columns, otherwise, an error is reported. If the same, you can specify the correspondence or not; if not specified, they correspond by position order.
**Note** Although stream computing can write results to an existing supertable, it cannot allow two existing stream computations to write result data to the same (super) table. This is to avoid data conflicts and inconsistencies, ensuring data integrity and accuracy. In practice, set the column correspondence according to actual needs and data structure to achieve efficient and accurate data processing.
### Customizing Tags for Target Tables
Users can generate custom tag values for each partition's subtable, as shown in the stream creation statement below:
```sql
CREATE STREAM output_tag trigger at_once INTO output_tag_s TAGS(alias_tag varchar(100)) as select _wstart, count(*) from power.meters partition by concat("tag-", tbname) as alias_tag interval(10s));
```
In the PARTITION clause, an alias `alias_tag` is defined for `concat("tag-", tbname)`, corresponding to the custom tag name of the supertable `output_tag_s`. In the example above, the tag of the newly created subtable by the stream will use the prefix 'tag-' connected to the original table name as the tag value. The following checks will be performed on the tag information:
1. Check if the schema information of the tag matches; if not, automatically perform data type conversion. Currently, an error is reported only if the data length exceeds 4096 bytes; otherwise, type conversion can be performed.
2. Check if the number of tags is the same; if different, explicitly specify the correspondence between the supertable and the subquery tags, otherwise, an error is reported. If the same, you can specify the correspondence or not; if not specified, they correspond by position order.
### Cleaning Up Intermediate States of Stream Computing
```sql
DELETE_MARK time
```
DELETE_MARK is used to delete cached window states, i.e., deleting the intermediate results of stream computing. Cached window states are mainly used for window result updates caused by expired data. If not set, the default value is 10 years.
## Specific Operations of Stream Computing
### Deleting Stream Computing
Only deletes the stream computing task; data written by stream computing will not be deleted, SQL as follows:
```sql
DROP STREAM [IF EXISTS] stream_name;
```
### Displaying Stream Computing
View the SQL of stream computing tasks as follows:
```sql
SHOW STREAMS;
```
To display more detailed information, you can use:
```sql
SELECT * from information_schema.`ins_streams`;
```
### Pausing Stream Computing Tasks
The SQL to pause stream computing tasks is as follows:
```sql
PAUSE STREAM [IF EXISTS] stream_name;
```
If IF EXISTS is not specified, an error is reported if the stream does not exist. If it exists, the stream computing is paused. If IF EXISTS is specified, it returns success if the stream does not exist. If it exists, the stream computing is paused.
### Resuming Stream Computing Tasks
The SQL to resume stream computing tasks is as follows. If IGNORE UNTREATED is specified, it ignores the data written during the pause period of the stream computing task when resuming.
```sql
RESUME STREAM [IF EXISTS] [IGNORE UNTREATED] stream_name;
```
If IF EXISTS is not specified, an error is reported if the stream does not exist. If it exists, the stream computing is resumed. If IF EXISTS is specified, it returns success if the stream does not exist. If it exists, the stream computing is resumed. If IGNORE UNTREATED is specified, it ignores the data written during the pause period of the stream computing task when resuming.
### Stream Computing Upgrade Fault Recovery
After upgrading TDengine, if the stream computing is not compatible, you need to delete the stream computing and then recreate it. The steps are as follows:
1. Modify taos.cfg, add `disableStream 1`
2. Restart taosd. If the startup fails, change the name of the stream directory to avoid taosd trying to load the stream computing data information during startup. Avoid using the delete operation to prevent risks caused by misoperations. The folders that need to be modified: `$dataDir/vnode/vnode*/tq/stream`, where `$dataDir` refers to the directory where TDengine stores data. In the `$dataDir/vnode/` directory, there will be multiple directories like vnode1, vnode2...vnode*, all need to change the name of the tq/stream directory to tq/stream.bk
3. Start taos
```sql
drop stream xxxx; ---- xxx refers to the stream name
flush database stream_source_db; ---- The database where the supertable for stream computing data reading is located
flush database stream_dest_db; ---- The database where the supertable for stream computing data writing is located
```
Example:
```sql
create stream streams1 into test1.streamst as select _wstart, count(a) c1 from test.st interval(1s) ;
drop stream streams1;
flush database test;
flush database test1;
```
4. Close taosd
5. Modify taos.cfg, remove `disableStream 1`, or change `disableStream` to 0
6. Start taosd

View File

@ -0,0 +1,57 @@
---
title: EdgeCloud Orchestration
slug: /advanced-features/edge-cloud-orchestration
---
import Image from '@theme/IdealImage';
import edgeCloud from '../assets/edge-cloud-orchestration-01.png';
## Why Edge-Cloud Collaboration is Needed
In industrial Internet scenarios, edge devices are used only to handle local data, and decision-makers cannot form a global understanding of the entire system based solely on information collected by edge devices. In practical applications, edge devices need to report data to cloud computing platforms (public or private clouds), where data aggregation and information integration are carried out, providing decision-makers with a global insight into the entire dataset. This edge-cloud collaboration architecture has gradually become an important pillar supporting the development of the industrial Internet.
Edge devices mainly monitor and alert on specific data on the production line, such as real-time production data in a particular workshop, and then synchronize this edge-side production data to the big data platform in the cloud.
On the edge side, there is a high requirement for real-time performance, but the data volume may not be large, typically ranging from a few thousand to tens of thousands of monitoring points in a workshop. On the central side, computing resources are generally abundant, capable of aggregating data from the edge side for analysis and computation.
To achieve this operation, the requirements for the database or data storage layer are to ensure that data can be reported step by step and selectively. In some scenarios, where the overall data volume is very large, selective reporting is necessary. For example, raw records collected every second on the edge side, when reported to the central side, are downsampled to once a minute, which greatly reduces the data volume but still retains key information for long-term data analysis and prediction.
In the past industrial data collection process, data was collected from industrial logic controllers PLCs, then entered into Historian, the industrial real-time database, to support business applications. These systems are not easy to scale horizontally, and are heavily dependent on the Windows ecosystem, which is relatively closed.
## TDengine's Edge-Cloud Collaboration Solution
TDengine Enterprise is committed to providing powerful edge-cloud collaboration capabilities, with the following notable features:
- Efficient data synchronization: Supports synchronization efficiency of millions of data per second, ensuring fast and stable data transmission between the edge side and the cloud.
- Multi-data source integration: Compatible with various external data sources, such as AVEVA PI System, OPC-UA, OPC-DA, MQTT, etc., to achieve broad data access and integration.
- Flexible configuration of synchronization rules: Provides configurable synchronization rules, allowing users to customize the strategy and method of data synchronization according to actual needs.
- Offline continuation and re-subscription: Supports offline continuation and re-subscription functions, ensuring the continuity and integrity of data synchronization in the event of unstable or interrupted networks.
- Historical data migration: Supports the migration of historical data, facilitating users to seamlessly migrate historical data to a new system when upgrading or replacing systems.
TDengine's data subscription feature offers great flexibility to subscribers, allowing users to configure subscription objects as needed. Users can subscribe to a database, a supertable, or even a query statement with filtering conditions. This enables users to implement selective data synchronization, syncing truly relevant data (including offline and out-of-order data) from one cluster to another to meet the data needs of various complex scenarios.
The following diagram illustrates the implementation of an edge-cloud collaboration architecture in TDengine Enterprise using a specific production workshop example. In the production workshop, real-time data generated by equipment is stored in TDengine deployed on the edge side. The TDengine deployed in the branch factory subscribes to the data from the TDengine in the production workshop. To better meet business needs, data analysts set some subscription rules, such as data downsampling or syncing only data exceeding a specified threshold. Similarly, the TDengine deployed on the corporate side then subscribes to data from various branch factories, achieving corporate-level data aggregation, ready for further analysis and processing.
<figure>
<Image img={edgeCloud} alt="Edge-cloud orchestration diagram"/>
<figcaption>Edge-cloud orchestration diagram</figcaption>
</figure>
This implementation approach has the following advantages:
- No coding required, just simple configuration on the edge side and cloud.
- Greatly improved automation of cross-regional data synchronization, reducing error rates.
- No need for data caching, reducing batch sending, avoiding traffic peak congestion bandwidth.
- Data synchronization through subscription, with configurable rules, simple, flexible, and highly real-time.
- Both edge and cloud use TDengine, completely unifying the data model, reducing data governance difficulty.
Manufacturing enterprises often face a pain point in data synchronization. Many enterprises currently use offline methods to synchronize data, but TDengine Enterprise achieves real-time data synchronization with configurable rules. This method can avoid the resource waste and bandwidth congestion risks caused by regular large data transfers.
## Advantages of Edge-Cloud Collaboration
The IT and OT (Operational Technology) construction conditions of traditional industries vary, and compared to the internet industry, most enterprises are significantly behind in digital investment. Many enterprises still use outdated systems to process data, which are often independent of each other, forming so-called data silos.
In this context, to inject new vitality into traditional industries with AI, the primary task is to integrate systems scattered in various corners and their collected data, breaking the limitations of data silos. However, this process is full of challenges, as it involves multiple systems and a plethora of industrial Internet protocols, and data aggregation is not a simple merging task. It requires cleaning, processing, and handling data from different sources to integrate it into a unified platform.
When all data is aggregated into one system, the efficiency of accessing and processing data is significantly improved. Enterprises can respond more quickly to real-time data, solve problems more effectively, and achieve efficient collaboration among internal and external staff, enhancing overall operational efficiency.
Additionally, after data aggregation, advanced third-party AI analysis tools can be utilized for improved anomaly detection, real-time alerts, and provide more accurate predictions for production capacity, cost, and equipment maintenance. This will enable decision-makers to better grasp the overall macro situation, provide strong support for the development of the enterprise, and help traditional industries achieve digital transformation and intelligent upgrades.

View File

@ -0,0 +1,56 @@
---
title: TDengine 2.x
slug: /advanced-features/data-connectors/tdengine-2
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/tdengine-2-01.png';
import imgStep2 from '../../assets/tdengine-2-02.png';
import imgStep3 from '../../assets/tdengine-2-03.png';
import imgStep4 from '../../assets/tdengine-2-04.png';
This section describes how to create a data migration task through the Explorer interface to migrate data from the old version of TDengine2 to the current cluster.
## Feature Overview
taosX migrates data by querying the source cluster and writing the results to the target database. Specifically, taosX uses the data of a subtable over a period of time as the basic unit of query, and writes the data to be migrated to the target database in batches.
taosX supports three migration modes:
1. **history** mode. This refers to migrating data within a specified time range. If no time range is specified, it migrates all data up to the time the task was created. The task stops once migration is complete.
2. **realtime** mode. It synchronizes data from the time the task is created onwards. The task will continue to run unless manually stopped.
3. **both** mode. It first executes in history mode, then in realtime mode.
Under each migration mode, you can specify whether to migrate the table structure. If "always" is selected, the structure of the table is synchronized to the target database before migrating data. This process may take longer if there are many subtables. If it is certain that the target database already has the same table interface as the source database, it is recommended to choose "none" to save time.
The task saves progress information to the disk during operation, so if the task is paused and then restarted, or if it automatically recovers from an anomaly, the task will not start over from the beginning.
For more options, it is recommended to read the description of each form field on the task creation page in detail.
## Specific Steps
First, click on the "Data Writing" menu on the left, then click the "Add Data Source" button on the right.
<figure>
<Image img={imgStep1} alt="Add data source"/>
<figcaption>Figure 1. Add a data source</figcaption>
</figure>
Then enter the task name, such as "migrate-test", and finally select the type "TDengine2". At this point, the form switches to a form dedicated to migrating data from TDengine2, containing a large number of options, each with detailed explanations, as shown in the images below.
<figure>
<Image img={imgStep2} alt="Add data source"/>
<figcaption>Figure 2. Add a data source</figcaption>
</figure>
<figure>
<Image img={imgStep3} alt="Add data source"/>
<figcaption>Figure 3. Add a data source</figcaption>
</figure>
<figure>
<Image img={imgStep4} alt="Add data source"/>
<figcaption>Figure 4. Add a data source</figcaption>
</figure>
After clicking the "Submit" button to submit the task, return to the "Data Source" task list page to monitor the status of the task.

View File

@ -0,0 +1,110 @@
---
title: TDengine 3.x
slug: /advanced-features/data-connectors/tdengine-3
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/tdengine-3-01.png';
import imgStep2 from '../../assets/tdengine-3-02.png';
import imgStep3 from '../../assets/tdengine-3-03.png';
import imgStep4 from '../../assets/tdengine-3-04.png';
import imgStep5 from '../../assets/tdengine-3-05.png';
import imgStep6 from '../../assets/tdengine-3-06.png';
import imgStep7 from '../../assets/tdengine-3-07.png';
import imgStep8 from '../../assets/tdengine-3-08.png';
import imgStep9 from '../../assets/tdengine-3-09.png';
This document describes how to use Explorer to subscribe to data from another cluster to this cluster.
## Preparation
Create the required Topic in the source cluster, which can subscribe to the entire database, supertable, or subtable. In this example, we demonstrate subscribing to a database named test.
### Step One: Enter the "Data Subscription" page
Open the Explorer interface of the source cluster, click the "Data Subscription" menu on the left, then click "Add New Topic".
<figure>
<Image img={imgStep1} alt=""/>
</figure>
### Step Two: Add a New Topic
Enter the topic name, select the database to subscribe to.
<figure>
<Image img={imgStep2} alt=""/>
</figure>
### Step Three: Copy the Topic's DSN
Click the "Create" button, return to the topic list and copy the **DSN** of the topic for later use.
<figure>
<Image img={imgStep3} alt=""/>
</figure>
## Create Subscription Task
### Step One: Enter the "Add Data Source" page
1. Click the "Data Writing" menu on the left
2. Click "Add Data Source"
<figure>
<Image img={imgStep4} alt=""/>
</figure>
### Step Two: Enter Data Source Information
1. Enter the task name
2. Select the task type "TDengine3"
3. Select the target database
4. Paste the DSN copied in the preparation step into the **Topic DSN** field. For example: tmq+ws://root:taosdata@localhost:6041/topic
5. After completing the above steps, click the "Connectivity Check" button to test connectivity with the source
<figure>
<Image img={imgStep5} alt=""/>
</figure>
### Step Three: Fill in Subscription Settings and Submit Task
1. Choose the subscription start position. Configurable to start from the earliest or latest data, default is earliest
2. Set the timeout period. Supports units ms (milliseconds), s (seconds), m (minutes), h (hours), d (days), M (months), y (years)
3. Set the subscription group ID. The subscription group ID is an arbitrary string used to identify a subscription group, with a maximum length of 192. If not specified, a randomly generated group ID will be used.
4. Set the client ID. The client ID is an arbitrary string used to identify the client, with a maximum length of 192.
5. Synchronize data that has been written to disk. If enabled, it can synchronize data that has been written to the TSDB time-series data storage file (i.e., not in WAL). If disabled, only data that has not yet been written to disk (i.e., saved in WAL) will be synchronized.
6. Synchronize table deletion operations. If enabled, table deletion operations will be synchronized to the target database.
7. Synchronize data deletion operations. If enabled, data deletion operations will be synchronized to the target database.
8. Compression. Enable WebSocket compression support to reduce network bandwidth usage.
9. Click the "Submit" button to submit the task
<figure>
<Image img={imgStep6} alt=""/>
</figure>
## Monitor Task Execution
After submitting the task, return to the data source page to view the task status. The task will first be added to the execution queue and will start running shortly.
<figure>
<Image img={imgStep7} alt=""/>
</figure>
Click the "View" button to monitor the dynamic statistical information of the task.
<figure>
<Image img={imgStep8} alt=""/>
</figure>
You can also click the left collapse button to expand the task's activity information. If the task runs abnormally, detailed explanations can be seen here.
<figure>
<Image img={imgStep9} alt=""/>
</figure>
## Advanced Usage
1. FROM DSN supports multiple Topics, with multiple Topic names separated by commas. For example: `tmq+ws://root:taosdata@localhost:6041/topic1,topic2,topic3`
2. In the FROM DSN, you can also use the database name, supertable name, or subtable name instead of the Topic name. For example: `tmq+ws://root:taosdata@localhost:6041/db1,db2,db3`, in this case, there is no need to create a Topic in advance, taosX will automatically recognize that a database name is used and automatically create a subscription Topic in the source cluster.
3. FROM DSN supports the group.id parameter, to explicitly specify the group ID used for subscription. If not specified, a randomly generated group ID will be used.

View File

@ -0,0 +1,201 @@
---
title: AVEVA PI System
sidebar_label: PI System
slug: /advanced-features/data-connectors/pi-system
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/pi-system-01.png';
import imgStep2 from '../../assets/pi-system-02.png';
import imgStep3 from '../../assets/pi-system-03.png';
import imgStep4 from '../../assets/pi-system-04.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from the PI system to the current TDengine cluster.
## Feature Overview
The PI system is a software product suite used for data collection, retrieval, analysis, transmission, and visualization, serving as the infrastructure for enterprise-level systems managing real-time data and events. taosX can extract real-time or historical data from the PI system using the PI connector plugin.
From the perspective of data timeliness, PI data source tasks are divided into two categories: **real-time tasks** and **backfill tasks**. In the task type dropdown list, these two categories correspond to the names: **PI** and **PI backfill**.
From the data model perspective, PI data source tasks are divided into **single-column model** tasks and **multi-column model** tasks:
1. **Single-column model** tasks map one PI Point to one table in TDengine
2. **Multi-column model** tasks map one PI AF element to one table
Regarding the type of connected data source, PI data source tasks are further divided into **Archive Server** data sources and **AF Server** data sources. For **Archive Server** data sources, only the **single-column model** can be used. For **AF Server** data sources, both **single-column model** and **multi-column model** can be chosen.
Users configure the data mapping rules from PI to TDengine through a CSV file, referred to as the **model configuration file**:
1. For tasks using the AF Server single-column model, taosX automatically identifies which attributes of the element are referencing PI Point data, mapping one PI Point attribute to one table.
2. For tasks using the AF Server multi-column model, one element corresponds to one table. taosX by default maps PI Point attributes to TDengine Metric columns and other attributes to TDengine tag columns.
## Creating Tasks
### Add Data Source
In the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep1} alt=""/>
</figure>
### Basic Configuration
Enter the task name in **Name**, such as "test";
Select **PI** or **PI backfill** from the **Type** dropdown list.
If the taosX service is running on or can directly connect to the server where the PI system is located (dependent on PI AF SDK), **Proxy** is not necessary; otherwise, configure **Proxy**: select the specified proxy from the dropdown, or click the **+Create New Proxy** button on the right to create a new proxy and follow the prompts to configure the proxy. That is, taosX or its proxy needs to be deployed on a host that can directly connect to the PI system.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep2} alt=""/>
</figure>
### Connection Configuration
The PI connector supports two connection methods:
1. **PI Data Archive Only**: Does not use AF mode. In this mode, directly fill in the **PI Service Name** (server address, usually using the hostname).
<figure>
<Image img={imgStep3} alt=""/>
</figure>
2. **PI Data Archive and Asset Framework (AF) Server**: Uses AF SDK. In addition to configuring the service name, this mode also requires configuring the PI system (AF Server) name (hostname) and AF database name.
<figure>
<Image img={imgStep4} alt=""/>
</figure>
Click the **Connectivity Check** button to verify if the data source is available.
### Data Model Configuration
This part has two tabs, corresponding to the configuration of the single-column model and the multi-column model. If this is your first configuration, whether you choose a single-column model or a multi-column model, be sure to click the "Download Default Configuration" button. This action will trigger the generation of the default **model configuration file** and also download the **model configuration file** to your local machine, which you can view or edit. After editing, you can also upload it again to overwrite the default configuration.
If you want to synchronize all points or all template elements, then the default configuration is sufficient. If you want to filter specific naming patterns of points or element templates, you need to fill in the filter conditions before clicking "Download Default Configuration".
#### Multi-column Model Configuration File
Below is an example of a multi-column model configuration file. This configuration file includes configurations for two supertables: one is the metertemplate table, which receives data from elements of the MeterTemplate template; the other is the farm table, which receives data from elements of the Farm template.
```csv
SuperTable,metertemplate
SubTable,${element_name}_${element_id}
Template,MeterTemplate
Filter,
ts,KEY,TIMESTAMP,$ts
voltage,COLUMN,DOUBLE,$voltage
voltage_status,COLUMN,INT,$voltage_status
current,COLUMN,DOUBLE,$current
current_status,COLUMN,INT,$current_status
element_id,tag,VARCHAR(100),$element_id
element_name,tag,VARCHAR(100),$element_name
path,tag,VARCHAR(100),$path
categories,tag,VARCHAR(100),$categories
SuperTable,farm
SubTable,${element_name}_${element_id}
Template,Farm
Filter,
ts,KEY,TIMESTAMP,$ts
wind_speed,COLUMN,FLOAT,$wind_speed
wind_speed_status,COLUMN,INT,$wind_speed_status
power_production,COLUMN,FLOAT,$power_production
power_production_status,COLUMN,INT,$power_production_status
lost_power,COLUMN,FLOAT,$lost_power
lost_power_status,COLUMN,INT,$lost_power_status
farm_lifetime_production__weekly_,COLUMN,FLOAT,$farm_lifetime_production__weekly_
farm_lifetime_production__weekly__status,COLUMN,INT,$farm_lifetime_production__weekly__status
farm_lifetime_production__hourly_,COLUMN,FLOAT,$farm_lifetime_production__hourly_
farm_lifetime_production__hourly__status,COLUMN,INT,$farm_lifetime_production__hourly__status
element_id,tag,VARCHAR(100),$element_id
element_name,tag,VARCHAR(100),$element_name
path,tag,VARCHAR(100),$path
categories,tag,VARCHAR(100),$categories
```
The multi-column model configuration file consists of one or more supertable definitions. Each supertable configuration includes:
1. Correspondence between supertables and templates
2. Correspondence between attributes and TDengine Metric columns
3. Correspondence between attributes and TDengine tag columns
4. Source data filtering conditions
5. For each column, whether it is a Metrics column or a tag column, a mapping rule can be configured, see [Zero-code third-party data access](../) "Data extraction, filtering, and transformation" section
#### Single-column model configuration file
Below is an example of a single-column model configuration file.
```csv
SuperTable,volt_float32
SubTable,${point_name}
Filter,
ts,KEY,TIMESTAMP,$ts
value,COLUMN,FLOAT,$value
status,COLUMN,INT,$status
path,tag,VARCHAR(200),$path
point_name,tag,VARCHAR(100),$point_name
ptclassname,tag,VARCHAR(100),$ptclassname
sourcetag,tag,VARCHAR(100),$sourcetag
tag,tag,VARCHAR(100),$tag
descriptor,tag,VARCHAR(100),$descriptor
exdesc,tag,VARCHAR(100),$exdesc
engunits,tag,VARCHAR(100),$engunits
pointsource,tag,VARCHAR(100),$pointsource
step,tag,VARCHAR(100),$step
future,tag,VARCHAR(100),$future
element_paths,tag,VARCHAR(512),`$element_paths.replace("\\", ".")`
SuperTable,milliampere_float32
SubTable,${point_name}
Filter,
ts,KEY,TIMESTAMP,$ts
value,COLUMN,FLOAT,$value
status,COLUMN,INT,$status
path,tag,VARCHAR(200),$path
point_name,tag,VARCHAR(100),$point_name
ptclassname,tag,VARCHAR(100),$ptclassname
sourcetag,tag,VARCHAR(100),$sourcetag
tag,tag,VARCHAR(100),$tag
descriptor,tag,VARCHAR(100),$descriptor
exdesc,tag,VARCHAR(100),$exdesc
engunits,tag,VARCHAR(100),$engunits
pointsource,tag,VARCHAR(100),$pointsource
step,tag,VARCHAR(100),$step
future,tag,VARCHAR(100),$future
element_paths,tag,VARCHAR(512),`$element_paths.replace("\\", ".")`
Meter_1000004_Voltage,POINT,volt_float32
Meter_1000004_Current,POINT,milliampere_float32
Meter_1000001_Voltage,POINT,volt_float32
Meter_1000001_Current,POINT,milliampere_float32
Meter_1000474_Voltage,POINT,volt_float32
Meter_1000474_Current,POINT,milliampere_float32
```
The single-column model configuration file is divided into two parts. The first part, like the multi-column model configuration file, consists of several supertable definitions. The second part is the point list, which configures the mapping between points and supertables. The default configuration maps points with the same UOM and data type to the same supertable.
### Backfill Configuration
1. For PI tasks, you can configure the "restart compensation time." If the task is unexpectedly interrupted, configuring this parameter when restarting is very useful as it allows taosX to automatically backfill data for a period.
2. For PI backfill tasks, you must configure the start and end times of the backfill.
### Advanced Options
The advanced options vary for different types of tasks. Common advanced options include:
1. Connector log level
2. Batch size for connector queries and data sending
3. Maximum delay for a single read
For **real-time tasks of the multi-column model**, there are also the following switch options:
1. Whether to synchronize newly added elements. If enabled, the PI connector will listen for newly added elements under the template and automatically synchronize the data of the newly added elements without needing to restart the task.
2. Whether to synchronize changes in static attributes. If enabled, the PI connector will synchronize all changes in static attributes (non-PI Point attributes). That is, if a static attribute value of an element in the PI AF Server is modified, the corresponding tag value in the TDengine table will also be modified.
3. Whether to synchronize the deletion of elements. If enabled, the PI connector will listen for events of element deletions under the configured template and synchronize the deletion of the corresponding subtable in TDengine.
4. Whether to synchronize the deletion of historical data. If enabled, for the time-series data of an element, if data at a certain time is deleted in PI, the corresponding column data at that time in TDengine will be set to null.
5. Whether to synchronize the modification of historical data. If enabled, for the time-series data of an element, if historical data is modified in PI, the corresponding data at that time in TDengine will also be updated.

View File

@ -0,0 +1,253 @@
---
title: OPC UA
slug: /advanced-features/data-connectors/opc-ua
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/opc-ua-01.png';
import imgStep2 from '../../assets/opc-ua-02.png';
import imgStep3 from '../../assets/opc-ua-03.png';
import imgStep4 from '../../assets/opc-ua-04.png';
import imgStep5 from '../../assets/opc-ua-05.png';
import imgStep6 from '../../assets/opc-ua-06.png';
import imgStep7 from '../../assets/opc-ua-07.png';
import imgStep8 from '../../assets/opc-ua-08.png';
import imgStep9 from '../../assets/opc-ua-09.png';
This section describes how to create data migration tasks through the Explorer interface to synchronize data from an OPC-UA server to the current TDengine cluster.
## Overview
OPC is one of the interoperability standards for securely and reliably exchanging data in the field of industrial automation and other industries.
OPC-UA is the next-generation standard of the classic OPC specifications, a platform-independent, service-oriented architecture specification that integrates all the functionalities of the existing OPC Classic specifications, providing a path to a more secure and scalable solution.
TDengine can efficiently read data from OPC-UA servers and write it to TDengine, enabling real-time data ingestion.
## Creating a Task
### 1. Add a Data Source
On the data writing page, click the **+ Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep1} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, for example, for environmental temperature and humidity monitoring, name it **environment-monitoring**.
Select **OPC-UA** from the **Type** dropdown list.
**Proxy** is optional, you can select a specific proxy from the dropdown list, or click the **+ Create New Proxy** button on the right.
Select a target database from the **Target Database** dropdown list, or click the **+ Create Database** button on the right.
<figure>
<Image img={imgStep2} alt=""/>
</figure>
### 3. Configure Connection Information
In the **Connection Configuration** area, fill in the **OPC-UA Service Address**, for example: `127.0.0.1:5000`, and configure the data transmission security mode, with three security modes available:
1. None: Communication data is transmitted in plaintext.
2. Sign: Communication data is verified using a digital signature to protect data integrity.
3. SignAndEncrypt: Communication data is verified using a digital signature and encrypted using encryption algorithms to ensure data integrity, authenticity, and confidentiality.
If you choose Sign or SignAndEncrypt as the security mode, you must select a valid security policy. Security policies define how to implement the encryption and verification mechanisms in the security mode, including the encryption algorithms used, key lengths, digital certificates, etc. Available security policies include:
1. None: Only selectable when the security mode is None.
2. Basic128Rsa15: Uses RSA algorithm and 128-bit key length to sign or encrypt communication data.
3. Basic256: Uses AES algorithm and 256-bit key length to sign or encrypt communication data.
4. Basic256Sha256: Uses AES algorithm and 256-bit key length, and encrypts digital signatures using the SHA-256 algorithm.
5. Aes128Sha256RsaOaep: Uses AES-128 algorithm for encrypting and decrypting communication data, encrypts digital signatures using the SHA-256 algorithm, and uses RSA algorithm and OAEP mode for encrypting and decrypting symmetric communication keys.
6. Aes256Sha256RsaPss: Uses AES-256 algorithm for encrypting and decrypting communication data, encrypts digital signatures using the SHA-256 algorithm, and uses RSA algorithm and PSS mode for encrypting and decrypting symmetric communication keys.
<figure>
<Image img={imgStep3} alt=""/>
</figure>
### 4. Choose Authentication Method
As shown below, switch tabs to choose different authentication methods, with the following options available:
1. Anonymous
2. Username
3. Certificate Access: Can be the same as the security communication certificate, or a different certificate.
<figure>
<Image img={imgStep4} alt=""/>
</figure>
After configuring the connection properties and authentication method, click the **Connectivity Check** button to check if the data source is available. If using a security communication certificate or authentication certificate, the certificate must be trusted by the OPC UA server, otherwise, it will still fail.
### 5. Configure Points Set
**Points Set** can choose to use a CSV file template or **Select All Points**.
#### 5.1. Upload CSV Configuration File
You can download the CSV blank template and configure the point information according to the template, then upload the CSV configuration file to configure points; or download data points according to the configured filter conditions, and download in the format specified by the CSV template.
CSV files have the following rules:
1. File Encoding
The encoding format of the CSV file uploaded by the user must be one of the following:
(1) UTF-8 with BOM
(2) UTF-8 (i.e., UTF-8 without BOM)
2. Header Configuration Rules
The header is the first line of the CSV file, with the following rules:
(1) The header of the CSV can configure the following columns:
| Number | Column Name | Description | Required | Default Behavior |
|--------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 1 | point_id | The id of the data point on the OPC UA server | Yes | None |
| 2 | stable | The corresponding supertable for the data point in TDengine | Yes | None |
| 3 | tbname | The corresponding subtable for the data point in TDengine | Yes | None |
| 4 | enable | Whether to collect data from this point | No | Use the unified default value `1` for enable |
| 5 | value_col | The column name in TDengine corresponding to the collected value of the data point | No | Use the unified default value `val` as the value_col |
| 6 | value_transform | The transformation function executed in taosX for the collected value of the data point | No | Do not transform the collected value uniformly |
| 7 | type | The data type of the collected value of the data point | No | Use the original type of the collected value as the data type in TDengine |
| 8 | quality_col | The column name in TDengine corresponding to the quality of the collected value | No | Do not add a quality column in TDengine uniformly |
| 9 | ts_col | The original timestamp column of the data point in TDengine | No | ts_col, request_ts, received_ts these 3 columns, when there are more than 2 columns, the leftmost column is used as the primary key in TDengine. |
| 10 | request_ts_col | The timestamp column in TDengine when the data point value is request | No | Same as above |
| 11 | received_ts_col | The timestamp column in TDengine when the data point value is received | No | Same as above |
| 12 | ts_transform | The transformation function executed in taosX for the original timestamp of the data point | No | Do not transform the original timestamp of the data point uniformly |
| 13 | request_ts_transform | The transformation function executed in taosX for the request timestamp of the data point | No | Do not transform the original timestamp of the data point uniformly |
| 14 | received_ts_transform | The transformation function executed in taosX for the received timestamp of the data point | No | Do not transform the received timestamp of the data point uniformly |
| 15 | tag::VARCHAR(200)::name | The Tag column corresponding to the data point in TDengine. Here `tag` is a reserved keyword indicating that this column is a tag; `VARCHAR(200)` indicates the type of tag; `name` is the actual name of the tag. | No | If 1 or more tag columns are configured, use the configured tag columns; if no tag columns are configured and stable exists in TDengine, use the tags of the stable in TDengine; if no tag columns are configured and stable does not exist in TDengine, automatically add the following 2 tag columns: tag::VARCHAR(256)::point_id and tag::VARCHAR(256)::point_name |
(2) In the CSV Header, there cannot be duplicate columns;
(3) In the CSV Header, columns like `tag::VARCHAR(200)::name` can be configured multiple times, corresponding to multiple Tags in TDengine, but the names of the Tags cannot be repeated.
(4) In the CSV Header, the order of the columns does not affect the CSV file validation rules;
(5) In the CSV Header, columns that are not listed in the table above can be configured, such as: sequence number, these columns will be automatically ignored.
3. Row Configuration Rules
Each Row in the CSV file configures an OPC data point. The rules for Rows are as follows:
(1) Correspondence with columns in the Header
| Number | Column in Header | Type of Value | Value Range | Mandatory | Default Value |
|--------|-------------------------| ------------- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --------- |---------------------------------------|
| 1 | point_id | String | Strings like `ns=3;i=1005`, must meet the OPC UA ID specification, i.e., include ns and id parts | Yes | |
| 2 | enable | int | 0: Do not collect this point, and delete the corresponding subtable in TDengine before the OPC DataIn task starts; 1: Collect this point, do not delete the subtable before the OPC DataIn task starts. | No | 1 |
| 3 | stable | String | Any string that meets the TDengine supertable naming convention; if special character `.` exists, replace with underscore if `{type}` exists: if type in CSV file is not empty, replace with the value of type if type is empty, replace with the original type of the collected value | Yes | |
| 4 | tbname | String | Any string that meets the TDengine subtable naming convention; for OPC UA: if `{ns}` exists, replace with ns from point_id if `{id}` exists, replace with id from point_id for OPC DA: if `{tag_name}` exists, replace with tag_name | Yes | |
| 5 | value_col | String | Column name that meets TDengine naming convention | No | val |
| 6 | value_transform | String | Expressions that meet the Rhai engine, for example: `(val + 10) / 1000 * 2.0`, `log(val) + 10`, etc.; | No | None |
| 7 | type | String | Supported types include: b/bool/i8/tinyint/i16/small/inti32/int/i64/bigint/u8/tinyint unsigned/u16/smallint unsigned/u32/int unsigned/u64/bigint unsigned/f32/float/f64/double/timestamp/timestamp(ms)/timestamp(us)/timestamp(ns)/json | No | Original type of the data point value |
| 8 | quality_col | String | Column name that meets TDengine naming convention | No | None |
| 9 | ts_col | String | Column name that meets TDengine naming convention | No | ts |
| 10 | request_ts_col | String | Column name that meets TDengine naming convention | No | qts |
| 11 | received_ts_col | String | Column name that meets TDengine naming convention | No | rts |
| 12 | ts_transform | String | Supports +, -, *, /, % operators, for example: ts / 1000* 1000, sets the last 3 digits of a timestamp in ms to 0; ts + 8 *3600* 1000, adds 8 hours to a timestamp in ms; ts - 8 *3600* 1000, subtracts 8 hours from a timestamp in ms; | No | None |
| 13 | request_ts_transform | String | Supports +, -, *, /, % operators, for example: qts / 1000* 1000, sets the last 3 digits of a timestamp in ms to 0; qts + 8 *3600* 1000, adds 8 hours to a timestamp in ms; qts - 8 *3600* 1000, subtracts 8 hours from a timestamp in ms; | No | None |
| 14 | received_ts_transform | String | Supports +, -, *, /, % operators, for example: qts / 1000* 1000, sets the last 3 digits of a timestamp in ms to 0; qts + 8 *3600* 1000, adds 8 hours to a timestamp in ms; qts - 8 *3600* 1000, subtracts 8 hours from a timestamp in ms; | None | None |
| 15 | tag::VARCHAR(200)::name | String | The value inside a tag, when the tag type is VARCHAR, can be in Chinese | No | NULL |
(2) `point_id` is unique throughout the DataIn task, meaning: in an OPC DataIn task, a data point can only be written to one subtable in TDengine. If you need to write a data point to multiple subtables, you need to create multiple OPC DataIn tasks;
(3) When `point_id` is different but `tbname` is the same, `value_col` must be different. This configuration allows data from multiple data points of different types to be written to different columns in the same subtable. This method corresponds to the "OPC data into TDengine wide table" usage scenario.
4. Other Rules
(1) If the number of columns in Header and Row are inconsistent, the validation fails, and the user is prompted with the line number that does not meet the requirements;
(2) Header is on the first line and cannot be empty;
(3) There must be at least one data point;
#### 5.2. Selecting Data Points
Data points can be filtered by configuring **Root Node ID**, **Namespace**, **Regular Matching**, etc.
Configure **Supertable Name**, **Table Name** to specify the supertable and subtable where the data will be written.
Configure **Primary Key Column**, choose `origin_ts` to use the original timestamp of the OPC data point as the primary key in TDengine; choose `request_ts` to use the data's request timestamp as the primary key in TDengine; choose `received_ts` to use the data's reception timestamp as the primary key in TDengine. Configure **Primary Key Alias** to specify the name of the TDengine timestamp column.
<figure>
<Image img={imgStep5} alt=""/>
</figure>
### 6. Collection Configuration
In the collection configuration, configure the current task's collection mode, collection interval, collection timeout, etc.
<figure>
<Image img={imgStep6} alt=""/>
</figure>
As shown in the image above:
- **Collection Mode**: Can use `subscribe` or `observe` mode.
- `subscribe`: Subscription mode, reports data changes and writes to TDengine.
- `observe`: According to the `collection interval`, polls the latest value of the data point and writes to TDengine.
- **Collection Interval**: Default is 10 seconds, the interval for collecting data points, starting from the end of the last data collection, polls the latest value of the data point and writes to TDengine. Only configurable in `observe` **Collection Mode**.
- **Collection Timeout**: If the data from the OPC server is not returned within the set time when reading data points, the read fails, default is 10 seconds. Only configurable in `observe` **Collection Mode**.
When using **Selecting Data Points** in the **Data Point Set**, the collection configuration can configure **Data Point Update Mode** and **Data Point Update Interval** to enable dynamic data point updates. **Dynamic Data Point Update** refers to, during the task operation, after OPC Server adds or deletes data points, the data points that meet the conditions will automatically be added to the current task without needing to restart the OPC task.
- Data Point Update Mode: Can choose `None`, `Append`, `Update`.
- None: Do not enable dynamic data point updates;
- Append: Enable dynamic data point updates, but only append;
- Update: Enable dynamic data point updates, append or delete;
- Data Point Update Interval: Effective when "Data Point Update Mode" is `Append` and `Update`. Unit: seconds, default value is 600, minimum value: 60, maximum value: 2147483647.
### 7. Advanced Options
<figure>
<Image img={imgStep7} alt=""/>
</figure>
As shown in the image above, configure advanced options for more detailed optimization of performance, logs, etc.
**Log Level** defaults to `info`, with options `error`, `warn`, `info`, `debug`, `trace`.
In **Maximum Write Concurrency**, set the maximum concurrency limit for writing to taosX. Default value: 0, meaning auto, automatically configures concurrency.
In **Batch Size**, set the batch size for each write, i.e., the maximum number of messages sent at one time.
In **Batch Delay**, set the maximum delay for a single send (in seconds), when the timeout ends, as long as there is data, it is sent immediately even if it does not meet the **Batch Size**.
In **Save Raw Data**, choose whether to save raw data. Default value: No.
When saving raw data, the following 2 parameters are effective.
In **Maximum Retention Days**, set the maximum retention days for raw data.
In **Raw Data Storage Directory**, set the path for saving raw data. If using Agent, the storage path refers to the path on the server where the Agent is located, otherwise it is on the taosX server. The path can use placeholders `$DATA_DIR` and `:id` as part of the path.
- On Linux platform, `$DATA_DIR` is /var/lib/taos/taosx, by default the storage path is `/var/lib/taos/taosx/tasks/<task_id>/rawdata`.
- On Windows platform, `$DATA_DIR` is C:\TDengine\data\taosx, by default the storage path is `C:\TDengine\data\taosx\tasks\<task_id>\rawdata`.
### 8. Completion
Click the **Submit** button to complete the creation of the OPC UA to TDengine data synchronization task. Return to the **Data Source List** page to view the status of the task execution.
## Add Data Points
During the task execution, click **Edit**, then click the **Add Data Points** button to append data points to the CSV file.
<figure>
<Image img={imgStep8} alt=""/>
</figure>
In the pop-up form, fill in the information for the data points.
<figure>
<Image img={imgStep9} alt=""/>
</figure>
Click the **Confirm** button to complete the addition of the data points.

View File

@ -0,0 +1,225 @@
---
title: OPC DA
slug: /advanced-features/data-connectors/opc-da
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/opc-da-01.png';
import imgStep2 from '../../assets/opc-da-02.png';
import imgStep3 from '../../assets/opc-da-03.png';
import imgStep4 from '../../assets/opc-da-04.png';
import imgStep5 from '../../assets/opc-da-05.png';
import imgStep6 from '../../assets/opc-da-06.png';
import imgStep7 from '../../assets/opc-da-07.png';
import imgStep8 from '../../assets/opc-da-08.png';
This section describes how to create data migration tasks through the Explorer interface, synchronizing data from an OPC-DA server to the current TDengine cluster.
## Overview
OPC is one of the interoperability standards for secure and reliable data exchange in the field of industrial automation and other industries.
OPC DA (Data Access) is a classic COM-based specification, only applicable to Windows. Although OPC DA is not the latest and most efficient data communication specification, it is widely used. This is mainly because some old equipment only supports OPC DA.
TDengine can efficiently read data from OPC-DA servers and write it to TDengine, achieving real-time data storage.
## Creating a Task
### 1. Add a Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep1} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, for example, for environmental temperature and humidity monitoring, name it **environment-monitoring**.
Select **OPC-DA** from the **Type** dropdown list.
If the taosX service is running on the same server as OPC-DA, **Proxy** is not necessary; otherwise, configure **Proxy**: select a specified proxy from the dropdown, or click the **+Create New Proxy** button on the right to create a new proxy and follow the prompts to configure the proxy.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep2} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the **OPC-DA Service Address** in the **Connection Configuration** area, for example: `127.0.0.1/Matrikon.OPC.Simulation.1`, and configure the authentication method.
Click the **Connectivity Check** button to check if the data source is available.
<figure>
<Image img={imgStep3} alt=""/>
</figure>
### 4. Configure Points Set
**Points Set** can choose to use a CSV file template or **Select All Points**.
#### 4.1. Upload CSV Configuration File
You can download the CSV blank template and configure the point information according to the template, then upload the CSV configuration file to configure the points; or download the data points according to the configured filter conditions, and download them in the format specified by the CSV template.
CSV files have the following rules:
1. File Encoding
The encoding format of the CSV file uploaded by the user must be one of the following:
(1) UTF-8 with BOM
(2) UTF-8 (i.e., UTF-8 without BOM)
2. Header Configuration Rules
The header is the first line of the CSV file, with the following rules:
(1) The header of the CSV can configure the following columns:
| No. | Column Name | Description | Required | Default Behavior |
|-----|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| 1 | tag_name | The id of the data point on the OPC DA server | Yes | None |
| 2 | stable | The supertable in TDengine corresponding to the data point | Yes | None |
| 3 | tbname | The subtable in TDengine corresponding to the data point | Yes | None |
| 4 | enable | Whether to collect data from this point | No | Use a unified default value `1` for enable |
| 5 | value_col | The column name in TDengine corresponding to the collected value of the data point | No | Use a unified default value `val` as the value_col |
| 6 | value_transform | The transform function executed in taosX for the collected value of the data point | No | Do not perform a transform on the collected value |
| 7 | type | The data type of the collected value of the data point | No | Use the original type of the collected value as the data type in TDengine |
| 8 | quality_col | The column name in TDengine corresponding to the quality of the collected value | No | Do not add a quality column in TDengine |
| 9 | ts_col | The timestamp column in TDengine corresponding to the original timestamp of the data point | No | ts_col, request_ts, received_ts these 3 columns, when there are more than 2 columns, the leftmost column is used as the primary key in TDengine. |
| 10 | request_ts_col | The timestamp column in TDengine corresponding to the timestamp when the data point value was request | No | Same as above |
| 11 | received_ts_col | The timestamp column in TDengine corresponding to the timestamp when the data point value was received | No | Same as above |
| 12 | ts_transform | The transform function executed in taosX for the original timestamp of the data point | No | Do not perform a transform on the original timestamp of the data point |
| 13 | request_ts_transform | The transform function executed in taosX for the request timestamp of the data point | No | Do not perform a transform on the received timestamp of the data point |
| 14 | received_ts_transform | The transform function executed in taosX for the received timestamp of the data point | No | Do not perform a transform on the received timestamp of the data point |
| 15 | tag::VARCHAR(200)::name | The Tag column in TDengine corresponding to the data point. Where `tag` is a reserved keyword, indicating that this column is a tag column; `VARCHAR(200)` indicates the type of this tag, which can also be other legal types; `name` is the actual name of this tag. | No | If configuring more than one tag column, use the configured tag columns; if no tag columns are configured, and stable exists in TDengine, use the tags of the stable in TDengine; if no tag columns are configured, and stable does not exist in TDengine, automatically add the following two tag columns by default: tag::VARCHAR(256)::point_idtag::VARCHAR(256)::point_name |
(2) In the CSV Header, there cannot be duplicate columns;
(3) In the CSV Header, columns like `tag::VARCHAR(200)::name` can be configured multiple times, corresponding to multiple Tags in TDengine, but the names of the Tags cannot be duplicated.
(4) In the CSV Header, the order of columns does not affect the CSV file validation rules;
(5) In the CSV Header, columns that are not listed in the table above, such as: serial number, will be automatically ignored.
3. Row Configuration Rules
Each Row in the CSV file configures an OPC data point. The rules for Rows are as follows:
(1) Correspondence with columns in the Header
| Number | Column in Header | Type of Value | Range of Values | Mandatory | Default Value |
|--------|-------------------------| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------------------------ |
| 1 | tag_name | String | Strings like `root.parent.temperature`, must meet the OPC DA ID specification | Yes | |
| 2 | enable | int | 0: Do not collect this point, and delete the corresponding subtable in TDengine before the OPC DataIn task starts; 1: Collect this point, do not delete the subtable before the OPC DataIn task starts. | No | 1 |
| 3 | stable | String | Any string that meets the TDengine supertable naming convention; if there are special characters `.`, replace with underscore. If `{type}` exists: if type in CSV file is not empty, replace with the value of type; if empty, replace with the original type of the collected value | Yes | |
| 4 | tbname | String | Any string that meets the TDengine subtable naming convention; for OPC UA: if `{ns}` exists, replace with ns from point_id; if `{id}` exists, replace with id from point_id; for OPC DA: if `{tag_name}` exists, replace with tag_name | Yes | |
| 5 | value_col | String | Column name that meets TDengine naming convention | No | val |
| 6 | value_transform | String | Computation expressions supported by Rhai engine, such as: `(val + 10) / 1000 * 2.0`, `log(val) + 10`, etc.; | No | None |
| 7 | type | String | Supported types include: b/bool/i8/tinyint/i16/smallint/i32/int/i64/bigint/u8/tinyint unsigned/u16/smallint unsigned/u32/int unsigned/u64/bigint unsigned/f32/floatf64/double/timestamp/timestamp(ms)/timestamp(us)/timestamp(ns)/json | No | Original type of data point value |
| 8 | quality_col | String | Column name that meets TDengine naming convention | No | None |
| 9 | ts_col | String | Column name that meets TDengine naming convention | No | ts |
| 10 | request_ts_col | String | Column name that meets TDengine naming convention | No | rts |
| 11 | received_ts_col | String | Column name that meets TDengine naming convention | No | rts |
| 12 | ts_transform | String | Supports +, -, *, /, % operators, for example: ts / 1000* 1000, sets the last 3 digits of a ms unit timestamp to 0; ts + 8 *3600* 1000, adds 8 hours to a ms precision timestamp; ts - 8 *3600* 1000, subtracts 8 hours from a ms precision timestamp; | No | None |
| 13 | request_ts_transform | String | No | None | |
| 14 | received_ts_transform | String | No | None | |
| 15 | tag::VARCHAR(200)::name | String | The value in tag, when the tag type is VARCHAR, it can be in Chinese | No | NULL |
(2) `tag_name` is unique throughout the DataIn task, that is: in an OPC DataIn task, a data point can only be written to one subtable in TDengine. If you need to write a data point to multiple subtables, you need to create multiple OPC DataIn tasks;
(3) When `tag_name` is different but `tbname` is the same, `value_col` must be different. This configuration allows data from multiple data points of different types to be written to different columns in the same subtable. This corresponds to the "OPC data into TDengine wide table" scenario.
4. Other Rules
(1) If the number of columns in Header and Row are not consistent, validation fails, and the user is prompted with the line number that does not meet the requirements;
(2) Header is on the first line and cannot be empty;
(3) Row must have more than 1 line;
#### 4.2. Selecting Data Points
Data points can be filtered by configuring the **Root Node ID** and **Regular Expression**.
Configure **Supertable Name** and **Table Name** to specify the supertable and subtable where the data will be written.
Configure **Primary Key Column**, choosing `origin_ts` to use the original timestamp of the OPC data point as the primary key in TDengine; choosing `request_ts` to use the timestamp when the data is request as the primary key; choosing `received_ts` to use the timestamp when the data is received as the primary key. Configure **Primary Key Alias** to specify the name of the TDengine timestamp column.
<figure>
<Image img={imgStep4} alt=""/>
</figure>
### 5. Collection Configuration
In the collection configuration, set the current task's collection interval, connection timeout, and collection timeout.
<figure>
<Image img={imgStep5} alt=""/>
</figure>
As shown in the image:
- **Connection Timeout**: Configures the timeout for connecting to the OPC server, default is 10 seconds.
- **Collection Timeout**: If data is not returned from the OPC server within the set time during data point reading, the read fails, default is 10 seconds.
- **Collection Interval**: Default is 10 seconds, the interval for data point collection, starting from the end of the last data collection, polling to read the latest value of the data point and write it into TDengine.
When using **Select Data Points** in the **Data Point Set**, the collection configuration can configure **Data Point Update Mode** and **Data Point Update Interval** to enable dynamic data point updates. **Dynamic Data Point Update** means that during the task operation, if OPC Server adds or deletes data points, the matching data points will automatically be added to the current task without needing to restart the OPC task.
- Data Point Update Mode: Can choose `None`, `Append`, `Update`.
- None: Do not enable dynamic data point updates;
- Append: Enable dynamic data point updates, but only append;
- Update: Enable dynamic data point updates, append or delete;
- Data Point Update Interval: Effective when "Data Point Update Mode" is `Append` and `Update`. Unit: seconds, default value is 600, minimum value: 60, maximum value: 2147483647.
### 6. Advanced Options
<figure>
<Image img={imgStep6} alt=""/>
</figure>
As shown above, configure advanced options for more detailed optimization of performance, logs, etc.
**Log Level** defaults to `info`, with options `error`, `warn`, `info`, `debug`, `trace`.
In **Maximum Write Concurrency**, set the limit for the maximum number of concurrent writes to taosX. Default value: 0, meaning auto, automatically configures concurrency.
In **Batch Size**, set the batch size for each write, that is, the maximum number of messages sent at once.
In **Batch Delay**, set the maximum delay for a single send (in seconds). When the timeout ends, as long as there is data, it is sent immediately even if it does not meet the **Batch Size**.
In **Save Raw Data**, choose whether to save raw data. Default value: no.
When saving raw data, the following 2 parameters are effective.
In **Maximum Retention Days**, set the maximum retention days for raw data.
In **Raw Data Storage Directory**, set the path for saving raw data. If using Agent, the storage path refers to the path on the server where Agent is located, otherwise it is on the taosX server. The path can include placeholders `$DATA_DIR` and `:id` as part of the path.
- On Linux platform, `$DATA_DIR` is /var/lib/taos/taosx, by default the storage path is `/var/lib/taos/taosx/tasks/<task_id>/rawdata`.
- On Windows platform, `$DATA_DIR` is C:\TDengine\data\taosx, by default the storage path is `C:\TDengine\data\taosx\tasks\<task_id>\rawdata`.
### 7. Completion
Click the **Submit** button to complete the creation of the OPC DA to TDengine data synchronization task, return to the **Data Source List** page to view the task execution status.
## Add Data Points
During the task execution, click **Edit**, then click the **Add Data Points** button to append data points to the CSV file.
<figure>
<Image img={imgStep7} alt=""/>
</figure>
In the pop-up form, fill in the information for the data points.
<figure>
<Image img={imgStep8} alt=""/>
</figure>
Click the **Confirm** button to complete the addition of data points.

View File

@ -0,0 +1,210 @@
---
title: MQTT
slug: /advanced-features/data-connectors/mqtt
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/mqtt-01.png';
import imgStep02 from '../../assets/mqtt-02.png';
import imgStep03 from '../../assets/mqtt-03.png';
import imgStep04 from '../../assets/mqtt-04.png';
import imgStep05 from '../../assets/mqtt-05.png';
import imgStep06 from '../../assets/mqtt-06.png';
import imgStep07 from '../../assets/mqtt-07.png';
import imgStep08 from '../../assets/mqtt-08.png';
import imgStep09 from '../../assets/mqtt-09.png';
import imgStep10 from '../../assets/mqtt-10.png';
import imgStep11 from '../../assets/mqtt-11.png';
import imgStep12 from '../../assets/mqtt-12.png';
import imgStep13 from '../../assets/mqtt-13.png';
import imgStep14 from '../../assets/mqtt-14.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from MQTT to the current TDengine cluster.
## Overview
MQTT stands for Message Queuing Telemetry Transport. It is a lightweight messaging protocol that is easy to implement and use.
TDengine can subscribe to data from an MQTT broker via an MQTT connector and write it into TDengine, enabling real-time data streaming.
## Creating a Task
### 1. Add a Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, such as: "test_mqtt";
Select **MQTT** from the **Type** dropdown list.
**Broker** is optional, you can select a specific broker from the dropdown list or click the **+Create New Broker** button on the right.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection and Authentication Information
Enter the MQTT broker's address in **MQTT Address**, for example: `192.168.1.42`
Enter the MQTT broker's port in **MQTT Port**, for example: `1883`
Enter the MQTT broker's username in **User**.
Enter the MQTT broker's password in **Password**.
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure SSL Certificate
If the MQTT broker uses an SSL certificate, upload the certificate file in **SSL Certificate**.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Collection Information
Fill in the collection task related configuration parameters in the **Collection Configuration** area.
Select the MQTT protocol version from the **MQTT Protocol** dropdown list. There are three options: `3.1`, `3.1.1`, `5.0`. The default value is 3.1.
Enter the client identifier in **Client ID**, after which a client id with the prefix `taosx` will be generated (for example, if the identifier entered is `foo`, the generated client id will be `taosxfoo`). If the switch at the end is turned on, the current task's task id will be concatenated after `taosx` and before the entered identifier (the generated client id will look like `taosx100foo`). All client ids connecting to the same MQTT address must be unique.
Enter the keep alive interval in **Keep Alive**. If the broker does not receive any message from the client within the keep alive interval, it will assume the client has disconnected and will close the connection.
The keep alive interval is the time interval negotiated between the client and the broker to check if the client is active. If the client does not send a message to the broker within the keep alive interval, the broker will disconnect.
In **Clean Session**, choose whether to clear the session. The default value is true.
In the **Topics Qos Config**, fill in the topic name and QoS to subscribe. Use the following format: `{topic_name}::{qos}` (e.g., `my_topic::0`). MQTT protocol 5.0 supports shared subscriptions, allowing multiple clients to subscribe to the same topic for load balancing. Use the following format: `$share/{group_name}/{topic_name}::{qos}`, where `$share` is a fixed prefix indicating the enablement of shared subscription, and `group_name` is the client group name, similar to Kafka's consumer group.
In the **Topic Analysis**, fill in the MQTT topic parsing rules. The format is the same as the MQTT Topic, parsing each level of the MQTT Topic into corresponding variable names, with `_` indicating that the current level is ignored during parsing. For example: if the MQTT Topic `a/+/c` corresponds to the parsing rule `v1/v2/_`, it means assigning the first level `a` to variable `v1`, the value of the second level (where the wildcard `+` represents any value) to variable `v2`, and ignoring the value of the third level `c`, which will not be assigned to any variable. In the `payload parsing` below, the variables obtained from Topic parsing can also participate in various transformations and calculations.
In the **Compression**, configure the message body compression algorithm. After receiving the message, taosX uses the corresponding compression algorithm to decompress the message body and obtain the original data. Options include none (no compression), gzip, snappy, lz4, and zstd, with the default being none.
In the **Char Encoding**, configure the message body encoding format. After receiving the message, taosX uses the corresponding encoding format to decode the message body and obtain the original data. Options include UTF_8, GBK, GB18030, and BIG5, with the default being UTF_8.
Click the **Check Connection** button to check if the data source is available.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
### 6. Configure MQTT Payload Parsing
Fill in the Payload parsing related configuration parameters in the **MQTT Payload Parsing** area.
taosX can use a JSON extractor to parse data and allows users to specify the data model in the database, including specifying table names and supertable names, setting ordinary columns and tag columns, etc.
#### 6.1 Parsing
There are three methods to obtain sample data:
Click the **Retrieve from Server** button to get sample data from MQTT.
Click the **File Upload** button to upload a CSV file and obtain sample data.
Fill in the example data from the MQTT message body in **Message Body**.
JSON data supports JSONObject or JSONArray, and the json parser can parse the following data:
```json
{"id": 1, "message": "hello-word"}
{"id": 2, "message": "hello-word"}
```
or
```json
[{"id": 1, "message": "hello-word"},{"id": 2, "message": "hello-word"}]
```
The analysis results are as follows:
<figure>
<Image img={imgStep06} alt=""/>
</figure>
Click the **magnifying glass icon** to view the preview of the analysis results.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
#### 6.2 Field Splitting
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `message` field into `message_0` and `message_1`, select the split extractor, fill in the separator as -, and number as 2.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
Click **Delete** to remove the current extraction rule.
Click **Add** to add more extraction rules.
Click the **magnifying glass icon** to view the preview of the extraction/split results.
<figure>
<Image img={imgStep09} alt=""/>
</figure>
#### 6.3 Data Filtering
In **Filter**, fill in the filtering conditions, for example: write `id != 1`, then only data with id not equal to 1 will be written to TDengine.
<figure>
<Image img={imgStep10} alt=""/>
</figure>
Click **Delete** to remove the current filtering rule.
Click the **magnifying glass icon** to view the preview of the filtering results.
<figure>
<Image img={imgStep11} alt=""/>
</figure>
#### 6.4 Table Mapping
In the **Target Supertable** dropdown, select a target supertable, or click the **Create Supertable** button on the right.
In **Mapping**, fill in the subtable name in the target supertable, for example: `t_{id}`. Fill in the mapping rules according to the requirements, where mapping supports setting default values.
<figure>
<Image img={imgStep12} alt=""/>
</figure>
Click **Preview** to view the mapping results.
<figure>
<Image img={imgStep13} alt=""/>
</figure>
### 7. Advanced Options
In the **Log Level** dropdown, select a log level. There are five options: `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`. The default is INFO.
When **saving raw data**, the following two parameters are effective.
Set the maximum retention days for raw data in **Maximum Retention Days**.
Set the storage path for raw data in **Raw Data Storage Directory**.
<figure>
<Image img={imgStep14} alt=""/>
</figure>
### 8. Completion
Click the **Submit** button to complete the creation of the MQTT to TDengine data synchronization task, return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,263 @@
---
title: Apache Kafka
sidebar_label: Kafka
slug: /advanced-features/data-connectors/kafka
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/kafka-01.png';
import imgStep02 from '../../assets/kafka-02.png';
import imgStep03 from '../../assets/kafka-03.png';
import imgStep04 from '../../assets/kafka-04.png';
import imgStep05 from '../../assets/kafka-05.png';
import imgStep06 from '../../assets/kafka-06.png';
import imgStep07 from '../../assets/kafka-07.png';
import imgStep08 from '../../assets/kafka-08.png';
import imgStep09 from '../../assets/kafka-09.png';
import imgStep10 from '../../assets/kafka-10.png';
import imgStep11 from '../../assets/kafka-11.png';
import imgStep12 from '../../assets/kafka-12.png';
import imgStep13 from '../../assets/kafka-13.png';
import imgStep14 from '../../assets/kafka-14.png';
import imgStep15 from '../../assets/kafka-15.png';
import imgStep16 from '../../assets/kafka-16.png';
import imgStep17 from '../../assets/kafka-17.png';
import imgStep18 from '../../assets/kafka-18.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from Kafka to the current TDengine cluster.
## Feature Overview
Apache Kafka is an open-source distributed streaming system used for stream processing, real-time data pipelines, and large-scale data integration.
TDengine can efficiently read data from Kafka and write it into TDengine, enabling historical data migration or real-time data streaming.
## Creating a Task
### 1. Add a Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, such as: "test_kafka";
Select **Kafka** from the **Type** dropdown list.
**Proxy** is optional; if needed, you can select a specific proxy from the dropdown, or click **+Create New Proxy** on the right.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
**bootstrap-server**, for example: `192.168.1.92`.
**Service Port**, for example: `9092`.
When there are multiple broker addresses, add a **+Add Broker** button at the bottom right of the connection settings to add bootstrap-server and service port pairs.
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure SASL Authentication Mechanism
If the server has enabled SASL authentication, you need to enable SASL here and configure the relevant content. Currently, three authentication mechanisms are supported: PLAIN/SCRAM-SHA-256/GSSAPI. Please choose according to the actual situation.
#### 4.1. PLAIN Authentication
Select the `PLAIN` authentication mechanism and enter the username and password:
<figure>
<Image img={imgStep04} alt=""/>
</figure>
#### 4.2. SCRAM (SCRAM-SHA-256) Authentication
Select the `SCRAM-SHA-256` authentication mechanism and enter the username and password:
<figure>
<Image img={imgStep05} alt=""/>
</figure>
#### 4.3. GSSAPI Authentication
Select `GSSAPI`, which will use the [RDkafka client](https://github.com/confluentinc/librdkafka) to invoke the GSSAPI applying Kerberos authentication mechanism:
<figure>
<Image img={imgStep06} alt=""/>
</figure>
The required information includes:
- Kerberos service name, usually `kafka`;
- Kerberos authentication principal, i.e., the authentication username, such as `kafkaclient`;
- Kerberos initialization command (optional, generally not required);
- Kerberos keytab, you need to provide and upload the file;
All the above information must be provided by the Kafka service manager.
In addition, the [Kerberos](https://web.mit.edu/kerberos/) authentication service needs to be configured on the server. Use `apt install krb5-user` on Ubuntu; on CentOS, use `yum install krb5-workstation`.
After configuration, you can use the [kcat](https://github.com/edenhill/kcat) tool to verify Kafka topic consumption:
```shell
kcat <topic> \
-b <kafka-server:port> \
-G kcat \
-X security.protocol=SASL_PLAINTEXT \
-X sasl.mechanism=GSSAPI \
-X sasl.kerberos.keytab=</path/to/kafkaclient.keytab> \
-X sasl.kerberos.principal=<kafkaclient> \
-X sasl.kerberos.service.name=kafka
```
If an error occurs: "Server xxxx not found in kerberos database", you need to configure the domain name corresponding to the Kafka node and configure reverse DNS resolution `rdns = true` in the Kerberos client configuration file `/etc/krb5.conf`.
### 5. Configure SSL Certificate
If the server has enabled SSL encryption authentication, SSL needs to be enabled here and related content configured.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 6. Configure Collection Information
Fill in the configuration parameters related to the collection task in the **Collection Configuration** area.
Enter the timeout duration in **Timeout**. If no data is consumed from Kafka, and the timeout is exceeded, the data collection task will exit. The default value is 0 ms. When the timeout is set to 0, it will wait indefinitely until data becomes available or an error occurs.
Enter the Topic name to be consumed in **Topic**. Multiple Topics can be configured, separated by commas. For example: `tp1,tp2`.
Enter the client identifier in **Client ID**. After entering, a client ID with the prefix `taosx` will be generated (for example, if the identifier entered is `foo`, the generated client ID will be `taosxfoo`). If the switch at the end is turned on, the current task's task ID will be concatenated after `taosx` and before the entered identifier (the generated client ID will look like `taosx100foo`). Note that when using multiple taosX subscriptions for the same Topic to achieve load balancing, a consistent client ID must be entered to achieve the balancing effect.
Enter the consumer group identifier in **Consumer Group ID**. After entering, a consumer group ID with the prefix `taosx` will be generated (for example, if the identifier entered is `foo`, the generated consumer group ID will be `taosxfoo`). If the switch at the end is turned on, the current task's task ID will be concatenated after `taosx` and before the entered identifier (the generated consumer group ID will look like `taosx100foo`).
In the **Offset** dropdown, select from which Offset to start consuming data. There are three options: `Earliest`, `Latest`, `ByTime(ms)`. The default is Earliest.
- Earliest: Requests the earliest offset.
- Latest: Requests the latest offset.
Set the maximum duration to wait for insufficient data when fetching messages in **Maximum Duration to Fetch Data** (in milliseconds), the default value is 100ms.
Click the **Connectivity Check** button to check if the data source is available.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 7. Configure Payload Parsing
Fill in the configuration parameters related to Payload parsing in the **Payload Parsing** area.
#### 7.1 Parsing
There are three methods to obtain sample data:
Click the **Retrieve from Server** button to get sample data from Kafka.
Click the **File Upload** button to upload a CSV file and obtain sample data.
Enter sample data from the Kafka message body in **Message Body**.
JSON data supports JSONObject or JSONArray, and the following data can be parsed using a JSON parser:
```json
{"id": 1, "message": "hello-word"}
{"id": 2, "message": "hello-word"}
```
or
```json
[{"id": 1, "message": "hello-word"},{"id": 2, "message": "hello-word"}]
```
The parsing results are shown as follows:
<figure>
<Image img={imgStep09} alt=""/>
</figure>
Click the **magnifying glass icon** to view the preview parsing results.
<figure>
<Image img={imgStep10} alt=""/>
</figure>
#### 7.2 Field Splitting
In **Extract or Split from Columns**, fill in the fields to extract or split from the message body, for example: split the message field into `message_0` and `message_1`, select the split extractor, fill in the separator as -, and number as 2.
Click **Add** to add more extraction rules.
Click **Delete** to delete the current extraction rule.
<figure>
<Image img={imgStep11} alt=""/>
</figure>
Click the **magnifying glass icon** to view the preview extraction/splitting results.
<figure>
<Image img={imgStep12} alt=""/>
</figure>
#### 7.3 Data Filtering
In **Filter**, fill in the filtering conditions, for example: enter `id != 1`, then only data with id not equal to 1 will be written to TDengine.
Click **Add** to add more filtering rules.
Click **Delete** to delete the current filtering rule.
<figure>
<Image img={imgStep13} alt=""/>
</figure>
Click the **magnifying glass icon** to view the preview filtering results.
<figure>
<Image img={imgStep14} alt=""/>
</figure>
#### 7.4 Table Mapping
In the **Target Supertable** dropdown, select a target supertable, or click the **Create Supertable** button on the right.
In the **Mapping** section, fill in the name of the subtable in the target supertable, for example: `t_{id}`. Fill in the mapping rules as required, where mapping supports setting default values.
<figure>
<Image img={imgStep15} alt=""/>
</figure>
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep16} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
<figure>
<Image img={imgStep17} alt=""/>
</figure>
<figure>
<Image img={imgStep18} alt=""/>
</figure>
### 9. Completion of Creation
Click the **Submit** button to complete the creation of the Kafka to TDengine data synchronization task. Return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,125 @@
---
title: InfluxDB
slug: /advanced-features/data-connectors/influxdb
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/influxdb-01.png';
import imgStep02 from '../../assets/influxdb-02.png';
import imgStep03 from '../../assets/influxdb-03.png';
import imgStep04 from '../../assets/influxdb-04.png';
import imgStep05 from '../../assets/influxdb-05.png';
import imgStep06 from '../../assets/influxdb-06.png';
import imgStep07 from '../../assets/influxdb-07.png';
import imgStep08 from '../../assets/influxdb-08.png';
import imgStep09 from '../../assets/influxdb-09.png';
import imgStep10 from '../../assets/influxdb-10.png';
This section describes how to create a data migration task through the Explorer interface to migrate data from InfluxDB to the current TDengine cluster.
## Feature Overview
InfluxDB is a popular open-source time-series database optimized for handling large volumes of time-series data. TDengine can efficiently read data from InfluxDB through the InfluxDB connector and write it into TDengine, enabling historical data migration or real-time data synchronization.
The task saves progress information to the disk during operation, so if the task is paused and restarted, or if it automatically recovers from an anomaly, it will not start over. For more options, it is recommended to read the explanations of each form field on the task creation page in detail.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the add data source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_influxdb_01`*.
Select *`InfluxDB`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu, or click the **+ Create New Proxy** button on the right.
**Target Database** is required. Since InfluxDB can store data with time precision of seconds, milliseconds, microseconds, and nanoseconds, you need to select a *`nanosecond precision database`* here, or click the **+ Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source InfluxDB database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
In the **Authentication** area, there are two tabs, *`1.x version`* and *`2.x version`*, due to different authentication parameters and significant API differences between different versions of InfluxDB databases. Please choose according to the actual situation:
*`1.x version`*
**Version** Select the version of the source InfluxDB database from the dropdown menu.
**User** Enter the user of the source InfluxDB database, who must have read permissions in that organization.
**Password** Enter the login password for the above user in the source InfluxDB database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
*`2.x version`*
**Version** Select the version of the source InfluxDB database from the dropdown menu.
**Organization ID** Enter the organization ID of the source InfluxDB database, which is a string of hexadecimal characters, not the organization name, and can be obtained from the InfluxDB console's Organization->About page.
**Token** Enter the access token for the source InfluxDB database, which must have read permissions in that organization.
**Add Database Retention Policy** This is a *`Yes/No`* toggle. InfluxQL requires a combination of database and retention policy (DBRP) to query data. The cloud version of InfluxDB and some 2.x versions require manually adding this mapping. Turn on this switch, and the connector can automatically add it when executing tasks.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Below the **Authentication** area, there is a **Connectivity Check** button. Users can click this button to check if the information filled in above can normally access the data of the source InfluxDB database. The check results are shown below:
**Failed**
<figure>
<Image img={imgStep06} alt=""/>
</figure>
**Successful**
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 5. Configure Task Information
**Bucket** is a named space in the InfluxDB database for storing data. Each task needs to specify a bucket. Users need to first click the **Get Schema** button on the right to obtain the data structure information of the current source InfluxDB database, and then select from the dropdown menu as shown below:
<figure>
<Image img={imgStep08} alt=""/>
</figure>
**Measurements** are optional. Users can select one or more Measurements to synchronize from the dropdown menu. If none are specified, all will be synchronized.
**Start Time** refers to the start time of the data in the source InfluxDB database. The timezone for the start time uses the timezone selected in explorer, and this field is required.
**End Time** refers to the end time of the data in the source InfluxDB database. If no end time is specified, synchronization of the latest data will continue; if an end time is specified, synchronization will only continue up to this end time. The timezone for the end time uses the timezone selected in explorer, and this field is optional.
**Time Range per Read (minutes)** is the maximum time range for the connector to read data from the source InfluxDB database in a single read. This is a very important parameter, and users need to decide based on server performance and data storage density. If the range is too small, the execution speed of the synchronization task will be very slow; if the range is too large, it may cause the InfluxDB database system to fail due to high memory usage.
**Delay (seconds)** is an integer between 1 and 30. To eliminate the impact of out-of-order data, TDengine always waits for the duration specified here before reading data.
### 6. Configure Advanced Options
The **Advanced Options** area is collapsed by default. Click the `>` on the right to expand it, as shown below:
<figure>
<Image img={imgStep09} alt=""/>
</figure>
<figure>
<Image img={imgStep10} alt=""/>
</figure>
### 7. Completion of Creation
Click the **Submit** button to complete the creation of the data synchronization task from InfluxDB to TDengine. Return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,99 @@
---
title: OpenTSDB
slug: /advanced-features/data-connectors/opentsdb
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/opentsdb-01.png';
import imgStep02 from '../../assets/opentsdb-02.png';
import imgStep03 from '../../assets/opentsdb-03.png';
import imgStep04 from '../../assets/opentsdb-04.png';
import imgStep05 from '../../assets/opentsdb-05.png';
import imgStep06 from '../../assets/opentsdb-06.png';
import imgStep07 from '../../assets/opentsdb-07.png';
import imgStep08 from '../../assets/opentsdb-08.png';
This section describes how to create a data migration task through the Explorer interface to migrate data from OpenTSDB to the current TDengine cluster.
## Overview
OpenTSDB is a real-time monitoring information collection and display platform built on the HBase system. TDengine can efficiently read data from OpenTSDB through the OpenTSDB connector and write it into TDengine, achieving historical data migration or real-time data synchronization.
During the operation, the task will save progress information to the disk, so if the task is paused and restarted, or automatically recovers from an anomaly, it will not start over. For more options, it is recommended to read the explanations of each form field on the task creation page in detail.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the add data source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_opentsdb_01`*.
Select *`OpenTSDB`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu, or click the **+ Create New Proxy** button on the right.
**Target Database** is required. Since OpenTSDB stores data with millisecond precision, you need to select a *`millisecond precision database`*, or click the **+ Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source OpenTSDB database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
Below the **Connection Configuration** area, there is a **Connectivity Check** button. Users can click this button to check whether the information filled in above can normally access the data from the source OpenTSDB database. The check results are shown below:
**Failed**
<figure>
<Image img={imgStep04} alt=""/>
</figure>
**Successful**
<figure>
<Image img={imgStep05} alt=""/>
</figure>
### 4. Configure Task Information
**Metrics** are the physical quantities in which data is stored in the OpenTSDB database. Users can specify multiple metrics to synchronize, or synchronize all data in the database if none are specified. If users specify metrics, they need to first click the **Get Metrics** button on the right to obtain all the metric information from the current source OpenTSDB database, and then select from the dropdown menu, as shown below:
<figure>
<Image img={imgStep06} alt=""/>
</figure>
**Start Time** refers to the start time of the data in the source OpenTSDB database, using the timezone selected in explorer, and this field is required.
**End Time** refers to the end time of the data in the source OpenTSDB database. If no end time is specified, the synchronization of the latest data will continue; if an end time is specified, synchronization will only continue up to this end time, using the timezone selected in explorer, and this field is optional.
**Time Range per Read (minutes)** is the maximum time range for the connector to read data from the source OpenTSDB database in a single operation. This is a very important parameter, and users need to decide based on server performance and data storage density. If the range is too small, the execution speed of the synchronization task will be very slow; if the range is too large, it may cause the OpenTSDB database system to fail due to excessive memory usage.
**Delay (seconds)** is an integer ranging from 1 to 30. To eliminate the impact of out-of-order data, TDengine always waits for the duration specified here before reading the data.
### 5. Configure Advanced Options
The **Advanced Options** area is collapsed by default. Click the `>` on the right to expand it, as shown in the following images:
<figure>
<Image img={imgStep07} alt=""/>
</figure>
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 6. Completion of Creation
Click the **Submit** button to complete the creation of the OpenTSDB to TDengine data synchronization task. Return to the **Data Source List** page to view the status of the task.

View File

@ -0,0 +1,122 @@
---
title: CSV File
slug: /advanced-features/data-connectors/csv-file
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/csv-file-01.png';
import imgStep02 from '../../assets/csv-file-02.png';
import imgStep03 from '../../assets/csv-file-03.png';
import imgStep04 from '../../assets/csv-file-04.png';
import imgStep05 from '../../assets/csv-file-05.png';
import imgStep06 from '../../assets/csv-file-06.png';
import imgStep07 from '../../assets/csv-file-07.png';
import imgStep10 from '../../assets/csv-file-10.png';
import imgStep11 from '../../assets/csv-file-11.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from CSV to the current TDengine cluster.
## Feature Overview
Import data from one or more CSV files into TDengine.
## Create Task
### 1. Add Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, such as: "test_csv";
Select **CSV** from the **Type** dropdown list.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure CSV Options
Click to enable or disable in the **Include Header** area, if enabled, the first line will be treated as column information.
In the **Ignore First N Rows** area, fill in N, indicating to ignore the first N rows of the CSV file.
Select in the **Field Separator** area, the separator between CSV fields, default is ",".
Select in the **Field Enclosure** area, used to surround field content when CSV fields contain separators or newline characters, ensuring the entire field is correctly recognized, default is "\"".
Select in the **Comment Prefix** area, if a line in the CSV file starts with the character specified here, that line will be ignored, default is "#".
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Parsing CSV File
Upload a CSV file locally, for example: test-json.csv, this example csv file will be used later to configure extraction and filtering conditions.
#### 4.1 Parsing
Click **Select File**, choose test-json.csv, then click **Parse** to preview the recognized columns.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
**Preview Parsing Results**
<figure>
<Image img={imgStep05} alt=""/>
</figure>
#### 4.2 Field Splitting
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the message field into `text_0` and `text_1`, select the split extractor, fill in the separator as -, and number as 2.
Click **Delete** to remove the current extraction rule.
Click **Add** to add more extraction rules.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
Click the **Magnifying Glass Icon** to preview the extraction or splitting results.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
<!-- In **Filter**, fill in the filtering conditions, for example: fill in `id != 1`, then only data with id not equal to 1 will be written into TDengine.
Click **Delete** to remove the current filtering rule.
![csv-08.png](./csv-08.png)
Click the **Magnifying Glass Icon** to view the preview filtering results.
![csv-09.png](./csv-09.png) -->
#### 4.3 Table Mapping
Select a target supertable from the **Target Supertable** dropdown list, or click the **Create Supertable** button on the right.
In **Mapping**, fill in the subtable name of the target supertable, for example: `t_${groupid}`.
<figure>
<Image img={imgStep10} alt=""/>
</figure>
Click **Preview** to preview the mapping results.
<figure>
<Image img={imgStep11} alt=""/>
</figure>
### 5. Completion
Click the **Submit** button to complete the creation of the CSV to TDengine data synchronization task, return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,164 @@
---
title: AVEVA Historian
slug: /advanced-features/data-connectors/aveva-historian
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/aveva-historian-01.png';
import imgStep02 from '../../assets/aveva-historian-02.png';
import imgStep03 from '../../assets/aveva-historian-03.png';
import imgStep04 from '../../assets/aveva-historian-04.png';
import imgStep05 from '../../assets/aveva-historian-05.png';
import imgStep06 from '../../assets/aveva-historian-06.png';
import imgStep07 from '../../assets/aveva-historian-07.png';
import imgStep08 from '../../assets/aveva-historian-08.png';
This section describes how to create data migration/data synchronization tasks through the Explorer interface, migrating/synchronizing data from AVEVA Historian to the current TDengine cluster.
## Feature Overview
AVEVA Historian is an industrial big data analytics software, formerly known as Wonderware. It captures and stores high-fidelity industrial big data, unleashing constrained potential to improve operations.
TDengine can efficiently read data from AVEVA Historian and write it into TDengine, enabling historical data migration or real-time data synchronization.
## Creating Tasks
### 1. Add a Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, such as: "test_avevaHistorian";
Select **AVEVA Historian** from the **Type** dropdown list.
**Proxy** is optional, if needed, you can select a specific proxy from the dropdown, or click the **+Create New Proxy** button on the right.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
In the **Connection Configuration** area, fill in the **Server Address** and **Server Port**.
In the **Authentication** area, fill in the **Username** and **Password**.
Click the **Connectivity Check** button to check if the data source is available.
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Collection Information
Fill in the collection task related configuration parameters in the **Collection Configuration** area.
#### 4.1. Migrate Data
If you want to perform data migration, configure the following parameters:
Select **migrate** from the **Collection Mode** dropdown list.
In **Tags**, fill in the list of tags to migrate, separated by commas (,).
In **Tag Group Size**, fill in the size of the tag group.
In **Task Start Time**, fill in the start time of the data migration task.
In **Task End Time**, fill in the end time of the data migration task.
In **Query Time Window**, fill in a time interval, the data migration task will divide time windows according to this interval.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
#### 4.2. Synchronize Data from the History Table
If you want to synchronize data from the **Runtime.dbo.History** table to TDengine, configure the following parameters:
Select **synchronize** from the **Collection Mode** dropdown list.
In **Table**, select **Runtime.dbo.History**.
In **Tags**, fill in the list of tags to migrate, separated by commas (,).
In **Tag Group Size**, fill in the size of the tag group.
In **Task Start Time**, fill in the start time of the data migration task.
In **Query Time Window**, fill in a time interval, the historical data part will divide time windows according to this interval.
In **Real-time Synchronization Interval**, fill in a time interval, the real-time data part will poll data according to this interval.
In **Disorder Time Upper Limit**, fill in a time interval, data that enters the database after this time during real-time data synchronization may be lost.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
#### 4.3. Synchronize Data from the Live Table
If you want to synchronize data from the **Runtime.dbo.Live** table to TDengine, configure the following parameters:
Select **synchronize** from the **Collection Mode** dropdown list.
In **Table**, select **Runtime.dbo.Live**.
In **Tags**, fill in the list of tags to migrate, separated by commas (,).
In **Real-time Synchronization Interval**, fill in a time interval, the real-time data part will poll data according to this interval.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 5. Configure Data Mapping
Fill in the data mapping related configuration parameters in the **Data Mapping** area.
Click the **Retrieve from Server** button to fetch sample data from the AVEVA Historian server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator as `,`, and number as 2.
In **Filter**, fill in the filtering conditions, for example: enter `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to which you want to map, as well as the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 6. Configure Advanced Options
Fill in the related configuration parameters in the **Advanced Options** area.
Set the maximum read concurrency in **Maximum Read Concurrency**. Default value: 0, which means auto, automatically configures the concurrency.
Set the batch size for each write in **Batch Size**, that is: the maximum number of messages sent at once.
In **Save Raw Data**, choose whether to save the raw data. Default value: No.
When saving raw data, the following two parameters are effective.
Set the maximum retention days for raw data in **Maximum Retention Days**.
Set the storage path for raw data in **Raw Data Storage Directory**.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 7. Completion of Creation
Click the **Submit** button to complete the creation of the task. After submitting the task, return to the **Data Writing** page to view the status of the task.

View File

@ -0,0 +1,139 @@
---
title: MySQL
slug: /advanced-features/data-connectors/mysql
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/mysql-01.png';
import imgStep02 from '../../assets/mysql-02.png';
import imgStep03 from '../../assets/mysql-03.png';
import imgStep04 from '../../assets/mysql-04.png';
import imgStep05 from '../../assets/mysql-05.png';
import imgStep06 from '../../assets/mysql-06.png';
import imgStep07 from '../../assets/mysql-07.png';
import imgStep08 from '../../assets/mysql-08.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from MySQL to the current TDengine cluster.
## Overview
MySQL is one of the most popular relational databases. Many systems have used or are using MySQL databases to store data reported by IoT and industrial internet devices. However, as the number of devices in the access systems grows and the demand for real-time data feedback from users increases, MySQL can no longer meet business needs. Starting from TDengine Enterprise Edition 3.3.0.0, TDengine can efficiently read data from MySQL and write it into TDengine, achieving historical data migration or real-time data synchronization, and solving the technical pain points faced by businesses.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the top left corner of the data writing page to enter the Add Data Source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_mysql_01`*.
Select *`MySQL`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu, or click the **+ Create New Proxy** button on the right to create a new proxy.
**Target Database** is required. You can click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source MySQL database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source MySQL database, who must have read permissions in the organization.
**Password** Enter the login password for the user mentioned above in the source MySQL database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Connection Options
**Character Set** Set the character set for the connection. The default character set is utf8mb4. MySQL 5.5.3 supports this feature. If connecting to an older version, it is recommended to change to utf8.
Options include utf8, utf8mb4, utf16, utf32, gbk, big5, latin1, ascii.
**SSL Mode** Set whether to negotiate a secure SSL TCP/IP connection with the server or the priority of negotiation. The default value is PREFERRED. Options include DISABLED, PREFERRED, REQUIRED.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Then click the **Check Connectivity** button, where users can click this button to check if the information filled in above can normally fetch data from the source MySQL database.
### 6. Configure SQL Query
**Subtable Field** is used to split subtables, it is a select distinct SQL statement that queries non-repeated items of specified field combinations, usually corresponding to the tag in transform:
> This configuration is mainly to solve the problem of data migration disorder, and it needs to be used together with **SQL Template**, otherwise it cannot achieve the expected effect, usage examples are as follows:
>
> 1. Fill in the subtable field statement `select distinct col_name1, col_name2 from table`, which means using the fields col_name1 and col_name2 in the source table to split the subtables of the target supertable
> 2. Add subtable field placeholders in the **SQL Template**, for example, the `${col_name1} and ${col_name2}` part in `select * from table where ts >= ${start} and ts < ${end} and ${col_name1} and ${col_name2}`
> 3. Configure `col_name1` and `col_name2` two tag mappings in **transform**
**SQL Template** is the SQL statement template used for querying, the SQL statement must include time range conditions, and the start and end times must appear in pairs. The time range defined in the SQL statement template consists of a column representing time in the source database and the placeholders defined below.
> SQL uses different placeholders to represent different time format requirements, specifically the following placeholder formats:
>
> 1. `${start}`, `${end}`: Represents RFC3339 format timestamps, e.g.: 2024-03-14T08:00:00+0800
> 2. `${start_no_tz}`, `${end_no_tz}`: Represents RFC3339 strings without timezone: 2024-03-14T08:00:00
> 3. `${start_date}`, `${end_date}`: Represents date only, e.g.: 2024-03-14
>
> To solve the problem of data migration disorder, it is advisable to add sorting conditions in the query statement, such as `order by ts asc`.
**Start Time** The start time for migrating data, this field is required.
**End Time** The end time for migrating data, which can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data and the task will not stop automatically.
**Query Interval** The time interval for querying data in segments, default is 1 day. To avoid querying a large amount of data at once, a data synchronization sub-task will use the query interval to segment the data retrieval.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writes, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Data Mapping
In the **Data Mapping** area, fill in the configuration parameters related to data mapping.
Click the **Retrieve from Server** button to fetch sample data from the MySQL server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator `,`, and number `2`.
In **Filter**, fill in the filtering conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to map to, and the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** The limit on the number of data source connections or reading threads, modify this parameter when the default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. The default is 10000.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 9. Completion
Click the **Submit** button to complete the creation of the data synchronization task from MySQL to TDengine, and return to the **Data Source List** page to view the task execution status.

View File

@ -0,0 +1,140 @@
---
title: PostgreSQL
slug: /advanced-features/data-connectors/postgresql
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/postgresql-01.png';
import imgStep02 from '../../assets/postgresql-02.png';
import imgStep03 from '../../assets/postgresql-03.png';
import imgStep04 from '../../assets/postgresql-04.png';
import imgStep05 from '../../assets/postgresql-05.png';
import imgStep06 from '../../assets/postgresql-06.png';
import imgStep07 from '../../assets/postgresql-07.png';
import imgStep08 from '../../assets/postgresql-08.png';
This section describes how to create a data migration task through the Explorer interface to migrate data from PostgreSql to the current TDengine cluster.
## Feature Overview
PostgreSQL is a very powerful, open-source client/server relational database management system with many features found in large commercial RDBMSs, including transactions, subselects, triggers, views, foreign key referential integrity, and sophisticated locking capabilities.
TDengine can efficiently read data from PostgreSQL and write it to TDengine, enabling historical data migration or real-time data synchronization.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the add data source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_postgres_01`*.
Select *`PostgreSQL`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu or click the **+ Create New Proxy** button on the right to create a new proxy.
**Target Database** is required. You can click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source PostgreSQL database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source PostgreSQL database, who must have read permissions in the organization.
**Password** Enter the login password for the user mentioned above in the source PostgreSQL database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Connection Options
**Application Name** Set the application name to identify the connected application.
**SSL Mode** Set whether to negotiate a secure SSL TCP/IP connection with the server or the priority of such negotiation. The default value is PREFER. Options include DISABLE, ALLOW, PREFER, REQUIRE.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Then click the **Check Connectivity** button, where users can click this button to check if the information filled in above can normally fetch data from the source PostgreSQL database.
### 6. Configure SQL Query
**Subtable Field** Used to split subtables, it is a select distinct SQL statement querying non-repeated items of specified field combinations, usually corresponding to the tag in transform:
> This configuration is mainly to solve the data migration disorder problem, and it needs to be used in conjunction with **SQL Template**, otherwise, it cannot achieve the expected effect, usage examples are as follows:
>
> 1. Fill in the subtable field statement `select distinct col_name1, col_name2 from table`, which means using the fields col_name1 and col_name2 in the source table to split the subtables of the target supertable
> 2. Add subtable field placeholders in the **SQL Template**, for example, the `${col_name1} and ${col_name2}` part in `select * from table where ts >= ${start} and ts < ${end} and ${col_name1} and ${col_name2}`
> 3. Configure `col_name1` and `col_name2` two tag mappings in **transform**
**SQL Template** Used for the SQL statement template for querying, the SQL statement must include time range conditions, and the start and end times must appear in pairs. The time range defined in the SQL statement template consists of a column representing time in the source database and the placeholders defined below.
> Different placeholders represent different time format requirements in SQL, specifically including the following placeholder formats:
>
> 1. `${start}`, `${end}`: Represents RFC3339 format timestamps, e.g.: 2024-03-14T08:00:00+0800
> 2. `${start_no_tz}`, `${end_no_tz}`: Represents RFC3339 strings without timezone: 2024-03-14T08:00:00
> 3. `${start_date}`, `${end_date}`: Represents date only, e.g.: 2024-03-14
>
> To solve the problem of data migration disorder, sorting conditions should be added to the query statement, such as `order by ts asc`.
**Start Time** The start time for migrating data, this field is required.
**End Time** The end time for migrating data, which can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data and the task will not stop automatically.
**Query Interval** The time interval for querying data in segments, default is 1 day. To avoid querying a large amount of data at once, a data synchronization subtask will use the query interval to segment the data retrieval.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writes, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Data Mapping
In the **Data Mapping** area, fill in the configuration parameters related to data mapping.
Click the **Retrieve from Server** button to fetch sample data from the PostgreSQL server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator `,`, and number `2`.
In **Filter**, fill in the filtering conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to map to, and the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** Limit on the number of data source connections or read threads. Modify this parameter when the default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. The default is 10000.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 9. Completion
Click the **Submit** button to complete the creation of the data synchronization task from PostgreSQL to TDengine. Return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,129 @@
---
title: Oracle Database
slug: /advanced-features/data-connectors/oracle-database
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/oracle-database-01.png';
import imgStep02 from '../../assets/oracle-database-02.png';
import imgStep03 from '../../assets/oracle-database-03.png';
import imgStep04 from '../../assets/oracle-database-04.png';
import imgStep05 from '../../assets/oracle-database-05.png';
import imgStep06 from '../../assets/oracle-database-06.png';
import imgStep07 from '../../assets/oracle-database-07.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from Oracle to the current TDengine cluster.
## Feature Overview
The Oracle database system is a popular relational database management system worldwide, known for its good portability, ease of use, and strong functionality, suitable for various large, medium, and small computer environments. It is an efficient, reliable, and high-throughput database solution.
TDengine can efficiently read data from Oracle and write it to TDengine, enabling historical data migration or real-time data synchronization.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the Add Data Source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example, *`test_oracle_01`*.
Select *`Oracle`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Agent** is optional. If needed, you can select a specific agent from the dropdown menu or click the **+ Create New Agent** button on the right to create a new agent.
**Target Database** is required. You can click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source Oracle database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source Oracle database, who must have read permissions in the organization.
**Password** Enter the login password for the user mentioned above in the source Oracle database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
Then click the **Check Connectivity** button, where users can click this button to check if the information filled in above can normally access data from the source Oracle database.
### 5. Configure SQL Query
**Subtable Field** is used to split the subtable field, which is a select distinct SQL statement querying non-repeated items of specified field combinations, usually corresponding to the tag in transform:
> This configuration is mainly to solve the problem of data migration disorder. It needs to be used in conjunction with **SQL Template**, otherwise, it cannot achieve the expected effect. Usage examples are as follows:
>
> 1. Fill in the subtable field statement `select distinct col_name1, col_name2 from table`, which means using the fields col_name1 and col_name2 in the source table to split the subtable of the target supertable
> 2. Add subtable field placeholders in the **SQL Template**, for example, the `${col_name1} and ${col_name2}` part in `select * from table where ts >= ${start} and ts < ${end} and ${col_name1} and ${col_name2}`
> 3. Configure the mappings of `col_name1` and `col_name2` as two tags in **transform**
**SQL Template** is the SQL statement template used for querying, which must include time range conditions, and the start and end times must appear in pairs. The time range defined in the SQL statement template consists of a column representing time in the source database and the placeholders defined below.
> Different placeholders represent different time format requirements in SQL, specifically including the following placeholder formats:
>
> 1. `${start}`, `${end}`: Represents RFC3339 format timestamps, e.g., 2024-03-14T08:00:00+0800
> 2. `${start_no_tz}`, `${end_no_tz}`: Represents RFC3339 strings without timezone: 2024-03-14T08:00:00
> 3. `${start_date}`, `${end_date}`: Represents date only, but since there is no pure date type in Oracle, it will include zero hour, zero minute, and zero second, e.g., 2024-03-14 00:00:00, so be careful when using `date <= ${end_date}` as it does not include data of the day 2024-03-14
>
> To solve the problem of data migration disorder, a sorting condition should be added to the query statement, such as `order by ts asc`.
**Start Time** The start time for migrating data, this field is required.
**End Time** The end time for migrating data, which can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data and the task will not stop automatically.
**Query Interval** The time interval for querying data in segments, default is 1 day. To avoid querying too much data at once, a data synchronization sub-task will use the query interval to segment the data retrieval.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writes, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
### 6. Configure Data Mapping
In the **Data Mapping** area, fill in the configuration parameters related to data mapping.
Click the **Retrieve from Server** button to get sample data from the Oracle server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, choose the split extractor, fill in the separator `,`, and number `2`.
In **Filter**, fill in the filtering conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to map to, and the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** Limit on the number of data source connections or reading threads, modify this parameter when the default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. The default is 10000.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Completion
Click the **Submit** button to complete the creation of the data synchronization task from Oracle to TDengine, return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,147 @@
---
title: Microsoft SQL Server
sidebar_label: SQL Server
slug: /advanced-features/data-connectors/sql-server
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/sql-server-01.png';
import imgStep02 from '../../assets/sql-server-02.png';
import imgStep03 from '../../assets/sql-server-03.png';
import imgStep04 from '../../assets/sql-server-04.png';
import imgStep05 from '../../assets/sql-server-05.png';
import imgStep06 from '../../assets/sql-server-06.png';
import imgStep07 from '../../assets/sql-server-07.png';
import imgStep08 from '../../assets/sql-server-08.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from Microsoft SQL Server to the current TDengine cluster.
## Feature Overview
Microsoft SQL Server is one of the most popular relational databases. Many systems have used or are using Microsoft SQL Server to store data reported by IoT and industrial internet devices. However, as the number of devices in the access systems grows and the demand for real-time data feedback from users increases, Microsoft SQL Server can no longer meet business needs. Starting from TDengine Enterprise Edition 3.3.2.0, TDengine can efficiently read data from Microsoft SQL Server and write it into TDengine, achieving historical data migration or real-time data synchronization, and solving technical pain points faced by businesses.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the Add Data Source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_mssql_01`*.
Select *`Microsoft SQL Server`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Agent** is optional. If needed, you can select a specific agent from the dropdown menu, or click the **+ Create New Agent** button on the right to create a new agent.
**Target Database** is required. You can click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source Microsoft SQL Server database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source Microsoft SQL Server database, who must have read permissions in the organization.
**Password** Enter the login password for the user mentioned above in the source Microsoft SQL Server database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Connection Options
**Instance Name** Set the Microsoft SQL Server instance name (defined in SQL Browser, only available on Windows platform, if specified, the port will be replaced with the value returned from SQL Browser).
**Application Name** Set the application name to identify the connecting application.
**Encryption** Set whether to use an encrypted connection. The default value is Off. Options include Off, On, NotSupported, Required.
**Trust Certificate** Set whether to trust the server certificate. If enabled, the server certificate will not be verified and will be accepted as is (if trust is enabled, the `Trust Certificate CA` field below will be hidden).
**Trust Certificate CA** Set whether to trust the server's certificate CA. If a CA file is uploaded, the server certificate will be verified based on the provided CA certificate in addition to the system trust store.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Then click the **Check Connectivity** button. Users can click this button to check if the information filled in above can normally retrieve data from the source Microsoft SQL Server database.
### 6. Configure SQL Query
**Subtable Field** is used to split subtables, it is a select distinct SQL statement that queries non-repeated items of specified field combinations, usually corresponding to the tag in transform:
> This configuration is mainly to solve the data migration disorder problem, and needs to be used in conjunction with **SQL Template**, otherwise it cannot achieve the expected effect, usage examples are as follows:
>
> 1. Fill in the subtable field statement `select distinct col_name1, col_name2 from table`, which means using the fields col_name1 and col_name2 in the source table to split the subtables of the target supertable
> 2. Add subtable field placeholders in the **SQL Template**, for example, the `${col_name1} and ${col_name2}` part in `select * from table where ts >= ${start} and ts < ${end} and ${col_name1} and ${col_name2}`
> 3. Configure the mappings of `col_name1` and `col_name2` as two tags in **transform**
**SQL Template** is used for the SQL statement template for querying, which must include a time range condition, and the start time and end time must appear in pairs. The time range defined in the SQL statement template consists of a column representing time in the source database and the placeholders defined below.
> SQL uses different placeholders to represent different time format requirements, specifically the following placeholder formats:
>
> 1. `${start}`, `${end}`: Represents RFC3339 format timestamp, e.g.: 2024-03-14T08:00:00+0800
> 2. `${start_no_tz}`, `${end_no_tz}`: Represents an RFC3339 string without timezone: 2024-03-14T08:00:00
> 3. `${start_date}`, `${end_date}`: Represents date only, e.g.: 2024-03-14
>
> Note: Only `datetime2` and `datetimeoffset` support querying using start/end, `datetime` and `smalldatetime` can only use start_no_tz/end_no_tz for querying, and `timestamp` cannot be used as a query condition.
>
> To solve the problem of data migration disorder, it is advisable to add a sorting condition in the query statement, such as `order by ts asc`.
**Start Time** The start time of the data migration, this field is mandatory.
**End Time** The end time of the data migration, which can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data, and the task will not stop automatically.
**Query Interval** The time interval for segmenting data queries, default is 1 day. To avoid querying too much data at once, a data synchronization subtask will use the query interval to segment the data.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writing, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Data Mapping
Fill in the related configuration parameters in the **Data Mapping** area.
Click the **Retrieve from Server** button to get sample data from the Microsoft SQL Server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator as `,`, and number as 2.
In **Filter**, fill in the filter conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to which you want to map, and the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** Limit on the number of data source connections or reading threads, modify this parameter when default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. The default is 10000.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 9. Completion
Click the **Submit** button to complete the creation of the data synchronization task from Microsoft SQL Server to TDengine, and return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,164 @@
---
title: MongoDB
slug: /advanced-features/data-connectors/mongodb
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/mongodb-01.png';
import imgStep02 from '../../assets/mongodb-02.png';
import imgStep03 from '../../assets/mongodb-03.png';
import imgStep04 from '../../assets/mongodb-04.png';
import imgStep05 from '../../assets/mongodb-05.png';
import imgStep06 from '../../assets/mongodb-06.png';
import imgStep07 from '../../assets/mongodb-07.png';
import imgStep08 from '../../assets/mongodb-08.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from MongoDB to the current TDengine cluster.
## Feature Overview
MongoDB is a product that lies between relational and non-relational databases, widely used in content management systems, mobile applications, and the Internet of Things, among other fields. Starting from TDengine Enterprise Edition 3.3.3.0, TDengine can efficiently read data from MongoDB and write it into TDengine, achieving historical data migration or real-time data synchronization, and addressing technical pain points faced by businesses.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the top right corner of the data writing page to enter the Add Data Source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example `test_mongodb_01`.
Select `MongoDB` from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu, or click the **+ Create New Proxy** button on the right to create a new proxy.
**Target Database** is mandatory. You can select a specific database from the dropdown menu, or click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *connection information for the source MongoDB database* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source MongoDB database, who must have read permissions in the MongoDB system.
**Password** Enter the login password for the user mentioned above in the source MongoDB database.
**Authentication Database** The database in MongoDB where user information is stored, default is admin.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Connection Options
**Application Name** Set the application name to identify the connected application.
**SSL Certificate** Set whether to use an encrypted connection, which is off by default. If enabled, you need to upload the following two files:
&emsp; 1. **CA File** Upload the SSL encryption certificate authority file.
&emsp; 2. **Certificate File** Upload the SSL encryption certificate file.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Then click the **Check Connectivity** button, where users can click this button to check if the information filled in above can normally retrieve data from the source MongoDB database.
### 6. Configure Data Query
**Database** The source database in MongoDB, which can be dynamically configured using placeholders, such as `database_${Y}`. See the table below for a list of available placeholders.
**Collection** The collection in MongoDB, which can be dynamically configured using placeholders, such as `collection_${md}`. See the table below for a list of available placeholders.
|Placeholder|Description|Example Data|
| :-----: | :------------: |:--------:|
|Y|Complete Gregorian year, zero-padded 4-digit integer|2024|
|y|Gregorian year divided by 100, zero-padded 2-digit integer|24|
|M|Integer month (1 - 12)|1|
|m|Integer month (01 - 12)|01|
|B|Full English spelling of the month|January|
|b|Abbreviation of the month in English (3 letters)|Jan|
|D|Numeric representation of the date (1 - 31)|1|
|d|Numeric representation of the date (01 - 31)|01|
|J|Day of the year (1 - 366)|1|
|j|Day of the year (001 - 366)|001|
|F|Equivalent to `${Y}-${m}-${d}`|2024-01-01|
**Subtable Fields** Fields used to split subtables, usually corresponding to tags in transform, separated by commas, such as col_name1,col_name2.
This configuration is mainly to solve the problem of data migration disorder, and needs to be used in conjunction with **Query Template**, otherwise it cannot achieve the expected effect. Usage examples are as follows:
1. Configure two subtable fields `col_name1,col_name2`
2. Add subtable field placeholders in the **Query Template**, such as the `${col_name1}, ${col_name2}` part in `{"ddate":{"$gte":${start_datetime},"$lt":${end_datetime}}, ${col_name1}, ${col_name2}}`
3. Configure `col_name1` and `col_name2` two tag mappings in **transform**
**Query Template** is used for querying data with a JSON format query statement, which must include a time range condition, and the start and end times must appear in pairs. The time range defined in the template consists of a time-representing column from the source database and the placeholders defined below.
Different placeholders represent different time format requirements, specifically the following placeholder formats:
1. `${start_datetime}`, `${end_datetime}`: Corresponds to filtering by the backend datetime type field, e.g., `{"ddate":{"$gte":${start_datetime},"$lt":${end_datetime}}}` will be converted to `{"ddate":{"$gte":{"$date":"2024-06-01T00:00:00+00:00"},"$lt":{"$date":"2024-07-01T00:00:00+00:00"}}}`
2. `${start_timestamp}`, `${end_timestamp}`: Corresponds to filtering by the backend timestamp type field, e.g., `{"ttime":{"$gte":${start_timestamp},"$lt":${end_timestamp}}}` will be converted to `{"ttime":{"$gte":{"$timestamp":{"t":123,"i":456}},"$lt":{"$timestamp":{"t":123,"i":456}}}}`
**Query Sorting** Sorting conditions during query execution, in JSON format, must comply with MongoDB's sorting condition format specifications, with usage examples as follows:
1. `{"createtime":1}`: MongoDB query results are returned in ascending order by createtime.
2. `{"createdate":1, "createtime":1}`: MongoDB query results are returned in ascending order by createdate and createtime.
**Start Time** The start time for migrating data, this field is mandatory.
**End Time** The end time for migrating data, can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data, and the task will not stop automatically.
**Query Interval** The time interval for segmenting data queries, default is 1 day. To avoid querying too much data at once, a data synchronization subtask will use the query interval to segment the data.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writes, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Data Mapping
Fill in the data mapping related configuration parameters in the **Payload Transformation** area.
Click the **Retrieve from Server** button to fetch sample data from the MongoDB server.
In **Parsing**, choose from JSON/Regex/UDT to parse the original message body, and click the **Preview** button on the right to view the parsing results after configuration.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator as `,`, number as 2, and click the **Preview** button on the right to view the transformation results after configuration.
In **Filter**, fill in the filtering conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine, and click the **Preview** button on the right to view the filtering results after configuration.
In **Mapping**, select the supertable in TDengine to which the data will be mapped, as well as the columns to map to the supertable, and click the **Preview** button on the right to view the mapping results after configuration.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** Limit on the number of data source connections or reading threads, modify this parameter when default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. Default is 10000.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 9. Completion
Click the **Submit** button to complete the creation of the data synchronization task from MongoDB to TDengine, and return to the **Data Source List** page to view the task execution status.

View File

@ -0,0 +1,324 @@
---
title: Data Connectors
slug: /advanced-features/data-connectors
---
import Image from '@theme/IdealImage';
import imgZeroCode from '../../assets/data-connectors-01.png';
import imgSampleData from '../../assets/data-connectors-02.png';
import imgJsonParsing from '../../assets/data-connectors-03.png';
import imgRegexParsing from '../../assets/data-connectors-04.png';
import imgResults from '../../assets/data-connectors-05.png';
import imgSplit from '../../assets/data-connectors-06.png';
## Overview
TDengine Enterprise is equipped with a powerful visual data management tool—taosExplorer. With taosExplorer, users can easily submit tasks to TDengine through simple configurations in the browser, achieving seamless data import from various data sources into TDengine with zero coding. During the import process, TDengine automatically extracts, filters, and transforms the data to ensure the quality of the imported data. Through this zero-code data source integration method, TDengine has successfully transformed into an outstanding platform for aggregating time-series big data. Users do not need to deploy additional ETL tools, thereby greatly simplifying the overall architecture design and improving data processing efficiency.
The diagram below shows the system architecture of the zero-code integration platform.
<figure>
<Image img={imgZeroCode} alt="Zero-code access platform"/>
<figcaption>Figure 1. Zero-code access platform</figcaption>
</figure>
## Supported Data Sources
The data sources currently supported by TDengine are as follows:
| Data Source | Supported Version | Description |
| --- | --- | --- |
| Aveva PI System | PI AF Server Version 2.10.9.593 or above | An industrial data management and analytics platform, formerly known as OSIsoft PI System, capable of real-time collection, integration, analysis, and visualization of industrial data, helping enterprises achieve intelligent decision-making and refined management |
| Aveva Historian | AVEVA Historian 2020 RS SP1 | Industrial big data analytics software, formerly known as Wonderware Historian, designed for industrial environments to store, manage, and analyze real-time and historical data from various industrial devices and sensors |
| OPC DA | Matrikon OPC version: 1.7.2.7433 | Abbreviation for Open Platform Communications, an open, standardized communication protocol for data exchange between automation devices from different manufacturers. Initially developed by Microsoft, it was aimed at addressing interoperability issues in the industrial control field; the OPC protocol was first released in 1996, then known as OPC DA (Data Access), mainly for real-time data collection and control. |
| OPC UA | KeepWare KEPServerEx 6.5 | In 2006, the OPC Foundation released the OPC UA (Unified Architecture) standard, a service-oriented, object-oriented protocol with higher flexibility and scalability, which has become the mainstream version of the OPC protocol |
| MQTT | emqx: 3.0.0 to 5.7.1<br/> hivemq: 4.0.0 to 4.31.0<br/> mosquitto: 1.4.4 to 2.0.18 | Abbreviation for Message Queuing Telemetry Transport, a lightweight communication protocol based on the publish/subscribe pattern, designed for low overhead, low bandwidth usage instant messaging, widely applicable in IoT, small devices, mobile applications, and other fields. |
| Kafka | 2.11 ~ 3.8.0 | An open-source stream processing platform developed by the Apache Software Foundation, primarily used for processing real-time data and providing a unified, high-throughput, low-latency messaging system. It features high speed, scalability, persistence, and a distributed design, enabling it to handle hundreds of thousands of read/write operations per second, support thousands of clients, while maintaining data reliability and availability. |
| InfluxDB | 1.7, 1.8, 2.0-2.7 | A popular open-source time-series database optimized for handling large volumes of time-series data.|
| OpenTSDB | 2.4.1 | A distributed, scalable time-series database based on HBase. It is primarily used for storing, indexing, and providing access to metric data collected from large-scale clusters (including network devices, operating systems, applications, etc.), making this data more accessible and graphically presentable. |
| MySQL | 5.6,5.7,8.0+ | One of the most popular relational database management systems, known for its small size, fast speed, low overall ownership cost, and particularly its open-source nature, making it the choice for website database development for both medium-sized and large websites. |
| Oracle | 11G/12c/19c | Oracle Database System is one of the world's popular relational database management systems, known for its good portability, ease of use, powerful features, suitable for various large, medium, and small computer environments. It is an efficient, reliable, and high-throughput database solution. |
| PostgreSQL | v15.0+ | PostgreSQL is a very powerful open-source client/server relational database management system, with many features found in large commercial RDBMS, including transactions, sub-selects, triggers, views, foreign key referential integrity, and complex locking capabilities.|
| SQL Server | 2012/2022 | Microsoft SQL Server is a relational database management system developed by Microsoft, known for its ease of use, good scalability, and high integration with related software. |
| MongoDB | 3.6+ | MongoDB is a product between relational and non-relational databases, widely used in content management systems, mobile applications, and the Internet of Things, among many other fields. |
| CSV | - | Abbreviation for Comma Separated Values, a plain text file format separated by commas, commonly used in spreadsheet or database software. |
| TDengine 2.x | 2.4 or 2.6+ | Older version of TDengine, no longer maintained, upgrade to the latest version 3.0 is recommended. |
| TDengine 3.x | Source version+ | Use TMQ for subscribing to specified databases or supertables from TDengine. |
## Data Extraction, Filtering, and Transformation
Since there can be multiple data sources, each data source may have different physical units, naming conventions, and time zones. To address this issue, TDengine has built-in ETL capabilities that can parse and extract the required data from the data packets of data sources, and perform filtering and transformation to ensure the quality of the data written and provide a unified namespace. The specific functions are as follows:
1. Parsing: Use JSON Path or regular expressions to parse fields from the original message.
2. Extracting or splitting from columns: Use split or regular expressions to extract multiple fields from an original field.
3. Filtering: Messages are only written to TDengine if the expression's value is true.
4. Transformation: Establish conversion and mapping relationships between parsed fields and TDengine supertable fields.
Below is a detailed explanation of the data transformation rules.
### Parsing
Only unstructured data sources need this step. Currently, MQTT and Kafka data sources use the rules provided in this step to parse unstructured data to preliminarily obtain structured data, i.e., row and column data that can be described by fields. In the explorer, you need to provide sample data and parsing rules to preview the parsed structured data presented in a table.
#### Sample Data
<figure>
<Image img={imgZeroCode} alt="Sample data"/>
<figcaption>Figure 2. Sample data</figcaption>
</figure>
As shown in the image, the textarea input box contains the sample data, which can be obtained in three ways:
1. Directly enter the sample data in the textarea;
2. Click the button on the right "Retrieve from Server" to get the sample data from the configured server and append it to the sample data textarea;
3. Upload a file, appending the file content to the sample data textarea.
Each piece of sample data ends with a carriage return.
#### Parsing<a name="parse"></a>
Parsing is the process of parsing unstructured strings into structured data. The message body's parsing rules currently support JSON, Regex, and UDT.
##### JSON Parsing
JSON parsing supports JSONObject or JSONArray. The following JSON sample data can automatically parse fields: `groupid`, `voltage`, `current`, `ts`, `inuse`, `location`.
```json
{"groupid": 170001, "voltage": "221V", "current": 12.3, "ts": "2023-12-18T22:12:00", "inuse": true, "location": "beijing.chaoyang.datun"}
{"groupid": 170001, "voltage": "220V", "current": 12.2, "ts": "2023-12-18T22:12:02", "inuse": true, "location": "beijing.chaoyang.datun"}
{"groupid": 170001, "voltage": "216V", "current": 12.5, "ts": "2023-12-18T22:12:04", "inuse": false, "location": "beijing.chaoyang.datun"}
```
Or
```json
[{"groupid": 170001, "voltage": "221V", "current": 12.3, "ts": "2023-12-18T22:12:00", "inuse": true, "location": "beijing.chaoyang.datun"},
{"groupid": 170001, "voltage": "220V", "current": 12.2, "ts": "2023-12-18T22:12:02", "inuse": true, "location": "beijing.chaoyang.datun"},
{"groupid": 170001, "voltage": "216V", "current": 12.5, "ts": "2023-12-18T22:12:04", "inuse": false, "location": "beijing.chaoyang.datun"}]
```
Subsequent examples will only explain using JSONObject.
The following nested JSON data can automatically parse fields `groupid`, `data_voltage`, `data_current`, `ts`, `inuse`, `location_0_province`, `location_0_city`, `location_0_datun`, and you can also choose which fields to parse and set aliases for the parsed fields.
```json
{"groupid": 170001, "data": { "voltage": "221V", "current": 12.3 }, "ts": "2023-12-18T22:12:00", "inuse": true, "location": [{"province": "beijing", "city":"chaoyang", "street": "datun"}]}
```
<figure>
<Image img={imgJsonParsing} alt="JSON parsing"/>
<figcaption>Figure 3. JSON parsing</figcaption>
</figure>
##### Regex Regular Expressions<a name="regex"></a>
You can use **named capture groups** in regular expressions to extract multiple fields from any string (text) field. As shown in the figure, extract fields such as access IP, timestamp, and accessed URL from nginx logs.
```regex
(?<ip>\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b)\s-\s-\s\[(?<ts>\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}\s\+\d{4})\]\s"(?<method>[A-Z]+)\s(?<url>[^\s"]+).*(?<status>\d{3})\s(?<length>\d+)
```
<figure>
<Image img={imgRegexParsing} alt="Regex parsing"/>
<figcaption>Figure 4. Regex parsing</figcaption>
</figure>
##### UDT Custom Parsing Scripts
Custom rhai syntax scripts for parsing input data (refer to `https://rhai.rs/book/`), the script currently only supports json format raw data.
**Input**: In the script, you can use the parameter data, which is the Object Map after the raw data is parsed from json;
**Output**: The output data must be an array.
For example, for data reporting three-phase voltage values, which are entered into three subtables respectively, such data needs to be parsed
```json
{
"ts": "2024-06-27 18:00:00",
"voltage": "220.1,220.3,221.1",
"dev_id": "8208891"
}
```
Then you can use the following script to extract the three voltage data.
```rhai
let v3 = data["voltage"].split(",");
[
#{"ts": data["ts"], "val": v3[0], "dev_id": data["dev_id"]},
#{"ts": data["ts"], "val": v3[1], "dev_id": data["dev_id"]},
#{"ts": data["ts"], "val": v3[2], "dev_id": data["dev_id"]}
]
```
The final parsing result is shown below:
<figure>
<Image img={imgResults} alt="Parsed results"/>
<figcaption>Figure 5. Parsed results</figcaption>
</figure>
### Extraction or Splitting
The parsed data may still not meet the data requirements of the target table. For example, the original data collected by a smart meter is as follows (in json format):
```json
{"groupid": 170001, "voltage": "221V", "current": 12.3, "ts": "2023-12-18T22:12:00", "inuse": true, "location": "beijing.chaoyang.datun"}
{"groupid": 170001, "voltage": "220V", "current": 12.2, "ts": "2023-12-18T22:12:02", "inuse": true, "location": "beijing.chaoyang.datun"}
{"groupid": 170001, "voltage": "216V", "current": 12.5, "ts": "2023-12-18T22:12:04", "inuse": false, "location": "beijing.chaoyang.datun"}
```
Using json rules, the voltage is parsed as a string with units, and it is desired to use int type to record voltage and current values for statistical analysis, so further splitting of the voltage is needed; additionally, the date is expected to be split into date and time for storage.
As shown in the figure below, you can use the split rule on the source field `ts` to split it into date and time, and use regex to extract the voltage value and unit from the field `voltage`. The split rule needs to set **delimiter** and **number of splits**, and the naming rule for the split fields is `{original field name}_{sequence number}`. The Regex rule is the same as in the parsing process, using **named capture groups** to name the extracted fields.
### Filtering<a name="filter"></a>
The filtering feature can set filtering conditions, and only data rows that meet the conditions will be written to the target table. The result of the filter condition expression must be of boolean type. Before writing filter conditions, it is necessary to determine the type of parsed fields, and based on the type of parsed fields, judgment functions and comparison operators (`>`, `>=`, `<=`, `<`, `==`, `!=`) can be used to judge.
#### Field Types and Conversion
Only by clearly parsing the type of each field can you use the correct syntax for data filtering.
Fields parsed using the json rule are automatically set to types based on their attribute values:
1. bool type: `"inuse": true`
2. int type: `"voltage": 220`
3. float type: `"current" : 12.2`
4. String type: `"location": "MX001"`
Data parsed using regex rules are all string types.
Data extracted or split using split and regex are string types.
If the extracted data type is not the expected type, data type conversion can be performed. A common data type conversion is converting a string to a numeric type. Supported conversion functions are as follows:
|Function|From type|To type|e.g.|
|:----|:----|:----|:----|
| parse_int | string | int | parse_int("56") // Results in integer 56 |
| parse_float | string | float | parse_float("12.3") // Results in float 12.3 |
#### Conditional Expressions
Different data types have their own ways of writing conditional expressions.
##### BOOL type
You can use variables or the `!` operator, for example for the field "inuse": true, you can write the following expressions:
> 1. inuse
> 2. !inuse
##### Numeric types (int/float)
Numeric types support comparison operators `==`, `!=`, `>`, `>=`, `<`, `<=`.
##### String type
Use comparison operators to compare strings.
String functions
|Function|Description|e.g.|
|:----|:----|:----|
| is_empty | returns true if the string is empty | s.is_empty() |
| contains | checks if a certain character or sub-string occurs in the string | s.contains("substring") |
| starts_with | returns true if the string starts with a certain string | s.starts_with("prefix") |
| ends_with | returns true if the string ends with a certain string | s.ends_with("suffix") |
| len | returns the number of characters (not number of bytes) in the string, must be used with comparison operator | s.len == 5 to check if the string length is 5; len as a property returns int, different from the first four functions which directly return bool. |
##### Compound Expressions
Multiple conditional expressions can be combined using logical operators (&&, ||, !).
For example, the following expression represents fetching data from smart meters installed in Beijing with a voltage value greater than 200.
> location.starts_with("beijing") && voltage > 200
### Mapping
Mapping is mapping the **source field** parsed, extracted, or split to the **target table field**. It can be directly mapped, or it can be mapped to the target table after some rule calculations.
#### Selecting the target supertable
After selecting the target supertable, all tags and columns of the supertable will be loaded.
The source field is automatically mapped to the tag and column of the target supertable using the mapping rule based on the name.
For example, the following parsed, extracted, or split preview data:
#### Mapping Rules <a name="expression"></a>
The supported mapping rules are shown in the following table:
|rule|description|
|:----|:----|
| mapping | Direct mapping, need to select the mapping source field.|
| value | Constant, can enter string constants or numeric constants, the entered constant value is directly stored.|
| generator | Generator, currently only supports the timestamp generator now, which stores the current time when storing.|
| join | String connector, can specify connecting characters to concatenate selected multiple source fields.|
| format | **String formatting tool**, fill in the formatting string, for example, if there are three source fields year, month, day representing year, month, and day, and you wish to store them in the yyyy-MM-dd date format, you can provide a formatting string as `${year}-${month}-${day}`. Where `${}` acts as a placeholder, the placeholder can be a source field or a string type field function handling|
| sum | Select multiple numeric fields for addition calculation.|
| expr | **Numeric operation expression**, can perform more complex function processing and mathematical operations on numeric fields.|
##### Supported string processing functions in `format`
|Function|description|e.g.|
|:----|:----|:----|
| pad(len, pad_chars) | pads the string with a character or a string to at least a specified length | "1.2".pad(5, '0') // Result is "1.200" |
|trim|trims the string of whitespace at the beginning and end|" abc ee ".trim() // Result is "abc ee"|
|sub_string(start_pos, len)|extracts a sub-string, two parameters:<br />1. start position, counting from end if < 0<br />2. (optional) number of characters to extract, none if ≤ 0, to end if omitted|"012345678".sub_string(5) // "5678"<br />"012345678".sub_string(5, 2) // "56"<br />"012345678".sub_string(-2) // "78"|
|replace(substring, replacement)|replaces a sub-string with another|"012345678".replace("012", "abc") // "abc345678"|
##### Mathematical expressions in `expr`
Basic mathematical operations support addition `+`, subtraction `-`, multiplication `*`, and division `/`.
For example, if the data source collects temperature values in Celsius and the target database stores values in Fahrenheit, then the collected temperature data needs to be converted.
If the source field is `temperature`, then use the expression `temperature * 1.8 + 32`.
Mathematical expressions also support the use of mathematical functions, as shown in the table below:
|Function|description|e.g.|
|:----|:----|:----|
|sin, cos, tan, sinh, cosh|Trigonometry|a.sin() |
|asin, acos, atan, asinh, acosh|arc-trigonometry|a.asin()|
|sqrt|Square root|a.sqrt() // 4.sqrt() == 2|
|exp|Exponential|a.exp()|
|ln, log|Logarithmic|a.ln() // e.ln() == 1<br />a.log() // 10.log() == 1|
|floor, ceiling, round, int, fraction|rounding|a.floor() // (4.2).floor() == 4<br />a.ceiling() // (4.2).ceiling() == 5<br />a.round() // (4.2).round() == 4<br />a.int() // (4.2).int() == 4<br />a.fraction() // (4.2).fraction() == 0.2|
#### Subtable name mapping
Subtable names are strings and can be defined using the string formatting `format` expression in the mapping rules.
## Creating a Task
Below, using MQTT data source as an example, we explain how to create a task of MQTT type, consume data from MQTT Broker, and write into TDengine.
1. After logging into taosExplorer, click on "Data Writing" on the left navigation bar to enter the task list page.
2. On the task list page, click "+ Add Data Source" to enter the task creation page.
3. After entering the task name, select the type as MQTT, then you can create a new proxy or select an already created proxy.
4. Enter the IP address and port number of the MQTT broker, for example: 192.168.1.100:1883
5. Configure authentication and SSL encryption:
- If the MQTT broker has enabled user authentication, enter the username and password of the MQTT broker in the authentication section;
- If the MQTT broker has enabled SSL encryption, you can turn on the SSL certificate switch on the page and upload the CA's certificate, as well as the client's certificate and private key files;
6. In the "Collection Configuration" section, you can select the version of the MQTT protocol, currently supporting 3.1, 3.1.1, 5.0; when configuring the Client ID, be aware that if multiple tasks are created for the same MQTT broker, the Client IDs should be different to avoid conflicts, which could cause the tasks to not run properly; when configuring the topic and QoS, use the format `<topic name>::<QoS>`, where the QoS values range from 0, 1, 2, representing at most once, at least once, exactly once; after configuring the above information, you can click the "Check Connectivity" button to check the configurations, if the connectivity check fails, please modify according to the specific error tips returned on the page;
7. During the process of syncing data from the MQTT broker, taosX also supports extracting, filtering, and mapping operations on the fields in the message body. In the text box under "Payload Transformation", you can directly input a sample of the message body, or import it by uploading a file, and in the future, it will also support retrieving sample messages directly from the configured server;
8. For extracting fields from the message body, currently, two methods are supported: JSON and regular expressions. For simple key/value formatted JSON data, you can directly click the extract button to display the parsed field names; for complex JSON data, you can use JSON Path to extract the fields of interest; when using regular expressions to extract fields, ensure the correctness of the regular expressions;
9. After the fields in the message body are parsed, you can set filtering rules based on the parsed field names, and only data that meets the filtering rules will be written into TDengine, otherwise, the message will be ignored; for example, you can configure a filtering rule as voltage > 200, meaning only data with a voltage greater than 200V will be synced to TDengine;
10. Finally, after configuring the mapping rules between the fields in the message body and the fields in the supertable, you can submit the task; in addition to basic mapping, here you can also convert the values of the fields in the message, for example, you can use the expression (expr) to calculate the power from the original message body's voltage and current before writing it into TDengine;
11. After submitting the task, it will automatically return to the task list page, if the submission is successful, the status of the task will switch to "Running", if the submission fails, you can check the activity log of the task to find the error reason;
12. For tasks that are running, clicking the view button of the metrics allows you to view the detailed running metrics of the task, the popup window is divided into 2 tabs, displaying the cumulative metrics of the task's multiple runs and the metrics of this run, these metrics are automatically refreshed every 2 seconds.
## Task Management
On the task list page, you can also start, stop, view, delete, copy, and other operations on tasks. You can also view the running status of each task, including the number of records written, traffic, etc.
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```

View File

@ -0,0 +1,13 @@
---
title: Advanced Features
slug: /advanced-features
---
This chapter mainly introduces the advanced features of TDengine, such as data subscription, caching, stream computing, edge-cloud collaboration, and data access.
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```

View File

@ -0,0 +1,766 @@
---
title: Connecting to TDengine
slug: /developer-guide/connecting-to-tdengine
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import Image from '@theme/IdealImage';
import imgConnect from '../assets/connecting-to-tdengine-01.png';
import ConnJava from "../assets/resources/_connect_java.mdx";
import ConnGo from "../assets/resources/_connect_go.mdx";
import ConnRust from "../assets/resources/_connect_rust.mdx";
import ConnNode from "../assets/resources/_connect_node.mdx";
import ConnPythonNative from "../assets/resources/_connect_python.mdx";
import ConnCSNative from "../assets/resources/_connect_cs.mdx";
import ConnC from "../assets/resources/_connect_c.mdx";
import InstallOnLinux from "../assets/resources/_linux_install.mdx";
import InstallOnWindows from "../assets/resources/_windows_install.mdx";
import InstallOnMacOS from "../assets/resources/_macos_install.mdx";
import VerifyLinux from "../assets/resources/_verify_linux.mdx";
import VerifyMacOS from "../assets/resources/_verify_macos.mdx";
import VerifyWindows from "../assets/resources/_verify_windows.mdx";
TDengine provides a rich set of application development interfaces. To facilitate users in quickly developing their applications, TDengine supports connectors for multiple programming languages. The official connectors include support for C/C++, Java, Python, Go, Node.js, C#, Rust, Lua (community contribution), and PHP (community contribution). These connectors support connecting to the TDengine cluster using the native interface (taosc) and REST interface (not supported in some languages yet). Community developers have also contributed several unofficial connectors, such as ADO.NET connector, Lua connector, and PHP connector. Additionally, TDengine can directly call the REST API provided by taosadapter for data writing and querying operations.
## Connection Methods
TDengine provides three methods for establishing connections:
1. Direct connection between the client driver taosc and the server program taosd, referred to as "native connection" in the text below.
2. Connection to taosd through the REST API provided by the taosAdapter component, referred to as "REST connection" in the text below.
3. Connection to taosd through the WebSocket API provided by the taosAdapter component, referred to as "WebSocket connection" in the text below.
<figure>
<Image img={imgConnect} alt="Connecting to TDengine"/>
<figcaption>Figure 1. Connecting to TDengine</figcaption>
</figure>
Regardless of the method used to establish the connection, the connectors provide the same or similar API to operate the database and can execute SQL statements. The initialization of the connection slightly differs, but users will not feel any difference in usage.
For various connection methods and language connector support, please refer to: [Connector Features](../../tdengine-reference/client-libraries/)
Key differences include:
1. Using native connection requires ensuring that the client driver taosc and the server's TDengine version are compatible.
2. Using REST connection does not require installing the client driver taosc, offering the advantage of cross-platform ease of use, but it lacks features like data subscription and binary data types. Additionally, compared to native and WebSocket connections, the performance of REST connections is the lowest. REST interfaces are stateless. When using REST connections, it is necessary to specify the database names of tables and supertables in SQL.
3. Using WebSocket connection also does not require installing the client driver taosc.
4. Connecting to cloud service instances must use REST connection or WebSocket connection.
**WebSocket connection is recommended**
## Installing the Client Driver taosc
If you choose a native connection and your application is not running on the same server as TDengine, you need to install the client driver first; otherwise, you can skip this step. To avoid incompatibility between the client driver and the server, please use consistent versions.
### Installation Steps
<Tabs defaultValue="linux" groupId="os">
<TabItem value="linux" label="Linux">
<InstallOnLinux />
</TabItem>
<TabItem value="windows" label="Windows">
<InstallOnWindows />
</TabItem>
<TabItem value="macos" label="macOS">
<InstallOnMacOS />
</TabItem>
</Tabs>
### Installation Verification
After completing the above installation and configuration, and confirming that the TDengine service has started running normally, you can log in using the TDengine command-line program `taos` included in the installation package.
<Tabs defaultValue="linux" groupId="os">
<TabItem value="linux" label="Linux">
<VerifyLinux />
</TabItem>
<TabItem value="windows" label="Windows">
<VerifyWindows />
</TabItem>
<TabItem value="macos" label="macOS">
<VerifyMacOS />
</TabItem>
</Tabs>
## Installing Connectors
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
If you are using Maven to manage your project, simply add the following dependency to your pom.xml.
```xml
<dependency>
<groupId>com.taosdata.jdbc</groupId>
<artifactId>taos-jdbcdriver</artifactId>
<version>3.5.2</version>
</dependency>
```
</TabItem>
<TabItem label="Python" value="python">
- **Pre-installation Preparation**
- Install Python. Recent versions of the taospy package require Python 3.6.2+. Earlier versions of the taospy package require Python 3.7+. The taos-ws-py package requires Python 3.7+. If Python is not already installed on your system, refer to [Python BeginnersGuide](https://wiki.python.org/moin/BeginnersGuide/Download) for installation.
- Install [pip](https://pypi.org/project/pip/). In most cases, the Python installation package comes with the pip tool; if not, refer to the [pip documentation](https://pip.pypa.io/en/stable/installation/) for installation.
- If using a native connection, you also need to [install the client driver](../connecting-to-tdengine/). The client software package includes the TDengine client dynamic link library (libtaos.so or taos.dll) and TDengine CLI.
- **Using pip to Install**
- Uninstall old versions
If you have previously installed old versions of the Python connector, please uninstall them first.
```shell
pip3 uninstall taos taospy
pip3 uninstall taos taos-ws-py
```
- Install `taospy`
- Latest version
```shell
pip3 install taospy
```
- Install a specific version
```shell
pip3 install taospy==2.3.0
```
- Install from GitHub
```shell
pip3 install git+https://github.com/taosdata/taos-connector-python.git
```
Note: This package is for native connection
- Install `taos-ws-py`
```bash
pip3 install taos-ws-py
```
Note: This package is for WebSocket connection
- Install both `taospy` and `taos-ws-py`
```bash
pip3 install taospy[ws]
```
- **Installation Verification**
<Tabs defaultValue="rest">
<TabItem value="native" label="Native Connection">
For native connections, it is necessary to verify that both the client driver and the Python connector itself are correctly installed. If the `taos` module can be successfully imported, then the client driver and Python connector are correctly installed. You can enter in the Python interactive Shell:
```python
import taos
```
</TabItem>
<TabItem value="rest" label="REST Connection">
For REST connections, you only need to verify if the `taosrest` module can be successfully imported. You can enter in the Python interactive Shell:
```python
import taosrest
```
</TabItem>
<TabItem value="ws" label="WebSocket Connection">
For WebSocket connections, you only need to verify if the `taosws` module can be successfully imported. You can enter in the Python interactive Shell:
```python
import taosws
```
</TabItem>
</Tabs>
</TabItem>
<TabItem label="Go" value="go">
Edit `go.mod` to add the `driver-go` dependency.
```go-mod title=go.mod
module goexample
go 1.17
require github.com/taosdata/driver-go/v3 latest
```
:::note
driver-go uses cgo to wrap the taosc API. cgo requires GCC to compile C source code. Therefore, make sure GCC is installed on your system.
:::
</TabItem>
<TabItem label="Rust" value="rust">
Edit `Cargo.toml` to add the `taos` dependency.
```toml title=Cargo.toml
[dependencies]
taos = { version = "*"}
```
:::info
The Rust connector distinguishes different connection methods through different features. It supports both native and WebSocket connections by default. If only a WebSocket connection is needed, set the `ws` feature:
```toml
taos = { version = "*", default-features = false, features = ["ws"] }
```
:::
</TabItem>
<TabItem label="Node.js" value="node">
- **Pre-installation Preparation**
- Install the Node.js development environment, using version 14 or above. Download link: [https://nodejs.org/en/download/](https://nodejs.org/en/download/)
- **Installation**
- Use npm to install the Node.js connector
```shell
npm install @tdengine/websocket
```
Note: Node.js currently only supports WebSocket connections
- **Installation Verification**
- Create a verification directory, for example: `~/tdengine-test`, download the [nodejsChecker.js source code](https://github.com/taosdata/TDengine/tree/main/docs/examples/node/websocketexample/nodejsChecker.js) from GitHub to local.
- Execute the following commands in the command line.
```bash
npm init -y
npm install @tdengine/websocket
node nodejsChecker.js
```
- After performing the above steps, the command line will output the results of nodeChecker.js connecting to the TDengine instance and performing simple insertion and query operations.
</TabItem>
<TabItem label="C#" value="csharp">
Edit the project configuration file to add a reference to [TDengine.Connector](https://www.nuget.org/packages/TDengine.Connector/):
```xml title=csharp.csproj
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<StartupObject>TDengineExample.AsyncQueryExample</StartupObject>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="TDengine.Connector" Version="3.1.0" />
</ItemGroup>
</Project>
```
You can also add it via the dotnet command:
```shell
dotnet add package TDengine.Connector
```
:::note
The following example code is based on dotnet6.0. If you are using another version, you may need to make appropriate adjustments.
:::
</TabItem>
<TabItem label="C" value="c">
If you have already installed the TDengine server software or the TDengine client driver taosc, then the C connector is already installed and no additional action is required.
</TabItem>
<TabItem label="REST API" value="rest">
To access TDengine using the REST API method, no drivers or connectors need to be installed.
</TabItem>
</Tabs>
## Establishing Connection
Before proceeding with this step, please ensure that there is a running TDengine that can be accessed, and that the server's FQDN is configured correctly. The following example code assumes that TDengine is installed on the local machine, and that the FQDN (default localhost) and serverPort (default 6030) are using the default configuration.
### Connection Parameters
There are many configuration options for connecting, so before establishing a connection, let's first introduce the parameters used by the connectors of each language to establish a connection.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
The parameters for establishing a connection with the Java connector are URL and Properties.
The JDBC URL format for TDengine is: `jdbc:[TAOS|TAOS-WS|TAOS-RS]://[host_name]:[port]/[database_name]?[user={user}|&password={password}|&charset={charset}|&cfgdir={config_dir}|&locale={locale}|&timezone={timezone}|&batchfetch={batchfetch}]`
For detailed explanations of URL and Properties parameters and how to use them, see [URL specifications](../../tdengine-reference/client-libraries/java/)
</TabItem>
<TabItem label="Python" value="python">
The Python connector uses the `connect()` method to establish a connection, here are the specific parameters for the connection:
- url: URL of the `taosAdapter` REST service. The default is port `6041` on `localhost`.
- user: TDengine username. The default is `root`.
- password: TDengine user password. The default is `taosdata`.
- timeout: HTTP request timeout in seconds. The default is `socket._GLOBAL_DEFAULT_TIMEOUT`. Generally, no configuration is needed.
</TabItem>
<TabItem label="Go" value="go">
The data source name has a generic format, similar to [PEAR DB](http://pear.php.net/manual/en/package.database.db.intro-dsn.php), but without the type prefix (brackets indicate optional):
```text
[username[:password]@][protocol[(address)]]/[dbname][?param1=value1&...&paramN=valueN]
```
Complete DSN format:
```text
username:password@protocol(address)/dbname?param=value
```
Supported DSN parameters are as follows:
Native connection:
- `cfg` specifies the taos.cfg directory
- `cgoThread` specifies the number of cgo operations that can be executed concurrently, default is the number of system cores
- `cgoAsyncHandlerPoolSize` specifies the size of the async function handler, default is 10000
REST connection:
- `disableCompression` whether to accept compressed data, default is true which means not accepting compressed data, set to false if data transmission uses gzip compression.
- `readBufferSize` the size of the buffer for reading data, default is 4K (4096), this value can be increased appropriately when the query result data volume is large.
- `token` the token used when connecting to cloud services.
- `skipVerify` whether to skip certificate verification, default is false which means not skipping certificate verification, set to true if connecting to an insecure service.
WebSocket connection:
- `enableCompression` whether to send compressed data, default is false which means not sending compressed data, set to true if data transmission uses compression.
- `readTimeout` the timeout for reading data, default is 5m.
- `writeTimeout` the timeout for writing data, default is 10s.
</TabItem>
<TabItem label="Rust" value="rust">
Rust connector uses DSN to create connections, the basic structure of the DSN description string is as follows:
```text
<driver>[+<protocol>]://[[<username>:<password>@]<host>:<port>][/<database>][?<p1>=<v1>[&<p2>=<v2>]]
|------|------------|---|-----------|-----------|------|------|------------|-----------------------|
|driver| protocol | | username | password | host | port | database | params |
```
For detailed explanation of DSN and how to use it, see [Connection Features](../../tdengine-reference/client-libraries/rust/)
</TabItem>
<TabItem label="Node.js" value="node">
Node.js connector uses DSN to create connections, the basic structure of the DSN description string is as follows:
```text
[+<protocol>]://[[<username>:<password>@]<host>:<port>][/<database>][?<p1>=<v1>[&<p2>=<v2>]]
|------------|---|-----------|-----------|------|------|------------|-----------------------|
| protocol | | username | password | host | port | database | params |
```
- **protocol**: Establish a connection using the websocket protocol. For example, `ws://localhost:6041`
- **username/password**: Username and password for the database.
- **host/port**: Host address and port number. For example, `localhost:6041`
- **database**: Database name.
- **params**: Other parameters. For example, token.
- Complete DSN example:
```js
ws://root:taosdata@localhost:6041
```
</TabItem>
<TabItem label="C#" value="csharp">
ConnectionStringBuilder uses a key-value pair method to set connection parameters, where key is the parameter name and value is the parameter value, separated by a semicolon `;`.
For example:
```csharp
"protocol=WebSocket;host=127.0.0.1;port=6041;useSSL=false"
```
Supported parameters are as follows:
- `host`: The address of the TDengine instance.
- `port`: The port of the TDengine instance.
- `username`: Username for the connection.
- `password`: Password for the connection.
- `protocol`: Connection protocol, options are Native or WebSocket, default is Native.
- `db`: Database to connect to.
- `timezone`: Time zone, default is the local time zone.
- `connTimeout`: Connection timeout, default is 1 minute.
Additional parameters supported for WebSocket connections:
- `readTimeout`: Read timeout, default is 5 minutes.
- `writeTimeout`: Send timeout, default is 10 seconds.
- `token`: Token for connecting to TDengine cloud.
- `useSSL`: Whether to use SSL connection, default is false.
- `enableCompression`: Whether to enable WebSocket compression, default is false.
- `autoReconnect`: Whether to automatically reconnect, default is false.
- `reconnectRetryCount`: Number of retries for reconnection, default is 3.
- `reconnectIntervalMs`: Reconnection interval in milliseconds, default is 2000.
-
</TabItem>
<TabItem label="C" value="c">
**WebSocket Connection**
For C/C++ language connectors, the WebSocket connection uses the `ws_connect()` function to establish a connection with the TDengine database. Its parameter is a DSN description string, structured as follows:
```text
<driver>[+<protocol>]://[[<username>:<password>@]<host>:<port>][/<database>][?<p1>=<v1>[&<p2>=<v2>]]
|------|------------|---|-----------|-----------|------|------|------------|-----------------------|
|driver| protocol | | username | password | host | port | database | params |
```
For detailed explanation of DSN and how to use it, see [Connection Features](../../tdengine-reference/client-libraries/cpp/#dsn)
**Native Connection**
For C/C++ language connectors, the native connection method uses the `taos_connect()` function to establish a connection with the TDengine database. Detailed parameters are as follows:
- `host`: Hostname or IP address of the database server to connect to. If it is a local database, `"localhost"` can be used.
- `user`: Username for logging into the database.
- `passwd`: Password corresponding to the username.
- `db`: Default database name when connecting. If no database is specified, pass `NULL` or an empty string.
- `port`: Port number the database server listens on. The default port number is `6030`.
The `taos_connect_auth()` function is also provided for establishing a connection with the TDengine database using an MD5 encrypted password. This function is similar to `taos_connect`, but differs in the handling of the password, as `taos_connect_auth` requires the MD5 encrypted string of the password.
</TabItem>
<TabItem label="REST API" value="rest">
When accessing TDengine via REST API, the application directly establishes an HTTP connection with taosAdapter, and it is recommended to use a connection pool to manage connections.
For specific parameters using the REST API, refer to: [HTTP request format](../../tdengine-reference/client-libraries/rest-api/)
</TabItem>
</Tabs>
### WebSocket Connection
Below are code examples for establishing WebSocket connections in various language connectors. It demonstrates how to connect to the TDengine database using WebSocket and set some parameters for the connection. The whole process mainly involves establishing the database connection and handling exceptions.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WSConnectExample.java:main}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/connect_websocket_examples.py:connect}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/connect/wsexample/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/connect.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:createConnect}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsConnect/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/connect_example.c}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
Below are examples of code for establishing native connections in various languages. It demonstrates how to connect to the TDengine database using a native connection method and set some parameters for the connection. The entire process mainly involves establishing a database connection and handling exceptions.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JNIConnectExample.java:main}}
```
</TabItem>
<TabItem label="Python" value="python">
<ConnPythonNative />
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/connect/cgoexample/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/connect.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/connect/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
<ConnC />
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### REST Connection
Below are examples of code for establishing REST connections in various languages. It demonstrates how to connect to the TDengine database using a REST connection method. The entire process mainly involves establishing a database connection and handling exceptions.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/RESTConnectExample.java:main}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/connect_rest_example.py:connect}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/connect/restexample/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
Not supported
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
Not supported
</TabItem>
<TabItem label="C" value="c">
Not supported
</TabItem>
<TabItem label="REST API" value="rest">
Access TDengine using the REST API method, where the application independently establishes an HTTP connection.
</TabItem>
</Tabs>
:::tip
If the connection fails, in most cases it is due to incorrect FQDN or firewall settings. For detailed troubleshooting methods, please see ["Encountering the error 'Unable to establish connection, what should I do?'"](../../frequently-asked-questions/) in the "Common Questions and Feedback".
:::
## Connection Pool
Some connectors offer a connection pool, or can be used in conjunction with existing connection pool components. By using a connection pool, applications can quickly obtain available connections from the pool, avoiding the overhead of creating and destroying connections with each operation. This not only reduces resource consumption but also improves response speed. Additionally, connection pools support the management of connections, such as limiting the maximum number of connections and checking the validity of connections, ensuring efficient and reliable use of connections. We **recommend managing connections using a connection pool**.
Below are code examples of connection pool support for various language connectors.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
**HikariCP**
Example usage is as follows:
```java
{{#include docs/examples/java/src/main/java/com/taos/example/HikariDemo.java:connection_pool}}
```
> After obtaining a connection through HikariDataSource.getConnection(), you need to call the close() method after use, which actually does not close the connection but returns it to the pool.
> For more issues about using HikariCP, please see the [official documentation](https://github.com/brettwooldridge/HikariCP).
**Druid**
Example usage is as follows:
```java
{{#include docs/examples/java/src/main/java/com/taos/example/DruidDemo.java:connection_pool}}
```
> For more issues about using Druid, please see the [official documentation](https://github.com/alibaba/druid).
</TabItem>
<TabItem label="Python" value="python">
<ConnPythonNative />
</TabItem>
<TabItem label="Go" value="go">
Using `sql.Open` creates a connection that has already implemented a connection pool, and you can set connection pool parameters through the API, as shown in the example below
```go
{{#include docs/examples/go/connect/connpool/main.go:pool}}
```
</TabItem>
<TabItem label="Rust" value="rust">
In complex applications, it is recommended to enable connection pooling. The connection pool for [taos] by default (in asynchronous mode) is implemented using [deadpool].
Below, you can create a connection pool with default parameters.
```rust
let pool: Pool<TaosBuilder> = TaosBuilder::from_dsn("taos:///")
.unwrap()
.pool()
.unwrap();
```
You can also use the connection pool builder to set the connection pool parameters:
```rust
let pool: Pool<TaosBuilder> = Pool::builder(Manager::from_dsn(self.dsn.clone()).unwrap().0)
.max_size(88) // Maximum number of connections
.build()
.unwrap();
```
In your application code, use `pool.get()?` to obtain a connection object [Taos].
```rust
let taos = pool.get()?;
```
</TabItem>
</Tabs>

View File

@ -0,0 +1,338 @@
---
title: Running SQL Statements
sidebar_label: Running SQL Statements
slug: /developer-guide/running-sql-statements
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
TDengine provides comprehensive support for the SQL language, allowing users to query, insert, and delete data using familiar SQL syntax. TDengine's SQL also supports database and table management operations, such as creating, modifying, and deleting databases and tables. TDengine extends standard SQL by introducing features unique to time-series data processing, such as aggregation queries, downsampling, and interpolation queries, to adapt to the characteristics of time-series data. These extensions enable users to process time-series data more efficiently and perform complex data analysis and processing. For specific supported SQL syntax, please refer to [TDengine SQL](../../tdengine-reference/sql-manual/)
Below, we introduce how to use language connectors to execute SQL for creating databases, tables, writing data, and querying data.
:::note
REST connection: Connectors for various programming languages encapsulate the use of `HTTP` requests for connections, supporting data writing and querying operations, with developers still using the interfaces provided by the connectors to access `TDengine`.
REST API: Directly call the REST API interface provided by `taosadapter` for data writing and querying operations. Code examples use the `curl` command for demonstration.
:::
## Creating Databases and Tables
Below, using smart meters as an example, we show how to use language connectors to execute SQL commands to create a database named `power`, then use the `power` database as the default database.
Next, create a supertable (STABLE) named `meters`, whose table structure includes columns for timestamp, current, voltage, phase, etc., and labels for group ID and location.
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JdbcCreatDBDemo.java:create_db_and_table}}
```
</TabItem>
<TabItem label="Python" value="python">
```python title="WebSocket Connection"
{{#include docs/examples/python/create_db_ws.py}}
```
```python title="Native Connection"
{{#include docs/examples/python/create_db_native.py}}
```
```python title="Rest Connection"
{{#include docs/examples/python/create_db_rest.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/sqlquery/main.go:create_db_and_table}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/createdb.rs:create_db_and_table}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:create_db_and_table}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsInsert/Program.cs:create_db_and_table}}
```
</TabItem>
<TabItem label="C" value="c">
```c title="WebSocket Connection"
{{#include docs/examples/c-ws/create_db_demo.c:create_db_and_table}}
```
```c title="Native Connection"
{{#include docs/examples/c/create_db_demo.c:create_db_and_table}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Create Database
```shell
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql' \
--data 'CREATE DATABASE IF NOT EXISTS power'
```
Create Table, specify the database as `power` in the URL
```shell
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql/power' \
--data 'CREATE STABLE IF NOT EXISTS meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (groupId INT, location BINARY(24))'
```
</TabItem>
</Tabs>
> **Note**: It is recommended to construct SQL statements in the format of `<dbName>.<tableName>`. It is not recommended to use `USE DBName` in applications.
## Insert Data
Below, using smart meters as an example, demonstrates how to use connectors to execute SQL to insert data into the `power` database's `meters` supertable. The example uses TDengine's auto table creation SQL syntax, writes 3 records into the d1001 subtable, writes 1 record into the d1002 subtable, and then prints the actual number of records inserted.
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JdbcInsertDataDemo.java:insert_data}}
```
**Note**
NOW is an internal system function, defaulting to the current time of the client's computer. NOW + 1s represents the client's current time plus 1 second, with the number following representing the time unit: a (millisecond), s (second), m (minute), h (hour), d (day), w (week), n (month), y (year).
</TabItem>
<TabItem label="Python" value="python">
```python title="WebSocket Connection"
{{#include docs/examples/python/insert_ws.py}}
```
```python title="Native Connection"
{{#include docs/examples/python/insert_native.py}}
```
```python title="Rest Connection"
{{#include docs/examples/python/insert_rest.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/sqlquery/main.go:insert_data}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/insert.rs:insert_data}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:insertData}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsInsert/Program.cs:insert_data}}
```
</TabItem>
<TabItem label="C" value="c">
```c title="WebSocket Connection"
{{#include docs/examples/c-ws/insert_data_demo.c:insert_data}}
```
```c title="Native Connection"
{{#include docs/examples/c/insert_data_demo.c:insert_data}}
```
**Note**
NOW is an internal system function, defaulting to the current time of the client's computer. NOW + 1s represents the client's current time plus 1 second, where the number is followed by a time unit: a (milliseconds), s (seconds), m (minutes), h (hours), d (days), w (weeks), n (months), y (years).
</TabItem>
<TabItem label="REST API" value="rest">
Write data
```shell
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql' \
--data 'INSERT INTO power.d1001 USING power.meters TAGS(2,'\''California.SanFrancisco'\'') VALUES (NOW + 1a, 10.30000, 219, 0.31000) (NOW + 2a, 12.60000, 218, 0.33000) (NOW + 3a, 12.30000, 221, 0.31000) power.d1002 USING power.meters TAGS(3, '\''California.SanFrancisco'\'') VALUES (NOW + 1a, 10.30000, 218, 0.25000)'
```
</TabItem>
</Tabs>
## Query data
Below, using smart meters as an example, demonstrates how to use connectors in various languages to execute SQL to query data from the `power` database `meters` supertable, querying up to 100 rows of data and printing the results line by line.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JdbcQueryDemo.java:query_data}}
```
**Note** Querying and operating relational databases are consistent, use indices starting from 1 to get returned field content, and it is recommended to use field names to retrieve.
</TabItem>
<TabItem label="Python" value="python">
```python title="WebSocket Connection"
{{#include docs/examples/python/query_ws.py}}
```
```python title="Native Connection"
{{#include docs/examples/python/query_native.py}}
```
```python title="Rest Connection"
{{#include docs/examples/python/query_rest.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/sqlquery/main.go:select_data}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/query.rs:query_data}}
```
Rust connector also supports using **serde** for deserializing to get structured results:
```rust
{{#include docs/examples/rust/nativeexample/examples/query.rs:query_data_2}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:queryData}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsInsert/Program.cs:select_data}}
```
</TabItem>
<TabItem label="C" value="c">
```c title="WebSocket Connection"
{{#include docs/examples/c-ws/query_data_demo.c:query_data}}
```
```c title="Native Connection"
{{#include docs/examples/c/query_data_demo.c:query_data}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Query Data
```shell
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql' \
--data 'SELECT ts, current, location FROM power.meters limit 100'
```
</TabItem>
</Tabs>
## Execute SQL with reqId
reqId can be used for request link tracing, similar to the role of traceId in distributed systems. A request might need to pass through multiple services or modules to be completed. reqId is used to identify and associate all related operations of this request, allowing us to track and analyze the complete path of the request.
Using reqId has the following benefits:
- Request tracing: By associating the same reqId with all related operations of a request, you can trace the complete path of the request in the system.
- Performance analysis: By analyzing a request's reqId, you can understand the processing time of the request across various services and modules, thereby identifying performance bottlenecks.
- Fault diagnosis: When a request fails, you can identify where the problem occurred by examining the reqId associated with the request.
If the user does not set a reqId, the connector will internally generate one randomly, but it is recommended that users explicitly set it to better associate it with their requests.
Below are code examples of setting reqId to execute SQL in various language connectors.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JdbcReqIdDemo.java:with_reqid}}
```
</TabItem>
<TabItem label="Python" value="python">
```python title="WebSocket Connection"
{{#include docs/examples/python/reqid_ws.py}}
```
```python title="Native Connection"
{{#include docs/examples/python/reqid_native.py}}
```
```python title="Rest Connection"
{{#include docs/examples/python/reqid_rest.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/queryreqid/main.go:query_id}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/query.rs:query_with_req_id}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:sqlWithReqid}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsInsert/Program.cs:query_id}}
```
</TabItem>
<TabItem label="C" value="c">
```c "WebSocket Connection"
{{#include docs/examples/c-ws/with_reqid_demo.c:with_reqid}}
```
```c "Native Connection"
{{#include docs/examples/c/with_reqid_demo.c:with_reqid}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Query data, specify reqId as 3
```shell
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql?req_id=3' \
--data 'SELECT ts, current, location FROM power.meters limit 1'
```
</TabItem>
</Tabs>

View File

@ -0,0 +1,329 @@
---
title: Ingesting Data in Schemaless Mode
sidebar_label: Schemaless Ingestion
slug: /developer-guide/schemaless-ingestion
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
In IoT applications, to achieve functions such as automated management, business analysis, and device monitoring, it is often necessary to collect a large number of data items. However, due to reasons such as application logic upgrades and hardware adjustments of the devices themselves, the data collection items may change frequently. To address this challenge, TDengine provides a schemaless writing method, aimed at simplifying the data recording process.
With the schemaless writing method, users do not need to create supertables or subtables in advance, as TDengine will automatically create the corresponding storage structures based on the actual data written. Additionally, when necessary, the schemaless writing method can also automatically add necessary data columns or tag columns to ensure that the data written by users is correctly stored.
It is worth noting that the supertables and their corresponding subtables created through the schemaless writing method have no functional differences from those created directly through SQL. Users can still use SQL to write data directly into them. However, since the table names generated by the schemaless writing method are based on tag values according to a fixed mapping rule, these table names may lack readability and are not easy to understand.
**When using the schemaless writing method, tables are created automatically, and manual creation of tables may lead to unknown errors.**
## Schemaless Writing Line Protocol
TDengine's schemaless writing line protocol is compatible with InfluxDB's line protocol, OpenTSDB's telnet line protocol, and OpenTSDB's JSON format protocol. For the standard writing protocols of InfluxDB and OpenTSDB, please refer to their respective official documentation.
Below, we first introduce the protocol content extended by TDengine based on InfluxDB's line protocol. This protocol allows users to control the (supertable) schema in a more detailed manner. Using a string to express a data row, multiple rows of strings can be passed into the writing API at once to achieve batch writing of multiple data rows, with the format specified as follows.
```text
measurement,tag_set field_set timestamp
```
The parameters are explained as follows.
- measurement is the table name, separated by a comma from tag_set.
- tag_set is formatted as `<tag_key>=<tag_value>, <tag_key>=<tag_value>`, representing tag column data, separated by commas, and separated by a space from field_set.
- field_set is formatted as `<field_key>=<field_value>, <field_key>=<field_value>`, representing ordinary columns, also separated by commas, and separated by a space from timestamp.
- timestamp is the primary key timestamp for this row of data.
- Schemaless writing does not support writing data for tables with a second primary key column.
All data in tag_set are automatically converted to nchar data type and do not need to use double quotes.
In the schemaless writing line protocol, each data item in field_set needs to describe its own data type, with specific requirements as follows.
- If enclosed in double quotes, it represents varchar type, e.g., "abc".
- If enclosed in double quotes and prefixed with L or l, it represents nchar type, e.g., L" error message ".
- If enclosed in double quotes and prefixed with G or g, it represents geometry type, e.g., G"Point(4.343 89.342)".
- If enclosed in double quotes and prefixed with B or b, it represents varbinary type, the double quotes can contain hexadecimal starting with \x or strings, e.g., B"\x98f46e" and B"hello".
- For spaces, equal signs (=), commas (,), double quotes ("), and backslashes (\), a backslash (\) is needed for escaping (all in half-width English characters). The domain escape rules for the schemaless writing protocol are shown in the following table.
| **Number** | **Field** | **Characters to Escape** |
| -------- | -------- | ---------------- |
| 1 | Supertable name | comma, space |
| 2 | Tag name | comma, equal sign, space |
| 3 | Tag value | comma, equal sign, space |
| 4 | Column name | comma, equal sign, space |
| 5 | Column value | double quotes, backslash |
If two consecutive backslashes are used, the first backslash acts as an escape character; if there is only one backslash, no escape is needed. The backslash escape rules for the schemaless writing protocol are shown in the following table.
| **Number** | **Backslash** | **Escapes to** |
| -------- | ------------ | ---------- |
| 1 | \ | \ |
| 2 | \\\\ | \ |
| 3 | \\\\\\ | \\\\ |
| 4 | \\\\\\\\ | \\\\ |
| 5 | \\\\\\\\\\ | \\\\\\ |
| 6 | \\\\\\\\\\\\ | \\\\\\ |
Numeric types are distinguished by suffixes. The escape rules for numeric types in the schema-less write protocol are shown in the following table.
| **Number** | **Suffix** | **Mapped Type** | **Size (Bytes)** |
| ---------- | ---------- | ---------------------------- | ---------------- |
| 1 | None or f64| double | 8 |
| 2 | f32 | float | 4 |
| 3 | i8/u8 | TinyInt/UTinyInt | 1 |
| 4 | i16/u16 | SmallInt/USmallInt | 2 |
| 5 | i32/u32 | Int/UInt | 4 |
| 6 | i64/i/u64/u| BigInt/BigInt/UBigInt/UBigInt| 8 |
- t, T, true, True, TRUE, f, F, false, False will be directly treated as BOOL type.
For example, the following data line indicates: under the supertable named st, a subtable with tags t1 as "3" (NCHAR), t2 as "4" (NCHAR), t3 as "t3" (NCHAR), writing a row of data with column c1 as 3 (BIGINT), c2 as false (BOOL), c3 as "passit" (BINARY), c4 as 4 (DOUBLE), and the primary timestamp as 1626006833639000000.
```json
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
```
Note that if there is a case error in describing the data type suffix or the data type specified for the data is incorrect, it may trigger an error message and cause data writing to fail.
TDengine provides idempotence for data writing, meaning you can repeatedly call the API to write data that failed previously. However, it does not provide atomicity for writing multiple rows of data. That is, during the batch writing process of multiple rows of data, some data may be written successfully while others may fail.
## Schema-less Write Handling Rules
Schema-less writes handle row data according to the following principles:
1. The subtable name is generated using the following rules: first, combine the measurement name with the tag's key and value into the following string:
```json
"measurement,tag_key1=tag_value1,tag_key2=tag_value2"
```
- Note that tag_key1, tag_key2 are not in the original order entered by the user, but are sorted in ascending order by tag name. Therefore, tag_key1 is not the first tag entered in the line protocol.
After sorting, calculate the MD5 hash value "md5_val" of this string. Then combine the calculated result with the string to generate the table name: "t_md5_val". The "t_" is a fixed prefix, and each table automatically generated through this mapping relationship has this prefix.
- If you do not want to use the automatically generated table name, there are two ways to specify the subtable name (the first method has higher priority).
1. By configuring the smlAutoChildTableNameDelimiter parameter in taos.cfg (excluding `@ # space CR LF tab`).
1. For example: configure smlAutoChildTableNameDelimiter=- and insert data as st,t0=cpu1,t1=4 c1=3 1626006833639000000, the created table name would be cpu1-4.
2. By configuring the smlChildTableName parameter in taos.cfg.
1. For example: configure smlChildTableName=tname and insert data as st,tname=cpu1,t1=4 c1=3 1626006833639000000, the created table name would be cpu1. Note that if multiple rows of data have the same tname but different tag_sets, the tag_set specified during the first automatic table creation is used, and other rows will ignore it.
2. If the supertable obtained from parsing the line protocol does not exist, it will be created (it is not recommended to manually create supertables, otherwise data insertion may be abnormal).
3. If the subtable obtained from parsing the line protocol does not exist, Schemaless will create the subtable according to the subtable name determined in step 1 or 2.
4. If the tag columns or regular columns specified in the data row do not exist, they will be added to the supertable (only additions, no deletions).
5. If some tag columns or regular columns exist in the supertable but are not specified in a data row, their values will be set to NULL in that row.
6. For BINARY or NCHAR columns, if the length of the values provided in the data row exceeds the limit of the column type, the maximum character storage limit of the column will be automatically increased (only additions, no deletions) to ensure the complete storage of data.
7. Errors encountered during the entire processing process will interrupt the writing process and return an error code.
8. To improve writing efficiency, it is assumed by default that the order of the field_set in the same supertable is the same (the first data contains all fields, and subsequent data follow this order). If the order is different, configure the smlDataFormat parameter to false. Otherwise, data will be written in the same order, and the data in the database will be abnormal. Starting from version 3.0.3.0, it automatically checks whether the order is consistent, and this configuration is deprecated.
9. Since SQL table creation does not support dots (.), Schemaless also processes dots (.) in automatically created table names, replacing them with underscores (_). If the subtable name is manually specified and contains a dot (.), it will also be converted to an underscore (_).
10. taos.cfg adds the smlTsDefaultName configuration (value as a string), which only works on the client side. After configuration, the time column name for Schemaless automatic table creation can be set through this configuration. If not configured, the default is _ts.
11. The supertable or subtable names in schema-less writing are case-sensitive.
12. Schema-less writing still follows TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48KB (from version 3.0.5.0 it is 64KB), and the total length of tag values cannot exceed 16KB.
## Time Resolution Recognition
Schema-less writing supports three specified modes, as shown in the table below:
| **Number** | **Value** | **Description** |
| ---------- | -------------------- | -------------------------------- |
| 1 | SML_LINE_PROTOCOL | InfluxDB Line Protocol |
| 2 | SML_TELNET_PROTOCOL | OpenTSDB Text Line Protocol |
| 3 | SML_JSON_PROTOCOL | JSON Protocol Format |
In the SML_LINE_PROTOCOL parsing mode, users need to specify the time resolution of the input timestamp. The available time resolutions are as follows:
| **Number** | **Time Resolution Definition** | **Meaning** |
| ---------- | ----------------------------------- | -------------- |
| 1 | TSDB_SML_TIMESTAMP_NOT_CONFIGURED | Undefined (invalid) |
| 2 | TSDB_SML_TIMESTAMP_HOURS | Hours |
| 3 | TSDB_SML_TIMESTAMP_MINUTES | Minutes |
| 4 | TSDB_SML_TIMESTAMP_SECONDS | Seconds |
| 5 | TSDB_SML_TIMESTAMP_MILLI_SECONDS | Milliseconds |
| 6 | TSDB_SML_TIMESTAMP_MICRO_SECONDS | Microseconds |
| 7 | TSDB_SML_TIMESTAMP_NANO_SECONDS | Nanoseconds |
In the SML_TELNET_PROTOCOL and SML_JSON_PROTOCOL modes, the time precision is determined by the length of the timestamp (consistent with the standard operation of OpenTSDB), and the user-specified time resolution will be ignored.
## Data Mode Mapping Rules
Data from the InfluxDB line protocol will be mapped to schema-based data, where the measurement maps to the supertable name, tag names in the tag_set map to tag names in the data schema, and names in the field_set map to column names. For example, the following data.
```json
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
```
This line of data maps to create a supertable: st, which includes 3 tags of type nchar: t1, t2, t3. Five data columns, namely ts (timestamp), c1 (bigint), c3 (binary), c2 (bool), c4 (bigint). Mapped into the following SQL statement:
```json
create stable st (_ts timestamp, c1 bigint, c2 bool, c3 binary(6), c4 bigint) tags(t1 nchar(1), t2 nchar(1), t3 nchar(2))
```
## Data Mode Change Handling
This section will explain the impact on the data schema under different line data writing scenarios.
When using line protocol to write a field type with a clear identifier, subsequent changes to the field type definition will result in a clear data schema error, triggering the write API to report an error. As shown below,
```json
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4 1626006833639000000
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4i 1626006833640000000
```
The data type mapping of the first line defines the c4 column as Double, but the second line declares the column as BigInt through a numeric suffix, thus triggering a parsing error in schema-less writing.
If the line protocol in the previous rows declares a data column as binary, and subsequent requirements for a longer binary length, this will trigger a change in the supertable schema.
```json
st,t1=3,t2=4,t3=t3 c1=3i64,c5="pass" 1626006833639000000
st,t1=3,t2=4,t3=t3 c1=3i64,c5="passit" 1626006833640000000
```
The line protocol parsing in the first line declares that column c5 is a binary(4) field. The second line of data writing extracts that column c5 is still a binary column, but its width is 6. At this point, the width of the binary needs to be increased to accommodate the new string.
```json
st,t1=3,t2=4,t3=t3 c1=3i64 1626006833639000000
st,t1=3,t2=4,t3=t3 c1=3i64,c6="passit" 1626006833640000000
```
The second line of data adds a column c6 relative to the first line, with a type of binary(6). Thus, a column c6, type binary(6), will be automatically added.
## Schemaless Writing Example
Below, using smart meters as an example, we introduce code samples for writing data using the schemaless writing interface with various language connectors. This includes three protocols: InfluxDB's line protocol, OpenTSDB's TELNET line protocol, and OpenTSDB's JSON format protocol.
:::note
- Since the rules for automatic table creation with schemaless writing differ from those in the previous SQL examples, please ensure that the `meters`, `metric_telnet`, and `metric_json` tables do not exist before running the code samples.
- OpenTSDB's TELNET line protocol and OpenTSDB's JSON format protocol only support one data column, so we have used other examples.
:::
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/SchemalessWsTest.java:schemaless}}
```
Execute schemaless writing with reqId, where the last parameter reqId can be used for request link tracing.
```java
writer.write(lineDemo, SchemalessProtocolType.LINE, SchemalessTimestampType.NANO_SECONDS, 1L);
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/schemaless_ws.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/schemaless/ws/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/schemaless.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/line_example.js}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssml/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/sml_insert_demo.c:schemaless}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/SchemalessJniTest.java:schemaless}}
```
Execute schemaless writing with reqId, where the last parameter reqId can be used for request link tracing.
```java
writer.write(lineDemo, SchemalessProtocolType.LINE, SchemalessTimestampType.NANO_SECONDS, 1L);
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/schemaless_native.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/schemaless/native/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/schemaless.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/nativesml/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/sml_insert_demo.c:schemaless}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Querying the Written Data
By running the example code from the previous section, tables will be automatically created in the power database. We can query the data using TDengine CLI or an application. Below is an example of querying the data from the supertable and meters table using TDengine CLI.
```shell
taos> show power.stables;
stable_name |
=================================
meter_current |
stb0_0 |
meters |
Query OK, 3 row(s) in set (0.002527s)
taos> select * from power.meters limit 1 \G;
*************************** 1.row ***************************
_ts: 2021-07-11 20:33:53.639
current: 10.300000199999999
voltage: 219
phase: 0.310000000000000
groupid: 2
location: California.SanFrancisco
Query OK, 1 row(s) in set (0.004501s)
```

View File

@ -0,0 +1,166 @@
---
title: Ingesting Data in Parameter Binding Mode
sidebar_label: Parameter Binding
slug: /developer-guide/parameter-binding
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
When inserting data using parameter binding, it can avoid the resource consumption of SQL syntax parsing, thereby significantly improving the write performance. The reasons why parameter binding can improve writing efficiency include:
- Reduced parsing time: With parameter binding, the structure of the SQL statement is determined at the first execution, and subsequent executions only need to replace parameter values, thus avoiding syntax parsing each time and reducing parsing time.
- Precompilation: When using parameter binding, the SQL statement can be precompiled and cached. When executed later with different parameter values, the precompiled version can be used directly, improving execution efficiency.
- Reduced network overhead: Parameter binding also reduces the amount of data sent to the database because only parameter values need to be sent, not the complete SQL statement, especially when performing a large number of similar insert or update operations, this difference is particularly noticeable.
**Tips: It is recommended to use parameter binding for data insertion**
:::note
We only recommend using the following two forms of SQL for parameter binding data insertion:
```sql
a. Subtables already exists:
1. INSERT INTO meters (tbname, ts, current, voltage, phase) VALUES(?, ?, ?, ?, ?)
b. Automatic table creation on insert:
1. INSERT INTO meters (tbname, ts, current, voltage, phase, location, group_id) VALUES(?, ?, ?, ?, ?, ?, ?)
2. INSERT INTO ? USING meters TAGS (?, ?) VALUES (?, ?, ?, ?)
```
:::
Next, we continue to use smart meters as an example to demonstrate the efficient writing functionality of parameter binding with various language connectors:
1. Prepare a parameterized SQL insert statement for inserting data into the supertable `meters`. This statement allows dynamically specifying subtable names, tags, and column values.
2. Loop to generate multiple subtables and their corresponding data rows. For each subtable:
- Set the subtable's name and tag values (group ID and location).
- Generate multiple rows of data, each including a timestamp, randomly generated current, voltage, and phase values.
- Perform batch insertion operations to insert these data rows into the corresponding subtable.
3. Finally, print the actual number of rows inserted into the table.
## WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
There are two kinds of interfaces for parameter binding: one is the standard JDBC interface, and the other is an extended interface. The extended interface offers better performance.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WSParameterBindingStdInterfaceDemo.java:para_bind}}
```
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WSParameterBindingExtendInterfaceDemo.java:para_bind}}
```
This is a [more detailed parameter binding example](https://github.com/taosdata/TDengine/blob/main/docs/examples/java/src/main/java/com/taos/example/WSParameterBindingFullDemo.java)
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/stmt_ws.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/stmt/ws/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/stmt.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/stmt_example.js:createConnect}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsStmt/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/stmt_insert_demo.c}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/ParameterBindingBasicDemo.java:para_bind}}
```
This is a [more detailed parameter binding example](https://github.com/taosdata/TDengine/blob/main/docs/examples/java/src/main/java/com/taos/example/ParameterBindingFullDemo.java)
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/stmt2_native.py}}
```
</TabItem>
<TabItem label="Go" value="go">
The example code for binding parameters with stmt2 (Go connector v3.6.0 and above, TDengine v3.3.5.0 and above) is as follows:
```go
{{#include docs/examples/go/stmt2/native/main.go}}
```
The example code for binding parameters with stmt is as follows:
```go
{{#include docs/examples/go/stmt/native/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/stmt.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/stmtInsert/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
The example code for binding parameters with stmt2 (TDengine v3.3.5.0 or higher is required) is as follows:
```c
{{#include docs/examples/c/stmt2_insert_demo.c}}
```
The example code for binding parameters with stmt is as follows:
```c
{{#include docs/examples/c/stmt_insert_demo.c}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>

View File

@ -0,0 +1,937 @@
---
title: Managing Consumers
slug: /developer-guide/manage-consumers
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
TDengine provides data subscription and consumption interfaces similar to those of message queue products. In many scenarios, by adopting TDengine's time-series big data platform, there is no need to integrate additional message queue products, thus simplifying application design and reducing maintenance costs. This chapter introduces the related APIs and usage methods for data subscription with various language connectors. For basic information on data subscription, please refer to [Data Subscription](../../advanced-features/data-subscription/)
## Creating Topics
Please use TDengine CLI or refer to the [Execute SQL](../running-sql-statements/) section to execute the SQL for creating topics: `CREATE TOPIC IF NOT EXISTS topic_meters AS SELECT ts, current, voltage, phase, groupid, location FROM meters`
The above SQL will create a subscription named topic_meters. Each record in the messages obtained using this subscription is composed of the columns selected by this query statement `SELECT ts, current, voltage, phase, groupid, location FROM meters`.
**Note**
In the implementation of TDengine connectors, there are the following limitations for subscription queries.
- Query statement limitation: Subscription queries can only use select statements and do not support other types of SQL, such as subscribing to databases, subscribing to supertables (non-select methods), insert, update, or delete, etc.
- Raw data query: Subscription queries can only query raw data, not aggregated or calculated results.
- Time order limitation: Subscription queries can only query data in chronological order.
## Creating Consumers
The concept of TDengine consumers is similar to Kafka, where consumers receive data streams by subscribing to topics. Consumers can be configured with various parameters, such as connection methods, server addresses, automatic Offset submission, etc., to suit different data processing needs. Some language connectors' consumers also support advanced features such as automatic reconnection and data transmission compression to ensure efficient and stable data reception.
### Creation Parameters
There are many parameters for creating consumers, which flexibly support various connection types, Offset submission methods, compression, reconnection, deserialization, and other features. The common basic configuration items applicable to all language connectors are shown in the following table:
| Parameter Name | Type | Description | Remarks |
| :-----------------------: | :-----: | ------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `td.connect.ip` | string | FQDN of Server | ip or host name |
| `td.connect.user` | string | Username | |
| `td.connect.pass` | string | Password | |
| `td.connect.port` | integer | Server port number | |
| `group.id` | string | Consumer group ID, the same consumer group shares consumption progress | <br />**Required**. Maximum length: 192,excess length will be cut off.<br />Each topic can have up to 100 consumer groups |
| `client.id` | string | Client ID | Maximum length: 255, excess length will be cut off. |
| `auto.offset.reset` | enum | Initial position of the consumer group subscription | <br />`earliest`: default(version < 3.2.0.0); subscribe from the beginning; <br/>`latest`: default(version >= 3.2.0.0); only subscribe from the latest data; <br/>`none`: cannot subscribe without a committed offset |
| `enable.auto.commit` | boolean | Whether to enable automatic consumption point submission, true: automatic submission, client application does not need to commit; false: client application needs to commit manually | Default is true |
| `auto.commit.interval.ms` | integer | Time interval for automatically submitting consumption records, in milliseconds | Default is 5000 |
| `msg.with.table.name` | boolean | Whether to allow parsing the table name from the message, not applicable to column subscription (column subscription can write tbname as a column in the subquery statement) (from version 3.2.0.0 this parameter is deprecated, always true) | Default is off |
| `enable.replay` | boolean | Whether to enable data replay function | Default is off |
| `session.timeout.ms` | integer | Timeout after consumer heartbeat is lost, after which rebalance logic is triggered, and upon success, that consumer will be removed (supported from version 3.3.3.0) | Default is 12000, range [6000, 1800000] |
| `max.poll.interval.ms` | integer | The longest time interval for consumer poll data fetching, exceeding this time will be considered as the consumer being offline, triggering rebalance logic, and upon success, that consumer will be removed (supported from version 3.3.3.0) | Default is 300000, range [1000, INT32_MAX] |
| `fetch.max.wait.ms` | integer | The maximum time it takes for the server to return data once (supported from version 3.3.6.0) | Default is 1000, range [1, INT32_MAX] |
| `min.poll.rows` | integer | The minimum number of data returned by the server once (supported from version 3.3.6.0) | Default is 4096, range [1, INT32_MAX]
| `msg.consume.rawdata` | integer | When consuming data, the data type pulled is binary and cannot be parsed. It is an internal parameter and is only used for taosx data migrationsupported from version 3.3.6.0 | The default value of 0 indicates that it is not effective, and non-zero indicates that it is effective |
Below are the connection parameters for connectors in various languages:
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
The parameters for creating a consumer with the Java connector are Properties. For a list of parameters you can set, please refer to [Consumer Parameters](../../tdengine-reference/client-libraries/java/)
For other parameters, refer to the common basic configuration items mentioned above.
</TabItem>
<TabItem label="Python" value="python">
The `td.connect.websocket.scheme` parameter is provided to indicate the protocol type, other parameters are the same as the common basic configuration items.
</TabItem>
<TabItem label="Go" value="go">
Supported properties list for creating consumers:
- `ws.url`: WebSocket connection address.
- `ws.message.channelLen`: WebSocket message channel buffer length, default 0.
- `ws.message.timeout`: WebSocket message timeout, default 5m.
- `ws.message.writeWait`: WebSocket message write timeout, default 10s.
- `ws.message.enableCompression`: Whether to enable compression for WebSocket, default false.
- `ws.autoReconnect`: Whether WebSocket should automatically reconnect, default false.
- `ws.reconnectIntervalMs`: WebSocket reconnect interval in milliseconds, default 2000.
- `ws.reconnectRetryCount`: WebSocket reconnect retry count, default 3.
See the table above for other parameters.
</TabItem>
<TabItem label="Rust" value="rust">
The parameters for creating a consumer with the Rust connector are DSN. For a list of parameters you can set, please refer to [DSN](../../tdengine-reference/client-libraries/rust/#dsn)
For other parameters, refer to the common basic configuration items mentioned above.
</TabItem>
<TabItem label="Node.js" value="node">
The `WS_URL` parameter is provided to indicate the server address to connect to, other parameters are the same as the common basic configuration items.
</TabItem>
<TabItem label="C#" value="csharp">
Supported properties list for creating consumers:
- `useSSL`: Whether to use SSL connection, default false.
- `token`: Token for connecting to TDengine cloud.
- `ws.message.enableCompression`: Whether to enable WebSocket compression, default false.
- `ws.autoReconnect`: Whether to automatically reconnect, default false.
- `ws.reconnect.retry.count`: Reconnect attempts, default 3.
- `ws.reconnect.interval.ms`: Reconnect interval in milliseconds, default 2000.
See the table above for other parameters.
</TabItem>
<TabItem label="C" value="c">
- WebSocket connection: Since it uses dsn, the four configuration items `td.connect.ip`, `td.connect.port`, `td.connect.user`, and `td.connect.pass` are not needed, the rest are the same as the common configuration items.
- Native connection: Same as the common basic configuration items.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### WebSocket Connection
Introduces how connectors in various languages use WebSocket connection method to create consumers. Specify the server address to connect, set auto-commit, start consuming from the latest message, specify `group.id` and `client.id`, etc. Some language connectors also support deserialization parameters.
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:create_consumer}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:create_consumer}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:create_consumer}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:create_consumer_dsn}}
```
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:create_consumer_ac}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_example.js:create_consumer}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:create_consumer}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:create_consumer_1}}
```
```c
{{#include docs/examples/c-ws/tmq_demo.c:create_consumer_2}}
```
Call the `build_consumer` function to attempt to obtain the consumer instance `tmq`. Print a success log if successful, and a failure log if not.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
Introduce how connectors in various languages use native connections to create consumers. Specify the server address for the connection, set auto-commit, start consuming from the latest message, and specify information such as `group.id` and `client.id`. Some language connectors also support deserialization parameters.
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/ConsumerLoopFull.java:create_consumer}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:create_consumer}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:create_consumer}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs:create_consumer_dsn}}
```
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs:create_consumer_ac}}
```
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:create_consumer}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:create_consumer_1}}
```
```c
{{#include docs/examples/c/tmq_demo.c:create_consumer_2}}
```
Call the `build_consumer` function to attempt to obtain the consumer instance `tmq`. Print a success log if successful, and a failure log if not.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Subscribe to Consume Data
After subscribing to a topic, consumers can start receiving and processing messages from these topics. The example code for subscribing to consume data is as follows:
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:poll_data_code_piece}}
```
- The parameters of the `subscribe` method mean: the list of topics subscribed to (i.e., names), supporting subscription to multiple topics simultaneously.
- `poll` is called each time to fetch a message, which may contain multiple records.
- `ResultBean` is a custom internal class, whose field names and data types correspond one-to-one with the column names and data types, allowing objects of type `ResultBean` to be deserialized using the `value.deserializer` property's corresponding deserialization class.
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:subscribe}}
```
- The parameters of the `subscribe` method mean: the list of topics subscribed to (i.e., names), supporting subscription to multiple topics simultaneously.
- `poll` is called each time to fetch a message, which may contain multiple records.
- `records` contains multiple block segments, each of which may contain multiple records.
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:subscribe}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:consume}}
```
- Consumers can subscribe to one or more `TOPIC`, generally it is recommended that a consumer subscribes to only one `TOPIC`.
- TMQ message queue is a [futures::Stream](https://docs.rs/futures/latest/futures/stream/index.html) type, which can be used with the corresponding API to consume each message and mark it as consumed through `.commit`.
- `Record` is a custom structure, whose field names and data types correspond one-to-one with the column names and data types, allowing objects of type `Record` to be deserialized using `serde`.
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_seek_example.js:subscribe}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:subscribe}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:build_topic_list}}
```
```c
{{#include docs/examples/c-ws/tmq_demo.c:basic_consume_loop}}
```
```c
{{#include docs/examples/c-ws/tmq_demo.c:msg_process}}
```
```c
{{#include docs/examples/c-ws/tmq_demo.c:subscribe_3}}
```
Steps for subscribing and consuming data:
1. Call the `ws_build_topic_list` function to create a topic list `topic_list`.
2. If `topic_list` is `NULL`, it means creation failed, and the function returns `-1`.
3. Use the `ws_tmq_subscribe` function to subscribe to the topic list specified by `tmq`. If the subscription fails, print an error message.
4. Destroy the topic list `topic_list` to free resources.
5. Call the `basic_consume_loop` function to start the basic consumption loop, processing the subscribed messages.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:poll_data_code_piece}}
```
- The parameters of the `subscribe` method mean: the list of topics (i.e., names) to subscribe to, supporting subscription to multiple topics simultaneously.
- `poll` is called each time to get a message, which may contain multiple records.
- `ResultBean` is a custom internal class, whose field names and data types correspond one-to-one with the column names and data types, allowing objects of type `ResultBean` to be deserialized based on the `value.deserializer` property's corresponding deserialization class.
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:subscribe}}
```
- The parameters of the `subscribe` method mean: the list of topics (i.e., names) to subscribe to, supporting subscription to multiple topics simultaneously.
- `poll` is called each time to get a message, which may contain multiple records.
- `records` contains multiple blocks, each of which may contain multiple records.
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:subscribe}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:consume}}
```
- Consumers can subscribe to one or more `TOPIC`, generally it is recommended that a consumer subscribes to only one `TOPIC`.
- The TMQ message queue is a [futures::Stream](https://docs.rs/futures/latest/futures/stream/index.html) type, which can be used with the corresponding API to consume each message and mark it as consumed with `.commit`.
- `Record` is a custom structure, whose field names and data types correspond one-to-one with the column names and data types, allowing objects of type `Record` to be deserialized through `serde`.
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:subscribe}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:build_topic_list}}
```
```c
{{#include docs/examples/c/tmq_demo.c:basic_consume_loop}}
```
```c
{{#include docs/examples/c/tmq_demo.c:msg_process}}
```
```c
{{#include docs/examples/c/tmq_demo.c:subscribe_3}}
```
Subscription and consumption data steps:
1. Call the `build_topic_list` function to create a topic list `topic_list`.
2. If `topic_list` is `NULL`, it means creation failed, and the function returns `-1`.
3. Use the `tmq_subscribe` function to subscribe to the topic list specified by `tmq`. If the subscription fails, print an error message.
4. Destroy the topic list `topic_list` to free resources.
5. Call the `basic_consume_loop` function to start the basic consumption loop, processing the subscribed messages.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Specifying the Subscription Offset
Consumers can specify to start reading messages from a specific Offset in the partition, allowing them to reread messages or skip processed messages. Below is how connectors in various languages specify the subscription Offset.
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:consumer_seek}}
```
1. Use the consumer.poll method to poll data until data is obtained.
2. For the first batch of polled data, print the content of the first message and obtain the current consumer's partition assignment information.
3. Use the consumer.seekToBeginning method to reset the offset of all partitions to the starting position and print the successful reset message.
4. Poll data again using the consumer.poll method and print the content of the first message.
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:assignment}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:seek}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs:seek_offset}}
```
1. By calling the consumer.assignments() method, obtain the consumer's current partition assignment information and record the initial assignment status.
2. Traverse each partition assignment information, for each partition: extract the topic, consumer group ID (vgroup_id), current offset (current), starting offset (begin), and ending offset (end).
Record this information.
1. Call the consumer.offset_seek method to set the offset to the starting position. If the operation fails, record the error information and current assignment status.
2. After adjusting the offset for all partitions, obtain and record the consumer's partition assignment information again to confirm the status after the offset adjustment.
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_seek_example.js:offset}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:seek}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:consume_repeatly}}
```
1. Use the `ws_tmq_get_topic_assignment` function to obtain the assignment information for a specific topic, including the number of assignments and the details of each assignment.
2. If fetching the assignment information fails, print an error message and return.
3. For each assignment, use the `ws_tmq_offset_seek` function to set the consumer's offset to the earliest offset.
4. If setting the offset fails, print an error message.
5. Release the assignment information array to free resources.
6. Call the `basic_consume_loop` function to start a new consumption loop and process messages.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:consumer_seek}}
```
1. Use the consumer.poll method to poll data until data is obtained.
2. For the first batch of polled data, print the content of the first data item and obtain the current consumer's partition assignment information.
3. Use the consumer.seekToBeginning method to reset the offset of all partitions to the beginning position and print a message of successful reset.
4. Poll data again using the consumer.poll method and print the content of the first data item.
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:assignment}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:seek}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs:seek_offset}}
```
1. Obtain the consumer's current partition assignment information by calling the consumer.assignments() method and record the initial assignment status.
2. For each partition assignment, extract the topic, consumer group ID (vgroup_id), current offset, beginning offset, and ending offset. Record this information.
3. Use the consumer.offset_seek method to set the offset to the beginning position. If the operation fails, record the error information and the current assignment status.
4. After adjusting the offset for all partitions, obtain and record the consumer's partition assignment information again to confirm the status after the offset adjustment.
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:seek}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:consume_repeatly}}
```
1. Use the `tmq_get_topic_assignment` function to obtain the assignment information for a specific topic, including the number of assignments and the details of each assignment.
2. If fetching the assignment information fails, print an error message and return.
3. For each assignment, use the `tmq_offset_seek` function to set the consumer's offset to the earliest offset.
4. If setting the offset fails, print an error message.
5. Release the assignment information array to free resources.
6. Call the `basic_consume_loop` function to start a new consumption loop and process messages.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Commit Offset
After a consumer has read and processed messages, it can commit the Offset, indicating that the consumer has successfully processed messages up to this Offset. Offset commits can be automatic (committed periodically based on configuration) or manual (controlled by the application when to commit).
When creating a consumer, if the property `enable.auto.commit` is set to false, the offset can be manually committed.
**Note**: Before manually submitting the consumption progress, ensure that the message has been processed correctly; otherwise, the incorrectly processed message will not be consumed again. Automatic submission may commit the consumption progress of the previous message during the current `poll`, so please ensure that the message processing is completed before the next `poll` or message retrieval.
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:commit_code_piece}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:commit_offset}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:commit_offset}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:consumer_commit_manually}}
```
You can manually submit the consumption progress using the `consumer.commit` method.
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_example.js:commit}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:commit_offset}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:manual_commit}}
```
You can manually submit the consumption progress using the `ws_tmq_commit_sync` function.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:commit_code_piece}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:commit_offset}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:commit_offset}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:consumer_commit_manually}}
```
You can manually submit the consumption progress using the `consumer.commit` method.
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:commit_offset}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:manual_commit}}
```
You can manually commit the consumption progress using the `tmq_commit_sync` function.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Unsubscribe and Close Consumption
Consumers can unsubscribe from topics and stop receiving messages. When a consumer is no longer needed, the consumer instance should be closed to release resources and disconnect from the TDengine server.
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:unsubscribe_data_code_piece}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:unsubscribe}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:close}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:unsubscribe}}
```
**Note**: Once the consumer unsubscribes and is closed, it cannot be reused. If you want to subscribe to a new `topic`, please recreate the consumer.
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_example.js:unsubscribe}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:close}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:unsubscribe_and_close}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:unsubscribe_data_code_piece}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:unsubscribe}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:close}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:unsubscribe}}
```
**Note**: After the consumer unsubscribes, it is closed and cannot be reused. If you want to subscribe to a new `topic`, please create a new consumer.
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:close}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:unsubscribe_and_close}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Complete Examples
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
<details>
<summary>Complete code example</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:consumer_demo}}
```
**Note**: The value of the `value.deserializer` configuration parameter should be adjusted according to the package path of the test environment.
</details>
</TabItem>
<TabItem label="Python" value="python">
<details>
<summary>Complete code example</summary>
```python
{{#include docs/examples/python/tmq_websocket_example.py}}
```
</details>
</TabItem>
<TabItem label="Go" value="go">
<details>
<summary>Complete code example</summary>
```go
{{#include docs/examples/go/tmq/ws/main.go}}
```
</details>
</TabItem>
<TabItem label="Rust" value="rust">
<details>
<summary>Complete code example</summary>
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs}}
```
</details>
</TabItem>
<TabItem label="Node.js" value="node">
<details>
<summary>Complete code example</summary>
```js
{{#include docs/examples/node/websocketexample/tmq_example.js}}
```
</details>
</TabItem>
<TabItem label="C#" value="csharp">
<details>
<summary>Complete code example</summary>
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs}}
```
</details>
</TabItem>
<TabItem label="C" value="c">
<details>
<summary>Complete code example</summary>
```c
{{#include docs/examples/c-ws/tmq_demo.c}}
```
</details>
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
<details>
<summary>Complete code example</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/ConsumerLoopFull.java:consumer_demo}}
```
**Note**: The value of the `value.deserializer` configuration parameter should be adjusted according to the package path in the test environment.
</details>
</TabItem>
<TabItem label="Python" value="python">
<details>
<summary>Complete code example</summary>
```python
{{#include docs/examples/python/tmq_native.py}}
```
</details>
</TabItem>
<TabItem label="Go" value="go">
<details>
<summary>Complete code example</summary>
```go
{{#include docs/examples/go/tmq/native/main.go}}
```
</details>
</TabItem>
<TabItem label="Rust" value="rust">
<details>
<summary>Complete code example</summary>
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs}}
```
</details>
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
<details>
<summary>Complete code example</summary>
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs}}
```
</details>
</TabItem>
<TabItem label="C" value="c">
<details>
<summary>Complete code example</summary>
```c
{{#include docs/examples/c/tmq_demo.c}}
```
</details>
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>

View File

@ -0,0 +1,964 @@
---
sidebar_label: User-Defined Functions
title: User-Defined Functions (UDF)
slug: /developer-guide/user-defined-functions
---
## Introduction to UDF
In some application scenarios, the query functionality required by the application logic cannot be directly implemented using built-in functions. TDengine allows the writing of user-defined functions (UDFs) to address the needs of special application scenarios. Once successfully registered in the cluster, UDFs can be called in SQL just like system built-in functions, with no difference in usage. UDFs are divided into scalar functions and aggregate functions. Scalar functions output a value for each row of data, such as absolute value (abs), sine function (sin), string concatenation function (concat), etc. Aggregate functions output a value for multiple rows of data, such as average (avg), maximum value (max), etc.
TDengine supports writing UDFs in two programming languages: C and Python. UDFs written in C have performance nearly identical to built-in functions, while those written in Python can utilize the rich Python computation libraries. To prevent exceptions during UDF execution from affecting the database service, TDengine uses process isolation technology, executing UDFs in a separate process. Even if a user-written UDF crashes, it will not affect the normal operation of TDengine.
## Developing UDFs in C Language
When implementing UDFs in C language, you need to implement the specified interface functions:
- Scalar functions need to implement the scalar interface function scalarfn.
- Aggregate functions need to implement the aggregate interface functions `aggfn_start`, `aggfn`, `aggfn_finish`.
- If initialization is needed, implement `udf_init`.
- If cleanup is needed, implement `udf_destroy`.
### Interface Definition
The interface function names are the UDF name, or the UDF name connected with specific suffixes (`_start`,`_finish`, `_init`,`_destroy`). Function names described later in the content, such as `scalarfn`, `aggfn`, should be replaced with the UDF name.
#### Scalar Function Interface
A scalar function is a function that converts input data into output data, typically used for calculating and transforming a single data value. The prototype of the scalar function interface is as follows.
```c
int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn *resultColumn);
```
Key parameter descriptions are as follows:
- inputDataBlock: The input data block.
- resultColumn: The output column.
#### Aggregate Function Interface
An aggregate function is a special type of function used for grouping and calculating data to generate summary information. The working principle of aggregate functions is as follows:
- Initialize the result buffer: First, the `aggfn_start` function is called to generate a result buffer for storing intermediate results.
- Group data: Related data is divided into multiple row data blocks, each containing a group of data with the same grouping key.
- Update intermediate results: For each data block, the `aggfn` function is called to update the intermediate results. The `aggfn` function performs calculations according to the type of aggregate function (such as sum, avg, count, etc.) and stores the results in the result buffer.
- Generate the final result: After updating the intermediate results of all data blocks, the `aggfn_finish` function is called to extract the final result from the result buffer. The final result contains either 0 or 1 data row, depending on the type of aggregate function and the input data.
The prototype of the aggregate function interface is as follows.
```c
int32_t aggfn_start(SUdfInterBuf *interBuf);
int32_t aggfn(SUdfDataBlock* inputBlock, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf);
int32_t aggfn_finish(SUdfInterBuf* interBuf, SUdfInterBuf *result);
```
Key parameter descriptions are as follows:
- `interBuf`: Intermediate result buffer.
- `inputBlock`: The input data block.
- `newInterBuf`: New intermediate result buffer.
- `result`: The final result.
#### Initialization and Destruction Interface
The initialization and destruction interfaces are common interfaces used by both scalar and aggregate functions, with the following APIs.
```c
int32_t udf_init()
int32_t udf_destroy()
```
Among them, the `udf_init` function completes the initialization work, and the `udf_destroy` function completes the cleanup work. If there is no initialization work, there is no need to define the `udf_init` function; if there is no cleanup work, there is no need to define the `udf_destroy` function.
### Scalar Function Template
The template for developing scalar functions in C language is as follows.
```c
#include "taos.h"
#include "taoserror.h"
#include "taosudf.h"
// Initialization function.
// If no initialization, we can skip definition of it.
// The initialization function shall be concatenation of the udf name and _init suffix.
// @return error number defined in taoserror.h
int32_t scalarfn_init() {
// initialization.
return TSDB_CODE_SUCCESS;
}
// Scalar function main computation function.
// @param inputDataBlock, input data block composed of multiple columns with each column defined by SUdfColumn
// @param resultColumn, output column
// @return error number defined in taoserror.h
int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn* resultColumn) {
// read data from inputDataBlock and process, then output to resultColumn.
return TSDB_CODE_SUCCESS;
}
// Cleanup function.
// If no cleanup related processing, we can skip definition of it.
// The destroy function shall be concatenation of the udf name and _destroy suffix.
// @return error number defined in taoserror.h
int32_t scalarfn_destroy() {
// clean up
return TSDB_CODE_SUCCESS;
}
```
### Aggregate Function Template
The template for developing aggregate functions in C language is as follows.
```c
#include "taos.h"
#include "taoserror.h"
#include "taosudf.h"
// Initialization function.
// If no initialization, we can skip definition of it.
// The initialization function shall be concatenation of the udf name and _init suffix.
// @return error number defined in taoserror.h
int32_t aggfn_init() {
// initialization.
return TSDB_CODE_SUCCESS;
}
// Aggregate start function.
// The intermediate value or the state(@interBuf) is initialized in this function.
// The function name shall be concatenation of udf name and _start suffix.
// @param interbuf intermediate value to initialize
// @return error number defined in taoserror.h
int32_t aggfn_start(SUdfInterBuf* interBuf) {
// initialize intermediate value in interBuf
return TSDB_CODE_SUCCESS;
}
// Aggregate reduce function.
// This function aggregate old state(@interbuf) and one data bock(inputBlock) and output a new state(@newInterBuf).
// @param inputBlock input data block
// @param interBuf old state
// @param newInterBuf new state
// @return error number defined in taoserror.h
int32_t aggfn(SUdfDataBlock* inputBlock, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf) {
// read from inputBlock and interBuf and output to newInterBuf
return TSDB_CODE_SUCCESS;
}
// Aggregate function finish function.
// This function transforms the intermediate value(@interBuf) into the final output(@result).
// The function name must be concatenation of aggfn and _finish suffix.
// @interBuf : intermediate value
// @result: final result
// @return error number defined in taoserror.h
int32_t int32_t aggfn_finish(SUdfInterBuf* interBuf, SUdfInterBuf *result) {
// read data from inputDataBlock and process, then output to result
return TSDB_CODE_SUCCESS;
}
// Cleanup function.
// If no cleanup related processing, we can skip definition of it.
// The destroy function shall be concatenation of the udf name and _destroy suffix.
// @return error number defined in taoserror.h
int32_t aggfn_destroy() {
// clean up
return TSDB_CODE_SUCCESS;
}
```
### Compilation
In TDengine, to implement UDF, you need to write C language source code and compile it into a dynamic link library file according to TDengine's specifications.
Prepare the UDF source code `bit_and.c` as described earlier. For example, on a Linux operating system, execute the following command to compile into a dynamic link library file.
```shell
gcc -g -O0 -fPIC -shared bit_and.c -o libbitand.so
```
It is recommended to use GCC version 7.5 or above to ensure reliable operation.
### C UDF Data Structures
```c
typedef struct SUdfColumnMeta {
int16_t type;
int32_t bytes;
uint8_t precision;
uint8_t scale;
} SUdfColumnMeta;
typedef struct SUdfColumnData {
int32_t numOfRows;
int32_t rowsAlloc;
union {
struct {
int32_t nullBitmapLen;
char *nullBitmap;
int32_t dataLen;
char *data;
} fixLenCol;
struct {
int32_t varOffsetsLen;
int32_t *varOffsets;
int32_t payloadLen;
char *payload;
int32_t payloadAllocLen;
} varLenCol;
};
} SUdfColumnData;
typedef struct SUdfColumn {
SUdfColumnMeta colMeta;
bool hasNull;
SUdfColumnData colData;
} SUdfColumn;
typedef struct SUdfDataBlock {
int32_t numOfRows;
int32_t numOfCols;
SUdfColumn **udfCols;
} SUdfDataBlock;
typedef struct SUdfInterBuf {
int32_t bufLen;
char *buf;
int8_t numOfResult; //zero or one
} SUdfInterBuf;
```
The data structures are described as follows:
- `SUdfDataBlock` contains the number of rows `numOfRows` and the number of columns `numOfCols`. `udfCols[i]` (0 \<= i \<= numCols-1) represents each column's data, type `SUdfColumn*`.
- `SUdfColumn` includes the column's data type definition `colMeta` and the column's data `colData`.
- `SUdfColumnMeta` members are defined similarly to data type definitions in `taos.h`.
- `SUdfColumnData` can be variable-length, `varLenCol` defines variable-length data, and `fixLenCol` defines fixed-length data.
- `SUdfInterBuf` defines an intermediate structure buffer and the number of results in the buffer `numOfResult`
To better operate the above data structures, some convenience functions are provided, defined in `taosudf.h`.
### C UDF Example Code
#### Scalar Function Example [bit_and](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/bit_and.c)
`bit_and` implements the bitwise AND function for multiple columns. If there is only one column, it returns that column. `bit_and` ignores null values.
<details>
<summary>bit_and.c</summary>
```c
{{#include tests/script/sh/bit_and.c}}
```
</details>
#### Aggregate Function Example 1 Returning Numeric Type [l2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/l2norm.c)
`l2norm` implements the second-order norm of all data in the input columns, i.e., squaring each data point, then summing them up, and finally taking the square root.
<details>
<summary>l2norm.c</summary>
```c
{{#include tests/script/sh/l2norm.c}}
```
</details>
#### Aggregate Function Example 2 Returning String Type [max_vol](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/max_vol.c)
`max_vol` implements finding the maximum voltage from multiple input voltage columns, returning a composite string value consisting of the device ID + the position (row, column) of the maximum voltage + the maximum voltage value.
Create table:
```shell
create table battery(ts timestamp, vol1 float, vol2 float, vol3 float, deviceId varchar(16));
```
Create custom function:
```shell
create aggregate function max_vol as '/root/udf/libmaxvol.so' outputtype binary(64) bufsize 10240 language 'C';
```
Use custom function:
```shell
select max_vol(vol1, vol2, vol3, deviceid) from battery;
```
<details>
<summary>max_vol.c</summary>
```c
{{#include tests/script/sh/max_vol.c}}
```
</details>
#### Aggregate Function Example 3 Split string and calculate average value [extract_avg](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/extract_avg.c)
The `extract_avg` function converts a comma-separated string sequence into a set of numerical values, counts the results of all rows, and calculates the final average. Note when implementing:
- `interBuf->numOfResult` needs to return 1 or 0 and cannot be used for count.
- Count can use additional caches, such as the `SumCount` structure.
- Use `varDataVal` to obtain the string.
Create table:
```shell
create table scores(ts timestamp, varStr varchar(128));
```
Create custom function:
```shell
create aggregate function extract_avg as '/root/udf/libextract_avg.so' outputtype double bufsize 16 language 'C';
```
Use custom function:
```shell
select extract_avg(valStr) from scores;
```
Generate `.so` file
```bash
gcc -g -O0 -fPIC -shared extract_vag.c -o libextract_avg.so
```
<details>
<summary>max_vol.c</summary>
```c
{{#include tests/script/sh/max_vol.c}}
```
</details>
## Developing UDFs in Python Language
### Environment Setup
The specific steps to prepare the environment are as follows:
- Step 1, prepare the Python runtime environment. If you compile and install Python locally, be sure to enable the `--enable-shared` option, otherwise the subsequent installation of taospyudf will fail due to failure to generate a shared library.
- Step 2, install the Python package taospyudf. The command is as follows.
```shell
pip3 install taospyudf
```
- Step 3, execute the command ldconfig.
- Step 4, start the taosd service.
The installation process will compile C++ source code, so cmake and gcc must be present on the system. The compiled libtaospyudf.so file will automatically be copied to the /usr/local/lib/ directory, so if you are not a root user, you need to add sudo during installation. After installation, you can check if this file is in the directory:
```shell
root@server11 ~/udf $ ls -l /usr/local/lib/libtaos*
-rw-r--r-- 1 root root 671344 May 24 22:54 /usr/local/lib/libtaospyudf.so
```
### Interface Definition
When developing UDFs in Python, you need to implement the specified interface functions. The specific requirements are as follows.
- Scalar functions need to implement the scalar interface function process.
- Aggregate functions need to implement the aggregate interface functions start, reduce, finish.
- If initialization is needed, the init function should be implemented.
- If cleanup work is needed, implement the destroy function.
#### Scalar Function Interface
The interface for scalar functions is as follows.
```python
def process(input: datablock) -> tuple[output_type]:
```
The main parameters are as follows:
- input: datablock is similar to a two-dimensional matrix, read the python object located at row and col through the member method data(row, col)
- The return value is a tuple of Python objects, each element type as the output type.
#### Aggregate Function Interface
The interface for aggregate functions is as follows.
```python
def start() -> bytes:
def reduce(inputs: datablock, buf: bytes) -> bytes
def finish(buf: bytes) -> output_type:
```
The above code defines 3 functions, each used to implement a custom aggregate function. The specific process is as follows.
First, the start function is called to generate the initial result buffer. This result buffer is used to store the internal state of the aggregate function, which is continuously updated as input data is processed.
Then, the input data is divided into multiple row data blocks. For each row data block, the reduce function is called, and the current row data block (inputs) and the current intermediate result (buf) are passed as parameters. The reduce function updates the internal state of the aggregate function based on the input data and current state, and returns a new intermediate result.
Finally, when all row data blocks have been processed, the finish function is called. This function takes the final intermediate result (buf) as a parameter and generates the final output from it. Due to the nature of aggregate functions, the final output can only contain 0 or 1 data entries. This output result is returned to the caller as the result of the aggregate function calculation.
#### Initialization and Destruction Interface
The interfaces for initialization and destruction are as follows.
```python
def init()
def destroy()
```
Parameter description:
- `init` completes the initialization work
- `destroy` completes the cleanup work
**Note** When developing UDFs in Python, you must define both `init` and `destroy` functions
### Scalar Function Template
The template for developing scalar functions in Python is as follows.
```python
def init():
# initialization
def destroy():
# destroy
def process(input: datablock) -> tuple[output_type]:
```
### Aggregate Function Template
The template for developing aggregate functions in Python is as follows.
```python
def init():
#initialization
def destroy():
#destroy
def start() -> bytes:
#return serialize(init_state)
def reduce(inputs: datablock, buf: bytes) -> bytes
# deserialize buf to state
# reduce the inputs and state into new_state.
# use inputs.data(i, j) to access python object of location(i, j)
# serialize new_state into new_state_bytes
return new_state_bytes
def finish(buf: bytes) -> output_type:
#return obj of type outputtype
```
### Data Type Mapping
The table below describes the mapping between TDengine SQL data types and Python data types. Any type of NULL value is mapped to Python's None value.
| **TDengine SQL Data Type** | **Python Data Type** |
| :-----------------------: | ------------ |
| TINYINT / SMALLINT / INT / BIGINT | int |
| TINYINT UNSIGNED / SMALLINT UNSIGNED / INT UNSIGNED / BIGINT UNSIGNED | int |
| FLOAT / DOUBLE | float |
| BOOL | bool |
| BINARY / VARCHAR / NCHAR | bytes|
| TIMESTAMP | int |
| JSON and other types | Not supported |
### Development Examples
This article includes 5 example programs, ranging from basic to advanced, and also contains numerous practical debugging tips.
Note: **Within UDF, logging cannot be done using the print function; you must write to a file or use Python's built-in logging library.**
#### Example One
Write a UDF function that only accepts a single integer: Input n, output ln(n^2 + 1).
First, write a Python file, located in a system directory, such as `/root/udf/myfun.py` with the following content.
```python
from math import log
def init():
pass
def destroy():
pass
def process(block):
rows, _ = block.shape()
return [log(block.data(i, 0) ** 2 + 1) for i in range(rows)]
```
This file contains 3 functions, `init` and `destroy` are empty functions, they are the lifecycle functions of UDF, even if they do nothing, they must be defined. The most crucial is the `process` function, which accepts a data block. This data block object has two methods.
1. `shape()` returns the number of rows and columns of the data block
2. `data(i, j)` returns the data at row i, column j
The scalar function's `process` method must return as many rows of data as there are in the data block. The above code ignores the number of columns, as it only needs to compute each row's first column.
Next, create the corresponding UDF function, execute the following statement in the TDengine CLI.
```sql
create function myfun as '/root/udf/myfun.py' outputtype double language 'Python'
```
```shell
taos> create function myfun as '/root/udf/myfun.py' outputtype double language 'Python';
Create OK, 0 row(s) affected (0.005202s)
```
It looks smooth, next let's check all the custom functions in the system to confirm it was created successfully.
```text
taos> show functions;
name |
=================================
myfun |
Query OK, 1 row(s) in set (0.005767s)
```
Generate test data, you can execute the following commands in the TDengine CLI.
```sql
create database test;
create table t(ts timestamp, v1 int, v2 int, v3 int);
insert into t values('2023-05-01 12:13:14', 1, 2, 3);
insert into t values('2023-05-03 08:09:10', 2, 3, 4);
insert into t values('2023-05-10 07:06:05', 3, 4, 5);
```
Test the myfun function.
```sql
taos> select myfun(v1, v2) from t;
DB error: udf function execution failure (0.011088s)
```
Unfortunately, the execution failed. What could be the reason? Check the taosudf process logs.
```shell
tail -10 /var/log/taos/taosudf.log
```
Found the following error messages.
```text
05/24 22:46:28.733545 01665799 UDF ERROR can not load library libtaospyudf.so. error: operation not permitted
05/24 22:46:28.733561 01665799 UDF ERROR can not load python plugin. lib path libtaospyudf.so
```
The error is clear: the Python plugin `libtaospyudf.so` was not loaded. If you encounter this error, please refer to the previous section on setting up the environment.
After fixing the environment error, execute again as follows.
```sql
taos> select myfun(v1) from t;
myfun(v1) |
============================
0.693147181 |
1.609437912 |
2.302585093 |
```
With this, we have completed our first UDF 😊, and learned some basic debugging methods.
#### Example 2
Although the myfun function passed the test, it has two drawbacks.
1. This scalar function only accepts 1 column of data as input, and it will not throw an exception if multiple columns are passed.
```sql
taos> select myfun(v1, v2) from t;
myfun(v1, v2) |
============================
0.693147181 |
1.609437912 |
2.302585093 |
```
2. It does not handle null values. We expect that if the input contains null, it will throw an exception and terminate execution. Therefore, the process function is improved as follows.
```python
def process(block):
rows, cols = block.shape()
if cols > 1:
raise Exception(f"require 1 parameter but given {cols}")
return [ None if block.data(i, 0) is None else log(block.data(i, 0) ** 2 + 1) for i in range(rows)]
```
Execute the following statement to update the existing UDF.
```sql
create or replace function myfun as '/root/udf/myfun.py' outputtype double language 'Python';
```
Passing two arguments to myfun will result in a failure.
```sql
taos> select myfun(v1, v2) from t;
DB error: udf function execution failure (0.014643s)
```
Custom exception messages are logged in the plugin log file `/var/log/taos/taospyudf.log`.
```text
2023-05-24 23:21:06.790 ERROR [1666188] [doPyUdfScalarProc@507] call pyUdfScalar proc function. context 0x7faade26d180. error: Exception: require 1 parameter but given 2
At:
/var/lib/taos//.udf/myfun_3_1884e1281d9.py(12): process
```
Thus, we have learned how to update UDFs and view the error logs output by UDFs.
(Note: If the UDF does not take effect after an update, in versions prior to TDengine 3.0.5.0 (not inclusive), it is necessary to restart taosd, while in version 3.0.5.0 and later, restarting taosd is not required for the update to take effect.)
#### Example Three
Input (x1, x2, ..., xn), output the sum of each value and its index multiplied: `1 *x1 + 2* x2 + ... + n * xn`. If x1 to xn contain null, the result is null.
This example differs from Example One in that it can accept any number of columns as input and needs to process each column's value. Write the UDF file /root/udf/nsum.py.
```python
def init():
pass
def destroy():
pass
def process(block):
rows, cols = block.shape()
result = []
for i in range(rows):
total = 0
for j in range(cols):
v = block.data(i, j)
if v is None:
total = None
break
total += (j + 1) * block.data(i, j)
result.append(total)
return result
```
Create the UDF.
```sql
create function nsum as '/root/udf/nsum.py' outputtype double language 'Python';
```
Test the UDF.
```sql
taos> insert into t values('2023-05-25 09:09:15', 6, null, 8);
Insert OK, 1 row(s) affected (0.003675s)
taos> select ts, v1, v2, v3, nsum(v1, v2, v3) from t;
ts | v1 | v2 | v3 | nsum(v1, v2, v3) |
================================================================================================
2023-05-01 12:13:14.000 | 1 | 2 | 3 | 14.000000000 |
2023-05-03 08:09:10.000 | 2 | 3 | 4 | 20.000000000 |
2023-05-10 07:06:05.000 | 3 | 4 | 5 | 26.000000000 |
2023-05-25 09:09:15.000 | 6 | NULL | 8 | NULL |
Query OK, 4 row(s) in set (0.010653s)
```
#### Example Four
Write a UDF that takes a timestamp as input and outputs the next closest Sunday. For example, if today is 2023-05-25, then the next Sunday is 2023-05-28.
To complete this function, you need to use the third-party library moment. First, install this library.
```shell
pip3 install moment
```
Then write the UDF file `/root/udf/nextsunday.py`.
```python
import moment
def init():
pass
def destroy():
pass
def process(block):
rows, cols = block.shape()
if cols > 1:
raise Exception("require only 1 parameter")
if not type(block.data(0, 0)) is int:
raise Exception("type error")
return [moment.unix(block.data(i, 0)).replace(weekday=7).format('YYYY-MM-DD')
for i in range(rows)]
```
The UDF framework maps TDengine's timestamp type to Python's int type, so this function only accepts an integer representing milliseconds. The process method first checks the parameters, then uses the moment package to replace the day of the week with Sunday, and finally formats the output. The output string length is fixed at 10 characters long, so you can create the UDF function like this.
```sql
create function nextsunday as '/root/udf/nextsunday.py' outputtype binary(10) language 'Python';
```
At this point, test the function. If you started taosd with systemctl, you will definitely encounter an error.
```sql
taos> select ts, nextsunday(ts) from t;
DB error: udf function execution failure (1.123615s)
```
```shell
tail -20 taospyudf.log
2023-05-25 11:42:34.541 ERROR [1679419] [PyUdf::PyUdf@217] py udf load module failure. error ModuleNotFoundError: No module named 'moment'
```
This is because the location of "moment" is not in the default library search path of the python udf plugin. How to confirm this? Search `taospyudf.log` with the following command.
```shell
grep 'sys path' taospyudf.log | tail -1
```
The output is as follows
```text
2023-05-25 10:58:48.554 INFO [1679419] [doPyOpen@592] python sys path: ['', '/lib/python38.zip', '/lib/python3.8', '/lib/python3.8/lib-dynload', '/lib/python3/dist-packages', '/var/lib/taos//.udf']
```
It is found that the default third-party library installation path searched by the python udf plugin is: `/lib/python3/dist-packages`, while moment is installed by default in `/usr/local/lib/python3.8/dist-packages`. Next, we modify the default library search path of the python udf plugin.
First, open the python3 command line and check the current sys.path.
```python
>>> import sys
>>> ":".join(sys.path)
'/usr/lib/python3.8:/usr/lib/python3.8/lib-dynload:/usr/local/lib/python3.8/dist-packages:/usr/lib/python3/dist-packages'
```
Copy the output string from the script above, then edit `/var/taos/taos.cfg` and add the following configuration.
```shell
UdfdLdLibPath /usr/lib/python3.8:/usr/lib/python3.8/lib-dynload:/usr/local/lib/python3.8/dist-packages:/usr/lib/python3/dist-packages
```
After saving, execute `systemctl restart taosd`, then test again and there will be no errors.
```sql
taos> select ts, nextsunday(ts) from t;
ts | nextsunday(ts) |
===========================================
2023-05-01 12:13:14.000 | 2023-05-07 |
2023-05-03 08:09:10.000 | 2023-05-07 |
2023-05-10 07:06:05.000 | 2023-05-14 |
2023-05-25 09:09:15.000 | 2023-05-28 |
Query OK, 4 row(s) in set (1.011474s)
```
#### Example Five
Write an aggregate function to calculate the difference between the maximum and minimum values of a column.
The difference between aggregate functions and scalar functions is: scalar functions have multiple outputs corresponding to multiple rows of input, whereas aggregate functions have a single output corresponding to multiple rows of input. The execution process of an aggregate function is somewhat similar to the classic map-reduce framework, where the framework divides the data into several chunks, each mapper handles a chunk, and the reducer aggregates the results of the mappers. The difference is that, in the TDengine Python UDF, the reduce function has both map and reduce capabilities. The reduce function takes two parameters: one is the data it needs to process, and the other is the result of other tasks executing the reduce function. See the following example `/root/udf/myspread.py`.
```python
import io
import math
import pickle
LOG_FILE: io.TextIOBase = None
def init():
global LOG_FILE
LOG_FILE = open("/var/log/taos/spread.log", "wt")
log("init function myspead success")
def log(o):
LOG_FILE.write(str(o) + '\n')
def destroy():
log("close log file: spread.log")
LOG_FILE.close()
def start():
return pickle.dumps((-math.inf, math.inf))
def reduce(block, buf):
max_number, min_number = pickle.loads(buf)
log(f"initial max_number={max_number}, min_number={min_number}")
rows, _ = block.shape()
for i in range(rows):
v = block.data(i, 0)
if v > max_number:
log(f"max_number={v}")
max_number = v
if v < min_number:
log(f"min_number={v}")
min_number = v
return pickle.dumps((max_number, min_number))
def finish(buf):
max_number, min_number = pickle.loads(buf)
return max_number - min_number
```
In this example, we not only defined an aggregate function but also added the functionality to record execution logs.
1. The `init` function opens a file for logging.
2. The `log` function records logs, automatically converting the incoming object into a string and appending a newline.
3. The `destroy` function closes the log file after execution.
4. The `start` function returns the initial buffer to store intermediate results of the aggregate function, initializing the maximum value as negative infinity and the minimum value as positive infinity.
5. The `reduce` function processes each data block and aggregates the results.
6. The `finish` function converts the buffer into the final output.
Execute the following SQL statement to create the corresponding UDF.
```sql
create or replace aggregate function myspread as '/root/udf/myspread.py' outputtype double bufsize 128 language 'Python';
```
This SQL statement has two important differences from the SQL statement used to create scalar functions.
1. Added the `aggregate` keyword.
2. Added the `bufsize` keyword, which is used to specify the memory size for storing intermediate results. This value can be larger than the actual usage. In this example, the intermediate result is a tuple consisting of two floating-point arrays, which actually occupies only 32 bytes when serialized, but the specified `bufsize` is 128. You can use the Python command line to print the actual number of bytes used.
```python
>>> len(pickle.dumps((12345.6789, 23456789.9877)))
32
```
To test this function, you can see that the output of `myspread` is consistent with that of the built-in `spread` function.
```sql
taos> select myspread(v1) from t;
myspread(v1) |
============================
5.000000000 |
Query OK, 1 row(s) in set (0.013486s)
taos> select spread(v1) from t;
spread(v1) |
============================
5.000000000 |
Query OK, 1 row(s) in set (0.005501s)
```
Finally, by checking the execution log, you can see that the reduce function was executed 3 times, during which the max value was updated 4 times, and the min value was updated only once.
```shell
root@server11 /var/log/taos $ cat spread.log
init function myspead success
initial max_number=-inf, min_number=inf
max_number=1
min_number=1
initial max_number=1, min_number=1
max_number=2
max_number=3
initial max_number=3, min_number=1
max_number=6
close log file: spread.log
```
Through this example, we learned how to define aggregate functions and print custom log information.
### More Python UDF Example Code
#### Scalar Function Example [pybitand](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pybitand.py)
`pybitand` implements the bitwise AND function for multiple columns. If there is only one column, it returns that column. `pybitand` ignores null values.
<details>
<summary>pybitand.py</summary>
```python
{{#include tests/script/sh/pybitand.py}}
```
</details>
#### Aggregate Function Example [pyl2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pyl2norm.py)
`pyl2norm` calculates the second-order norm of all data in the input column, i.e., squares each data point, then sums them up, and finally takes the square root.
<details>
<summary>pyl2norm.py</summary>
```c
{{#include tests/script/sh/pyl2norm.py}}
```
</details>
#### Aggregate Function Example [pycumsum](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pycumsum.py)
`pycumsum` uses numpy to calculate the cumulative sum of all data in the input column.
<details>
<summary>pycumsum.py</summary>
```c
{{#include tests/script/sh/pycumsum.py}}
```
</details>
## Managing UDFs
The process of managing UDFs in a cluster involves creating, using, and maintaining these functions. Users can create and manage UDFs in the cluster through SQL. Once created, all users in the cluster can use these functions in SQL. Since UDFs are stored on the cluster's mnode, they remain available even after the cluster is restarted.
When creating UDFs, it is necessary to distinguish between scalar functions and aggregate functions. Scalar functions accept zero or more input parameters and return a single value. Aggregate functions accept a set of input values and return a single value by performing some calculation (such as summing, counting, etc.) on these values. If the wrong function category is declared during creation, an error will be reported when the function is called through SQL.
Additionally, users need to ensure that the input data type matches the UDF program, and the output data type of the UDF matches the `outputtype`. This means that when creating a UDF, you need to specify the correct data types for input parameters and output values. This helps ensure that when the UDF is called, the input data is correctly passed to the UDF, and the output values match the expected data types.
### Creating Scalar Functions
The SQL syntax for creating scalar functions is as follows.
```sql
CREATE [OR REPLACE] FUNCTION function_name AS library_path OUTPUTTYPE output_type LANGUAGE 'Python';
```
The parameters are explained as follows.
- or replace: If the function already exists, it modifies the existing function properties.
- function_name: The name of the scalar function when called in SQL.
- language: Supports C and Python languages (version 3.7 and above), default is C.
- library_path: If the programming language is C, the path is the absolute path to the library file containing the UDF implementation dynamic link library, usually pointing to a .so file. If the programming language is Python, the path is the path to the Python file containing the UDF implementation. The path needs to be enclosed in single or double quotes in English.
- output_type: The data type name of the function computation result.
### Creating Aggregate Functions
The SQL syntax for creating aggregate functions is as follows.
```sql
CREATE [OR REPLACE] AGGREGATE FUNCTION function_name library_path OUTPUTTYPE output_type BUFSIZE buffer_size LANGUAGE 'Python';
```
Here, `buffer_size` represents the size of the buffer for intermediate calculation results, in bytes. The meanings of other parameters are the same as those for scalar functions.
The following SQL creates a UDF named `l2norm`.
```sql
CREATE AGGREGATE FUNCTION l2norm AS "/home/taos/udf_example/libl2norm.so" OUTPUTTYPE DOUBLE bufsize 8;
```
### Deleting UDFs
The SQL syntax for deleting a UDF with a specified name is as follows.
```sql
DROP FUNCTION function_name;
```
### Viewing UDFs
The SQL to display all currently available UDFs in the cluster is as follows.
```sql
show functions;
```
### Viewing Function Information
Each update of a UDF with the same name increases the version number by 1.
```sql
select * from ins_functions \G;
```

View File

@ -0,0 +1,450 @@
---
title: Ingesting Data Efficiently
slug: /developer-guide/ingesting-data-efficiently
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import Image from '@theme/IdealImage';
import imgThread from '../assets/ingesting-data-efficiently-01.png';
This section describes how to write data to TDengine efficiently.
## Principles of Efficient Writing {#principle}
### From the Client Application's Perspective {#application-view}
From the perspective of the client application, efficient data writing should consider the following factors:
1. The amount of data written at once. Generally, the larger the batch of data written at once, the more efficient it is (but the advantage disappears beyond a certain threshold). When writing to TDengine using SQL, try to concatenate more data in one SQL statement. Currently, the maximum length of a single SQL statement supported by TDengine is 1,048,576 (1MB) characters.
2. Number of concurrent connections. Generally, the more concurrent connections writing data at the same time, the more efficient it is (but efficiency may decrease beyond a certain threshold, depending on the server's processing capacity).
3. Distribution of data across different tables (or subtables), i.e., the adjacency of the data being written. Generally, writing data to the same table (or subtable) in each batch is more efficient than writing to multiple tables (or subtables).
4. Method of writing. Generally:
- Binding parameters is more efficient than writing SQL. Parameter binding avoids SQL parsing (but increases the number of calls to the C interface, which also has a performance cost).
- Writing SQL without automatic table creation is more efficient than with automatic table creation because it frequently checks whether the table exists.
- Writing SQL is more efficient than schema-less writing because schema-less writing automatically creates tables and supports dynamic changes to the table structure.
Client applications should fully and appropriately utilize these factors. In a single write operation, try to write data only to the same table (or subtable), set the batch size after testing and tuning to a value that best suits the current system's processing capacity, and similarly set the number of concurrent writing connections after testing and tuning to achieve the best writing speed in the current system.
### From the Data Source's Perspective {#datasource-view}
Client applications usually need to read data from a data source before writing it to TDengine. From the data source's perspective, the following situations require adding a queue between the reading and writing threads:
1. There are multiple data sources, and the data generation speed of a single data source is much lower than the writing speed of a single thread, but the overall data volume is relatively large. In this case, the role of the queue is to aggregate data from multiple sources to increase the amount of data written at once.
2. The data generation speed of a single data source is much greater than the writing speed of a single thread. In this case, the role of the queue is to increase the concurrency of writing.
3. Data for a single table is scattered across multiple data sources. In this case, the role of the queue is to aggregate the data for the same table in advance, improving the adjacency of the data during writing.
If the data source for the writing application is Kafka, and the writing application itself is a Kafka consumer, then Kafka's features can be utilized for efficient writing. For example:
1. Write data from the same table to the same Topic and the same Partition to increase data adjacency.
2. Aggregate data by subscribing to multiple Topics.
3. Increase the concurrency of writing by increasing the number of Consumer threads.
4. Increase the maximum amount of data fetched each time to increase the maximum amount of data written at once.
### From the Server Configuration's Perspective {#setting-view}
From the server configuration's perspective, the number of vgroups should be set appropriately when creating the database based on the number of disks in the system, the I/O capability of the disks, and the processor's capacity to fully utilize system performance. If there are too few vgroups, the system's performance cannot be maximized; if there are too many vgroups, it will cause unnecessary resource competition. The recommended number of vgroups is typically twice the number of CPU cores, but this should still be adjusted based on the specific system resource configuration.
For more tuning parameters, please refer to [Database Management](../../tdengine-reference/sql-manual/manage-databases/) and [Server Configuration](../../tdengine-reference/components/taosd/).
## Efficient Writing Example {#sample-code}
### Scenario Design {#scenario}
The following example program demonstrates how to write data efficiently, with the scenario designed as follows:
- The TDengine client application continuously reads data from other data sources. In the example program, simulated data generation is used to mimic reading from data sources.
- The speed of a single connection writing to TDengine cannot match the speed of reading data, so the client application starts multiple threads, each establishing a connection with TDengine, and each thread has a dedicated fixed-size message queue.
- The client application hashes the received data according to the table name (or subtable name) to different threads, i.e., writing to the message queue corresponding to that thread, ensuring that data belonging to a certain table (or subtable) will always be processed by a fixed thread.
- Each sub-thread empties the data in its associated message queue or reaches a predetermined threshold of data volume, writes that batch of data to TDengine, and continues to process the data received afterwards.
<figure>
<Image img={imgThread} alt="Thread model for efficient writing example"/>
<figcaption>Figure 1. Thread model for efficient writing example</figcaption>
</figure>
### Sample Code {#code}
This section provides sample code for the above scenario. The principle of efficient writing is the same for other scenarios, but the code needs to be modified accordingly.
This sample code assumes that the source data belongs to different subtables of the same supertable (meters). The program has already created this supertable in the test database before starting to write data. For subtables, they will be automatically created by the application according to the received data. If the actual scenario involves multiple supertables, only the code for automatic table creation in the write task needs to be modified.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
**Program Listing**
| Class Name | Function Description |
| ----------------- | -------------------------------------------------------------------------------- |
| FastWriteExample | Main program |
| ReadTask | Reads data from a simulated source, hashes the table name to get the Queue Index, writes to the corresponding Queue |
| WriteTask | Retrieves data from the Queue, forms a Batch, writes to TDengine |
| MockDataSource | Simulates generating data for a certain number of meters subtables |
| SQLWriter | WriteTask relies on this class to complete SQL stitching, automatic table creation, SQL writing, and SQL length checking |
| StmtWriter | Implements parameter binding for batch writing (not yet completed) |
| DataBaseMonitor | Counts the writing speed and prints the current writing speed to the console every 10 seconds |
Below are the complete codes and more detailed function descriptions for each class.
<details>
<summary>FastWriteExample</summary>
The main program is responsible for:
1. Creating message queues
2. Starting write threads
3. Starting read threads
4. Counting the writing speed every 10 seconds
The main program exposes 4 parameters by default, which can be adjusted each time the program is started, for testing and tuning:
1. Number of read threads. Default is 1.
2. Number of write threads. Default is 3.
3. Total number of simulated tables. Default is 1,000. This will be evenly divided among the read threads. If the total number of tables is large, table creation will take longer, and the initial writing speed statistics may be slow.
4. Maximum number of records written per batch. Default is 3,000.
Queue capacity (taskQueueCapacity) is also a performance-related parameter, which can be adjusted by modifying the program. Generally speaking, the larger the queue capacity, the less likely it is to be blocked when enqueuing, the greater the throughput of the queue, but the larger the memory usage. The default value of the sample program is already set large enough.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/FastWriteExample.java}}
```
</details>
<details>
<summary>ReadTask</summary>
The read task is responsible for reading data from the data source. Each read task is associated with a simulated data source. Each simulated data source can generate data for a certain number of tables. Different simulated data sources generate data for different tables.
The read task writes to the message queue in a blocking manner. That is, once the queue is full, the write operation will be blocked.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/ReadTask.java}}
```
</details>
<details>
<summary>WriteTask</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/WriteTask.java}}
```
</details>
<details>
<summary>MockDataSource</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/MockDataSource.java}}
```
</details>
<details>
<summary>SQLWriter</summary>
The SQLWriter class encapsulates the logic of SQL stitching and data writing. Note that none of the tables are created in advance; instead, they are created in batches using the supertable as a template when a table not found exception is caught, and then the INSERT statement is re-executed. For other exceptions, this simply logs the SQL statement being executed at the time; you can also log more clues to facilitate error troubleshooting and fault recovery.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/SQLWriter.java}}
```
</details>
<details>
<summary>DataBaseMonitor</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/DataBaseMonitor.java}}
```
</details>
**Execution Steps**
<details>
<summary>Execute the Java Example Program</summary>
Before running the program, configure the environment variable `TDENGINE_JDBC_URL`. If the TDengine Server is deployed on the local machine, and the username, password, and port are all default values, then you can configure:
```shell
TDENGINE_JDBC_URL="jdbc:TAOS://localhost:6030?user=root&password=taosdata"
```
**Execute the example program in a local integrated development environment**
1. Clone the TDengine repository
```shell
git clone git@github.com:taosdata/TDengine.git --depth 1
```
2. Open the `docs/examples/java` directory with the integrated development environment.
3. Configure the environment variable `TDENGINE_JDBC_URL` in the development environment. If the global environment variable `TDENGINE_JDBC_URL` has already been configured, you can skip this step.
4. Run the class `com.taos.example.highvolume.FastWriteExample`.
**Execute the example program on a remote server**
To execute the example program on a server, follow these steps:
1. Package the example code. Execute in the directory TDengine/docs/examples/java:
```shell
mvn package
```
2. Create an examples directory on the remote server:
```shell
mkdir -p examples/java
```
3. Copy dependencies to the specified directory on the server:
- Copy dependency packages, only once
```shell
scp -r .\target\lib <user>@<host>:~/examples/java
```
- Copy the jar package of this program, copy every time the code is updated
```shell
scp -r .\target\javaexample-1.0.jar <user>@<host>:~/examples/java
```
4. Configure the environment variable.
Edit `~/.bash_profile` or `~/.bashrc` and add the following content for example:
```shell
export TDENGINE_JDBC_URL="jdbc:TAOS://localhost:6030?user=root&password=taosdata"
```
The above uses the default JDBC URL when TDengine Server is deployed locally. You need to modify it according to your actual situation.
5. Start the example program with the Java command, command template:
```shell
java -classpath lib/*:javaexample-1.0.jar com.taos.example.highvolume.FastWriteExample <read_thread_count> <white_thread_count> <total_table_count> <max_batch_size>
```
6. End the test program. The test program will not end automatically; after obtaining a stable writing speed under the current configuration, press <kbd>CTRL</kbd> + <kbd>C</kbd> to end the program.
Below is a log output from an actual run, with machine configuration 16 cores + 64G + SSD.
```text
root@vm85$ java -classpath lib/*:javaexample-1.0.jar com.taos.example.highvolume.FastWriteExample 2 12
18:56:35.896 [main] INFO c.t.e.highvolume.FastWriteExample - readTaskCount=2, writeTaskCount=12 tableCount=1000 maxBatchSize=3000
18:56:36.011 [WriteThread-0] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.015 [WriteThread-0] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.021 [WriteThread-1] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.022 [WriteThread-1] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.031 [WriteThread-2] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.032 [WriteThread-2] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.041 [WriteThread-3] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.042 [WriteThread-3] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.093 [WriteThread-4] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.094 [WriteThread-4] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.099 [WriteThread-5] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.100 [WriteThread-5] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.100 [WriteThread-6] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.101 [WriteThread-6] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.103 [WriteThread-7] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.104 [WriteThread-7] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.105 [WriteThread-8] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.107 [WriteThread-8] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.108 [WriteThread-9] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.109 [WriteThread-9] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.156 [WriteThread-10] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.157 [WriteThread-11] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.158 [WriteThread-10] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.158 [ReadThread-0] INFO com.taos.example.highvolume.ReadTask - started
18:56:36.158 [ReadThread-1] INFO com.taos.example.highvolume.ReadTask - started
18:56:36.158 [WriteThread-11] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:46.369 [main] INFO c.t.e.highvolume.FastWriteExample - count=18554448 speed=1855444
18:56:56.946 [main] INFO c.t.e.highvolume.FastWriteExample - count=39059660 speed=2050521
18:57:07.322 [main] INFO c.t.e.highvolume.FastWriteExample - count=59403604 speed=2034394
18:57:18.032 [main] INFO c.t.e.highvolume.FastWriteExample - count=80262938 speed=2085933
18:57:28.432 [main] INFO c.t.e.highvolume.FastWriteExample - count=101139906 speed=2087696
18:57:38.921 [main] INFO c.t.e.highvolume.FastWriteExample - count=121807202 speed=2066729
18:57:49.375 [main] INFO c.t.e.highvolume.FastWriteExample - count=142952417 speed=2114521
18:58:00.689 [main] INFO c.t.e.highvolume.FastWriteExample - count=163650306 speed=2069788
18:58:11.646 [main] INFO c.t.e.highvolume.FastWriteExample - count=185019808 speed=2136950
```
</details>
</TabItem>
<TabItem label="Python" value="python">
**Program Listing**
The Python example program uses a multi-process architecture and employs a cross-process message queue.
| Function or Class | Description |
| ------------------------ | ------------------------------------------------------------------- |
| main function | Entry point of the program, creates various subprocesses and message queues |
| run_monitor_process function | Creates database, supertables, tracks write speed and periodically prints to console |
| run_read_task function | Main logic for read processes, responsible for reading data from other data systems and distributing it to assigned queues |
| MockDataSource class | Simulates a data source, implements iterator interface, returns the next 1,000 records for each table in batches |
| run_write_task function | Main logic for write processes. Retrieves as much data as possible from the queue and writes in batches |
| SQLWriter class | Handles SQL writing and automatic table creation |
| StmtWriter class | Implements batch writing with parameter binding (not yet completed) |
<details>
<summary>main function</summary>
The main function is responsible for creating message queues and launching subprocesses, which are of 3 types:
1. 1 monitoring process, responsible for database initialization and tracking write speed
2. n read processes, responsible for reading data from other data systems
3. m write processes, responsible for writing to the database
The main function can accept 5 startup parameters, in order:
1. Number of read tasks (processes), default is 1
2. Number of write tasks (processes), default is 1
3. Total number of simulated tables, default is 1,000
4. Queue size (in bytes), default is 1,000,000
5. Maximum number of records written per batch, default is 3,000
```python
{{#include docs/examples/python/fast_write_example.py:main}}
```
</details>
<details>
<summary>run_monitor_process</summary>
The monitoring process is responsible for initializing the database and monitoring the current write speed.
```python
{{#include docs/examples/python/fast_write_example.py:monitor}}
```
</details>
<details>
<summary>run_read_task function</summary>
The read process, responsible for reading data from other data systems and distributing it to assigned queues.
```python
{{#include docs/examples/python/fast_write_example.py:read}}
```
</details>
<details>
<summary>MockDataSource</summary>
Below is the implementation of the mock data source. We assume that each piece of data generated by the data source includes the target table name information. In practice, you might need certain rules to determine the target table name.
```python
{{#include docs/examples/python/mockdatasource.py}}
```
</details>
<details>
<summary>run_write_task function</summary>
The write process retrieves as much data as possible from the queue and writes in batches.
```python
{{#include docs/examples/python/fast_write_example.py:write}}
```
</details>
<details>
The SQLWriter class encapsulates the logic of SQL stitching and data writing. None of the tables are pre-created; instead, they are batch-created using the supertable as a template when a table does not exist error occurs, and then the INSERT statement is re-executed. For other errors, the SQL executed at the time is recorded for error troubleshooting and fault recovery. This class also checks whether the SQL exceeds the maximum length limit, based on the TDengine 3.0 limit, the supported maximum SQL length of 1,048,576 is passed in by the input parameter maxSQLLength.
<summary>SQLWriter</summary>
```python
{{#include docs/examples/python/sql_writer.py}}
```
</details>
**Execution Steps**
<details>
<summary>Execute the Python Example Program</summary>
1. Prerequisites
- TDengine client driver installed
- Python3 installed, recommended version >= 3.8
- taospy installed
2. Install faster-fifo to replace the built-in multiprocessing.Queue in python
```shell
pip3 install faster-fifo
```
3. Click the "View Source" link above to copy the `fast_write_example.py`, `sql_writer.py`, and `mockdatasource.py` files.
4. Execute the example program
```shell
python3 fast_write_example.py <READ_TASK_COUNT> <WRITE_TASK_COUNT> <TABLE_COUNT> <QUEUE_SIZE> <MAX_BATCH_SIZE>
```
Below is an actual output from a run, on a machine configured with 16 cores + 64G + SSD.
```text
root@vm85$ python3 fast_write_example.py 8 8
2022-07-14 19:13:45,869 [root] - READ_TASK_COUNT=8, WRITE_TASK_COUNT=8, TABLE_COUNT=1000, QUEUE_SIZE=1000000, MAX_BATCH_SIZE=3000
2022-07-14 19:13:48,882 [root] - WriteTask-0 started with pid 718347
2022-07-14 19:13:48,883 [root] - WriteTask-1 started with pid 718348
2022-07-14 19:13:48,884 [root] - WriteTask-2 started with pid 718349
2022-07-14 19:13:48,884 [root] - WriteTask-3 started with pid 718350
2022-07-14 19:13:48,885 [root] - WriteTask-4 started with pid 718351
2022-07-14 19:13:48,885 [root] - WriteTask-5 started with pid 718352
2022-07-14 19:13:48,886 [root] - WriteTask-6 started with pid 718353
2022-07-14 19:13:48,886 [root] - WriteTask-7 started with pid 718354
2022-07-14 19:13:48,887 [root] - ReadTask-0 started with pid 718355
2022-07-14 19:13:48,888 [root] - ReadTask-1 started with pid 718356
2022-07-14 19:13:48,889 [root] - ReadTask-2 started with pid 718357
2022-07-14 19:13:48,889 [root] - ReadTask-3 started with pid 718358
2022-07-14 19:13:48,890 [root] - ReadTask-4 started with pid 718359
2022-07-14 19:13:48,891 [root] - ReadTask-5 started with pid 718361
2022-07-14 19:13:48,892 [root] - ReadTask-6 started with pid 718364
2022-07-14 19:13:48,893 [root] - ReadTask-7 started with pid 718365
2022-07-14 19:13:56,042 [DataBaseMonitor] - count=6676310 speed=667631.0
2022-07-14 19:14:06,196 [DataBaseMonitor] - count=20004310 speed=1332800.0
2022-07-14 19:14:16,366 [DataBaseMonitor] - count=32290310 speed=1228600.0
2022-07-14 19:14:26,527 [DataBaseMonitor] - count=44438310 speed=1214800.0
2022-07-14 19:14:36,673 [DataBaseMonitor] - count=56608310 speed=1217000.0
2022-07-14 19:14:46,834 [DataBaseMonitor] - count=68757310 speed=1214900.0
2022-07-14 19:14:57,280 [DataBaseMonitor] - count=80992310 speed=1223500.0
2022-07-14 19:15:07,689 [DataBaseMonitor] - count=93805310 speed=1281300.0
2022-07-14 19:15:18,020 [DataBaseMonitor] - count=106111310 speed=1230600.0
2022-07-14 19:15:28,356 [DataBaseMonitor] - count=118394310 speed=1228300.0
2022-07-14 19:15:38,690 [DataBaseMonitor] - count=130742310 speed=1234800.0
2022-07-14 19:15:49,000 [DataBaseMonitor] - count=143051310 speed=1230900.0
2022-07-14 19:15:59,323 [DataBaseMonitor] - count=155276310 speed=1222500.0
2022-07-14 19:16:09,649 [DataBaseMonitor] - count=167603310 speed=1232700.0
2022-07-14 19:16:19,995 [DataBaseMonitor] - count=179976310 speed=1237300.0
```
</details>
:::note
When using the Python connector to connect to TDengine with multiple processes, there is a limitation: connections cannot be established in the parent process; all connections must be created in the child processes.
If a connection is created in the parent process, any connection attempts in the child processes will be perpetually blocked. This is a known issue.
:::
</TabItem>
</Tabs>

View File

@ -0,0 +1,27 @@
---
title: Developer's Guide
slug: /developer-guide
---
To develop an application, if you plan to use TDengine as a tool for time-series data processing, there are several things to do:
1. Determine the connection method to TDengine. No matter what programming language you use, you can always use the REST interface, but you can also use connectors unique to each programming language for convenient connections.
2. Based on your application scenario, determine the data model. Depending on the characteristics of the data, decide whether to create one or multiple databases; distinguish between static tags and collected metrics, establish the correct supertables, and create subtables.
3. Decide on the method of inserting data. TDengine supports data insertion using standard SQL, but also supports Schemaless mode insertion, which allows data to be written directly without manually creating tables.
4. Based on business requirements, determine which SQL queries need to be written.
5. If you want to perform lightweight real-time statistical analysis based on time-series data, including various monitoring dashboards, it is recommended to use the streaming computing capabilities of TDengine 3.0, instead of deploying complex streaming computing systems like Spark or Flink.
6. If your application has modules that need to consume inserted data and you want to be notified when new data is inserted, it is recommended to use the data subscription feature provided by TDengine, without the need to deploy Kafka or other messaging queue software.
7. In many scenarios (such as vehicle management), applications need to obtain the latest status of each data collection point, so it is recommended to use TDengine's Cache feature, instead of deploying separate caching software like Redis.
8. If you find that TDengine's functions do not meet your requirements, you can use User Defined Functions (UDF) to solve the problem.
This section is organized in the order mentioned above. For ease of understanding, TDengine provides example code for each feature and each supported programming language, located at [Example Code](https://github.com/taosdata/TDengine/tree/main/docs/examples). All example codes are guaranteed to be correct by CI, scripts located at [Example Code CI](https://github.com/taosdata/TDengine/tree/main/tests/docs-examples-test).
If you want to learn more about using SQL, check out the [SQL Manual](../tdengine-reference/sql-manual/). If you want to learn more about using various connectors, read the [Connector Reference Guide](../tdengine-reference/client-libraries/). If you also want to integrate TDengine with third-party systems, such as Grafana, please refer to [Third-Party Tools](../third-party-tools/).
If you encounter any problems during the development process, please click ["Report Issue"](https://github.com/taosdata/TDengine/issues/new/choose) at the bottom of each page to submit an Issue directly on GitHub.
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```

View File

@ -1,75 +0,0 @@
---
title: Resource Planning
sidebar_label: Resource Planning
description: This document describes how to plan compute and storage resources for your TDengine cluster.
---
It is important to plan computing and storage resources if using TDengine to build an IoT, time-series or Big Data platform. How to plan the CPU, memory and disk resources required, will be described in this chapter.
## Server Memory Requirements
Each database creates a fixed number of vgroups. This number is 2 by default and can be configured with the `vgroups` parameter. The number of replicas can be controlled with the `replica` parameter. Each replica requires one vnode per vgroup. Altogether, the memory required by each database depends on the following configuration options:
- vgroups
- replica
- buffer
- pages
- pagesize
- cachesize
For more information, see [Database](../../reference/taos-sql/database).
The memory required by a database is therefore greater than or equal to:
```
vgroups * replica * (buffer + pages * pagesize + cachesize)
```
However, note that this requirement is spread over all dnodes in the cluster, not on a single physical machine. The physical servers that run dnodes meet the requirement together. If a cluster has multiple databases, the memory required increases accordingly. In complex environments where dnodes were added after initial deployment in response to increasing resource requirements, load may not be balanced among the original dnodes and newer dnodes. In this situation, the actual status of your dnodes is more important than theoretical calculations.
## Client Memory Requirements
For the client programs using TDengine client driver `taosc` to connect to the server side there is a memory requirement as well.
The memory consumed by a client program is mainly about the SQL statements for data insertion, caching of table metadata, and some internal use. Assuming maximum number of tables is N (the memory consumed by the metadata of each table is 256 bytes), maximum number of threads for parallel insertion is T, maximum length of a SQL statement is S (normally 1 MB), the memory required by a client program can be estimated using the below formula:
```
M = (T * S * 3 + (N / 4096) + 100)
```
For example, if the number of parallel data insertion threads is 100, total number of tables is 10,000,000, then the minimum memory requirement of a client program is:
```
100 * 3 + (10000000 / 4096) + 100 = 2741 (MBytes)
```
So, at least 3GB needs to be reserved for such a client.
## CPU Requirement
The CPU resources required depend on two aspects:
- **Data Insertion** Each dnode of TDengine can process at least 10,000 insertion requests in one second, while each insertion request can have multiple rows. The difference in computing resource consumed, between inserting 1 row at a time, and inserting 10 rows at a time is very small. So, the more the number of rows that can be inserted one time, the higher the efficiency. If each insert request contains more than 200 records, a single core can process more than 1 million records per second. Inserting in batch also imposes requirements on the client side which needs to cache rows to insert in batch once the number of cached rows reaches a threshold.
- **Data Query** High efficiency query is provided in TDengine, but it's hard to estimate the CPU resource required because the queries used in different use cases and the frequency of queries vary significantly. It can only be verified with the query statements, query frequency, data size to be queried, and other requirements provided by users.
In short, the CPU resource required for data insertion can be estimated but it's hard to do so for query use cases. If possible, ensure that CPU usage remains below 50%. If this threshold is exceeded, it's a reminder for system operator to add more nodes in the cluster to expand resources.
## Disk Requirement
The compression ratio in TDengine is much higher than that in RDBMS. In most cases, the compression ratio in TDengine is bigger than 5, or even 10 in some cases, depending on the characteristics of the original data. The data size before compression can be calculated based on below formula:
```
Raw DataSize = numOfTables * rowSizePerTable * rowsPerTable
```
For example, there are 10,000,000 meters, while each meter collects data every 15 minutes and the data size of each collection is 128 bytes, so the raw data size of one year is: 10000000 \* 128 \* 24 \* 60 / 15 \* 365 = 44.8512(TB). Assuming compression ratio is 5, the actual disk size is: 44.851 / 5 = 8.97024(TB).
Parameter `keep` can be used to set how long the data will be kept on disk. To further reduce storage cost, multiple storage levels can be enabled in TDengine, with the coldest data stored on the cheapest storage device. This is completely transparent to application programs.
To increase performance, multiple disks can be setup for parallel data reading or data inserting. Please note that an expensive disk array is not necessary because replications are used in TDengine to provide high availability.
## Number of Hosts
A host can be either physical or virtual. The total memory, total CPU, total disk required can be estimated according to the formulae mentioned previously. If the number of data replicas is not 1, the required resources are multiplied by the number of replicas.
Then, according to the system resources that a single host can provide, assuming all hosts have the same resources, the number of hosts can be derived easily.

File diff suppressed because it is too large Load Diff

View File

@ -1,25 +0,0 @@
---
title: Fault Tolerance and Disaster Recovery
description: This document describes how TDengine provides fault tolerance and disaster recovery.
---
## Fault Tolerance
TDengine uses **WAL**, i.e. Write Ahead Log, to achieve fault tolerance and high reliability.
When a data block is received by TDengine, the original data block is first written into WAL. The log in WAL will be deleted only after the data has been written into data files in the database. Data can be recovered from WAL in case the server is stopped abnormally for any reason and then restarted.
There are 2 configuration parameters related to WAL:
- wal_level: Specifies the WAL level. 1 indicates that WAL is enabled but fsync is disabled. 2 indicates that WAL and fsync are both enabled. The default value is 1.
- wal_fsync_period: This parameter is only valid when wal_level is set to 2. It specifies the interval, in milliseconds, of invoking fsync. If set to 0, it means fsync is invoked immediately once WAL is written.
To achieve absolutely no data loss, set wal_level to 2 and wal_fsync_period to 0. There is a performance penalty to the data ingestion rate. However, if the concurrent data insertion threads on the client side can reach a big enough number, for example 50, the data ingestion performance will be still good enough. Our verification shows that the drop is only 30% when wal_fsync_period is set to 3000 milliseconds.
## Disaster Recovery
TDengine provides disaster recovery by using taosX to replicate data between two TDengine clusters which are deployed in two distant data centers. Assume there are two TDengine clusters, A and B, A is the source and B is the target, and A takes the workload of writing and querying. You can deploy `taosX` in the data center where cluster A resides in, `taosX` consumes the data written into cluster A and writes into cluster B. If the data center of cluster A is disrupted because of disaster, you can switch to cluster B to take the workload of data writing and querying, and deploy a `taosX` in the data center of cluster B to replicate data from cluster B to cluster A if cluster A has been recovered, or another cluster C if cluster A has not been recovered.
You can use the data replication feature of `taosX` to build more complicated disaster recovery solution.
taosX is only provided in TDengine enterprise edition, for more details please contact business@tdengine.com.

View File

@ -1,62 +0,0 @@
---
title: Data Import
description: This document describes how to import data into TDengine.
---
There are multiple ways of importing data provided by TDengine: import with script, import from data file, import using `taosdump`.
## Import Using Script
TDengine CLI `taos` supports `source <filename>` command for executing the SQL statements in the file in batch. The SQL statements for creating databases, creating tables, and inserting rows can be written in a single file with one statement on each line, then the file can be executed using the `source` command in TDengine CLI `taos` to execute the SQL statements in order and in batch. In the script file, any line beginning with "#" is treated as comments and ignored silently.
## Import from Data File
In TDengine CLI, data can be imported from a CSV file into an existing table. The data in a single CSV must belong to the same table and must be consistent with the schema of that table. The SQL statement is as below:
```sql
insert into tb1 file 'path/data.csv';
```
:::note
If there is a description in the first line of the CSV file, please remove it before importing. If there is no value for a column, please use `NULL` without quotes.
:::
For example, there is a subtable d1001 whose schema is as below:
```sql
taos> DESCRIBE d1001
Field | Type | Length | Note |
=================================================================================
ts | TIMESTAMP | 8 | |
current | FLOAT | 4 | |
voltage | INT | 4 | |
phase | FLOAT | 4 | |
location | BINARY | 64 | TAG |
groupid | INT | 4 | TAG |
```
The format of the CSV file to be imported, data.csv, is as below:
```csv
'2018-10-04 06:38:05.000',10.30000,219,0.31000
'2018-10-05 06:38:15.000',12.60000,218,0.33000
'2018-10-06 06:38:16.800',13.30000,221,0.32000
'2018-10-07 06:38:05.000',13.30000,219,0.33000
'2018-10-08 06:38:05.000',14.30000,219,0.34000
'2018-10-09 06:38:05.000',15.30000,219,0.35000
'2018-10-10 06:38:05.000',16.30000,219,0.31000
'2018-10-11 06:38:05.000',17.30000,219,0.32000
'2018-10-12 06:38:05.000',18.30000,219,0.31000
```
Then, the below SQL statement can be used to import data from file "data.csv", assuming the file is located under the home directory of the current Linux user.
```sql
taos> insert into d1001 file '~/data.csv';
Query OK, 9 row(s) affected (0.004763s)
```
## Import using taosdump
A convenient tool for importing and exporting data is provided by TDengine, `taosdump`, which can be used to export data from one TDengine cluster and import into another one. For the details of using `taosdump` please refer to the taosdump documentation.

View File

@ -1,22 +0,0 @@
---
title: Data Export
description: This document describes how to export data from TDengine.
---
There are two ways of exporting data from a TDengine cluster:
- Using a SQL statement in TDengine CLI
- Using the `taosdump` tool
## Export Using SQL
If you want to export the data of a table or a STable, please execute the SQL statement below, in the TDengine CLI.
```sql
select * from <tb_name> >> data.csv;
```
The data of table or STable specified by `tb_name` will be exported into a file named `data.csv` in CSV format.
## Export Using taosdump
With `taosdump`, you can choose to export the data of all databases, a database, a table or a STable, you can also choose to export the data within a time range, or even only export the schema definition of a table. For the details of using `taosdump` please refer to the taosdump documentation.

View File

@ -1,331 +0,0 @@
---
title: TDengine Monitoring
description: This document describes how to monitor your TDengine cluster.
---
After TDengine is started, it automatically writes monitoring data including CPU, memory and disk usage, bandwidth, number of requests, disk I/O speed, slow queries, into a designated database at a predefined interval through taosKeeper. Additionally, some important system operations, like logon, create user, drop database, and alerts and warnings generated in TDengine are written into the `log` database too. A system operator can view the data in `log` database from TDengine CLI or from a web console.
The collection of the monitoring information is enabled by default, but can be disabled by parameter `monitor` in the configuration file.
## TDinsight
TDinsight is a complete solution which uses the monitoring database `log` mentioned previously, and Grafana, to monitor a TDengine cluster.
A script `TDinsight.sh` is provided to deploy TDinsight automatically.
Download `TDinsight.sh` with the below command:
```bash
wget https://github.com/taosdata/grafanaplugin/raw/master/dashboards/TDinsight.sh
chmod +x TDinsight.sh
```
Prepare:
1. TDengine Server
- The URL of REST service: for example `http://localhost:6041` if TDengine is deployed locally
- User name and password
2. Grafana Alert Notification
You can use below command to setup Grafana alert notification.
An existing Grafana Notification Channel can be specified with parameter `-E`, the notifier uid of the channel can be obtained by `curl -u admin:admin localhost:3000/api/alert-notifications |jq`
```bash
./TDinsight.sh -a http://localhost:6041 -u root -p taosdata -E <notifier uid>
```
Launch `TDinsight.sh` with the command above and restart Grafana, then open Dashboard `http://localhost:3000/d/tdinsight`.
## log database
The data of tdinsight dashboard is stored in `log` database (default. You can change it in taoskeeper's config file. For more infrmation, please reference to [taoskeeper document](../../reference/components/taosKeeper)). The taoskeeper will create log database on taoskeeper startup.
### taosd\_cluster\_basic table
`taosd_cluster_basic` table contains cluster basic information.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|first\_ep|VARCHAR||first ep of cluster|
|first\_ep\_dnode\_id|INT||dnode id or first\_ep|
|cluster_version|VARCHAR||tdengine version. such as: 3.0.4.0|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_cluster\_info table
`taosd_cluster_info` table contains cluster information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|cluster\_uptime|DOUBLE||seconds of master's uptime|
|dbs\_total|DOUBLE||total number of databases in cluster|
|tbs\_total|DOUBLE||total number of tables in cluster|
|stbs\_total|DOUBLE||total number of stables in cluster|
|dnodes\_total|DOUBLE||total number of dnodes in cluster|
|dnodes\_alive|DOUBLE||total number of dnodes in ready state|
|mnodes\_total|DOUBLE||total number of mnodes in cluster|
|mnodes\_alive|DOUBLE||total number of mnodes in ready state|
|vgroups\_total|DOUBLE||total number of vgroups in cluster|
|vgroups\_alive|DOUBLE||total number of vgroups in ready state|
|vnodes\_total|DOUBLE||total number of vnode in cluster|
|vnodes\_alive|DOUBLE||total number of vnode in ready state|
|connections\_total|DOUBLE||total number of connections to cluster|
|topics\_total|DOUBLE||total number of topics in cluster|
|streams\_total|DOUBLE||total number of streams in cluster|
|grants_expire\_time|DOUBLE||time until grants expire in seconds|
|grants_timeseries\_used|DOUBLE||timeseries used|
|grants_timeseries\_total|DOUBLE||total timeseries|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_vgroups\_info table
`taosd_vgroups_info` table contains vgroups information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|tables\_num|DOUBLE||number of tables per vgroup|
|status|DOUBLE||status, value range:unsynced = 0, ready = 1|
|vgroup\_id|VARCHAR|TAG|vgroup id|
|database\_name|VARCHAR|TAG|database for the vgroup|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_dnodes\_info table
`taosd_dnodes_info` table contains dnodes information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|uptime|DOUBLE||dnode uptime in `seconds`|
|cpu\_engine|DOUBLE||cpu usage of tdengine. read from `/proc/<taosd_pid>/stat`|
|cpu\_system|DOUBLE||cpu usage of server. read from `/proc/stat`|
|cpu\_cores|DOUBLE||cpu cores of server|
|mem\_engine|DOUBLE||memory usage of tdengine. read from `/proc/<taosd_pid>/status`|
|mem\_free|DOUBLE||available memory on the server in `KB`|
|mem\_total|DOUBLE||total memory of server in `KB`|
|disk\_used|DOUBLE||usage of data dir in `bytes`|
|disk\_total|DOUBLE||the capacity of data dir in `bytes`|
|system\_net\_in|DOUBLE||network throughput rate in byte/s. read from `/proc/net/dev`|
|system\_net\_out|DOUBLE||network throughput rate in byte/s. read from `/proc/net/dev`|
|io\_read|DOUBLE||io throughput rate in byte/s. read from `/proc/<taosd_pid>/io`|
|io\_write|DOUBLE||io throughput rate in byte/s. read from `/proc/<taosd_pid>/io`|
|io\_read\_disk|DOUBLE||io throughput rate of disk in byte/s. read from `/proc/<taosd_pid>/io`|
|io\_write\_disk|DOUBLE||io throughput rate of disk in byte/s. read from `/proc/<taosd_pid>/io`|
|vnodes\_num|DOUBLE||number of vnodes per dnode|
|masters|DOUBLE||number of master vnodes|
|has\_mnode|DOUBLE||if the dnode has mnode, value range:include=1, not_include=0|
|has\_qnode|DOUBLE||if the dnode has qnode, value range:include=1, not_include=0|
|has\_snode|DOUBLE||if the dnode has snode, value range:include=1, not_include=0|
|has\_bnode|DOUBLE||if the dnode has bnode, value range:include=1, not_include=0|
|error\_log\_count|DOUBLE||error count|
|info\_log\_count|DOUBLE||info count|
|debug\_log\_count|DOUBLE||debug count|
|trace\_log\_count|DOUBLE||trace count|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_dnodes\_status table
`taosd_dnodes_status` table contains dnodes information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|status|DOUBLE||dnode status, value range:ready=1offline =0|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_dnodes\_log\_dir table
`log_dir` table contains log directory information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|avail|DOUBLE||available space for log directory in `bytes`|
|used|DOUBLE||used space for data directory in `bytes`|
|total|DOUBLE||total space for data directory in `bytes`|
|name|VARCHAR|TAG|log directory. default is `/var/log/taos/`|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_dnodes\_data\_dir table
`taosd_dnodes_data_dir` table contains data directory information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|avail|DOUBLE||available space for data directory in `bytes`|
|used|DOUBLE||used space for data directory in `bytes`|
|total|DOUBLE||total space for data directory in `bytes`|
|level|VARCHAR|TAG|level for multi-level storage|
|name|VARCHAR|TAG|data directory. default is `/var/lib/taos`|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_mnodes\_info table
`taosd_mnodes_info` table contains mnode information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|role|DOUBLE||the role of mnode. value range:offline = 0,follower = 100,candidate = 101,leader = 102,error = 103,learner = 104|
|mnode\_id|VARCHAR|TAG|master node id|
|mnode\_ep|VARCHAR|TAG|master node endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_vnodes\_role table
`taosd_vnodes_role` table contains vnode role information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|role|DOUBLE||role. value range:offline = 0,follower = 100,candidate = 101,leader = 102,error = 103,learner = 104|
|vgroup\_id|VARCHAR|TAG|vgroup id|
|database\_name|VARCHAR|TAG|database for the vgroup|
|dnode\_id|VARCHAR|TAG|dnode id|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_sql\_req table
`taosd_sql_req` tables contains taosd sql records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|count|DOUBLE||sql count|
|result|VARCHAR|TAG|sql execution resultvalue range: Success, Failed|
|username|VARCHAR|TAG|user name who executed the sql|
|sql\_type|VARCHAR|TAG|sql typevalue range:inserted_rows|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|vgroup\_id|VARCHAR|TAG|dnode id|
|cluster\_id|VARCHAR|TAG|cluster id|
### taos\_sql\_req 表
`taos_sql_req` tables contains taos sql records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|count|DOUBLE||sql count|
|result|VARCHAR|TAG|sql execution resultvalue range: Success, Failed|
|username|VARCHAR|TAG|user name who executed the sql|
|sql\_type|VARCHAR|TAG|sql typevalue range:select, insertdelete|
|cluster\_id|VARCHAR|TAG|cluster id|
### taos\_slow\_sql 表
`taos_slow_sql` ables contains taos slow sql records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|count|DOUBLE||sql count|
|result|VARCHAR|TAG|sql execution resultvalue range: Success, Failed|
|username|VARCHAR|TAG|user name who executed the sql|
|duration|VARCHAR|TAG|sql execution durationvalue range:3-10s,10-100s,100-1000s,1000s-|
|cluster\_id|VARCHAR|TAG|cluster id|
### keeper\_monitor table
`keeper_monitor` table contains keeper monitor information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|cpu|FLOAT||cpu usage|
|mem|FLOAT||memory usage|
|identify|NCHAR|TAG||
### taosadapter\_restful\_http\_request\_total table
`taosadapter_restful_http_request_total` table contains taosadapter rest request information record. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||metric value|
|client\_ip|NCHAR|TAG|client ip|
|endpoint|NCHAR|TAG|taosadpater endpoint|
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
|status\_code|NCHAR|TAG|status code|
### taosadapter\_restful\_http\_request\_fail table
`taosadapter_restful_http_request_fail` table contains taosadapter failed rest request information record. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||metric value|
|client\_ip|NCHAR|TAG|client ip|
|endpoint|NCHAR|TAG|taosadpater endpoint|
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
|status\_code|NCHAR|TAG|status code|
### taosadapter\_restful\_http\_request\_in\_flight table
`taosadapter_restful_http_request_in_flight` table contains taosadapter rest request information record in real time. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||metric value|
|endpoint|NCHAR|TAG|taosadpater endpoint|
### taosadapter\_restful\_http\_request\_summary\_milliseconds table
`taosadapter_restful_http_request_summary_milliseconds` table contains the summary or rest information record. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|count|DOUBLE|||
|sum|DOUBLE|||
|0.5|DOUBLE|||
|0.9|DOUBLE|||
|0.99|DOUBLE|||
|0.1|DOUBLE|||
|0.2|DOUBLE|||
|endpoint|NCHAR|TAG|taosadpater endpoint|
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
### taosadapter\_system\_mem\_percent table
`taosadapter_system_mem_percent` table contains taosadapter memory usage information. The timestamp of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||metric value|
|endpoint|NCHAR|TAG|taosadpater endpoint|
### taosadapter\_system\_cpu\_percent table
`taosadapter_system_cpu_percent` table contains taosadapter cup usage information. The timestamp of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||mertic value|
|endpoint|NCHAR|TAG|taosadpater endpoint|

Some files were not shown because too many files have changed in this diff Show More