Merge remote-tracking branch 'origin/3.0' into feat/TD-30268

This commit is contained in:
dapan1121 2024-12-06 17:43:19 +08:00
commit d77147d296
1046 changed files with 47436 additions and 31813 deletions

11
.github/pull_request_template.md vendored Normal file
View File

@ -0,0 +1,11 @@
# Description
Please briefly describe the code changes in this pull request.
# Checklist
Please check the items in the checklist if applicable.
- [ ] Is the user manual updated?
- [ ] Are the test cases passed and automated?
- [ ] Is there no significant decrease in test coverage?

View File

@ -6,6 +6,7 @@ node {
file_zh_changed = ''
file_en_changed = ''
file_no_doc_changed = '1'
file_only_tdgpt_change_except = '1'
def abortPreviousBuilds() {
def currentJobName = env.JOB_NAME
def currentBuildNumber = env.BUILD_NUMBER.toInteger()
@ -69,13 +70,23 @@ def check_docs(){
file_no_doc_changed = sh (
script: '''
cd ${WKC}
git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${CHANGE_TARGET}`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v "*.md" || :
git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${CHANGE_TARGET}`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v ".md$" || :
''',
returnStdout: true
).trim()
file_only_tdgpt_change_except = sh (
script: '''
cd ${WKC}
git --no-pager diff --name-only FETCH_HEAD `git merge-base FETCH_HEAD ${CHANGE_TARGET}`|grep -v "^docs/en/"|grep -v "^docs/zh/"|grep -v ".md$" | grep -v "forecastoperator.c\\|anomalywindowoperator.c" |grep -v "tsim/analytics" |grep -v "tdgpt_cases.task" || :
''',
returnStdout: true
).trim()
echo "file_zh_changed: ${file_zh_changed}"
echo "file_en_changed: ${file_en_changed}"
echo "file_no_doc_changed: ${file_no_doc_changed}"
echo "file_only_tdgpt_change_except: ${file_only_tdgpt_change_except}"
}
}
@ -355,7 +366,7 @@ def pre_test_build_win() {
bat '''
cd %WIN_COMMUNITY_ROOT%/tests/ci
pip3 install taospy==2.7.16
pip3 install taos-ws-py==0.3.3
pip3 install taos-ws-py==0.3.5
xcopy /e/y/i/f %WIN_INTERNAL_ROOT%\\debug\\build\\lib\\taos.dll C:\\Windows\\System32
'''
return 1
@ -385,7 +396,7 @@ def run_win_test() {
}
pipeline {
agent none
agent any
options { skipDefaultCheckout() }
environment{
WKDIR = '/var/lib/jenkins/workspace'
@ -451,8 +462,8 @@ pipeline {
stage('run test') {
when {
allOf {
not { expression { file_no_doc_changed == '' }}
expression {
file_no_doc_changed != '' && env.CHANGE_TARGET != 'docs-cloud'
}
}
parallel {
@ -463,19 +474,27 @@ pipeline {
WIN_COMMUNITY_ROOT="C:\\workspace\\${env.EXECUTOR_NUMBER}\\TDinternal\\community"
WIN_SYSTEM_TEST_ROOT="C:\\workspace\\${env.EXECUTOR_NUMBER}\\TDinternal\\community\\tests\\system-test"
}
when {
beforeAgent true
expression { file_only_tdgpt_change_except != '' }
}
steps {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
timeout(time: 126, unit: 'MINUTES'){
pre_test_win()
pre_test_build_win()
run_win_ctest()
run_win_test()
pre_test_win()
pre_test_build_win()
run_win_ctest()
run_win_test()
}
}
}
}
stage('mac test') {
agent{label " Mac_catalina "}
when {
beforeAgent true
expression { file_only_tdgpt_change_except != '' }
}
steps {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
timeout(time: 60, unit: 'MINUTES'){
@ -551,28 +570,25 @@ pipeline {
cd ${WKC}/tests/parallel_test
./run_scan_container.sh -d ${WKDIR} -b ${BRANCH_NAME}_${BUILD_ID} -f ${WKDIR}/tmp/${BRANCH_NAME}_${BUILD_ID}/docs_changed.txt ''' + extra_param + '''
'''
sh '''
cd ${WKC}/tests/parallel_test
export DEFAULT_RETRY_TIME=2
date
''' + timeout_cmd + ''' time ./run.sh -e -m /home/m.json -t cases.task -b ${BRANCH_NAME}_${BUILD_ID} -l ${WKDIR}/log -o 1200 ''' + extra_param + '''
'''
if ( file_no_doc_changed =~ /forecastoperator.c|anomalywindowoperator.c|tsim\/analytics|tdgpt_cases.task/ ) {
sh '''
cd ${WKC}/tests/parallel_test
export DEFAULT_RETRY_TIME=2
date
timeout 600 time ./run.sh -e -m /home/m.json -t tdgpt_cases.task -b ${BRANCH_NAME}_${BUILD_ID} -l ${WKDIR}/log -o 300 ''' + extra_param + '''
'''
}
if ( file_only_tdgpt_change_except != '' ) {
sh '''
cd ${WKC}/tests/parallel_test
export DEFAULT_RETRY_TIME=2
date
''' + timeout_cmd + ''' time ./run.sh -e -m /home/m.json -t cases.task -b ${BRANCH_NAME}_${BUILD_ID} -l ${WKDIR}/log -o 1200 ''' + extra_param + '''
'''
}
}
}
}
/*catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
timeout(time: 15, unit: 'MINUTES'){
script {
sh '''
echo "packaging ..."
date
rm -rf ${WKC}/release/*
cd ${WKC}/packaging
./release.sh -v cluster -n 3.0.0.100 -s static
'''
}
}
}*/
}
}
}

View File

@ -337,14 +337,14 @@ Query OK, 2 row(s) in set (0.001700s)
TDengine 提供了丰富的应用程序开发接口,其中包括 C/C++、Java、Python、Go、Node.js、C# 、RESTful 等,便于用户快速开发应用:
- [Java](https://docs.taosdata.com/connector/java/)
- [C/C++](https://docs.taosdata.com/connector/cpp/)
- [Python](https://docs.taosdata.com/connector/python/)
- [Go](https://docs.taosdata.com/connector/go/)
- [Node.js](https://docs.taosdata.com/connector/node/)
- [Rust](https://docs.taosdata.com/connector/rust/)
- [C#](https://docs.taosdata.com/connector/csharp/)
- [RESTful API](https://docs.taosdata.com/connector/rest-api/)
- [Java](https://docs.taosdata.com/reference/connector/java/)
- [C/C++](https://docs.taosdata.com/reference/connector/cpp/)
- [Python](https://docs.taosdata.com/reference/connector/python/)
- [Go](https://docs.taosdata.com/reference/connector/go/)
- [Node.js](https://docs.taosdata.com/reference/connector/node/)
- [Rust](https://docs.taosdata.com/reference/connector/rust/)
- [C#](https://docs.taosdata.com/reference/connector/csharp/)
- [RESTful API](https://docs.taosdata.com/reference/connector/rest-api/)
# 成为社区贡献者

View File

@ -23,7 +23,26 @@
English | [简体中文](README-CN.md) | [TDengine Cloud](https://cloud.tdengine.com) | [Learn more about TSDB](https://tdengine.com/tsdb/)
# What is TDengine
# Table of Contents
1. [What is TDengine?](#1-what-is-tdengine)
2. [Documentation](#2-documentation)
3. [Building](#3-building)
1. [Install build tools](#31-install-build-tools)
1. [Get the source codes](#32-get-the-source-codes)
1. [Special Note](#33-special-note)
1. [Build TDengine](#34-build-tdengine)
4. [Installing](#4-installing)
1. [On Linux platform](#41-on-linux-platform)
1. [On Windows platform](#42-on-windows-platform)
1. [On macOS platform](#43-on-macos-platform)
1. [Quick Run](#44-quick-run)
5. [Try TDengine](#5-try-tdengine)
6. [Developing with TDengine](#6-developing-with-tdengine)
7. [Contribute to TDengine](#7-contribute-to-tdengine)
8. [Join the TDengine Community](#8-join-the-tdengine-community)
# 1. What is TDengine
TDengine is an open source, high-performance, cloud native [time-series database](https://tdengine.com/tsdb/) optimized for Internet of Things (IoT), Connected Cars, and Industrial IoT. It enables efficient, real-time data ingestion, processing, and monitoring of TB and even PB scale data per day, generated by billions of sensors and data collectors. TDengine differentiates itself from other time-series databases with the following advantages:
@ -33,19 +52,19 @@ TDengine is an open source, high-performance, cloud native [time-series database
- **[Cloud Native](https://tdengine.com/tdengine/cloud-native-time-series-database/)**: Through native distributed design, sharding and partitioning, separation of compute and storage, RAFT, support for kubernetes deployment and full observability, TDengine is a cloud native Time-Series Database and can be deployed on public, private or hybrid clouds.
- **[Ease of Use](https://tdengine.com/tdengine/easy-time-series-data-platform/)**: For administrators, TDengine significantly reduces the effort to deploy and maintain. For developers, it provides a simple interface, simplified solution and seamless integrations for third party tools. For data users, it gives easy data access.
- **[Ease of Use](https://tdengine.com/tdengine/easy-time-series-data-platform/)**: For administrators, TDengine significantly reduces the effort to deploy and maintain. For developers, it provides a simple interface, simplified solution and seamless integrations for third party tools. For data users, it gives easy data access.
- **[Easy Data Analytics](https://tdengine.com/tdengine/time-series-data-analytics-made-easy/)**: Through super tables, storage and compute separation, data partitioning by time interval, pre-computation and other means, TDengine makes it easy to explore, format, and get access to data in a highly efficient way.
- **[Easy Data Analytics](https://tdengine.com/tdengine/time-series-data-analytics-made-easy/)**: Through super tables, storage and compute separation, data partitioning by time interval, pre-computation and other means, TDengine makes it easy to explore, format, and get access to data in a highly efficient way.
- **[Open Source](https://tdengine.com/tdengine/open-source-time-series-database/)**: TDengines core modules, including cluster feature, are all available under open source licenses. It has gathered 19.9k stars on GitHub. There is an active developer community, and over 139k running instances worldwide.
For a full list of TDengine competitive advantages, please [check here](https://tdengine.com/tdengine/). The easiest way to experience TDengine is through [TDengine Cloud](https://cloud.tdengine.com).
For a full list of TDengine competitive advantages, please [check here](https://tdengine.com/tdengine/). The easiest way to experience TDengine is through [TDengine Cloud](https://cloud.tdengine.com).
# Documentation
# 2. Documentation
For user manual, system design and architecture, please refer to [TDengine Documentation](https://docs.tdengine.com) ([TDengine 文档](https://docs.taosdata.com))
# Building
# 3. Building
At the moment, TDengine server supports running on Linux/Windows/macOS systems. Any application can also choose the RESTful interface provided by taosAdapter to connect the taosd service . TDengine supports X64/ARM64 CPU, and it will support MIPS64, Alpha64, ARM32, RISC-V and other CPU architectures in the future. Right now we don't support build with cross-compiling environment.
@ -55,7 +74,7 @@ TDengine provide a few useful tools such as taosBenchmark (was named taosdemo) a
To build TDengine, use [CMake](https://cmake.org/) 3.13.0 or higher versions in the project directory.
## Install build tools
## 3.1 Install build tools
### Ubuntu 18.04 and above or Debian
@ -148,7 +167,7 @@ cmake .. -DBUILD_HTTP=false
TDengine includes a few components developed by Rust language. Please refer to rust-lang.org official documentation for rust environment setup.
## Get the source codes
## 3.2 Get the source codes
First of all, you may clone the source codes from github:
@ -164,11 +183,11 @@ You can modify the file ~/.gitconfig to use ssh protocol instead of https for be
insteadOf = https://github.com/
```
## Special Note
## 3.3 Special Note
[JDBC Connector](https://github.com/taosdata/taos-connector-jdbc) [Go Connector](https://github.com/taosdata/driver-go)[Python Connector](https://github.com/taosdata/taos-connector-python)[Node.js Connector](https://github.com/taosdata/taos-connector-node)[C# Connector](https://github.com/taosdata/taos-connector-dotnet) [Rust Connector](https://github.com/taosdata/taos-connector-rust) and [Grafana plugin](https://github.com/taosdata/grafanaplugin) has been moved to standalone repository.
## Build TDengine
## 3.4 Build TDengine
### On Linux platform
@ -244,9 +263,9 @@ mkdir debug && cd debug
cmake .. && cmake --build .
```
# Installing
# 4. Installing
## On Linux platform
## 4.1 On Linux platform
After building successfully, TDengine can be installed by
@ -272,7 +291,7 @@ taos
If TDengine CLI connects the server successfully, welcome messages and version info are printed. Otherwise, an error message is shown.
## On Windows platform
## 4.2 On Windows platform
After building successfully, TDengine can be installed by:
@ -280,8 +299,7 @@ After building successfully, TDengine can be installed by:
nmake install
```
## On macOS platform
## 4.3 On macOS platform
After building successfully, TDengine can be installed by:
@ -307,7 +325,7 @@ taos
If TDengine CLI connects the server successfully, welcome messages and version info are printed. Otherwise, an error message is shown.
## Quick Run
## 4.4 Quick Run
If you don't want to run TDengine as a service, you can run it in current shell. For example, to quickly start a TDengine server after building, run the command below in terminal: (We take Linux as an example, command on Windows will be `taosd.exe`)
@ -323,7 +341,7 @@ In another terminal, use the TDengine CLI to connect the server:
option "-c test/cfg" specifies the system configuration file directory.
# Try TDengine
# 5. Try TDengine
It is easy to run SQL commands from TDengine CLI which is the same as other SQL databases.
@ -341,26 +359,26 @@ SELECT * FROM t;
Query OK, 2 row(s) in set (0.001700s)
```
# Developing with TDengine
# 6. Developing with TDengine
## Official Connectors
TDengine provides abundant developing tools for users to develop on TDengine. Follow the links below to find your desired connectors and relevant documentation.
- [Java](https://docs.tdengine.com/reference/connector/java/)
- [C/C++](https://docs.tdengine.com/reference/connector/cpp/)
- [Python](https://docs.tdengine.com/reference/connector/python/)
- [Go](https://docs.tdengine.com/reference/connector/go/)
- [Node.js](https://docs.tdengine.com/reference/connector/node/)
- [Rust](https://docs.tdengine.com/reference/connector/rust/)
- [C#](https://docs.tdengine.com/reference/connector/csharp/)
- [RESTful API](https://docs.tdengine.com/reference/rest-api/)
- [Java](https://docs.tdengine.com/reference/connectors/java/)
- [C/C++](https://docs.tdengine.com/reference/connectors/cpp/)
- [Python](https://docs.tdengine.com/reference/connectors/python/)
- [Go](https://docs.tdengine.com/reference/connectors/go/)
- [Node.js](https://docs.tdengine.com/reference/connectors/node/)
- [Rust](https://docs.tdengine.com/reference/connectors/rust/)
- [C#](https://docs.tdengine.com/reference/connectors/csharp/)
- [RESTful API](https://docs.tdengine.com/reference/connectors/rest-api/)
# Contribute to TDengine
# 7. Contribute to TDengine
Please follow the [contribution guidelines](CONTRIBUTING.md) to contribute to the project.
# Join the TDengine Community
# 8. Join the TDengine Community
For more information about TDengine, you can follow us on social media and join our Discord server:

View File

@ -97,10 +97,13 @@ ELSE()
SET(TD_TAOS_TOOLS TRUE)
ENDIF()
SET(TAOS_LIB taos)
SET(TAOS_LIB_STATIC taos_static)
IF(${TD_WINDOWS})
SET(TAOS_LIB taos_static)
SET(TAOS_LIB_PLATFORM_SPEC taos_static)
ELSE()
SET(TAOS_LIB taos)
SET(TAOS_LIB_PLATFORM_SPEC taos)
ENDIF()
# build TSZ by default

View File

@ -2,7 +2,7 @@
IF (DEFINED VERNUMBER)
SET(TD_VER_NUMBER ${VERNUMBER})
ELSE ()
SET(TD_VER_NUMBER "3.3.4.3.alpha")
SET(TD_VER_NUMBER "3.3.4.8.alpha")
ENDIF ()
IF (DEFINED VERCOMPATIBLE)

View File

@ -28,6 +28,9 @@ if(${BUILD_WITH_TRAFT})
# add_subdirectory(traft)
endif(${BUILD_WITH_TRAFT})
add_subdirectory(azure)
if(${BUILD_S3})
add_subdirectory(azure)
endif()
add_subdirectory(tdev)
add_subdirectory(lz4)

View File

@ -1,32 +1,19 @@
---
title: TDengine Documentation
sidebar_label: Documentation Home
description: This website contains the user manuals for TDengine, an open-source, cloud-native time-series database optimized for IoT, Connected Cars, and Industrial IoT.
slug: /
---
TDengine is an [open-source](https://tdengine.com/tdengine/open-source-time-series-database/), [cloud-native](https://tdengine.com/tdengine/cloud-native-time-series-database/) [time-series database](https://tdengine.com/tsdb/) optimized for the Internet of Things (IoT), Connected Cars, and Industrial IoT. It enables efficient, real-time data ingestion, processing, and monitoring of TB and even PB scale data per day, generated by billions of sensors and data collectors. This document is the TDengine user manual. It introduces the basic, as well as novel concepts, in TDengine, and also talks in detail about installation, features, SQL, APIs, operation, maintenance, kernel design, and other topics. It's written mainly for architects, developers, and system administrators.
TDengine™ is a time-series database purpose-built for Industry 4.0 and Industrial IoT. It enables real-time ingestion, storage, analysis, and distribution of petabytes of data per day, generated by billions of sensors and data collectors. TDengine's mission is to make time-series data accessible, valuable, and affordable for everyone — from independent developers and startups to industry stalwarts and multinationals.
To get an overview of TDengine, such as a feature list, benchmarks, and competitive advantages, please browse through the [Introduction](./intro) section.
This website contains the user documentation for TDengine:
TDengine greatly improves the efficiency of data ingestion, querying, and storage by exploiting the characteristics of time series data, introducing the novel concepts of "one table for one data collection point" and "super table", and designing an innovative storage engine. To understand the new concepts in TDengine and make full use of the features and capabilities of TDengine, please read [Concepts](./concept) thoroughly.
- If you are new to time-series data, you can get a quick understanding of the field from ["What Is a Time-Series Database?"](https://tdengine.com/what-is-a-time-series-database/) and [other articles](https://tdengine.com/time-series-database/) on our official website.
- If you would like to install TDengine and experience its features for yourself, see the [Get Started](get-started/) section for instructions.
- System architects are advised to review the [Basic Features](basic-features/) and [Advanced Features](advanced-features/) sections to decide whether TDengine's capabilities can meet their needs, as well as [Inside TDengine](inside-tdengine/) for a more in-depth look at TDengine's design.
- Software developers can consult the [Developer's Guide](developer-guide/) for information about creating applications that interoperate with TDengine and writing user-defined functions that run within TDengine.
- Database administrators will find valuable information in [Operations and Maintenance](operations-and-maintenance/) and [TDengine Reference](tdengine-reference/) to assist in managing, maintaining, and monitoring their TDengine deployments.
If you are a developer, please read the [Developer Guide](./develop) carefully. This section introduces the database connection, data modeling, data ingestion, query, continuous query, cache, data subscription, user-defined functions, and other functionality in detail. Sample code is provided for a variety of programming languages. In most cases, you can just copy and paste the sample code, and make a few changes to accommodate your application, and it will work.
We live in the era of big data, and scale-up is unable to meet the growing needs of the business. Any modern data system must have the ability to scale out, and clustering has become an indispensable feature of big data systems. Not only did the TDengine team develop the cluster feature, but also decided to open source this important feature. To learn how to deploy, manage and maintain a TDengine cluster please refer to [Cluster Deployment](./operation/deployment).
TDengine uses ubiquitous SQL as its query language, which greatly reduces learning costs and migration costs. In addition to the standard SQL, TDengine has extensions to better support time series data analysis. These extensions include functions such as roll-up, interpolation, and time-weighted average, among many others. The [SQL Reference](./reference/taos-sql) chapter describes the SQL syntax in detail and lists the various supported commands and functions.
If you are a system administrator who cares about installation, upgrade, fault tolerance, disaster recovery, data import, data export, system configuration, how to monitor whether TDengine is running healthily, and how to improve system performance, please refer to, and thoroughly read the [Administration](./operation) section.
If you want to know more about TDengine tools and the REST API, please see the [Reference](./reference) chapter.
For information about connecting to TDengine with different programming languages, see [Client Libraries](./reference/connectors).
If you are very interested in the internal design of TDengine, please read the chapter [Inside TDengine](./tdinternal), which introduces the cluster design, data partitioning, sharding, writing, and reading processes in detail. If you want to study TDengine code or even contribute code, please read this chapter carefully.
To get more general introduction about time series database, please read through [a series of articles](https://tdengine.com/tsdb/). To lean more competitive advantages about TDengine, please read through [a series of blogs](https://tdengine.com/tdengine/).
TDengine is an open-source database, and we would love for you to be a part of TDengine. If you find any errors in the documentation or see parts where more clarity or elaboration is needed, please click "Edit this page" at the bottom of each page to edit it directly.
TDengine, including this documentation, is an open-source project, and we welcome contributions from the community. If you find any errors or unclear descriptions, click **Edit this page** at the bottom of the page to submit your corrections. To view the source code, visit our [GitHub repository](https://github.com/taosdata/tdengine).
Together, we make a difference!

View File

@ -1,182 +0,0 @@
---
title: Concepts
description: This document describes the basic concepts of TDengine, including the supertable.
---
In order to explain the basic concepts and provide some sample code, the TDengine documentation smart meters as a typical time series use case. We assume the following: 1. Each smart meter collects three metrics i.e. current, voltage, and phase; 2. There are multiple smart meters; 3. Each meter has static attributes like location and group ID. Based on this, collected data will look similar to the following table:
<div className="center-table">
<table>
<thead>
<tr>
<th rowSpan="2">Device ID</th>
<th rowSpan="2">Timestamp</th>
<th colSpan="3">Collected Metrics</th>
<th colSpan="2">Tags</th>
</tr>
<tr>
<th>current</th>
<th>voltage</th>
<th>phase</th>
<th>location</th>
<th>groupid</th>
</tr>
</thead>
<tbody>
<tr>
<td>d1001</td>
<td>1538548685000</td>
<td>10.3</td>
<td>219</td>
<td>0.31</td>
<td>California.SanFrancisco</td>
<td>2</td>
</tr>
<tr>
<td>d1002</td>
<td>1538548684000</td>
<td>10.2</td>
<td>220</td>
<td>0.23</td>
<td>California.SanFrancisco</td>
<td>3</td>
</tr>
<tr>
<td>d1003</td>
<td>1538548686500</td>
<td>11.5</td>
<td>221</td>
<td>0.35</td>
<td>California.LosAngeles</td>
<td>3</td>
</tr>
<tr>
<td>d1004</td>
<td>1538548685500</td>
<td>13.4</td>
<td>223</td>
<td>0.29</td>
<td>California.LosAngeles</td>
<td>2</td>
</tr>
<tr>
<td>d1001</td>
<td>1538548695000</td>
<td>12.6</td>
<td>218</td>
<td>0.33</td>
<td>California.SanFrancisco</td>
<td>2</td>
</tr>
<tr>
<td>d1004</td>
<td>1538548696600</td>
<td>11.8</td>
<td>221</td>
<td>0.28</td>
<td>California.LosAngeles</td>
<td>2</td>
</tr>
<tr>
<td>d1002</td>
<td>1538548696650</td>
<td>10.3</td>
<td>218</td>
<td>0.25</td>
<td>California.SanFrancisco</td>
<td>3</td>
</tr>
<tr>
<td>d1001</td>
<td>1538548696800</td>
<td>12.3</td>
<td>221</td>
<td>0.31</td>
<td>California.SanFrancisco</td>
<td>2</td>
</tr>
</tbody>
</table>
<a href="#model_table1">Table 1: Smart meter example data</a>
</div>
Each row contains the device ID, timestamp, collected metrics (`current`, `voltage`, `phase` as above), and static tags (`location` and `groupid` in Table 1) associated with the devices. Each smart meter generates a row (measurement) in a pre-defined time interval or triggered by an external event. The device produces a sequence of measurements with associated timestamps.
## Metric
Metric refers to the physical quantity collected by sensors, equipment or other types of data collection devices, such as current, voltage, temperature, pressure, GPS position, etc., which change with time, and the data type can be integer, float, Boolean, or strings. As time goes by, the amount of collected metric data stored increases. In the smart meters example, current, voltage and phase are the metrics.
## Label/Tag
Label/Tag refers to the static properties of sensors, equipment or other types of data collection devices, which do not change with time, such as device model, color, fixed location of the device, etc. The data type can be any type. Although static, TDengine allows users to add, delete or update tag values at any time. Unlike the collected metric data, the amount of tag data stored does not change over time. In the meters example, `location` and `groupid` are the tags.
## Data Collection Point
Data Collection Point (DCP) refers to hardware or software that collects metrics based on preset time periods or triggered by events. A data collection point can collect one or multiple metrics, but these metrics are collected at the same time and have the same timestamp. For some complex equipment, there are often multiple data collection points, and the sampling rate of each collection point may be different, and fully independent. For example, for a car, there could be a data collection point to collect GPS position metrics, a data collection point to collect engine status metrics, and a data collection point to collect the environment metrics inside the car. So in this example the car would have three data collection points. In the smart meters example, d1001, d1002, d1003, and d1004 are the data collection points.
## Table
Since time-series data is most likely to be structured data, TDengine adopts the traditional relational database model to process them with a short learning curve. You need to create a database, create tables, then insert data points and execute queries to explore the data.
To make full use of time-series data characteristics, TDengine adopts a strategy of "**One Table for One Data Collection Point**". TDengine requires the user to create a table for each data collection point (DCP) to store collected time-series data. For example, if there are over 10 million smart meters, it means 10 million tables should be created. For the table above, 4 tables should be created for devices d1001, d1002, d1003, and d1004 to store the data collected. This design has several benefits:
1. Since the metric data from different DCP are fully independent, the data source of each DCP is unique, and a table has only one writer. In this way, data points can be written in a lock-free manner, and the writing speed can be greatly improved.
2. For a DCP, the metric data generated by DCP is ordered by timestamp, so the write operation can be implemented by simple appending, which further greatly improves the data writing speed.
3. The metric data from a DCP is continuously stored, block by block. If you read data for a period of time, it can greatly reduce random read operations and improve read and query performance by orders of magnitude.
4. Inside a data block for a DCP, columnar storage is used, and different compression algorithms are used for different data types. Metrics generally don't vary as significantly between themselves over a time range as compared to other metrics, which allows for a higher compression rate.
If the metric data of multiple DCPs are traditionally written into a single table, due to uncontrollable network delays, the timing of the data from different DCPs arriving at the server cannot be guaranteed, write operations must be protected by locks, and metric data from one DCP cannot be guaranteed to be continuously stored together. **One table for one data collection point can ensure the best performance of insert and query of a single data collection point to the greatest possible extent.**
TDengine suggests using DCP ID as the table name (like d1001 in the above table). Each DCP may collect one or multiple metrics (like the `current`, `voltage`, `phase` as above). Each metric has a corresponding column in the table. The data type for a column can be int, float, string and others. In addition, the first column in the table must be a timestamp. TDengine uses the timestamp as the index, and won't build the index on any metrics stored. Column wise storage is used.
Complex devices, such as connected cars, may have multiple DCPs. In this case, multiple tables are created for a single device, one table per DCP.
## Super Table (STable)
The design of one table for one data collection point will require a huge number of tables, which is difficult to manage. Furthermore, applications often need to take aggregation operations among DCPs, thus aggregation operations will become complicated. To support aggregation over multiple tables efficiently, the STable(Super Table) concept is introduced by TDengine.
STable is a template for a type of data collection point. A STable contains a set of data collection points (tables) that have the same schema or data structure, but with different static attributes (tags). To describe a STable, in addition to defining the table structure of the metrics, it is also necessary to define the schema of its tags. The data type of tags can be int, float, string, and there can be multiple tags, which can be added, deleted, or modified afterward. If the whole system has N different types of data collection points, N STables need to be established.
In the design of TDengine, **a table is used to represent a specific data collection point, and STable is used to represent a set of data collection points of the same type**. In the smart meters example, we can create a super table named `meters`.
## Subtable
When creating a table for a specific data collection point, the user can use a STable as a template and specify the tag values of this specific DCP to create it. ** The table created by using a STable as the template is called subtable** in TDengine. The difference between regular table and subtable is:
1. Subtable is a table, all SQL commands applied on a regular table can be applied on subtable.
2. Subtable is a table with extensions, it has static tags (labels), and these tags can be added, deleted, and updated after it is created. But a regular table does not have tags.
3. A subtable belongs to only one STable, but a STable may have many subtables. Regular tables do not belong to a STable.
4. A regular table can not be converted into a subtable, and vice versa.
The relationship between a STable and the subtables created based on this STable is as follows:
1. A STable contains multiple subtables with the same metric schema but with different tag values.
2. The schema of metrics or labels cannot be adjusted through subtables, and it can only be changed via STable. Changes to the schema of a STable takes effect immediately for all associated subtables.
3. STable defines only one template and does not store any data or label information by itself. Therefore, data cannot be written to a STable, only to subtables.
Queries can be executed on both a table (subtable) and a STable. For a query on a STable, TDengine will treat the data in all its subtables as a whole data set for processing. TDengine will first find the subtables that meet the tag filter conditions, then scan the time-series data of these subtables to perform aggregation operation, which reduces the number of data sets to be scanned which in turn greatly improves the performance of data aggregation across multiple DCPs. In essence, querying a supertable is a very efficient aggregate query on multiple DCPs of the same type.
In TDengine, it is recommended to use a subtable instead of a regular table for a DCP. In the smart meters example, we can create subtables like d1001, d1002, d1003, and d1004 under super table `meters`.
To better understand the data model using metrics, tags, super table and subtable, please refer to the diagram below which demonstrates the data model of the smart meters example.
<figure>
![Meters Data Model Diagram](./supertable.webp)
<center><figcaption>Figure 1. Meters Data Model Diagram</figcaption></center>
</figure>
## Database
A database is a collection of tables. TDengine allows a running instance to have multiple databases, and each database can be configured with different storage policies. The [characteristics of time-series data](https://tdengine.com/tsdb/characteristics-of-time-series-data/) from different data collection points may be different. Characteristics include collection frequency, retention policy and others which determine how you create and configure the database. For e.g. days to keep, number of replicas, data block size, whether data updates are allowed and other configurable parameters would be determined by the characteristics of your data and your business requirements. In order for TDengine to work with maximum efficiency in various scenarios, TDengine recommends that STables with different data characteristics be created in different databases.
In a database, there can be one or more STables, but a STable belongs to only one database. All tables owned by a STable are stored in only one database.
## FQDN & End Point
FQDN (Fully Qualified Domain Name) is the full domain name of a specific computer or host on the Internet. FQDN consists of two parts: hostname and domain name. For example, the FQDN of a mail server might be mail.tdengine.com. The hostname is mail, and the host is located in the domain name tdengine.com. DNS (Domain Name System) is responsible for translating FQDN into IP. For systems without DNS, it can be solved by configuring the hosts file.
Each node of a TDengine cluster is uniquely identified by an End Point, which consists of an FQDN and a Port, such as h1.tdengine.com:6030. In this way, when the IP changes, we can still use the FQDN to dynamically find the node without changing any configuration of the cluster. In addition, FQDN is used to facilitate unified access to the same cluster from the Intranet and the Internet.
TDengine does not recommend using an IP address to access the cluster. FQDN is recommended for cluster management.

View File

@ -1,132 +1,76 @@
---
title: Introduction
description: This document introduces the major features, competitive advantages, typical use cases, and benchmarks of TDengine.
toc_max_heading_level: 2
sidebar_label: Introduction
title: Introduction to TDengine
slug: /introduction
---
TDengine is a big data platform designed and optimized for IoT (Internet of Things) and Industrial Internet. It can safely and effetively converge, store, process and distribute high volume data (TB or even PB) generated everyday by a lot of devices and data acquisition units, monitor and alert business operation status in real time and provide real time business insight. The core component of TDengine is TDengine OSS, which is a high performance, open source, cloud native and simplified time series database.
import Image from '@theme/IdealImage';
import imgEcosystem from './assets/introduction-01.png';
This section introduces the major features, competitive advantages, typical use-cases and benchmarks to help you get a high level overview of TDengine.
TDengine is a time-series database designed to help traditional industries overcome the challenges of Industry 4.0 and Industrial IoT. It enables real-time ingestion, storage, analysis, and distribution of petabytes of data per day, generated by billions of sensors and data collectors. By making big data accessible and affordable, TDengine helps everyone — from independent developers and startups to industry stalwarts and multinationals — unlock the true value of their data.
## Major Features of TDengine OSS
## TDengine Offerings
The major features are listed below:
- [TDengine OSS](https://tdengine.com/oss/) is an open-source, cloud-native time-series database. Its source code is licensed under the AGPL and publicly available on GitHub. TDengine OSS serves as the code base for our paid offerings and provides the same core functionality. Unlike some open-core products, TDengine OSS is a full-featured solution that includes the necessary components for production use, including clustering.
- [TDengine Enterprise](https://tdengine.com/enterprise/) is a high-performance big data platform designed for Industry 4.0 and the Industrial IoT. Built on the open-source TDengine OSS, it delivers an enterprise-grade feature set tailored to the needs of traditional industries.
- [TDengine Cloud](https://cloud.tdengine.com) delivers all features of TDengine Enterprise as a fully managed service that can run on Amazon Web Services (AWS), Microsoft Azure, and Google Cloud Platform (GCP).
1. Insert data
- Supports [using SQL to insert](../develop/insert-data/sql-writing).
- Supports [schemaless writing](../reference/schemaless/) just like NoSQL databases. It also supports standard protocols like [InfluxDB Line](../develop/insert-data/influxdb-line), [OpenTSDB Telnet](../develop/insert-data/opentsdb-telnet), [OpenTSDB JSON ](../develop/insert-data/opentsdb-json) among others.
- Supports seamless integration with third-party tools like [Telegraf](../third-party/telegraf/), [Prometheus](../third-party/prometheus/), [collectd](../third-party/collectd/), [StatsD](../third-party/statsd/), [TCollector](../third-party/tcollector/), [EMQX](../third-party/emq-broker), [HiveMQ](../third-party/hive-mq-broker), and [Icinga2](../third-party/icinga2/), they can write data into TDengine with simple configuration and without a single line of code.
2. Query data
- Supports standard [SQL](../reference/taos-sql/), including nested query.
- Supports [time series specific functions](../reference/taos-sql/function/#time-series-extensions) and [time series specific queries](../reference/taos-sql/distinguished), like downsampling, interpolation, cumulated sum, time weighted average, state window, session window and many others.
- Supports [User Defined Functions (UDF)](../reference/taos-sql/udf).
3. [Caching](../develop/cache/): TDengine always saves the last data point in cache, so Redis is not needed for time-series data processing.
4. [Stream Processing](../develop/stream/): Not only is the continuous query is supported, but TDengine also supports event driven stream processing, so Flink or Spark is not needed for time-series data processing.
5. [Data Subscription](../develop/tmq/): Application can subscribe a table or a set of tables. API is the same as Kafka, but you can specify filter conditions.
6. Visualization
- Supports seamless integration with [Grafana](../third-party/grafana/).
- Supports seamless integration with [Google Data Studio](../third-party/google-data-studio/).
7. Cluster
- Supports [cluster](../operation/deployment/) with the capability of increasing processing power by adding more nodes.
- Supports [deployment on Kubernetes](../operation/deployment).
- Supports high availability via data replication.
8. Administration
- Provides [monitoring](../operation/monitor) on running instances of TDengine.
- Provides many ways to [import](../operation/import) and [export](../operation/export) data.
9. Tools
- Provides an interactive [Command Line Interface (CLI)](../reference/components/taos-shell) for management, maintenance and ad-hoc queries.
- Provides a tool [taosBenchmark](../reference/components/taosbenchmark/) for testing the performance of TDengine.
10. Programming
- Provides [client libraries](../reference/connectors/) for [C/C++](../reference/connectors/cpp), [Java](../reference/connectors/java), [Python](../reference/connectors/python), [Go](../reference/connectors/go), [Rust](../reference/connectors/rust), [Node.js](../reference/connectors/node) and other programming languages.
- Provides a [REST API](../reference/connectors/rest-api).
## What Makes TDengine Different
For more details on features, please read through the entire documentation.
TDengine differentiates itself from typical time-series databases with the following four core competencies:
## Competitive Advantages
1. **High Performance at Any Scale:** With its distributed scalable architecture that grows together with your business, TDengine can store and process massive datasets up to 10.6x faster than other TSDBs — all while providing the split-second latency that your real-time visualization and reporting apps demand.
2. **Efficient Data Storage:** With its unique design and data model, TDengine provides the most cost-effective solution for storing your operations data, including tiered storage, S3, and 10:1 data compression, ensuring that you can get valuable business insights from your data without breaking the bank.
3. **Data Consolidation Across Sites:** With built-in connectors for a wide variety of industrial sources — MQTT, Kafka, OPC, PI System, and more — TDengine delivers zero-code data ingestion and extract, transform, and load (ETL) in a centralized platform that acts as a single source of truth for your business.
4. **Comprehensive Solution for Industrial Data:** With out-of-the-box data subscription, caching, and stream processing, TDengine is more than just a time-series database — it includes all key components needed for industrial data storage and processing built into a single product and accessible through familiar SQL statements.
By making full use of [characteristics of time series data](https://tdengine.com/characteristics-of-time-series-data/), TDengine differentiates itself from other time series databases with the following advantages.
## What TDengine Delivers
- **[High-Performance](https://tdengine.com/high-performance/)**: TDengine is the only time-series database to solve the high cardinality issue to support billions of data collection points while outperforming other time-series databases for data ingestion, querying and data compression.
With its innovative "one table per device" design, unique supertable concept, and highly optimized storage engine, TDengine is purpose-built to meet the unique needs of ingesting, querying, and storing massive time-series datasets. In its role at the core of the industrial data architecture, it provides the following functionality:
- **[Simplified Solution](https://tdengine.com/comprehensive-industrial-data-solution/)**: Through built-in caching, stream processing and data subscription features, TDengine provides a simplified solution for time-series data processing. It reduces system design complexity and operation costs significantly.
1. [Data Ingestion](../basic-features/data-ingestion/): You can write data into TDengine with standard SQL or in schemaless mode over the InfluxDB Line Protocol, OpenTSDN Telnet Protocol, and OpenTSDB JSON Protocol. TDengine also seamlessly integrates with data collectors like Telegraf and Prometheus.
2. [Data Querying](../basic-features/data-querying): In addition to standard SQL query syntax, TDengine includes time-series extensions such as downsampling and windowing and functions such as cumulative sum and time-weighted average to better meet the needs of time-series data processing. TDengine also supports user-defined functions (UDF), which can be written in C or Python.
3. [Read Caching](../advanced-features/caching/): TDengine uses a time-driven first-in, first-out (FIFO) cache management strategy, keeping the most recent data in the cache. This makes it easy and fast to access the real-time status of any metric without the need for other caching tools like Redis, simplifying system architecture and reducing operational costs.
4. [Stream Processing](../advanced-features/stream-processing/): TDengine's built-in stream processing engine provides the capability to process data streams in real-time as they are written, supporting not only continuous queries but also event-driven stream processing. This lightweight but optimized solution can return results in milliseconds even during high-throughput data ingestion.
5. [Data Subscription](../advanced-features/data-subscription): TDengine includes data subscription out of the box, eliminating the need to deploy other complex products to provide this critical feature. You can define topics in SQL, subscribing to a query, supertable, or database, and use a Kafka-like API to consume these topics in your applications.
6. [Visualization](../third-party-tools/visualization/) and [BI](../third-party-tools/analytics/): Through its REST API and standard JDBC and ODBC interfaces, TDengine seamlessly integrates with leading platforms like Grafana, Power BI, and Seeq.
7. [Clustering](../operations-and-maintenance/deploy-your-cluster/): TDengine supports clustered deployment so that you can add nodes to scale your system and increase processing capacity. At the same time, it provides high availability through multi-replica technology and supports Kubernetes deployment. It also offers various operational tools to facilitate system administrators in managing and maintaining robust cluster operations.
8. Data Migration: TDengine provides various convenient data import and export functions, including script file import/export, data file import/export, and the [taosdump](../tdengine-reference/tools/taosdump/) tool.
9. [Client Libraries](../tdengine-reference/client-libraries/): TDengine offers client libraries for a variety of different programming languages, including Java, Python, and C/C++, so that you can build custom applications in your favorite language. Sample code that you can copy and paste into your apps is also provided to make the development process even easier.
10. O&M Tools: You can use the interactive [command-line interface (CLI)](../tdengine-reference/tools/tdengine-cli/) for managing clusters, checking system status, and performing ad hoc queries. The stress-testing tool [taosBenchmark](../tdengine-reference/tools/taosbenchmark/) is a quick way to generate sample data and test the performance of TDengine. And TDengine's GUI component [taosExplorer](../tdengine-reference/components/taosexplorer/) simplifies the operations and management process.
11. [Data Security](https://tdengine.com/security/): With TDengine Enterprise, you can implement fine-grained access controls with rich user and permissions management features. IP whitelisting helps you control which accounts can access your cluster from which servers, and audit logs record sensitive operations. In TDengine Enterprise, you can also configure encryption in transit on the server level and encryption at rest on the database level, which is transparent to operations and has minimal impact on performance.
12. [Zero-Code Data Connectors](https://tdengine.com/data-sources/): TDengine Enterprise includes zero-code connectors for industrial data protocols like MQTT and OPC, traditional data historians like AVEVA PI System and Wonderware Historian, relational databases like Oracle Database and SQL Server, and other time-series databases like InfluxDB and OpenTSDB. With these connectors, you can synchronize or migrate diverse time-series datasets to TDengine in the GUI without touching a line of code.
- **[Cloud Native](https://tdengine.com/cloud-native/)**: Through native distributed design, sharding and partitioning, separation of compute and storage, RAFT, support for Kubernetes deployment and full observability, TDengine is a cloud native Time-series Database and can be deployed on public, private or hybrid clouds.
## How TDengine Benefits You
- **[Ease of Use](https://tdengine.com/easy-to-use/)**: For administrators, TDengine significantly reduces the effort to deploy and maintain. For developers, it provides a simple interface, simplified solution and seamless integrations for third party tools. For data users, it gives easy data access.
With its high performance, standard SQL support, and component integration, TDengine can reduce your total cost of data operations:
- **[Easy Data Analytics](https://tdengine.com/simplifying-time-series-analysis-for-data-scientists/)**: Through super tables, storage and compute separation, data partitioning by time interval, pre-computation and other means, TDengine makes it easy to explore, format, and get access to data in a highly efficient way.
1. **Industry-leading performance:** TDengine significantly outperforms other time-series databases with up to 16 times faster ingestion and over 100 times higher query performance than InfluxDB or TimescaleDB while requiring fewer storage resources. Because TDengine ingests data faster, stores data more efficiently, and responds to queries more quickly, it uses fewer CPU and storage resources and adds less to your bills.
2. **Easy to use with no learning costs:** TDengine is easier to use than other time-series database solutions and does not require specialized training. This is because TDengine supports standard SQL, is easy to integrate with third-party tools, and comes with client libraries for various programming languages, including sample code.
3. **Simplified, fully integrated solution:** By including stream processing, caching, and data subscription as built-in components at no extra cost, TDengine eliminates the need to deploy third-party products just to process time-series data. Its components are simple, easy to use, and purpose-built to process time-series data.
- **[Open Source](https://tdengine.com/open-source/)**: TDengine's core modules, including cluster feature, are all available under open source licenses. It has gathered over 22k stars on GitHub. There is an active developer community, and over 400k running instances worldwide.
## TDengine Ecosystem
With TDengine, the total cost of ownership of your time-series data platform can be greatly reduced.
1. With its superior performance, the computing and storage resources are reduced significantly.
2. With SQL support, it can be seamlessly integrated with many third party tools, and learning costs/migration costs are reduced significantly.
3. With its simplified solution and nearly zero management, the operation and maintenance costs are reduced significantly.
## Technical Ecosystem
This is how TDengine would be situated, in a typical time-series data processing platform:
With its open ecosystem, TDengine allows you the freedom to construct the data stack that is best for your business. Its support for standard SQL, zero-code connectors for a wide range of industrial protocols and data solutions, and seamless integration with visualization, analytics, and business intelligence (BI) applications make it easy to fit TDengine into your infrastructure.
<figure>
![TDengine Database Technical Ecosystem ](eco_system.webp)
<center><figcaption>Figure 1. TDengine Technical Ecosystem</figcaption></center>
<Image img={imgEcosystem} alt="TDengine ecosystem"/>
<figcaption>Figure 1. TDengine ecosystem</figcaption>
</figure>
On the left-hand side, there are data collection agents like OPC-UA, MQTT, Telegraf and Kafka. On the right-hand side, visualization/BI tools, HMI, Python/R, and IoT Apps can be connected. TDengine itself provides an interactive command-line interface and a web interface for management and maintenance.
As shown in the figure, TDengine acts as the central source of truth in an industrial data ecosystem, ingesting data from a variety of sources and sharing that data with business applications and stakeholders.
## Typical Use Cases
## Application Scenarios
As a high-performance, scalable and SQL supported time-series database, TDengine's typical use case include but are not limited to IoT, Industrial Internet, Connected Vehicles, IT operation and maintenance, energy, financial markets and other fields. TDengine is a purpose-built database optimized for the characteristics of time series data. As such, it cannot be used to process data from web crawlers, social media, e-commerce, ERP, CRM and so on. More generally TDengine is not a suitable storage engine for non-time-series data. This section makes a more detailed analysis of the applicable scenarios.
TDengine is the only time-series database purpose-built for industrial scenarios and is fully capable of storing and processing the massive, high-frequency datasets generated by a range of industries, especially the following:
### Characteristics and Requirements of Data Sources
- [Renewable energy](https://tdengine.com/renewable-energy/)
- [Manufacturing](https://tdengine.com/manufacturing/)
- [Connected cars](https://tdengine.com/connected-cars/)
| **Data Source Characteristics and Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| ------------------------------------------------ | ------------------ | ----------------------- | ------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| A massive amount of total data | | | √ | TDengine provides excellent scale-out functions in terms of capacity, and has a storage structure with matching high compression ratio to achieve the best storage efficiency in the industry. |
| Data input velocity is extremely high | | | √ | TDengine's performance is much higher than that of other similar products. It can continuously process larger amounts of input data in the same hardware environment, and provides a performance evaluation tool that can easily run in the user environment. |
| A huge number of data sources | | | √ | TDengine is optimized specifically for a huge number of data sources. It is especially suitable for efficiently ingesting, writing and querying data from billions of data sources. |
TDengine can also form the core component of a data stack to enable the following industrial applications:
### System Architecture Requirements
- [Predictive maintenance](https://tdengine.com/predictive-maintenance/)
- [Vibration analysis](https://tdengine.com/high-frequency-data/)
- [Condition monitoring](https://tdengine.com/condition-monitoring)
| **System Architecture Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| ----------------------------------------- | ------------------ | ----------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| A simple and reliable system architecture | | | √ | TDengine's system architecture is very simple and reliable, with its own message queue, cache, stream computing, monitoring and other functions. There is no need to integrate any additional third-party products. |
| Fault-tolerance and high-reliability | | | √ | TDengine has cluster functions to automatically provide high-reliability and high-availability functions such as fault tolerance and disaster recovery. |
| Standardization support | | | √ | TDengine supports standard SQL and provides SQL extensions for time-series data analysis. |
### System Function Requirements
| **System Function Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| -------------------------------------------- | ------------------ | ----------------------- | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Complete data processing algorithms built-in | | √ | | While TDengine implements various general data processing algorithms, industry specific algorithms and special types of processing will need to be implemented at the application level. |
| A large number of crosstab queries | | √ | | This type of processing is better handled by general purpose relational database systems but TDengine can work in concert with relational database systems to provide more complete solutions. |
### System Performance Requirements
| **System Performance Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| ------------------------------------------------- | ------------------ | ----------------------- | ------------------- | --------------------------------------------------------------------------------------------------------------------------- |
| Very large total processing capacity | | | √ | TDengine's cluster functions can easily improve processing capacity via multi-server coordination. |
| Extremely high-speed data processing | | | √ | TDengine's storage and data processing are optimized for IoT, and can process data many times faster than similar products. |
| Extremely fast processing of high resolution data | | | √ | TDengine has achieved the same or better performance than other relational and NoSQL data processing systems. |
### System Maintenance Requirements
| **System Maintenance Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** |
| --------------------------------------- | ------------------ | ----------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| Native high-reliability | | | √ | TDengine has a very robust, reliable and easily configurable system architecture to simplify routine operation. Human errors and accidents are eliminated to the greatest extent, with a streamlined experience for operators. |
| Minimize learning and maintenance costs | | | √ | In addition to being easily configurable, standard SQL support and the TDengine CLI for ad hoc queries makes maintenance simpler, allows reuse and reduces learning costs. |
| Abundant talent supply | √ | | | Given the above, and given the extensive training and professional services provided by TDengine, it is easy to migrate from existing solutions or create a new and lasting solution based on TDengine. |
## Comparison with other databases
- [TDengine vs. InfluxDB](https://tdengine.com/tsdb-comparison-influxdb-vs-tdengine/)
- [TDengine vs. TimescaleDB](https://tdengine.com/tsdb-comparison-timescaledb-vs-tdengine/)
## Products
For information about our paid offerings, see:
- [TDengine Enterprise](https://tdengine.com/enterprise/)
- [TDengine Cloud](https://cloud.tdengine.com)

View File

@ -1,137 +1,126 @@
---
title: Quick Install on Docker
sidebar_label: Docker
description: This document describes how to install TDengine in a Docker container and perform queries and inserts.
sidebar_label: Deploy in Docker
title: Get Started with TDengine Using Docker
description: Quickly experience TDengines efficient insertion and querying using Docker
slug: /get-started/deploy-in-docker
---
This document describes how to install TDengine in a Docker container and perform queries and inserts.
You can install TDengine in a Docker container and perform some basic tests to verify its performance.
- The easiest way to explore TDengine is through [TDengine Cloud](https://cloud.tdengine.com).
- To get started with TDengine in a non-containerized environment, see [Quick Install from Package](../../get-started/package).
- If you want to view the source code, build TDengine yourself, or contribute to the project, see the [TDengine GitHub repository](https://github.com/taosdata/TDengine).
To install TDengine on your local machine instead of in a container, see [Get Started with TDengine Using an Installation Package](../deploy-from-package/).
## Run TDengine
## Before You Begin
If Docker is already installed on your computer, pull the latest TDengine Docker container image:
- Install Docker. For more information, see the [Docker website](https://www.docker.com/).
- Ensure that the network ports required by TDengine are not currently in use. For more information, see [Network Port Requirements](../../operations-and-maintenance/system-requirements/#network-port-requirements).
```shell
docker pull tdengine/tdengine:latest
```
## Procedure
Or the container image of specific version:
1. Pull the latest TDengine image:
```shell
docker pull tdengine/tdengine:3.0.1.4
```
```bash
docker pull tdengine/tdengine:latest
```
And then run the following command:
:::note
You can also pull a specific version of the image. For example:
```shell
docker run -d -p 6030:6030 -p 6041:6041 -p 6043-6060:6043-6060 -p 6043-6060:6043-6060/udp tdengine/tdengine
```
```bash
docker pull tdengine/tdengine:3.3.0.0
```
Note that TDengine Server 3.0 uses TCP port 6030. Port 6041 is used by taosAdapter for the REST API service. Ports 6043 through 6049 are used by taosAdapter for other connections. You can open these ports as needed.
:::
If you need to persist data to a specific directory on your local machine, please run the following command:
```shell
docker run -d -v ~/data/taos/dnode/data:/var/lib/taos \
-v ~/data/taos/dnode/log:/var/log/taos \
-p 6030:6030 -p 6041:6041 -p 6043-6060:6043-6060 -p 6043-6060:6043-6060/udp tdengine/tdengine
```
:::note
2. Start a container with the following command:
- /var/lib/taos: TDengine's default data file directory. The location can be changed via [configuration file]. Also you can modify ~/data/taos/dnode/data to your any local empty data directory
- /var/log/taos: TDengine's default log file directory. The location can be changed via [configure file]. you can modify ~/data/taos/dnode/log to your any local empty log directory
```bash
docker run -d -p 6030:6030 -p 6041:6041 -p 6043-6060:6043-6060 -p 6043-6060:6043-6060/udp tdengine/tdengine
```
:::
To persist data to your local machine, use the following command:
```bash
docker run -d -v <local-data-directory>:/var/lib/taos -v <local-log-directory>:/var/log/taos -p 6030:6030 -p 6041:6041 -p 6043-6060:6043-6060 -p 6043-6060:6043-6060/udp tdengine/tdengine
```
Run the following command to ensure that your container is running:
3. Verify that the container is running properly:
```shell
docker ps
```
```bash
docker ps
```
Enter the container and open the `bash` shell:
4. Enter the container and open a shell:
```shell
docker exec -it <container name> bash
```
```bash
docker exec -it <container-name> bash
```
You can now access TDengine or run other Linux commands.
You can now work with TDengine inside your container. For example, you can run the `taos` command to open the TDengine command-line interface.
Note: For information about installing docker, see the [official documentation](https://docs.docker.com/get-docker/).
## What to Do Next
## TDengine Command Line Interface
### Test Data Ingestion
On the container, run the following command to open the TDengine CLI:
Your TDengine installation includes taosBenchmark, a tool specifically designed to test TDengines performance. taosBenchmark can simulate data generated by many devices with a wide range of configuration options so that you can perform tests on sample data similar to your real-world use cases. For more information about taosBenchmark, see [taosBenchmark](../../tdengine-reference/tools/taosbenchmark/).
```
$ taos
Perform the following steps to use taosBenchmark to test TDengine's ingestion performance in your container:
taos>
1. In a shell inside your container, run taosBenchmark with the default settings:
```
```bash
taosBenchmark -y
```
## TDegnine Graphic User Interface
taosBenchmark automatically creates the `test` database and the `meters` supertable inside that database. This supertable contains 10,000 subtables, named `d0` to `d9999`, with each subtable containing 10,000 records. Each record includes the following four metrics:
From TDengine 3.3.0.0, there is a new component called `taos-explorer` added in the TDengine docker image. You can use it to manage the databases, super tables, child tables, and data in your TDengine system. There are also some features only available in TDengine Enterprise Edition, please contact TDengine sales team in case you need these features.
- `ts` (timestamp), ranging from `2017-07-14 10:40:00 000` to `2017-07-14 10:40:09 999`
- `current`
- `voltage`
- `phase`
To use taos-explorer in the container, you need to access the host port mapped from container port 6060. Assuming the host name is abc.com, and the port used on host is 6060, you need to access `http://abc.com:6060`. taos-explorer uses port 6060 by default in the container. The default username and password to log in to the TDengine Database Management System is "root/taosdata".
Each subtable also has the following two tags:
## Test data insert performance
- `groupId`, ranging from `1` to `10`
- `location`, indicating a city and state such as `California.Campbell` or `California.Cupertino`
After your TDengine Server is running normally, you can run the taosBenchmark utility to test its performance:
When the ingestion process is finished, taosBenchmark outputs the time taken to ingest the specified sample data. From this, you can estimate how TDengine would perform on your system in a production environment.
Start TDengine service and execute `taosBenchmark` (formerly named `taosdemo`) in a terminal.
### Test Data Querying
```bash
taosBenchmark
```
After inserting data with taosBenchmark as described above, you can use the TDengine CLI to test TDengine's query performance in your container:
This command creates the `meters` supertable in the `test` database. In the `meters` supertable, it then creates 10,000 subtables named `d0` to `d9999`. Each table has 10,000 rows and each row has four columns: `ts`, `current`, `voltage`, and `phase`. The timestamps of the data in these columns range from 2017-07-14 10:40:00 000 to 2017-07-14 10:40:09 999. Each table is randomly assigned a `groupId` tag from 1 to 10 and a `location` tag of either `California.Campbell`, `California.Cupertino`, `California.LosAngeles`, `California.MountainView`, `California.PaloAlto`, `California.SanDiego`, `California.SanFrancisco`, `California.SanJose`, `California.SantaClara` or `California.Sunnyvale`.
1. Start the TDengine CLI:
The `taosBenchmark` command creates a deployment with 100 million data points that you can use for testing purposes. The time required to create the deployment depends on your hardware. On most modern servers, the deployment is created in ten to twenty seconds.
```bash
taos
```
You can customize the test deployment that taosBenchmark creates by specifying command-line parameters. For information about command-line parameters, run the `taosBenchmark --help` command. For more information about taosBenchmark, see [taosBenchmark](../../reference/components/taosbenchmark).
2. Query the total number of records in the `meters` supertable:
## Test data query performance
```sql
SELECT COUNT(*) FROM test.meters;
```
After using `taosBenchmark` to create your test deployment, you can run queries in the TDengine CLI to test its performance:
3. Query the average, maximum, and minimum values of 100 million records:
From the TDengine CLI (taos) query the number of rows in the `meters` supertable:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters;
```
```sql
SELECT COUNT(*) FROM test.meters;
```
4. Query the total number of records where the value of the `location` tag is `California.SanFrancisco`:
Query the average, maximum, and minimum values of all 100 million rows of data:
```sql
SELECT COUNT(*) FROM test.meters WHERE location = "California.SanFrancisco";
```
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters;
```
5. Query the average, maximum, and minimum values of all records where the value of the `groupId` tag is `10`:
Query the number of rows whose `location` tag is `California.SanFrancisco`:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters WHERE groupId = 10;
```
```sql
SELECT COUNT(*) FROM test.meters WHERE location = "California.SanFrancisco";
```
6. Calculate the average, maximum, and minimum values for the `d1001` table every 10 seconds:
Query the average, maximum, and minimum values of all rows whose `groupId` tag is `10`:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters WHERE groupId = 10;
```
Query the average, maximum, and minimum values for table `d10` in 10 second intervals:
```sql
SELECT FIRST(ts), AVG(current), MAX(voltage), MIN(phase) FROM test.d10 INTERVAL(10s);
```
In the query above you are selecting the first timestamp (ts) in the interval, another way of selecting this would be `\_wstart` which will give the start of the time window. For more information about windowed queries, see [Time-Series Extensions](../../reference/taos-sql/distinguished/).
## Additional Information
For more information about deploying TDengine in a Docker environment, see [Deploying TDengine with Docker](../../operation/deployment/#docker).
```sql
SELECT _wstart, AVG(current), MAX(voltage), MIN(phase) FROM test.d1001 INTERVAL(10s);
```

View File

@ -1,326 +1,247 @@
---
title: Quick Install from Package
sidebar_label: Package
description: This document describes how to install TDengine on Linux, Windows, and macOS and perform queries and inserts.
sidebar_label: Deploy from Package
title: Get Started with TDengine Using an Installation Package
description: Quick experience with TDengine using the installation package
slug: /get-started/deploy-from-package
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import PkgListV3 from "/components/PkgListV3";
This document describes how to install TDengine on Linux/Windows/macOS and perform queries and inserts.
You can install TDengine on a local machine and perform some basic tests to verify its performance. The TDengine OSS server can be installed on Linux and macOS, and the TDengine OSS client can be installed on Linux, macOS, and Windows.
- The easiest way to explore TDengine is through [TDengine Cloud](https://cloud.tdengine.com).
- To get started with TDengine on Docker, see [Quick Install on Docker](../../get-started/docker).
- If you want to view the source code, build TDengine yourself, or contribute to the project, see the [TDengine GitHub repository](https://github.com/taosdata/TDengine).
To install TDengine in a Docker container instead of on your machine, see [Get Started with TDengine in Docker](../deploy-in-docker/).
The full package of TDengine includes the TDengine Server (`taosd`), TDengine Client (`taosc`), taosAdapter for connecting with third-party systems and providing a RESTful interface, a command-line interface (CLI, taos), and some tools. Note that taosAdapter supports Linux only. In addition to client libraries for multiple languages, TDengine also provides a [REST API](../../reference/connectors/rest-api) through [taosAdapter](../../reference/components/taosadapter).
## Before You Begin
The standard server installation package includes `taos`, `taosd`, `taosAdapter`, `taosBenchmark`, and sample code. You can also download the Lite package that includes only `taosd` and the C/C++ client library.
- Verify that your machine meets the minimum system requirements for TDengine. For more information, see [Supported Platforms](../../tdengine-reference/supported-platforms/) and [System Requirements](../../operations-and-maintenance/system-requirements/).
- **(Windows only)** Verify that the latest version of the Microsoft Visual C++ Redistributable is installed on your machine. To download the redistributable package, see [Microsoft Visual C++ Redistributable latest supported downloads](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-170).
TDengine OSS is released as Deb and RPM packages. The Deb package can be installed on Debian, Ubuntu, and derivative systems. The RPM package can be installed on CentOS, RHEL, SUSE, and derivative systems. A .tar.gz package is also provided for enterprise customers, and you can install TDengine over `apt-get` as well. The .tar.tz package includes `taosdump` and the TDinsight installation script. If you want to use these utilities with the Deb or RPM package, download and install taosTools separately. TDengine can also be installed on x64 Windows and x64/m1 macOS.
## Procedure
## Operating environment requirements
In the Linux system, the minimum requirements for the operating environment are as follows:
The TDengine OSS installation package is provided for Linux users in .deb, .rpm, and .tar.gz format and can also be installed via APT from our repository. Installation packages are also provided for macOS (client and server) and Windows (client only).
linux core version - 3.10.0-1160.83.1.el7.x86_64;
1. Select the appropriate package for your machine and follow the steps to install TDengine.
glibc version - 2.17;
<Tabs>
<TabItem label=".deb" value="debinst">
If compiling and installing through clone source code, it is also necessary to meet the following requirements:
1. Download the .deb installation package:
<PkgListV3 type={6}/>
2. Run the following command to install TDengine:
cmake version - 3.26.4 or above;
```bash
sudo dpkg -i TDengine-server-<version>-Linux-x64.deb
```
gcc version - 9.3.1 or above;
Replace `<version>` with the version of the package that you downloaded.
## Installation
</TabItem>
**Note**
<TabItem label=".rpm" value="rpminst">
Since TDengine 3.0.6.0, we don't provide standalone taosTools pacakge for downloading. However, all the tools included in the taosTools pacakge can be found in TDengine-server pacakge.
1. Download the .rpm installation package:
<PkgListV3 type={5}/>
2. Run the following command to install TDengine:
<Tabs>
<TabItem label=".deb" value="debinst">
```bash
sudo rpm -ivh TDengine-server-<version>-Linux-x64.rpm
```
1. Download the Deb installation package.
<PkgListV3 type={6}/>
2. In the directory where the package is located, use `dpkg` to install the package:
Replace `<version>` with the version of the package that you downloaded.
> Please replace `<version>` with the corresponding version of the package downloaded
</TabItem>
```bash
sudo dpkg -i TDengine-server-<version>-Linux-x64.deb
```
<TabItem label=".tar.gz" value="tarinst">
</TabItem>
1. Download the desired .tar.gz package from the following list:
<PkgListV3 type={0}/>
2. Run the following command to decompress the package:
<TabItem label=".rpm" value="rpminst">
```bash
tar -zxvf TDengine-server-<version>-Linux-x64.tar.gz
```
1. Download the .rpm installation package.
<PkgListV3 type={5}/>
2. In the directory where the package is located, use rpm to install the package:
Replace `<version>` with the version of the package that you downloaded.
3. In the directory where you decompressed the package, run the following command to install TDengine:
> Please replace `<version>` with the corresponding version of the package downloaded
```bash
sudo ./install.sh
```
```bash
sudo rpm -ivh TDengine-server-<version>-Linux-x64.rpm
```
:::note
</TabItem>
The `install.sh` script requires you to enter configuration information in the terminal. For a non-interactive installation, run `./install.sh -e no`. You can run `./install.sh -h` for detailed information about all parameters.
<TabItem label=".tar.gz" value="tarinst">
:::
1. Download the .tar.gz installation package.
<PkgListV3 type={0}/>
2. In the directory where the package is located, use `tar` to decompress the package:
</TabItem>
> Please replace `<version>` with the corresponding version of the package downloaded
<TabItem label="APT" value="apt-get">
```bash
tar -zxvf TDengine-server-<version>-Linux-x64.tar.gz
```
1. Configure the package repository:
In the directory to which the package was decompressed, run `install.sh`:
```bash
wget -qO - http://repos.taosdata.com/tdengine.key | sudo apt-key add -
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-stable stable main" | sudo tee /etc/apt/sources.list.d/tdengine-stable.list
```
```bash
sudo ./install.sh
```
2. Update the list of available packages and install TDengine.
:::info
Users will be prompted to enter some configuration information when install.sh is executing. The interactive mode can be disabled by executing `./install.sh -e no`. `./install.sh -h` can show all parameters with detailed explanation.
:::
```bash
sudo apt-get update
apt-cache policy tdengine
sudo apt-get install tdengine
```
</TabItem>
</TabItem>
<TabItem value="apt-get" label="apt-get">
You can use `apt-get` to install TDengine from the official package repository.
<TabItem label="Windows" value="windows">
**Configure the package repository**
:::note
```bash
wget -qO - http://repos.taosdata.com/tdengine.key | sudo apt-key add -
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-stable stable main" | sudo tee /etc/apt/sources.list.d/tdengine-stable.list
```
This procedure installs the TDengine OSS client on Windows. The TDengine OSS server does not support Windows.
You can install beta versions by configuring the following repository:
:::
```bash
wget -qO - http://repos.taosdata.com/tdengine.key | sudo apt-key add -
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-beta beta main" | sudo tee /etc/apt/sources.list.d/tdengine-beta.list
```
1. Download the Windows installation package:
<PkgListV3 type={3}/>
2. Run the installation package to install TDengine.
**Install TDengine with `apt-get`**
</TabItem>
```bash
sudo apt-get update
apt-cache policy tdengine
sudo apt-get install tdengine
```
<TabItem label="macOS" value="macos">
:::tip
This installation method is supported only for Debian and Ubuntu.
:::
</TabItem>
<TabItem label="Windows" value="windows">
1. Download the desired installation package from the following list:
<PkgListV3 type={7}/>
2. Run the installation package to install TDengine.
**Note**
- TDengine only supports Windows Server 2016/2019 and Windows 10/11 on the Windows platform.
- Since TDengine 3.1.0.0, we wonly provide client package for Windows. If you need to run TDenginer server on Windows, please contact TDengine sales team to upgrade to TDengine Enterprise.
- To run on Windows, the Microsoft Visual C++ Runtime library is required. If the Microsoft Visual C++ Runtime Library is missing on your platform, you can download and install it from [VC Runtime Library](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-170).
:::note
Follow the steps below:
If the installation is blocked, right-click on the package and choose **Open**.
1. Download the Windows installation package.
<PkgListV3 type={3}/>
2. Run the downloaded package to install TDengine.
Note: From version 3.0.1.7, only TDengine client pacakge can be downloaded for Windows platform. If you want to run TDengine servers on Windows, please contact our sales team to upgrade to TDengine Enterprise.
:::
</TabItem>
</Tabs>
</TabItem>
<TabItem label="macOS" value="macos">
2. When installing the first node and prompted with `Enter FQDN:`, you do not need to input anything. Only when installing the second or subsequent nodes do you need to input the FQDN of any available node in the existing cluster to join the new node to the cluster. Alternatively, you can configure it in the new node's configuration file before starting.
1. Download the macOS installation package.
<PkgListV3 type={7}/>
2. Run the downloaded package to install TDengine. If the installation is blocked, you can right-click or ctrl-click on the installation package and select `Open`.
3. Select your operating system and follow the steps to start TDengine services.
</TabItem>
</Tabs>
<Tabs>
<TabItem label="Linux" value="linux">
:::info
For information about TDengine other releases, check [Release History](../../releases/tdengine).
:::
Run the following command to start all TDengine services:
:::note
On the first node in your TDengine cluster, leave the `Enter FQDN:` prompt blank and press **Enter**. On subsequent nodes, you can enter the endpoint of the first dnode in the cluster. You can also configure this setting after you have finished installing TDengine.
```bash
sudo start-all.sh
```
:::
Alternatively, you can manage specific TDengine services through systemd:
## Quick Launch
```bash
sudo systemctl start taosd
sudo systemctl start taosadapter
sudo systemctl start taoskeeper
sudo systemctl start taos-explorer
```
<Tabs>
<TabItem label="Linux" value="linux">
:::note
After the installation is complete, run the following command to start the TDengine service:
If your machine does not support systemd, you can manually run the TDengine services located in the `/usr/local/taos/bin` directory.
```bash
systemctl start taosd
systemctl start taosadapter
systemctl start taoskeeper
systemctl start taos-explorer
```
:::
Or you can run a scrip to start all the above services together
</TabItem>
```bash
start-all.sh
```
<TabItem label="macOS" value="macos">
systemctl can also be used to stop, restart a specific service or check its status, like below using `taosd` as example:
Run the following command to start all TDengine services:
```bash
systemctl start taosd
systemctl stop taosd
systemctl restart taosd
systemctl status taosd
```
```bash
sudo start-all.sh
```
:::info
Alternatively, you can manage specific TDengine services with the `launchctl` command:
- The `systemctl` command requires _root_ privileges. If you are not logged in as the _root_ user, use the `sudo` command.
- The `systemctl stop taosd` command does not instantly stop TDengine Server. The server is stopped only after all data in memory is flushed to disk. The time required depends on the cache size.
- If your system does not include `systemd`, you can run `/usr/local/taos/bin/taosd` to start TDengine manually.
```bash
sudo launchctl start com.tdengine.taosd
sudo launchctl start com.tdengine.taosadapter
sudo launchctl start com.tdengine.taoskeeper
sudo launchctl start com.tdengine.taos-explorer
```
:::
</TabItem>
</Tabs>
</TabItem>
You can now work with TDengine on your local machine. For example, you can run the `taos` command to open the TDengine command-line interface.
<TabItem label="Windows" value="windows">
## What to Do Next
After the installation is complete, please run `sc start taosd` or run `C:\TDengine\taosd.exe` with administrator privilege to start TDengine Server. Please run `sc start taosadapter` or run `C:\TDengine\taosadapter.exe` with administrator privilege to start taosAdapter to provide http/REST service.
### Test Data Ingestion
</TabItem>
Your TDengine installation includes taosBenchmark, a tool specifically designed to test TDengines performance. taosBenchmark can simulate data generated by many devices with a wide range of configuration options so that you can perform tests on sample data similar to your real-world use cases. For more information about taosBenchmark, see [taosBenchmark](../../tdengine-reference/tools/taosbenchmark/).
<TabItem label="macOS" value="macos">
Perform the following steps to use taosBenchmark to test TDengine's ingestion performance on your machine:
After the installation is complete, double-click the /applications/TDengine to start the program, or run `sudo launchctl start ` to start TDengine services.
1. Run taosBenchmark with the default settings:
```bash
sudo launchctl start com.tdengine.taosd
sudo launchctl start com.tdengine.taosadapter
sudo launchctl start com.tdengine.taoskeeper
sudo launchctl start com.tdengine.taos-explorer
```
```bash
taosBenchmark -y
```
Or you can run a scrip to start all the above services together
```bash
start-all.sh
```
taosBenchmark automatically creates the `test` database and the `meters` supertable inside that database. This supertable contains 10,000 subtables, named `d0` to `d9999`, with each subtable containing 10,000 records. Each record includes the following four metrics:
The following `launchctl` commands can help you manage TDengine service, using `taosd` service as an example below:
- `ts` (timestamp), ranging from `2017-07-14 10:40:00 000" to "2017-07-14 10:40:09 999`
- `current`
- `voltage`
- `phase`
```bash
sudo launchctl start com.tdengine.taosd
sudo launchctl stop com.tdengine.taosd
sudo launchctl list | grep taosd
sudo launchctl print system/com.tdengine.taosd
```
Each subtable also has the following two tags:
:::info
- Please use `sudo` to run `launchctl` to manage _com.tdengine.taosd_ with administrator privileges.
- The administrator privilege is required for service management to enhance security.
- Troubleshooting:
- The first column returned by the command `launchctl list | grep taosd` is the PID of the program. If it's `-`, that means the TDengine service is not running.
- If the service is abnormal, please check the `launchd.log` file from the system log or the `taosdlog` from the `/var/log/taos directory` for more information.
- `groupId`, ranging from `1` to `10`
- `location`, indicating a city and state such as `California.Campbell` or `California.Cupertino`
:::
When the ingestion process is finished, taosBenchmark outputs the time taken to ingest the specified sample data. From this, you can estimate how TDengine would perform on your system in a production environment.
### Test Data Querying
</TabItem>
</Tabs>
After inserting data with taosBenchmark as described above, you can use the TDengine CLI to test TDengine's query performance on your machine:
1. Start the TDengine CLI:
## TDengine Command Line Interface
```bash
taos
```
You can use the TDengine CLI to monitor your TDengine deployment and execute ad hoc queries. To open the CLI, you can execute `taos` (Linux/Mac) or `taos.exe` (Windows) in terminal. The prompt of TDengine CLI is like below:
2. Query the total number of records in the `meters` supertable:
```cmd
taos>
```
```sql
SELECT COUNT(*) FROM test.meters;
```
Using TDengine CLI, you can create and delete databases and tables and run all types of queries. Each SQL command must be end with a semicolon (;). For example:
3. Query the average, maximum, and minimum values of 100 million records:
```sql
CREATE DATABASE demo;
USE demo;
CREATE TABLE t (ts TIMESTAMP, speed INT);
INSERT INTO t VALUES ('2019-07-15 00:00:00', 10);
INSERT INTO t VALUES ('2019-07-15 01:00:00', 20);
SELECT * FROM t;
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters;
```
ts | speed |
========================================
2019-07-15 00:00:00.000 | 10 |
2019-07-15 01:00:00.000 | 20 |
4. Query the total number of records where the value of the `location` tag is `California.SanFrancisco`:
Query OK, 2 row(s) in set (0.003128s)
```
```sql
SELECT COUNT(*) FROM test.meters WHERE location = "California.SanFrancisco";
```
You can also can monitor the deployment status, add and remove user accounts, and manage running instances. You can run the TDengine CLI on either machines. For more information, see [TDengine CLI](../../reference/components/taos-shell/).
5. Query the average, maximum, and minimum values of all records where the value of the `groupId` tag is `10`:
## TDengine Graphic User Interface
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters WHERE groupId = 10;
```
From TDengine 3.3.0.0, there is a new componenet called `taos-explorer` added in the TDengine docker image. You can use it to manage the databases, super tables, child tables, and data in your TDengine system. There are also some features only available in TDengine Enterprise Edition, please contact TDengine sales team in case you need these features.
6. Calculate the average, maximum, and minimum values for the `d1001` table every 10 seconds:
To use taos-explorer in the container, you need to access the host port mapped from container port 6060. Assuming the host name is abc.com, and the port used on host is 6060, you need to access `http://abc.com:6060`. taos-explorer uses port 6060 by default in the container. When you use it the first time, you need to register with your enterprise email, then can logon using your user name and password in the TDengine
## Test data insert performance
After your TDengine Server is running normally, you can run the taosBenchmark utility to test its performance:
Start TDengine service and execute `taosBenchmark` (formerly named `taosdemo`) in a terminal.
```bash
taosBenchmark
```
This command creates the `meters` supertable in the `test` database. In the `meters` supertable, it then creates 10,000 subtables named `d0` to `d9999`. Each table has 10,000 rows and each row has four columns: `ts`, `current`, `voltage`, and `phase`. The timestamps of the data in these columns range from 2017-07-14 10:40:00 000 to 2017-07-14 10:40:09 999. Each table is randomly assigned a `groupId` tag from 1 to 10 and a `location` tag of either `California.Campbell`, `California.Cupertino`, `California.LosAngeles`, `California.MountainView`, `California.PaloAlto`, `California.SanDiego`, `California.SanFrancisco`, `California.SanJose`, `California.SantaClara` or `California.Sunnyvale`.
The `taosBenchmark` command creates a deployment with 100 million data points that you can use for testing purposes. The time required to create the deployment depends on your hardware. On most modern servers, the deployment is created in ten to twenty seconds.
You can customize the test deployment that taosBenchmark creates by specifying command-line parameters. For information about command-line parameters, run the `taosBenchmark --help` command. For more information about taosBenchmark, see [taosBenchmark](../../reference/components/taosbenchmark).
## Test data query performance
After using `taosBenchmark` to create your test deployment, you can run queries in the TDengine CLI to test its performance:
From the TDengine CLI (taos) query the number of rows in the `meters` supertable:
```sql
SELECT COUNT(*) FROM test.meters;
```
Query the average, maximum, and minimum values of all 100 million rows of data:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters;
```
Query the number of rows whose `location` tag is `California.SanFrancisco`:
```sql
SELECT COUNT(*) FROM test.meters WHERE location = "California.SanFrancisco";
```
Query the average, maximum, and minimum values of all rows whose `groupId` tag is `10`:
```sql
SELECT AVG(current), MAX(voltage), MIN(phase) FROM test.meters WHERE groupId = 10;
```
Query the average, maximum, and minimum values for table `d10` in 10 second intervals:
```sql
SELECT FIRST(ts), AVG(current), MAX(voltage), MIN(phase) FROM test.d10 INTERVAL(10s);
```
In the query above you are selecting the first timestamp (ts) in the interval, another way of selecting this would be `\_wstart` which will give the start of the time window. For more information about windowed queries, see [Time-Series Extensions](../../reference/taos-sql/distinguished/).
```sql
SELECT _wstart, AVG(current), MAX(voltage), MIN(phase) FROM test.d1001 INTERVAL(10s);
```

View File

@ -0,0 +1,42 @@
---
sidebar_label: Use TDengine Cloud
title: Get Started with TDengine Cloud
slug: /get-started/use-tdengine-cloud
---
TDengine Cloud is a fully managed cloud service for industrial big data. It delivers all features of TDengine Enterprise as a cloud-native solution in Amazon Web Services, Microsoft Azure, or Google Cloud Platform.
You can register for a TDengine Cloud account for free and automatically obtain a one-month free trial to test TDengine Cloud for yourself.
## Procedure
1. Register for a TDengine Cloud account.
1. In a web browser, open the [TDengine Cloud](https://cloud.tdengine.com) website.
2. In the **Sign up** section, enter your name and company email address.
3. Click **Get Confirmation Code**. A confirmation email is sent to your email address.
4. Copy the 6-digit confirmation code from the email and paste it into the **Confirmation Code** field.
5. Click **Sign in TDengine Cloud**.
6. On the page displayed, enter your name, company, country of residence, and phone number.
7. Specify a password and click **Continue**.
2. Determine whether you want to use any public databases and click **Next**.
The TDengine DB Mart includes several public databases that you can use for testing purposes. To enable access to a public database in your account, select the toggle. You can modify these settings after the account creation process is finished.
3. Create an organization.
1. Enter a name for your organization in TDengine Cloud. This name must be unique.
2. Specify whether to enable single sign-on (SSO).
- Select **Public** to use GitHub, Microsoft, or Google SSO.
- Select **Azure AD** to use Microsoft Entra ID. Enter the Azure domain, client ID, and client secret as prompted.
3. Click **Next**.
4. Create your first instance.
1. Select a cloud and region from the drop-down lists.
2. Enter a name for your instance.
3. Specify whether to enable high availability.
4. Specify whether to create a sample database.
5. Click **Select Plan** and select your desired price plan.
6. Click **Create**.
Your instance is created according to your specifications and you can begin to use TDengine Cloud. For more information, see the [TDengine Cloud documentation](/cloud).

View File

@ -1,26 +0,0 @@
You can use `apt-get` to install TDengine from the official package repository.
**Configure the package repository**
```
wget -qO - http://repos.taosdata.com/tdengine.key | sudo apt-key add -
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-stable stable main" | sudo tee /etc/apt/sources.list.d/tdengine-stable.list
```
You can install beta versions by configuring the following package repository:
```
echo "deb [arch=amd64] http://repos.taosdata.com/tdengine-beta beta main" | sudo tee /etc/apt/sources.list.d/tdengine-beta.list
```
**Install TDengine with `apt-get`**
```
sudo apt-get update
apt-cache policy tdengine
sudo apt-get install tdengine
```
:::tip
This installation method is supported only for Debian and Ubuntu.
::::

View File

@ -1,17 +0,0 @@
import PkgList from "/components/PkgList";
TDengine is easy to download and install.
The standard server installation package includes `taos`, `taosd`, `taosAdapter`, `taosBenchmark`, and sample code. You can also download a lite package that includes only `taosd` and the C/C++ client library.
You can download the TDengine installation package in .rpm, .deb, or .tar.gz format. The .tar.tz package includes `taosdump` and the TDinsight installation script. If you want to use these utilities with the .deb or .rpm package, download and install taosTools separately.
Between official releases, beta versions may be released that contain new features. Do not use beta versions for production or testing environments. Select the installation package appropriate for your system.
<PkgList type={0}/>
For information about installing TDengine, see [Install and Uninstall](../operation/pkg-install).
For information about TDengine releases, see [All Downloads](https://tdengine.com/all-downloads)
and [Release Notes](https://github.com/taosdata/TDengine/releases).

View File

@ -1,7 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="-0.5 -1 32 32" width="50" height="50">
<g fill="#5865f2">
<path
d="M26.0015 6.9529C24.0021 6.03845 21.8787 5.37198 19.6623 5C19.3833 5.48048 19.0733 6.13144 18.8563 6.64292C16.4989 6.30193 14.1585 6.30193 11.8336 6.64292C11.6166 6.13144 11.2911 5.48048 11.0276 5C8.79575 5.37198 6.67235 6.03845 4.6869 6.9529C0.672601 12.8736 -0.41235 18.6548 0.130124 24.3585C2.79599 26.2959 5.36889 27.4739 7.89682 28.2489C8.51679 27.4119 9.07477 26.5129 9.55525 25.5675C8.64079 25.2265 7.77283 24.808 6.93587 24.312C7.15286 24.1571 7.36986 23.9866 7.57135 23.8161C12.6241 26.1255 18.0969 26.1255 23.0876 23.8161C23.3046 23.9866 23.5061 24.1571 23.7231 24.312C22.8861 24.808 22.0182 25.2265 21.1037 25.5675C21.5842 26.5129 22.1422 27.4119 22.7621 28.2489C25.2885 27.4739 27.8769 26.2959 30.5288 24.3585C31.1952 17.7559 29.4733 12.0212 26.0015 6.9529ZM10.2527 20.8402C8.73376 20.8402 7.49382 19.4608 7.49382 17.7714C7.49382 16.082 8.70276 14.7025 10.2527 14.7025C11.7871 14.7025 13.0425 16.082 13.0115 17.7714C13.0115 19.4608 11.7871 20.8402 10.2527 20.8402ZM20.4373 20.8402C18.9183 20.8402 17.6768 19.4608 17.6768 17.7714C17.6768 16.082 18.8873 14.7025 20.4373 14.7025C21.9717 14.7025 23.2271 16.082 23.1961 17.7714C23.1961 19.4608 21.9872 20.8402 20.4373 20.8402Z"
></path>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 1.3 KiB

View File

@ -1,6 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="-1 -2 18 18" width="50" height="50">
<path
fill="#000"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
></path>
</svg>

Before

Width:  |  Height:  |  Size: 705 B

View File

@ -1,43 +1,10 @@
---
title: Get Started
description: This document describes how to install TDengine on various platforms.
slug: /get-started
---
import GitHubSVG from './github.svg'
import DiscordSVG from './discord.svg'
import TwitterSVG from './twitter.svg'
import YouTubeSVG from './youtube.svg'
import LinkedInSVG from './linkedin.svg'
import StackOverflowSVG from './stackoverflow.svg'
This section describes how to set up a TDengine environment quickly using Docker or installation packages and experience its capabilities.
You can install and run TDengine on Linux/Windows/macOS machines as well as Docker containers. You can also deploy TDengine as a managed service with TDengine Cloud.
The full package of TDengine includes the TDengine Server (`taosd`), TDengine Client (`taosc`), taosAdapter for connecting with third-party systems and providing a RESTful interface, a command-line interface, and some tools. In addition to client libraries for multiple languages, TDengine also provides a [RESTful interface](../reference/connectors/rest-api) through [taosAdapter](../reference/components/taosadapter).
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```
## Join TDengine Community
<table width="100%">
<tr align="center" style={{border:0}}>
<td width="16%" style={{border:0}}><a href="https://github.com/taosdata/TDengine" target="_blank"><GitHubSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://discord.com/invite/VZdSuUg4pS" target="_blank"><DiscordSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://twitter.com/TDengineDB" target="_blank"><TwitterSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://www.youtube.com/@tdengine" target="_blank"><YouTubeSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://www.linkedin.com/company/tdengine" target="_blank"><LinkedInSVG /></a></td>
<td width="16%" style={{border:0}}><a href="https://stackoverflow.com/questions/tagged/tdengine" target="_blank"><StackOverflowSVG /></a></td>
</tr>
<tr align="center" style={{border:0,backgroundColor:'transparent'}}>
<td width="16%" style={{border:0,padding:0}}><a href="https://github.com/taosdata/TDengine" target="_blank">Star GitHub</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://discord.com/invite/VZdSuUg4pS" target="_blank">Join Discord</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://twitter.com/TDengineDB" target="_blank">Follow Twitter</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://www.youtube.com/@tdengine" target="_blank">Subscribe YouTube</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://www.linkedin.com/company/tdengine" target="_blank">Follow LinkedIn</a></td>
<td width="16%" style={{border:0,padding:0}}><a href="https://stackoverflow.com/questions/tagged/tdengine" target="_blank">Ask StackOverflow</a></td>
</tr>
</table>
- To deploy TDengine in a container, see [Get Started with TDengine Using Docker](deploy-in-docker/).
- To install TDengine on a local server, see [Get Started with TDengine Using an Installation Package](deploy-from-package/).
- To use TDengine as a fully managed cloud service instead of deploying on your own, see [Get Started with TDengine Cloud](use-tdengine-cloud/).

View File

@ -1,6 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 -2 24 24" width="50" height="50">
<path
fill="rgb(10, 102, 194)"
d="M20.5 2h-17A1.5 1.5 0 002 3.5v17A1.5 1.5 0 003.5 22h17a1.5 1.5 0 001.5-1.5v-17A1.5 1.5 0 0020.5 2zM8 19H5v-9h3zM6.5 8.25A1.75 1.75 0 118.3 6.5a1.78 1.78 0 01-1.8 1.75zM19 19h-3v-4.74c0-1.42-.6-1.93-1.38-1.93A1.74 1.74 0 0013 14.19a.66.66 0 000 .14V19h-3v-9h2.9v1.3a3.11 3.11 0 012.7-1.4c1.55 0 3.36.86 3.36 3.66z"
></path>
</svg>

Before

Width:  |  Height:  |  Size: 461 B

View File

@ -1,7 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="-8 0 48 48" width="50" height="50">
<path d="M26 41v-9h4v13H0V32h4v9h22z" fill="#BCBBBB" />
<path
d="M23 34l.8-3-16.1-3.3L7 31l16 3zM9.2 23.2l15 7 1.4-3-15-7-1.4 3zm4.2-7.4L26 26.4l2.1-2.5-12.7-10.6-2.1 2.5zM21.5 8l-2.7 2 9.9 13.3 2.7-2L21.5 8zM7 38h16v-3H7v3z"
fill="#F48024"
/>
</svg>

Before

Width:  |  Height:  |  Size: 350 B

View File

@ -1,7 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 -2 24 24" width="50" height="50">
<g fill="rgb(29, 155, 240)">
<path
d="M23.643 4.937c-.835.37-1.732.62-2.675.733.962-.576 1.7-1.49 2.048-2.578-.9.534-1.897.922-2.958 1.13-.85-.904-2.06-1.47-3.4-1.47-2.572 0-4.658 2.086-4.658 4.66 0 .364.042.718.12 1.06-3.873-.195-7.304-2.05-9.602-4.868-.4.69-.63 1.49-.63 2.342 0 1.616.823 3.043 2.072 3.878-.764-.025-1.482-.234-2.11-.583v.06c0 2.257 1.605 4.14 3.737 4.568-.392.106-.803.162-1.227.162-.3 0-.593-.028-.877-.082.593 1.85 2.313 3.198 4.352 3.234-1.595 1.25-3.604 1.995-5.786 1.995-.376 0-.747-.022-1.112-.065 2.062 1.323 4.51 2.093 7.14 2.093 8.57 0 13.255-7.098 13.255-13.254 0-.2-.005-.402-.014-.602.91-.658 1.7-1.477 2.323-2.41z"
></path>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 772 B

View File

@ -1,11 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="-2 -8 32 32" width="50" height="50">
<g>
<g>
<path
d="M27.9727 3.12324C27.6435 1.89323 26.6768 0.926623 25.4468 0.597366C23.2197 2.24288e-07 14.285 0 14.285 0C14.285 0 5.35042 2.24288e-07 3.12323 0.597366C1.89323 0.926623 0.926623 1.89323 0.597366 3.12324C2.24288e-07 5.35042 0 10 0 10C0 10 2.24288e-07 14.6496 0.597366 16.8768C0.926623 18.1068 1.89323 19.0734 3.12323 19.4026C5.35042 20 14.285 20 14.285 20C14.285 20 23.2197 20 25.4468 19.4026C26.6768 19.0734 27.6435 18.1068 27.9727 16.8768C28.5701 14.6496 28.5701 10 28.5701 10C28.5701 10 28.5677 5.35042 27.9727 3.12324Z"
fill="#FF0000"
></path>
<path d="M11.4253 14.2854L18.8477 10.0004L11.4253 5.71533V14.2854Z" fill="white"></path>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 801 B

View File

@ -0,0 +1,228 @@
---
sidebar_label: Data Model
title: The TDengine Data Model
slug: /basic-features/data-model
---
import Image from '@theme/IdealImage';
import dataModel from '../assets/data-model-01.png';
To clearly explain the concepts of time-series data and facilitate the writing of example programs, the TDengine documentation uses smart meters as an example. These example smart meters can collect three metrics: current, voltage, and phase. In addition, each smart meter also has two static attributes: location and group ID. The data collected by these smart meters is shown in the table below.
|Device ID| Timestamp | Current | Voltage | Phase | Location | Group ID |
|:-------:|:---------:|:-------:|:-------:|:-----:|:--------:|:--------:|
|d1001 |1538548685000 | 10.3 | 219 | 0.31 | California.SanFrancisco |2|
|d1002 | 1538548684000 | 10.2 | 220 | 0.23 | California.SanFrancisco |3|
|d1003 | 1538548686500 | 11.5 | 221 | 0.35 | California.LosAngeles | 3 |
|d1004 | 1538548685500 | 13.4 | 223 | 0.29 | California.LosAngeles | 2 |
|d1001 | 1538548695000 | 12.6 | 218 | 0.33 | California.SanFrancisco |2|
|d1004 | 1538548696600 | 11.8 | 221 | 0.28 | California.LosAngeles | 2 |
|d1002 | 1538548696650 | 10.3 | 218 | 0.25 | California.SanFrancisco | 3 |
|d1001 | 1538548696800 | 12.3 | 221 | 0.31 | California.SanFrancisco | 2 |
These smart meters collect data based on external trigger events or preset periods, ensuring the continuity and temporality of the data, thus forming a continuously updated data stream.
## Basic Concepts
### Metric
A metric refers to a physical quantity, such as current, voltage, or temperature, obtained from a sensor, device, or other data collection point. Since these physical quantities change over time, the types of data collected are diverse, including integers, floating-point numbers, and strings. As time passes, the stored data will continue to grow. For example, in smart meters, current, voltage, and phase are typical metrics collected.
### Tag
A tag refers to a static attribute associated with a sensor, device, or other data collection point. These are attributes that do not change over time, such as device model, color, or location. The data type of tags can be any type. Although tags themselves are static, in practical applications, you may need to modify, delete, or add tags. Unlike quantities collected, the amount of tag data stored remains relatively stable over time and does not show a significant growth trend. In the example of smart meters, location and group ID are typical tags.
### Data Collection Point
A data collection point (DCP) refers to a hardware or software device responsible for collecting metrics at a certain preset time period or when triggered by specific events. A data collection point can collect one or more quantities at the same time, but these quantities are obtained at the same moment and have the same timestamp. Complex structured devices typically include multiple data collection points, each with different collection cycles, and they operate independently without interference. For example, a car might have a dedicated data collection point for collecting location information, some for monitoring engine status, and others focused on monitoring the interior environment. Thus, a car could contain three different types of data collection points. In the example of smart meters, identifiers such as d1001, d1002, and d1003 represent different data collection points.
### Table
Given that the time-series data collected from DCPs is usually structured, TDengine uses the traditional relational database model to manage data. At the same time, to fully utilize the characteristics of time-series data, TDengine adopts a "one table per device" design, requiring a separate table for each data collection point. For example, if there are millions of smart meters, a corresponding number of tables need to be created in TDengine. In the example data of smart meters, the smart meter with device ID d1001 corresponds to a table in TDengine, and all the time-series data collected by this meter is stored in this table. This design approach retains the usability of relational databases while fully utilizing the unique advantages of time-series data:
1. Since the data generation process at different data collection points is completely independent, and each data collection point has a unique data source, there is only one writer per table. This allows for lock-free data writing, significantly increasing the write speed.
2. For a data collection point, the data it generates is in chronological order, so the write operation can be implemented in an append-only manner, further greatly enhancing the data writing speed.
3. The data from a data collection point is stored continuously in blocks. Thus, reading data from a specific time period can significantly reduce random read operations, dramatically improving the speed of data reading and querying.
4. Within a data block, columnar storage is used, and different compression algorithms can be applied to different data types to improve the compression ratio. Moreover, since the rate of data collection changes is usually slow, the compression ratio will be higher.
If the traditional method of writing data from multiple data collection points into a single table is used, due to uncontrollable network latency, the sequence of data arrival at the server from different data collection points cannot be guaranteed, and the write operation needs to be protected by locks. Moreover, it is difficult to ensure that the data from one data collection point is stored continuously together. Using the method of one data collection point per table can ensure to the greatest extent that the performance of insertion and querying for a single data collection point is optimal, and the data compression ratio is the highest.
In TDengine, the name of the data collection point (e.g., d1001) is usually used as the table name, and each data collection point can have multiple metrics (such as current, voltage, phase, etc.), each corresponding to a column in a table. The data type of the metrics can be integer, floating-point, string, etc.
Additionally, the first column of the table must be a timestamp. For each metric, TDengine will use the first column timestamp to build an index and use columnar storage. For complex devices, such as cars, which have multiple data collection points, multiple tables need to be created for one car.
### Supertable
Although the "one table per device" design helps to manage each collection point specifically, as the number of devices increases, the number of tables also increases dramatically, posing challenges for database management and data analysis. When performing aggregation operations across data collection points, users need to deal with a large number of tables, making the work exceptionally cumbersome.
To solve this problem, TDengine introduces the supertable. A supertable is a data structure that can aggregate certain types of data collection points together into a logically unified table. These data collection points have the same table structure, but their static properties (such as tags) may differ. When creating a supertable, in addition to defining the metrics, it is also necessary to define the tags of the supertable. A supertable must contain at least one timestamp column, one or more metric columns, and one or more tag columns. Moreover, the tags of the supertable can be flexibly added, modified, or deleted.
In TDengine, a table represents a specific data collection point, while a supertable represents a collection of data collection points with the same attributes. Taking smart meters as an example, we can create a supertable for this type of meter, which includes all the common properties and metrics of smart meters. This design not only simplifies table management but also facilitates aggregation operations across data collection points, thereby improving the efficiency of data processing.
### Subtable
A subtable is a logical abstraction of a data collection point and is a specific table belonging to a supertable. You can use the definition of the supertable as a template and create subtables by specifying the tag values of the subtables. Thus, tables generated through the supertable are referred to as subtables. The relationship between the supertable and subtables is mainly reflected in the following aspects.
- A supertable contains multiple subtables, which have the same table structure but different tag values.
- The table structure of subtables cannot be directly modified, but the columns and tags of the supertable can be modified, and the modifications take effect immediately for all subtables.
- A supertable defines a template and does not store any data or tag information itself.
In TDengine, query operations can be performed on both subtables and supertables. For queries on supertables, TDengine treats the data from all subtables as a whole, first filtering out the tables that meet the query conditions through tags, then querying the time-series data on these subtables separately, and finally merging the query results from each subtable. Essentially, by supporting queries on supertables, TDengine achieves efficient aggregation of multiple similar data collection points. To better understand the relationship between metrics, tags, supertables, and subtables, here is an example of a data model for smart meters. You can refer to the data model diagram below for a more intuitive understanding of these concepts.
To better understand the relationship between metrics, tags, supertables, and subtables, taking smart meters as an example, refer to the following diagram.
<figure>
<Image img={dataModel} alt="Data Model Diagram"/>
<figcaption>Figure 1. The TDengine data model</figcaption>
</figure>
### Database
A database in TDengine is used to manage a collection of tables. TDengine allows a running instance to contain multiple databases, and each database can be configured with different storage strategies. Since different types of data collection points usually have different data characteristics, such as data collection frequency, data retention period, number of replicas, data block size, etc., it is recommended to create supertables with different data characteristics in different databases.
In a database, one to many supertables can be included, but each supertable can only belong to one database. At the same time, all subtables owned by a supertable are also stored in that database. This design helps to achieve more fine-grained data management and optimization, ensuring that TDengine can provide the best processing performance based on different data characteristics.
### Timestamps
Timestamps play a crucial role in time-series data processing, especially when applications need to access the database from multiple time zones, making the issue more complex. Before delving into how TDengine handles timestamps and time zones, let's first introduce a few basic concepts.
- Local date and time: Refers to the local time of a specific region, usually expressed as a string in the format yyyy-MM-dd hh:mm:ss.SSS. This representation of time does not include any time zone information, such as "2021-07-21 12:00:00.000".
- Time zone: Standard time in different geographical locations on Earth. Coordinated Universal Time (UTC) or Greenwich Mean Time is the international time standard, and other time zones are usually expressed as an offset from UTC, such as "UTC+8" representing East Eight Zone time. UTC timestamp: Represents the number of milliseconds since the UNIX epoch (i.e., UTC time January 1, 1970, at 0:00). For example, "1700000000000" corresponds to the date and time "2023-11-14 22:13:20 (UTC+0)". In TDengine, when saving time-series data, what is actually saved is the UTC timestamp. When writing data, TDengine handles timestamps in the following two ways.
- RFC-3339 format: When using this format, TDengine can correctly parse time strings with time zone information into UTC timestamps. For example, "2018-10-03T14:38:05.000+08:00" will be converted into a UTC timestamp.
- Non-RFC-3339 format: If the time string does not contain time zone information, TDengine will use the time zone setting of the application to automatically convert the time into a UTC timestamp.
When querying data, the TDengine client will automatically convert the saved UTC timestamps into local time according to the current time zone setting of the application, ensuring that users in different time zones can see the correct time information.
## Data Modeling
This section uses smart meters as an example to briefly introduce how to use SQL to create databases, supertables, and basic table operations in TDengine.
### Creating a Database
The SQL to create a database for storing meter data is as follows:
```sql
CREATE DATABASE power PRECISION 'ms' KEEP 3650 DURATION 10 BUFFER 16;
```
This SQL will create a database named `power`, with the following parameters explained:
- `PRECISION 'ms'`: This database uses millisecond (ms) precision timestamps for its time-series data
- `KEEP 3650`: The data in this database will be retained for 3650 days, and data older than 3650 days will be automatically deleted
- `DURATION 10`: Data for every 10 days is stored in one data file
- `BUFFER 16`: Writing uses a memory pool of size 16MB.
After creating the power database, you can execute the USE statement to switch databases.
```sql
use power;
```
This SQL switches the current database to `power`, indicating that subsequent insertions, queries, and other operations will be performed in the current `power` database.
### Creating a Supertable
The SQL to create a supertable named `meters` is as follows:
```sql
CREATE STABLE meters (
ts timestamp,
current float,
voltage int,
phase float
) TAGS (
location varchar(64),
group_id int
);
```
In TDengine, the SQL statement to create a supertable is similar to that in relational databases. For example, in the SQL above, `CREATE STABLE` is the keyword, indicating the creation of a supertable; then, `meters` is the name of the supertable; in the parentheses following the table name, the columns of the supertable are defined (column names, data types, etc.), with the following rules:
1. The first column must be a timestamp column. For example: `ts timestamp` indicates that the timestamp column name is `ts`, and its data type is `timestamp`;
2. Starting from the second column are the measurement columns. The data types of measurements can be integer, float, string, etc. For example: `current float` indicates that the measurement current `current`, data type is `float`;
Finally, TAGS is a keyword, indicating tags, and in the parentheses following TAGS, the tags of the supertable are defined (tag names, data types, etc.).
1. The data type of tags can be integer, float, string, etc. For example: `location varchar(64)` indicates that the tag region `location`, data type is `varchar(64)`;
2. The names of tags cannot be the same as the names of measurement columns.
### Creating a Table
The SQL to create a subtable `d1001` using the supertable is as follows:
```sql
CREATE TABLE d1001
USING meters (
location,
group_id
) TAGS (
"California.SanFrancisco",
2
);
```
In the SQL above, `CREATE TABLE` is a keyword indicating the creation of a table; `d1001` is the name of the subtable; `USING` is a keyword indicating the use of a supertable as a template; `meters` is the name of the supertable; in the parentheses following the supertable name, `location`, `group_id` are the names of the tag columns of the supertable; `TAGS` is a keyword, and the values of the tag columns for the subtable are specified in the following parentheses. `"California.SanFrancisco"` and `2` indicate that the location of subtable `d1001` is `California.SanFrancisco`, and the group ID is `2`.
When performing write or query operations on a supertable, users can use the pseudocolumn `tbname` to specify or output the name of the corresponding subtable.
### Automatic Table Creation
In TDengine, to simplify user operations and ensure smooth data entry, even if a subtable does not exist, users can use the automatic table creation SQL with the `using` keyword to write data. This mechanism allows the system to automatically create the subtable when it encounters a non-existent subtable, and then perform the data writing operation. If the subtable already exists, the system will write the data directly without any additional steps.
The SQL for writing data while automatically creating tables is as follows:
```sql
INSERT INTO d1002
USING meters
TAGS (
"California.SanFrancisco",
2
) VALUES (
NOW,
10.2,
219,
0.32
);
```
In the SQL above, `INSERT INTO d1002` indicates writing data into the subtable `d1002`; `USING meters` indicates using the supertable `meters` as a template; `TAGS ("California.SanFrancisco", 2)` indicates the tag values for subtable `d1002` are `California.SanFrancisco` and `2`; `VALUES (NOW, 10.2, 219, 0.32)` indicates inserting a record into subtable `d1002` with values NOW (current timestamp), 10.2 (current), 219 (voltage), 0.32 (phase). When TDengine executes this SQL, if subtable `d1002` already exists, it writes the data directly; if subtable `d1002` does not exist, it first automatically creates the subtable, then writes the data.
### Creating Basic Tables
In TDengine, apart from subtables with tags, there are also basic tables without any tags. These tables are similar to tables in traditional relational databases, and users can create them using SQL.
The differences between basic tables and subtables are:
1. Tag Extensibility: Subtables add static tags on top of basic tables, allowing them to carry more metadata. Additionally, the tags of subtables are mutable, and users can add, delete, or modify tags as needed.
2. Table Ownership: Subtables always belong to a supertable and are part of it. Basic tables, however, exist independently and do not belong to any supertable.
3. Conversion Restrictions: In TDengine, basic tables cannot be directly converted into subtables, and likewise, subtables cannot be converted into basic tables. These two types of tables determine their structure and properties at creation and cannot be changed later.
In summary, basic tables provide functionality similar to traditional relational database tables, while subtables introduce a tagging mechanism, offering richer descriptions and more flexible management for time-series data. Users can choose to create basic tables or subtables based on actual needs.
The SQL for creating an basic table without any tags is as follows:
```sql
CREATE TABLE d1003(
ts timestamp,
current float,
voltage int,
phase float,
location varchar(64),
group_id int
);
```
The SQL above indicates the creation of the basic table `d1003`, with a structure including columns `ts`, `current`, `voltage`, `phase`, `location`, `group_id`, totaling 6 columns. This data model is completely consistent with relational databases.
Using basic tables as the data model means that static tag data (such as location and group_id) will be repeatedly stored in each row of the table. This approach not only increases storage space consumption but also significantly lowers query performance compared to using a supertable data model, as it cannot directly utilize tag data for filtering.
### Multi-Column Model vs. Single-Column Model
TDengine supports flexible data model designs, including multi-column and single-column models. The multi-column model allows multiple physical quantities collected simultaneously from the same data collection point with the same timestamp to be stored in different columns of the same supertable. However, in some extreme cases, a single-column model might be used, where each collected physical quantity is established in a separate table. For example, for the three physical quantities of current, voltage, and phase, three separate supertables might be established.
Although TDengine recommends using the multi-column model because it generally offers better writing and storage efficiency, the single-column model might be more suitable in certain specific scenarios. For example, if the types of quantities collected at a data collection point frequently change, using a multi-column model would require frequent modifications to the supertable's structural definition, increasing the complexity of the application. In such cases, using a single-column model can simplify the design and management of the application, as it allows independent management and expansion of each physical quantity's supertable.
Overall, TDengine offers flexible data model options, allowing users to choose the most suitable model based on actual needs and scenarios to optimize performance and manage complexity.

View File

@ -0,0 +1,138 @@
---
sidebar_label: Data Ingestion
title: Data Ingestion
slug: /basic-features/data-ingestion
---
This chapter uses the data model of smart meters as an example to introduce how to write, update, and delete time-series data in TDengine using SQL.
## Writing
In TDengine, you can write time-series data using the SQL insert statement.
### Writing One Record at a Time
Assume that the smart meter with device ID d1001 collected data on October 3, 2018, at 14:38:05: current 10.3A, voltage 219V, phase 0.31. We have already created a subtable d1001 belonging to the supertable meters in the TDengine's power database. Next, you can write time-series data into the subtable d1001 using the following insert statement.
1. You can write time-series data into the subtable d1001 using the following INSERT statement.
```sql
insert into d1001 (ts, current, voltage, phase) values ( "2018-10-03 14:38:05", 10.3, 219, 0.31)
```
The above SQL writes `2018-10-03 14:38:05`, `10.3`, `219`, `0.31` into the columns `ts`, `current`, `voltage`, `phase` of the subtable `d1001`.
2. When the `VALUES` part of the `INSERT` statement includes all columns of the table, the list of fields before `VALUES` can be omitted, as shown in the following SQL statement, which has the same effect as the previous INSERT statement specifying columns.
```sql
insert into d1001 values("2018-10-03 14:38:05", 10.3, 219, 0.31)
```
3. For the table's timestamp column (the first column), you can also directly use the timestamp of the database precision.
```sql
INSERT INTO d1001 VALUES (1538548685000, 10.3, 219, 0.31);
```
The effects of the above three SQL statements are exactly the same.
### Writing Multiple Records at Once
Assume that the smart meter with device ID d1001 collects data every 10s and reports data every 30s, i.e., it needs to write 3 records every 30s. Users can write multiple records in one insert statement. The following SQL writes a total of 3 records.
```sql
insert into d1001 values
( "2018-10-03 14:38:05", 10.2, 220, 0.23),
( "2018-10-03 14:38:15", 12.6, 218, 0.33),
( "2018-10-03 14:38:25", 12.3, 221, 0.31)
```
The above SQL writes a total of three records.
### Writing to Multiple Tables at Once
Assume that the smart meters with device IDs d1001, d1002, and d1003, all need to write 3 records every 30 seconds. For such cases, TDengine supports writing multiple records to multiple tables at once.
```sql
INSERT INTO d1001 VALUES
("2018-10-03 14:38:05", 10.2, 220, 0.23),
("2018-10-03 14:38:15", 12.6, 218, 0.33),
("2018-10-03 14:38:25", 12.3, 221, 0.31)
d1002 VALUES
("2018-10-03 14:38:04", 10.2, 220, 0.23),
("2018-10-03 14:38:14", 10.3, 218, 0.25),
("2018-10-03 14:38:24", 10.1, 220, 0.22)
d1003 VALUES
("2018-10-03 14:38:06", 11.5, 221, 0.35),
("2018-10-03 14:38:16", 10.4, 220, 0.36),
("2018-10-03 14:38:26", 10.3, 220, 0.33)
;
```
The above SQL writes a total of nine records.
### Specifying Columns for Writing
You can write data to specific columns of a table by specifying columns. Columns not appearing in the SQL will be automatically filled with NULL values. Note that the timestamp column must be present, and its value cannot be NULL. The following SQL writes one record to the subtable d1004. This record only includes voltage and phase, with the current value being NULL.
```sql
insert into d1004 (ts, voltage, phase) values("2018-10-04 14:38:06", 223, 0.29)
```
### Automatic Table Creation on Insert
Users can perform inserts using the `using` keyword for automatic table creation. If the subtable does not exist, it triggers automatic table creation before data insertion; if the subtable already exists, it directly inserts the data. An insert statement with automatic table creation can also specify only some tag columns for insertion, leaving the unspecified tag columns as NULL values. The following SQL inserts a record. If the subtable d1005 does not exist, it first creates the table automatically with the tag `group_id` value as NULL, then inserts the data.
```sql
insert into d1005
using meters (location)
tags ( "beijing.chaoyang")
values ( "2018-10-04 14:38:07", 10.15, 217, 0.33)
```
The insert statement with automatic table creation also supports inserting data into multiple tables in one statement. The following SQL uses an automatic table creation insert statement to insert 9 records.
```sql
INSERT INTO d1001 USING meters TAGS ("California.SanFrancisco", 2) VALUES
("2018-10-03 14:38:05", 10.2, 220, 0.23),
("2018-10-03 14:38:15", 12.6, 218, 0.33),
("2018-10-03 14:38:25", 12.3, 221, 0.31)
d1002 USING meters TAGS ("California.SanFrancisco", 3) VALUES
("2018-10-03 14:38:04", 10.2, 220, 0.23),
("2018-10-03 14:38:14", 10.3, 218, 0.25),
("2018-10-03 14:38:24", 10.1, 220, 0.22)
d1003 USING meters TAGS ("California.LosAngeles", 2) VALUES
("2018-10-03 14:38:06", 11.5, 221, 0.35),
("2018-10-03 14:38:16", 10.4, 220, 0.36),
("2018-10-03 14:38:26", 10.3, 220, 0.33)
;
```
### Inserting Through Supertables
TDengine also supports direct data insertion into supertables. It is important to note that a supertable is a template and does not store data itself; the data is stored in the corresponding subtables. The following SQL inserts a record into the subtable d1001 by specifying the tbname column.
```sql
insert into meters (tbname, ts, current, voltage, phase, location, group_id)
values( "d1001, "2018-10-03 14:38:05", 10.2, 220, 0.23, "California.SanFrancisco", 2)
```
### Zero-Code Insertion
To facilitate easy data insertion for users, TDengine has seamlessly integrated with many well-known third-party tools, including Telegraf, Prometheus, EMQX, StatsD, collectd, and HiveMQ. Users only need to perform simple configurations on these tools to easily import data into TDengine. Additionally, TDengine Enterprise offers a variety of connectors, such as MQTT, OPC, AVEVA PI System, Wonderware, Kafka, MySQL, Oracle, etc. By configuring the corresponding connection information on the TDengine side, users can efficiently write data from different data sources into TDengine without writing any code.
## Update
Data in time-series can be updated by inserting a record with a duplicate timestamp; the newly inserted data will replace the old values. The following SQL, by specifying columns, inserts 1 row of data into the subtable `d1001`; when there is already data with the datetime `2018-10-03 14:38:05` in subtable `d1001`, the new `current` (current) value 22 will replace the old value.
```sql
INSERT INTO d1001 (ts, current) VALUES ("2018-10-03 14:38:05", 22);
```
## Delete
To facilitate the cleanup of abnormal data caused by equipment failures and other reasons, TDengine supports deleting time-series data based on timestamps. The following SQL deletes all data in the supertable `meters` with timestamps earlier than `2021-10-01 10:40:00.100`. Data deletion is irreversible, so use it with caution. To ensure that the data being deleted is indeed what you want to delete, it is recommended to first use a select statement with the deletion condition in the where clause to view the data to be deleted, and confirm it is correct before executing delete.
```sql
delete from meters where ts < '2021-10-01 10:40:00.100' ;
```

View File

@ -0,0 +1,724 @@
---
sidebar_label: Data Querying
title: Data Querying
slug: /basic-features/data-querying
---
import Image from '@theme/IdealImage';
import windowModel from '../assets/data-querying-01.png';
import slidingWindow from '../assets/data-querying-02.png';
import sessionWindow from '../assets/data-querying-03.png';
import eventWindow from '../assets/data-querying-04.png';
Compared to many other time-series and real-time databases, a unique advantage of TDengine since its first release is its support for standard SQL queries. This feature significantly reduces the learning curve for users. This chapter will use the data model of smart meters as an example to demonstrate how to use SQL queries in TDengine to handle time-series data. For further details and features of SQL syntax, it is recommended to refer to the official TDengine documentation. By studying this chapter, you will be able to master TDengine's SQL querying techniques and efficiently operate and analyze time-series data.
## Basic Query
To better introduce TDengine data querying, use the following taosBenchmark command to generate the time-series data needed for this chapter.
```shell
taosBenchmark --start-timestamp=1600000000000 --tables=100 --records=10000000 --time-step=10000
```
The above command, the taosBenchmark tool generates a test database in TDengine, producing a total of 1 billion time-series data entries. The timestamp of the time-series data starts from `1600000000000` (2020-09-13T20:26:40+08:00), includes `100` devices (subtables), each device has `10000000` data entries, and the data collection frequency is 10 seconds per entry.
In TDengine, users can specify conditions through the WHERE statement to query time-series data. Taking the data of smart meters as an example:
```sql
SELECT * FROM meters
WHERE voltage > 230
ORDER BY ts DESC
LIMIT 5;
```
The above SQL queries records from the supertable `meters` where the `voltage` is greater than 230V, sorted in descending order by time, and only outputs the first 5 rows. The query results are as follows:
```text
ts | current | voltage | phase | groupid | location |
===================================================================================================
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 10 | California.Sunnyvale |
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 1 | California.LosAngles |
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 10 | California.Sunnyvale |
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 5 | California.Cupertino |
2023-11-15 06:13:10.000 | 14.0601978 | 232 | 146.5000000 | 4 | California.SanFrancisco |
Query OK, 5 row(s) in set (0.145403s)
```
## Aggregate Query
TDengine supports aggregate queries through the GROUP BY clause. When an SQL statement includes a GROUP BY clause, the SELECT list can only contain the following expressions:
1. Constants
2. Aggregate functions
3. Expressions identical to those after GROUP BY
4. Expressions containing the above expressions
The GROUP BY clause is used to group data and return a summary row for each group. In the GROUP BY clause, any column from tables or views can be used as the basis for grouping, and these columns do not need to appear in the select list. Additionally, users can directly perform aggregate queries on supertables without the need to create subtables beforehand. Taking the data model of smart meters as an example, the SQL using the GROUP BY clause is as follows:
```sql
SELECT groupid, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2023-01-01T00:00:00+08:00"
GROUP BY groupid;
```
The SQL above queries the supertable `meters` for data where the timestamp is greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2023-01-01T00:00:00+08:00`, grouped by `groupid`, to calculate the average voltage for each group. The query results are as follows:
```text
groupid | avg(voltage) |
======================================
8 | 243.961981544901079 |
5 | 243.961981544901079 |
1 | 243.961981544901079 |
7 | 243.961981544901079 |
9 | 243.961981544901079 |
6 | 243.961981544901079 |
4 | 243.961981544901079 |
10 | 243.961981544901079 |
2 | 243.961981544901079 |
3 | 243.961981544901079 |
Query OK, 10 row(s) in set (0.042446s)
```
**Note**: The group by clause does not guarantee that the results are ordered in a specific sequence when aggregating data. To obtain an ordered result set, you can use the order by clause to sort the results. This allows you to adjust the order of the output results as needed to meet specific business requirements or reporting needs.
TDengine provides a variety of built-in aggregation functions. The table below shows:
| Aggregation Function | Description |
|:----------------------:|:--------------------------------------------------------------:|
|APERCENTILE | Calculates the approximate percentile of a specified column in a table/supertable, similar to the PERCENTILE function, but returns an approximate result. |
|AVG | Calculates the average value of a specified field |
|COUNT | Counts the number of records for a specified field |
|ELAPSED| The elapsed function expresses the continuous duration within a statistical period, and when used with the twa function, it can calculate the area under the statistical curve. When specifying a window with the INTERVAL clause, it calculates the time range covered by data in each window within the given time range; if there is no INTERVAL clause, it returns the time range covered by data for the entire given time range. Note that ELAPSED returns not the absolute value of the time range, but the number of units obtained by dividing the absolute value by time_unit.|
|LEASTSQUARES | Calculates the fitted line equation for a column in the table. start_val is the initial value of the independent variable, and step_val is the step value of the independent variable. |
|SPREAD | Calculates the difference between the maximum and minimum values of a column in the table.|
|STDDEV | Calculates the standard deviation of a column in the table. |
|SUM | Calculates the sum of a column in a table/supertable. |
|HYPERLOGLOG | Uses the hyperloglog algorithm to return the cardinality of a column. This algorithm significantly reduces memory usage in large data volumes, producing an estimated cardinality with a standard error of 0.81%. The algorithm is not very accurate with smaller data volumes, and the method `select count(data) from (select unique(col) as data from table)` can be used instead. |
|HISTOGRAM | Calculates the distribution of data according to user-specified intervals. |
|PERCENTILE | Calculates the percentile of a column's values in the table.|
## Data Partitioning Query
TDengine supports the PARTITION BY clause. When you need to partition data by certain dimensions and then perform a series of calculations within the partitioned data space, you can use the PARTITION BY clause to query, with the syntax as follows:
```sql
PARTITION BY part_list
```
`part_list` can be any scalar expression, including columns, constants, scalar functions, and their combinations.
TDengine processes the data partitioning clause as follows:
1. The data partitioning clause is placed after the WHERE clause;
2. The data partitioning clause divides the table data by the specified dimensions, and each partitioned shard undergoes specified calculations. The calculations are defined by subsequent clauses (window clause, GROUP BY clause, or SELECT clause);
3. The data partitioning clause can be used together with a window partitioning clause (or GROUP BY clause), in which case the subsequent clauses apply to each partitioned shard.
The SQL for data partitioning is as follows:
```sql
SELECT location, avg(voltage)
FROM meters
PARTITION BY location;
```
The example SQL above queries the supertable `meters`, grouping the data by the label `location`, and calculates the average voltage for each group. The query results are as follows:
```text
location | avg(voltage) |
======================================================
California.SantaClara | 243.962050000000005 |
California.SanFrancisco | 243.962050000000005 |
California.SanJose | 243.962050000000005 |
California.LosAngles | 243.962050000000005 |
California.SanDiego | 243.962050000000005 |
California.Sunnyvale | 243.962050000000005 |
California.PaloAlto | 243.962050000000005 |
California.Cupertino | 243.962050000000005 |
California.MountainView | 243.962050000000005 |
California.Campbell | 243.962050000000005 |
Query OK, 10 row(s) in set (2.415961s)
```
## Window Partition Query
In TDengine, you can use the window clause to perform aggregation queries by time window partitioning, which is particularly suitable for scenarios requiring analysis of large amounts of time-series data, such as smart meters collecting data every 10s but needing to query the average temperature every 1min.
The window clause allows you to partition the queried data set by windows and aggregate the data within each window, including:
- Time window (time window)
- State window (status window)
- Session window (session window)
- Event window (event window)
The logic of window partitioning is shown in the following image:
<figure>
<Image img={windowModel} alt="Windowing description"/>
<figcaption>Figure 1. Windowing logic</figcaption>
</figure>
The syntax for the window clause is as follows:
```sql
window_clause: {
SESSION(ts_col, tol_val)
| STATE_WINDOW(col)
| INTERVAL(interval_val [, interval_offset]) [SLIDING (sliding_val)] [FILL(fill_mod_and_val)]
| EVENT_WINDOW START WITH start_trigger_condition END WITH end_trigger_condition
}
```
**Note** When using the window clause, the following rules should be observed:
1. The window clause is located after the data partitioning clause and cannot be used together with the GROUP BY clause.
2. The window clause partitions the data by windows and performs calculations on the expressions in the SELECT list for each window. The expressions in the SELECT list can only include: constants; pseudocolumns: _wstart pseudo-column,_wend pseudo-column, and _wduration pseudo-column; aggregate functions (including selection functions and time-series specific functions that can determine the number of output rows by parameters)
3. WHERE statements can specify the start and end times of the query and other filtering conditions.
### Timestamp Pseudocolumns
In the window aggregation query results, if the SQL does not specify the timestamp column in the output query results, the final results will not automatically include the time column information of the window. However, if you need to output the time window information corresponding to the aggregated query results in the results, you can use the timestamp-related pseudocolumns in the select clause, such as the start time of the time window (`_wstart`), the end time of the time window (`_wend`), the duration of the time window (`_wduration`), and the pseudocolumns related to the overall query window, such as the start time of the query window (`_qstart`) and the end time of the query window (`_qend`). Note that both the start and end times of the time window are closed intervals, and the duration of the time window is the value under the current time resolution of the data. For example, if the current database's time precision is milliseconds (ms), then 500 in the results represents the duration of the current time window is 500ms.
### Time Windows
Time windows can be divided into: sliding time windows and tumbling time windows. The syntax for the time window clause is as follows:
```sql
INTERVAL(interval_val [, interval_offset])
[SLIDING (sliding_val)]
[FILL(fill_mod_and_val)]
```
The time window clause includes 3 sub-clauses:
- INTERVAL clause: used to generate windows of equal time periods, where interval_val specifies the size of each time window, and interval_offset specifies;
- SLIDING clause: used to specify the time the window slides forward;
- FILL: used to specify the filling mode of data in case of missing data in the window interval.
For time windows, both interval_val and sliding_val represent time periods, and syntactically support three ways. For example:
1. INTERVAL(1s, 500a) SLIDING(1s), with time units, where the time units are represented by single characters, respectively: a (milliseconds), b (nanoseconds), d (days), h (hours), m (minutes), n (months), s (seconds), u (microseconds), w (weeks), y (years);
2. INTERVAL(1000, 500) SLIDING(1000), without time units, will use the time precision of the query database as the default time unit, and when there are multiple databases, the one with higher precision will be used by default;
3. INTERVAL('1s', '500a') SLIDING('1s'), with time units in string form, where the string cannot contain any spaces or other characters.
Example SQL is as follows:
```sql
SELECT tbname, _wstart, _wend, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
INTERVAL(1m, 5s)
SLIMIT 2;
```
The above SQL queries the supertable `meters` for data with timestamps greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`; data is first partitioned by subtable name `tbname`, then partitioned by a 1-minute time window, with each time window offset by 5 seconds; finally, only the data from the first 2 partitions is taken as the result. The query results are as follows:
```text
tbname | _wstart | _wend | avg(voltage) |
======================================================================================
d2 | 2021-12-31 23:59:05.000 | 2022-01-01 00:00:05.000 | 253.000000000000000 |
d2 | 2022-01-01 00:00:05.000 | 2022-01-01 00:01:05.000 | 244.166666666666657 |
d2 | 2022-01-01 00:01:05.000 | 2022-01-01 00:02:05.000 | 241.833333333333343 |
d2 | 2022-01-01 00:02:05.000 | 2022-01-01 00:03:05.000 | 243.166666666666657 |
d2 | 2022-01-01 00:03:05.000 | 2022-01-01 00:04:05.000 | 240.833333333333343 |
d2 | 2022-01-01 00:04:05.000 | 2022-01-01 00:05:05.000 | 244.800000000000011 |
d26 | 2021-12-31 23:59:05.000 | 2022-01-01 00:00:05.000 | 253.000000000000000 |
d26 | 2022-01-01 00:00:05.000 | 2022-01-01 00:01:05.000 | 244.166666666666657 |
d26 | 2022-01-01 00:01:05.000 | 2022-01-01 00:02:05.000 | 241.833333333333343 |
d26 | 2022-01-01 00:02:05.000 | 2022-01-01 00:03:05.000 | 243.166666666666657 |
d26 | 2022-01-01 00:03:05.000 | 2022-01-01 00:04:05.000 | 240.833333333333343 |
d26 | 2022-01-01 00:04:05.000 | 2022-01-01 00:05:05.000 | 244.800000000000011 |
Query OK, 12 row(s) in set (0.021265s)
```
#### Sliding Window
Each query execution is a time window, and the time window slides forward as time progresses. When defining a continuous query, it is necessary to specify the size of the time window (time window) and the forward increment time (forward sliding times). As shown in the figure below, [t0s, t0e], [t1s, t1e], [t2s, t2e] are the time window ranges for three consecutive queries, and the time range of the window's forward sliding is indicated by sliding time. Query filtering, aggregation, and other operations are performed independently for each time window.
<figure>
<Image img={slidingWindow} alt="Sliding window logic"/>
<figcaption>Figure 2. Sliding window logic</figcaption>
</figure>
**Note**
1. INTERVAL and SLIDING clauses need to be used in conjunction with aggregation and selection functions, therefore, the following SQL statement is illegal:
```sql
SELECT COUNT(*) FROM temp_tb_1 INTERVAL(1m) SLIDING(2m);
```
2. The forward sliding time of SLIDING cannot exceed the time range of a window, therefore, the following SQL statement is also illegal:
```sql
SELECT COUNT(*) FROM temp_tb_1 INTERVAL(1m) SLIDING(2m);
```
**Points to note when using time windows**
1. The window width of the aggregation period is specified by the keyword INTERVAL, with a minimum interval of 10 milliseconds (10a); it also supports an offset (offset must be less than the interval), which is the offset of the time window division compared to "UTC moment 0". The SLIDING statement is used to specify the forward increment of the aggregation period, i.e., the duration of each window's forward slide.
2. When using the INTERVAL statement, unless in very special circumstances, it is required to configure the timezone parameter in the taos.cfg configuration file of both client and server to the same value, to avoid frequent cross-time zone conversions by time handling functions, which could lead to severe performance impacts.
3. The returned results have a strictly monotonically increasing time sequence.
Example:
```sql
SELECT tbname, _wstart, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
INTERVAL(1m) SLIDING(30s)
SLIMIT 1;
```
The above SQL queries the supertable `meters` for data with timestamps greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`. Data is first partitioned by subtable name `tbname`, then divided into 1-minute time windows, with the time windows sliding every 30 seconds; finally, only the data from the first partition is taken as the result. The query results are as follows:
```text
tbname | _wstart | avg(voltage) |
=============================================================
d2 | 2021-12-31 23:59:30.000 | 248.333333333333343 |
d2 | 2022-01-01 00:00:00.000 | 246.000000000000000 |
d2 | 2022-01-01 00:00:30.000 | 244.666666666666657 |
d2 | 2022-01-01 00:01:00.000 | 240.833333333333343 |
d2 | 2022-01-01 00:01:30.000 | 239.500000000000000 |
d2 | 2022-01-01 00:02:00.000 | 243.833333333333343 |
d2 | 2022-01-01 00:02:30.000 | 243.833333333333343 |
d2 | 2022-01-01 00:03:00.000 | 241.333333333333343 |
d2 | 2022-01-01 00:03:30.000 | 241.666666666666657 |
d2 | 2022-01-01 00:04:00.000 | 244.166666666666657 |
d2 | 2022-01-01 00:04:30.000 | 244.666666666666657 |
Query OK, 11 row(s) in set (0.013153s)
```
#### Tumbling Window
When SLIDING is equal to INTERVAL, the sliding window becomes a tumbling window. The difference between a tumbling window and a sliding window is that in a sliding window, due to different interval_val and sliding_val, there is data overlap between different time windows, whereas in a tumbling window, there is no data overlap. Essentially, a tumbling window divides the time window according to interval_val, and INTERVAL(1m) with INTERVAL(1m) SLIDING(1m) are equivalent.
Example:
```sql
SELECT tbname, _wstart, _wend, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
INTERVAL(1m) SLIDING(1m)
SLIMIT 1;
```
The above SQL queries the supertable `meters` for data with timestamps greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`. The data is first partitioned by the subtable name `tbname`, then divided into 1-minute time windows, with each time window also being 1 minute long; finally, only the data from the first partition is taken as the result. The query results are as follows:
```text
tbname | _wstart | _wend | avg(voltage) |
======================================================================================
d2 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:00.000 | 246.000000000000000 |
d2 | 2022-01-01 00:01:00.000 | 2022-01-01 00:02:00.000 | 240.833333333333343 |
d2 | 2022-01-01 00:02:00.000 | 2022-01-01 00:03:00.000 | 243.833333333333343 |
d2 | 2022-01-01 00:03:00.000 | 2022-01-01 00:04:00.000 | 241.333333333333343 |
d2 | 2022-01-01 00:04:00.000 | 2022-01-01 00:05:00.000 | 244.166666666666657 |
Query OK, 5 row(s) in set (0.016812s)
```
#### FILL Clause
The FILL clause is used to specify the fill mode when data is missing in a window interval. The fill modes include the following:
1. No fill: NONE (default fill mode).
2. VALUE fill: Fixed value fill, where the fill value must be specified. For example: FILL(VALUE, 1.23). Note that the final fill value is determined by the type of the corresponding column, such as FILL(VALUE, 1.23) for an INT type column, the fill value would be 1.
3. PREV fill: Fill with the previous non-NULL value. For example: FILL(PREV).
4. NULL fill: Fill with NULL. For example: FILL(NULL).
5. LINEAR fill: Perform linear interpolation based on the nearest non-NULL values before and after. For example: FILL(LINEAR).
6. NEXT fill: Fill with the next non-NULL value. For example: FILL(NEXT).
Among these fill modes, except for the NONE mode which does not fill by default, other modes will be ignored if there is no data in the entire query time range, resulting in no fill data and an empty query result. This behavior is reasonable under some modes (PREV, NEXT, LINEAR) because no data means no fill value can be generated.
For other modes (NULL, VALUE), theoretically, fill values can be generated. Whether to output fill values depends on the application's requirements. To meet the needs of applications that require forced filling of data or NULL, and to maintain compatibility with existing fill modes, TDengine also supports two new fill modes:
1. NULL_F: Force fill with NULL values
2. VALUE_F: Force fill with VALUE
The differences between NULL, NULL_F, VALUE, and VALUE_F for different scenarios are as follows:
1. INTERVAL clause: NULL_F, VALUE_F are forced fill modes; NULL, VALUE are non-forced modes. In this mode, their semantics match their names.
2. Stream computing's INTERVAL clause: NULL_F and NULL behave the same, both are non-forced modes; VALUE_F and VALUE behave the same, both are non-forced modes. That is, there is no forced mode in stream computing's INTERVAL.
3. INTERP clause: NULL and NULL_F behave the same, both are forced modes; VALUE and VALUE_F behave the same, both are forced modes. That is, there is no non-forced mode in INTERP.
**Note**
1. Using the FILL statement may generate a large amount of filled output, be sure to specify the time range for the query.
2. For each query, the system can return no more than 10 million results with interpolation.
3. In time dimension aggregation, the returned results have a strictly monotonic increasing time sequence.
4. If the query target is a supertable, the aggregate function will apply to the data of all tables under the supertable that meet the value filtering conditions. If the query does not use a PARTITION BY statement, the results are returned in a strictly monotonic increasing time sequence; if the query uses a PARTITION BY statement for grouping, the results within each PARTITION are strictly monotonic increasing in time sequence.
Example:
```sql
SELECT tbname, _wstart, _wend, avg(voltage)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
INTERVAL(1m) FILL(prev)
SLIMIT 2;
```
The above SQL queries the supertable `meters` for data with timestamps greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`; data is first partitioned by subtable name `tbname`, then by each 1-minute time window. If data is missing within a window, it is filled with the previous non-NULL value; finally, only the data from the first 2 partitions is taken as the result. The query results are as follows:
```text
tbname | _wstart | _wend | avg(voltage) |
=======================================================================================
d2 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:00.000 | 246.000000000000000 |
d2 | 2022-01-01 00:01:00.000 | 2022-01-01 00:02:00.000 | 240.833333333333343 |
d2 | 2022-01-01 00:02:00.000 | 2022-01-01 00:03:00.000 | 243.833333333333343 |
d2 | 2022-01-01 00:03:00.000 | 2022-01-01 00:04:00.000 | 241.333333333333343 |
d2 | 2022-01-01 00:04:00.000 | 2022-01-01 00:05:00.000 | 244.166666666666657 |
d26 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:00.000 | 246.000000000000000 |
d26 | 2022-01-01 00:01:00.000 | 2022-01-01 00:02:00.000 | 240.833333333333343 |
d26 | 2022-01-01 00:02:00.000 | 2022-01-01 00:03:00.000 | 243.833333333333343 |
d26 | 2022-01-01 00:03:00.000 | 2022-01-01 00:04:00.000 | 241.333333333333343 |
d26 | 2022-01-01 00:04:00.000 | 2022-01-01 00:05:00.000 | 244.166666666666657 |
Query OK, 10 row(s) in set (0.022866s)
```
### State Window
Use integers (boolean values) or strings to identify the state of the device when the record is generated. Records with the same state value belong to the same state window, and the window closes when the value changes. TDengine also supports using CASE expressions on state values, which can express that the start of a state is triggered by meeting a certain condition, and the end of the state is triggered by meeting another condition. For example, with smart meters, if the voltage is within the normal range of 225V to 235V, you can monitor the voltage to determine if the circuit is normal.
```sql
SELECT tbname, _wstart, _wend,_wduration, CASE WHEN voltage >= 225 and voltage <= 235 THEN 1 ELSE 0 END status
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:05:00+08:00"
PARTITION BY tbname
STATE_WINDOW(
CASE WHEN voltage >= 225 and voltage <= 235 THEN 1 ELSE 0 END
)
SLIMIT 2;
```
The above SQL queries data from the supertable `meters`, where the timestamp is greater than or equal to `2022-01-01T00:00:00+08:00` and less than `2022-01-01T00:05:00+08:00`. Data is first partitioned by the subtable name `tbname`. It then divides into status windows based on whether the voltage is within the normal range. Finally, it retrieves data from the first 2 partitions as the result. The query results are as follows: (Since the data is randomly generated, the number of data entries in the result set may vary)
```text
tbname | _wstart | _wend | _wduration | status |
===============================================================================================
d2 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:20.000 | 80000 | 0 |
d2 | 2022-01-01 00:01:30.000 | 2022-01-01 00:01:30.000 | 0 | 1 |
d2 | 2022-01-01 00:01:40.000 | 2022-01-01 00:01:40.000 | 0 | 0 |
d2 | 2022-01-01 00:01:50.000 | 2022-01-01 00:01:50.000 | 0 | 1 |
d2 | 2022-01-01 00:02:00.000 | 2022-01-01 00:02:20.000 | 20000 | 0 |
d2 | 2022-01-01 00:02:30.000 | 2022-01-01 00:02:30.000 | 0 | 1 |
d2 | 2022-01-01 00:02:40.000 | 2022-01-01 00:03:00.000 | 20000 | 0 |
d2 | 2022-01-01 00:03:10.000 | 2022-01-01 00:03:10.000 | 0 | 1 |
d2 | 2022-01-01 00:03:20.000 | 2022-01-01 00:03:40.000 | 20000 | 0 |
d2 | 2022-01-01 00:03:50.000 | 2022-01-01 00:03:50.000 | 0 | 1 |
d2 | 2022-01-01 00:04:00.000 | 2022-01-01 00:04:50.000 | 50000 | 0 |
d26 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:20.000 | 80000 | 0 |
d26 | 2022-01-01 00:01:30.000 | 2022-01-01 00:01:30.000 | 0 | 1 |
d26 | 2022-01-01 00:01:40.000 | 2022-01-01 00:01:40.000 | 0 | 0 |
d26 | 2022-01-01 00:01:50.000 | 2022-01-01 00:01:50.000 | 0 | 1 |
d26 | 2022-01-01 00:02:00.000 | 2022-01-01 00:02:20.000 | 20000 | 0 |
d26 | 2022-01-01 00:02:30.000 | 2022-01-01 00:02:30.000 | 0 | 1 |
d26 | 2022-01-01 00:02:40.000 | 2022-01-01 00:03:00.000 | 20000 | 0 |
d26 | 2022-01-01 00:03:10.000 | 2022-01-01 00:03:10.000 | 0 | 1 |
d26 | 2022-01-01 00:03:20.000 | 2022-01-01 00:03:40.000 | 20000 | 0 |
d26 | 2022-01-01 00:03:50.000 | 2022-01-01 00:03:50.000 | 0 | 1 |
d26 | 2022-01-01 00:04:00.000 | 2022-01-01 00:04:50.000 | 50000 | 0 |
Query OK, 22 row(s) in set (0.153403s)
```
### Session Window
The session window determines whether records belong to the same session based on the value of the timestamp primary key. As shown in the figure below, if the interval between consecutive timestamps is set to be less than or equal to 12 seconds, the following 6 records form 2 session windows: [2019-04-28 14:22:10, 2019-04-28 14:22:30] and [2019-04-28 14:23:10, 2019-04-28 14:23:30]. This is because the interval between 2019-04-28 14:22:30 and 2019-04-28 14:23:10 is 40 seconds, which exceeds the continuous interval (12 seconds).
<figure>
<Image img={sessionWindow} alt="Session window example"/>
<figcaption>Figure 3. Session window example</figcaption>
</figure>
Within the tol_value time interval, results are considered to belong to the same window. If the time between two consecutive records exceeds tol_val, a new window is automatically started.
```sql
SELECT COUNT(*), FIRST(ts) FROM temp_tb_1 SESSION(ts, tol_val);
```
Example:
```sql
SELECT tbname, _wstart, _wend, _wduration, count(*)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:10:00+08:00"
PARTITION BY tbname
SESSION(ts, 10m)
SLIMIT 10;
```
The above SQL queries the supertable meters for data with timestamps greater than or equal to 2022-01-01T00:00:00+08:00 and less than 2022-01-01T00:10:00+08:00; data is first partitioned by the subtable name tbname, then split according to a 10-minute session window; finally, data from the first 10 partitions is returned, showing subtable name, window start time, window end time, window duration, and the number of records within the window. The query results are as follows:
```text
tbname | _wstart | _wend | _wduration | count(*) |
===============================================================================================
d2 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d26 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d52 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d64 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d76 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d28 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d4 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d88 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d77 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
d54 | 2022-01-01 00:00:00.000 | 2022-01-01 00:09:50.000 | 590000 | 60 |
Query OK, 10 row(s) in set (0.043489s)
```
### Event Window
Event windows are defined by start and end conditions. The window starts when the `start_trigger_condition` is met and closes when the `end_trigger_condition` is satisfied. Both `start_trigger_condition` and `end_trigger_condition` can be any condition expression supported by TDengine and can include different columns.
An event window can contain only one data point. That is, when a single data point meets both the `start_trigger_condition` and `end_trigger_condition` and is not currently within a window, it alone constitutes a window.
If an event window cannot be closed, it does not form a window and will not be output. That is, if data meets the `start_trigger_condition` and the window opens, but subsequent data does not meet the `end_trigger_condition`, the window cannot be closed. This data does not form a window and will not be output.
If event window queries are performed directly on a supertable, TDengine will aggregate the data of the supertable into a single timeline and then perform the event window calculation. If you need to perform event window queries on the result set of a subquery, the result set of the subquery needs to meet the requirements of outputting along a timeline and can output a valid timestamp column.
Consider the following SQL statement, the event window segmentation is illustrated in the diagram below.
```sql
select _wstart, _wend, count(*) from t event_window start with c1 > 0 end with c2 < 10
```
<figure>
<Image img={eventWindow} alt="Event window example"/>
<figcaption>Figure 4. Event window example</figcaption>
</figure>
Example SQL:
```sql
SELECT tbname, _wstart, _wend, _wduration, count(*)
FROM meters
WHERE ts >= "2022-01-01T00:00:00+08:00"
AND ts < "2022-01-01T00:10:00+08:00"
PARTITION BY tbname
EVENT_WINDOW START WITH voltage >= 225 END WITH voltage < 235
LIMIT 5;
```
The above SQL queries the supertable meters for data with timestamps greater than or equal to 2022-01-01T00:00:00+08:00 and less than 2022-01-01T00:10:00+08:00; data is first partitioned by subtable name tbname, then segmented according to the event window conditions: voltage greater than or equal to 225V and less than 235V; finally, the first 5 rows of data from each partition are taken as the result, returning the subtable name, window start time, window end time, window duration, and the number of data points in the window. The query results are as follows:
```text
tbname | _wstart | _wend | _wduration | count(*) |
==============================================================================================
d0 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:30.000 | 90000 | 10 |
d0 | 2022-01-01 00:01:40.000 | 2022-01-01 00:02:30.000 | 50000 | 6 |
d0 | 2022-01-01 00:02:40.000 | 2022-01-01 00:03:10.000 | 30000 | 4 |
d0 | 2022-01-01 00:03:20.000 | 2022-01-01 00:07:10.000 | 230000 | 24 |
d0 | 2022-01-01 00:07:20.000 | 2022-01-01 00:07:50.000 | 30000 | 4 |
d1 | 2022-01-01 00:00:00.000 | 2022-01-01 00:01:30.000 | 90000 | 10 |
d1 | 2022-01-01 00:01:40.000 | 2022-01-01 00:02:30.000 | 50000 | 6 |
d1 | 2022-01-01 00:02:40.000 | 2022-01-01 00:03:10.000 | 30000 | 4 |
d1 | 2022-01-01 00:03:20.000 | 2022-01-01 00:07:10.000 | 230000 | 24 |
……
Query OK, 500 row(s) in set (0.328557s)
```
### Count Window
Count window is a method of dividing windows based on a fixed number of data rows. By default, the count window first sorts the data by timestamp, then divides the data into multiple windows based on the value of count_val, and finally performs aggregation calculations.
count_val represents the maximum number of data rows in each count window. When the total number of data rows is not divisible by count_val, the number of rows in the last window will be less than count_val.
sliding_val is a constant that represents the number of sliding windows, similar to the sliding function of interval. By adjusting sliding_val, you can control the degree of overlap between windows, thus achieving detailed analysis of the data.
For example, using the data model of a smart meter, the query SQL is as follows.
```sql
select _wstart, _wend, count(*)
from meters
where ts >= "2022-01-01T00:00:00+08:00" and ts < "2022-01-01T00:30:00+08:00"
count_window(1000);
```
The above SQL query returns the data from the supertable meters where the timestamp is greater than or equal to 2022-01-01T00:00:00+08:00 and less than 2022-01-01T00:10:00+08:00. It groups every 1000 data rows and returns the start time, end time, and count of each group. The query results are as follows:
```text
_wstart | _wend | count(*) |
=====================================================================
2022-01-01 00:00:00.000 | 2022-01-01 00:01:30.000 | 1000 |
2022-01-01 00:01:40.000 | 2022-01-01 00:03:10.000 | 1000 |
2022-01-01 00:03:20.000 | 2022-01-01 00:04:50.000 | 1000 |
2022-01-01 00:05:00.000 | 2022-01-01 00:06:30.000 | 1000 |
2022-01-01 00:06:40.000 | 2022-01-01 00:08:10.000 | 1000 |
2022-01-01 00:08:20.000 | 2022-01-01 00:09:50.000 | 1000 |
2022-01-01 00:10:00.000 | 2022-01-01 00:11:30.000 | 1000 |
2022-01-01 00:11:40.000 | 2022-01-01 00:13:10.000 | 1000 |
2022-01-01 00:13:20.000 | 2022-01-01 00:14:50.000 | 1000 |
2022-01-01 00:15:00.000 | 2022-01-01 00:16:30.000 | 1000 |
Query OK, 10 row(s) in set (0.062794s)
```
## Time-Series Extensions
Time-series extensions are a set of functions specially designed by TDengine for time-series data query scenarios. In general databases, similar functions usually require complex query statements and are less efficient. To reduce user costs and simplify the query process, TDengine provides these functions in the form of built-in functions, thus achieving efficient and easy-to-use time series data processing capabilities. The time series specific functions are as follows.
| Function | Description |
|:--------------:|:------------------------------------------------------------------------:|
|CSUM | Cumulative sum, ignoring NULL values. |
|DERIVATIVE | Calculates the rate of change per unit of a column in the table. The unit time interval can be specified by the time_interval parameter, with a minimum of 1 second (1s); the ignore_negative parameter can be 0 or 1, with 1 meaning negative values are ignored. |
|DIFF | Calculates the difference between the value of a column and the corresponding value of the previous row. ignore_negative can be 0 or 1, default is 0, not ignoring negative values. When ignore_negative is 1, it means negative values are ignored.|
|IRATE | Calculates the instantaneous growth rate using the last two samples in the time interval; if these two values are decreasing, only the last value is used for calculation, not the difference between them. |
|MAVG | Calculates the moving average of consecutive k values. If the number of input rows is less than k, no result is output. The valid input range for parameter k is 1 ≤ k ≤ 1000.|
|STATECOUNT | Returns the number of consecutive records that meet a certain condition, appending the result as a new column at the end of each row. The condition is calculated based on the parameter, adding 1 if true, resetting to -1 if false, and skipping if the data is NULL. |
|STATEDURATION | Returns the duration of consecutive records that meet a certain condition, appending the result as a new column at the end of each row. The condition is calculated based on the parameter, adding the time length between two records if true (the time length of the first record meeting the condition is counted as 0), resetting to -1 if false, and skipping if the data is NULL.|
|TWA | Time Weighted Average function. Calculates the time-weighted average of a column over a period of time. |
## Nested Queries
Nested queries, also known as subqueries, refer to a structure in SQL where the result of an inner query can be used as the input for an outer query. TDengine supports non-correlated subqueries within the from clause. Non-correlated means that the subquery does not use parameters from the parent query. After the from clause in a select query, an independent select statement can be included, which is enclosed in parentheses. By using nested queries, you can reference the result of another query within a single query, thus enabling more complex data processing and analysis. For example, consider the following SQL for smart meters:
```sql
SELECT max(voltage),*
FROM (
SELECT tbname,last_row(ts),voltage,current,phase,groupid,location
FROM meters
PARTITION BY tbname
)
GROUP BY groupid;
```
The above SQL performs an inner query on the supertable meters, grouping by subtable name, and querying the latest data for each subtable; the outer query takes the result of the inner query as input and aggregates by groupid, querying the maximum voltage for each group.
TDengine's nested queries follow these rules:
1. The result of the inner query serves as a "virtual table" for the outer query to use, and it is recommended to alias this virtual table for easy reference in the outer query.
2. The outer query supports direct referencing of columns or pseudocolumns from the inner query by column name or alias.
3. Both inner and outer queries support regular joins between tables/supertables. The result of the inner query can also participate in joins with data subtables.
4. The features supported by the inner query are consistent with those of non-nested queries. The ORDER BY clause in the inner query generally has no meaning and is recommended to be avoided to prevent unnecessary resource consumption.
5. Compared to non-nested queries, the outer query has the following limitations in supported features:
6. If the result data of the inner query does not provide timestamps, then functions implicitly dependent on timestamps will not work properly in the outer query. Examples include: INTERP, DERIVATIVE, IRATE, LAST_ROW, FIRST, LAST, TWA, STATEDURATION, TAIL, UNIQUE.
7. If the result data of the inner query is not ordered by timestamp, then functions dependent on data being ordered by time will not work properly in the outer query. Examples include: LEASTSQUARES, ELAPSED, INTERP, DERIVATIVE, IRATE, TWA, DIFF, STATECOUNT, STATEDURATION, CSUM, MAVG, TAIL, UNIQUE.
8. Functions that require two passes of scanning will not work properly in the outer query. Such functions include: PERCENTILE.
## UNION Clause
TDengine supports the UNION operator. That is, if multiple SELECT clauses return result sets with the exact same structure (column names, types, number, and order), these result sets can be combined using the UNION clause.
Example:
```sql
(SELECT tbname,* FROM d1 limit 1)
UNION ALL
(SELECT tbname,* FROM d11 limit 2)
UNION ALL
(SELECT tbname,* FROM d21 limit 3);
```
The above SQL queries 1 record from subtable d1, 2 records from subtable d11, and 3 records from subtable d21, and combines the results. The returned results are as follows:
```text
tbname | ts | current | voltage | phase |
====================================================================================
d11 | 2020-09-13 20:26:40.000 | 11.5680809 | 247 | 146.5000000 |
d11 | 2020-09-13 20:26:50.000 | 14.2392311 | 234 | 148.0000000 |
d1 | 2020-09-13 20:26:40.000 | 11.5680809 | 247 | 146.5000000 |
d21 | 2020-09-13 20:26:40.000 | 11.5680809 | 247 | 146.5000000 |
d21 | 2020-09-13 20:26:50.000 | 14.2392311 | 234 | 148.0000000 |
d21 | 2020-09-13 20:27:00.000 | 10.0999422 | 251 | 146.0000000 |
Query OK, 6 row(s) in set (0.006438s)
```
In the same SQL statement, a maximum of 100 UNION clauses are supported.
## Association Query
### Join Concept
1. Driving Table
In association queries, the role of the driving table depends on the type of join used: in the Left Join series, the left table acts as the driving table; in the Right Join series, the right table acts as the driving table.
2. Join Condition
In TDengine, the join condition refers to the condition specified for table association. For all association queries (except ASOF Join and Window Join), a join condition must be specified, usually appearing after `on`. In ASOF Join, conditions appearing after `where` can also be considered as join conditions, while Window Join specifies join conditions through `window_offset`.
Except for ASOF Join, all Join types supported by TDengine must explicitly specify join conditions. ASOF Join, because it defines implicit join conditions by default, does not need to explicitly specify join conditions if the default conditions meet the requirements.
For types of joins other than ASOF Join and Window Join, the join conditions can include not only the primary join condition but also any number of other join conditions. There must be an `and` relationship between the primary join condition and other join conditions, but there is no such restriction among other join conditions. Other join conditions can include any logical operation combination of primary key columns, tag columns, ordinary columns, constants, and their scalar functions or operations.
For example, with smart meters, the following SQL statements all contain valid join conditions.
```sql
select a.* from meters a left join meters b on a.ts = b.ts and a.ts > '2023-10-18 10:00:00.000';
select a.* from meters a left join meters b on a.ts = b.ts and (a.ts > '2023-10-18 10:00:00.000' or a.ts < '2023-10-17 10:00:00.000');
select a.* from meters a left join meters b on timetruncate(a.ts, 1s) = timetruncate(b.ts, 1s) and (a.ts + 1s > '2023-10-18 10:00:00.000' or a.groupId > 0);
select a.* from meters a left asof join meters b on timetruncate(a.ts, 1s) < timetruncate(b.ts, 1s) and a.groupId = b.groupId;
```
3. Primary Join Condition
As a time-series database, all association queries in TDengine revolve around the primary key column. Therefore, for all association queries except ASOF Join and Window Join, an equality join condition on the primary key column must be included. The first appearing primary key column equality join condition in the join conditions will be considered as the primary join condition. The primary join condition of ASOF Join can include non-equality conditions, while the primary join condition of Window Join is specified through `window_offset`.
Except for Window Join, TDengine supports performing `timetruncate` function operations in the primary join condition, such as `on timetruncate(a.ts, 1s) = timetruncate(b.ts, 1s)`. Apart from this, other functions and scalar operations are currently not supported.
4. Grouping Condition
The time-series database features of ASOF Join and Window Join support grouping the input data of the association query first, and then performing association operations for each group. Grouping only applies to the input of the association query, and the output results will not contain grouping information. Equality conditions appearing after `on` in ASOF Join and Window Join (except for the primary join condition of ASOF Join) will be treated as grouping conditions.
5. Primary Key Timeline
As a time-series database, TDengine requires each table (subtable) to have a primary key timestamp column, which will serve as the primary key timeline of the table for many time-related operations, and in the results of subqueries or Join operations, it is also necessary to clearly identify which column will be considered as the primary key timeline for subsequent time-related operations. In subqueries, the first appearing ordered primary key column (or its operation) or pseudo primary key column (_wstart,_wend) in the query results will be considered as the primary key timeline of the output table. The selection of the primary key timeline in Join output results follows these rules:
- In the Left Join and Right Join series, the primary key column of the driving table (subquery) will be used as the primary key timeline for subsequent queries. Additionally, in the Window Join window, since both tables are ordered, any table's primary key column can be used as the primary key timeline within the window, with a preference for the primary key column of the local table.
- Inner Join can use the primary key column of any table as the primary key timeline, and when there are grouping conditions similar to tag column equality conditions and they are in an `and` relationship with the primary join condition, a primary key timeline cannot be produced.
- Full Join, because it cannot produce any valid primary key time-series, does not have a primary key timeline, which also means that operations related to the timeline cannot be performed in Full Join.
### Syntax Explanation
In the following content, we will introduce the Left Join and Right Join series in a unified and parallel manner. Therefore, in the subsequent introduction of the Outer, Semi, Anti-Semi, ASOF, Window series, etc., we have adopted the expression "Left/Right" to cover both Left Join and Right Join related knowledge simultaneously. The "/" symbol here specifically refers to Left Join before the slash, and Right Join after the slash. By using this expression, we can more clearly demonstrate the characteristics and usage of these two types of Join operations.
For example, when we mention "left / right table", for Left Join, it specifically refers to the left table, and for Right Join, it specifically refers to the right table. Similarly, when we mention "right / left table", for Left Join, it specifically refers to the right table, and for Right Join, it specifically refers to the left table.
### Join Features
The table below lists the types of Joins supported in TDengine and their definitions.
| Join Type | Definition |
|:------------------------:|:--------------------------------------------------------:|
|Inner Join | Inner join, only data that meets the join conditions in both the left and right tables are returned, can be seen as the intersection of data that meets the join conditions in both tables |
|Left/Right Outer Join | Left / Right (outer) join, includes both the set of data that meets the join conditions in both tables and the set of data in the left / right table that does not meet the join conditions |
|Left/Right Semi Join | Left / Right semi join, usually expresses the meaning of in, exists, i.e., for any data in the left / right table, it returns the left / right table row data only if there is any data in the right / left table that meets the join conditions |
|Left/Right Anti-Semi Join | Left / Right anti join, the logic is exactly opposite to that of the left / right semi join, usually expresses the meaning of not in, not exists, i.e., for any data in the left / right table, it returns the left / right table row data only if there is no data in the right / left table that meets the join conditions |
|Left/Right ASOF Join | Left / Right approximate match join, unlike other traditional join operations that require exact matches, ASOF Join allows for approximate matching in a specified matching mode, i.e., matching by the closest primary key timestamp |
|Left/Right Window Join | Left / Right window join, constructs windows based on the primary key timestamp of each row in the left / right table and the window boundaries and performs window joining, supports projection, scalar, and aggregation operations within the window |
|Full Outer Join | Full (outer) join, includes both the set of data that meets the join conditions in both tables and the set of data in both tables that does not meet the join conditions |
### Constraints and Limitations
1. Input Timeline Limitation
Currently, all Join operations in TDengine require the input data to contain a valid primary key timeline. For all table queries, this requirement is usually met. However, for subqueries, it is necessary to ensure that the output data contains a valid primary key timeline.
2. Join Condition Limitations
The limitations on join conditions include the following.
- Except for ASOF Join and Window Join, other join operations must include the primary join condition of the primary key column.
- Only and operations are supported between the primary join condition and other conditions.
- The primary key column as the primary join condition only supports the timetruncate function operation, not other functions and scalar operations, while there are no restrictions when used as other conditions.
3. Grouping Condition Limitations
The limitations on grouping conditions include the following.
- Only supports equality conditions for label columns and ordinary columns other than the primary key column.
- Does not support scalar operations.
- Supports multiple grouping conditions, only and operations are supported between conditions.
4. Query Result Order Limitations
The limitations on the order of query results include the following.
- In scenarios of basic tables, subtables, subqueries with no grouping conditions and no sorting, the query results will be output in the order of the primary key column of the driving table.
- Due to supertable queries, Full Join or scenarios with grouping conditions and no sorting, there is no fixed order of output for the query results, therefore, in scenarios where sorting is required and the output order is not fixed, sorting operations need to be performed. Some functions that depend on the timeline may not be executable due to the lack of a valid timeline output.

View File

@ -1,12 +1,13 @@
---
title: Application Practices
sidebar_label: Application Practices
description: This document describes some examples of building systems around TDengine.
title: Basic Features
slug: /basic-features
---
This chapter mainly introduces the data model of TDengine as well as its write and query functions.
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```
```

View File

@ -0,0 +1,142 @@
---
title: Data Subscription
slug: /advanced-features/data-subscription
---
To meet the needs of applications to obtain data written to TDengine in real-time, or to process data in the order of event arrival, TDengine provides data subscription and consumption interfaces similar to those of message queue products. In many scenarios, by adopting TDengine's time-series big data platform, there is no need to integrate additional message queue products, thus simplifying application design and reducing maintenance costs.
Similar to Kafka, users need to define topics in TDengine. However, a topic in TDengine can be a database, a supertable, or based on existing supertables, subtables, or basic tables with specific query conditions, i.e., a query statement. Users can use SQL to filter by tags, table names, columns, expressions, etc., and perform scalar function and UDF computations (excluding data aggregation). Compared to other message queue tools, this is the biggest advantage of TDengine's data subscription feature. It offers greater flexibility; the granularity of the data is determined by the SQL defining the topic, and the filtering and preprocessing of data are automatically handled by TDengine, reducing the amount of data transmitted and simplifying application complexity.
After subscribing to a topic, consumers can receive the latest data in real-time. Multiple consumers can form a consumption group to share consumption progress, enabling multi-threaded, distributed data consumption to increase consumption speed. Consumers in different consumption groups do not share consumption progress even if they consume the same topic. A consumer can subscribe to multiple topics. If the topic corresponds to a supertable or database, the data may be distributed across multiple different nodes or data shards. When there are multiple consumers in a consumption group, consumption efficiency can be improved. TDengine's message queue provides an ACK (Acknowledgment) mechanism to ensure at least once consumption in complex environments such as crashes and restarts.
To implement the above functions, TDengine automatically creates indexes for Write-Ahead Logging (WAL) files to support fast random access and provides flexible and configurable file switching and retention mechanisms. Users can specify the retention time and size of WAL files according to their needs. Through these methods, WAL is transformed into a persistent storage engine that retains the order of event arrival. For queries created in the form of topics, TDengine reads data from WAL. During consumption, TDengine reads data directly from WAL based on the current consumption progress, performs filtering, transformation, and other operations using a unified query engine, and then pushes the data to consumers.
Starting from version 3.2.0.0, data subscription supports vnode migration and splitting. Due to the dependence of data subscription on wal files, wal does not synchronize during vnode migration and splitting. Therefore, after migration or splitting, wal data that has not been consumed before cannot be consumed. So please ensure that all data has been consumed before proceeding with vnode migration or splitting, otherwise data loss may occur during consumption.
## Topics
TDengine uses SQL to create three types of topics, which are introduced below.
### Query Topic
Subscribe to the results of an SQL query, essentially a continuous query, returning only the latest values each time, with the following creation syntax:
```sql
CREATE TOPIC [IF NOT EXISTS] topic_name as subquery
```
This SQL subscribes through a SELECT statement (including SELECT *, or specific query subscriptions like SELECT ts, c1, with condition filtering, scalar function computations, but does not support aggregate functions or time window aggregation). Note that:
1. Once this type of TOPIC is created, the structure of the subscribed data is fixed.
2. Columns or tags that are subscribed to or used for calculations cannot be deleted (ALTER table DROP) or modified (ALTER table MODIFY).
3. If table structure changes occur, newly added columns will not appear in the results.
4. For select *, it subscribes to all columns at the time of creation (data columns for subtables and basic tables, data columns plus tag columns for supertables).
Suppose you need to subscribe to data where the voltage value in all smart meters is greater than 200, and only return the timestamp, current, and voltage (not phase), then you can create the topic power_topic with the following SQL.
```sql
CREATE TOPIC power_topic AS SELECT ts, current, voltage FROM power.meters WHERE voltage > 200;
```
### Supertable Topic
Subscribe to all data in a supertable, with the following syntax:
```sql
CREATE TOPIC [IF NOT EXISTS] topic_name [with meta] AS STABLE stb_name [where_condition]
```
The difference from subscribing using `SELECT * from stbName` is:
1. It does not restrict user table structure changes, i.e., both structure changes and new data after changes can be subscribed to.
2. It returns unstructured data, and the structure of the returned data will change with the structure of the supertable.
3. The with meta parameter is optional; when selected, it returns statements for creating supertables, subtables, etc., mainly used for supertable migration in taosx.
4. The where_condition parameter is optional; when selected, it will be used to filter subtables that meet the conditions, subscribing to these subtables. The where condition cannot include ordinary columns, only tags or tbname, and functions can be used to filter tags, but not aggregate functions, as subtable tag values cannot be aggregated. It can also be a constant expression, such as 2 > 1 (subscribe to all subtables), or false (subscribe to 0 subtables).
5. Returned data does not include tags.
### Database Topics
Subscribe to all data in a database, with the syntax as follows:
```sql
CREATE TOPIC [IF NOT EXISTS] topic_name [with meta] AS DATABASE db_name;
```
This statement creates a subscription that includes all table data in the database:
1. The `with meta` parameter is optional. When selected, it will return the creation, deletion, and modification statements of all supertables, subtables, and basic tables' metadata in the database, mainly used for database migration in taosx.
2. Subscriptions to supertables and databases are advanced subscription modes and are prone to errors. If you really need to use them, please consult technical support personnel.
## Delete Topic
If you no longer need to subscribe to the data, you can delete the topic. Note that only topics that are not currently subscribed can be deleted.
```sql
DROP TOPIC [IF EXISTS] topic_name;
```
## View Topics
```sql
SHOW TOPICS;
```
The above SQL will display information about all topics under the current database.
## Consumers
### Creating Consumers
Consumers can only be created through the TDengine client driver or APIs provided by connectors. For details, refer to the development guide or reference manual.
### View Consumers
```sql
SHOW CONSUMERS;
```
Displays information about all consumers in the current database, including the consumer's status, creation time, etc.
### Delete Consumer Group
When creating a consumer, a consumer group is assigned to the consumer. Consumers cannot be explicitly deleted, but the consumer group can be deleted with the following statement when there are no consumers in the group:
```sql
DROP CONSUMER GROUP [IF EXISTS] cgroup_name ON topic_name;
```
## Data Subscription
### View Subscription Information
```sql
SHOW SUBSCRIPTIONS;
```
Displays consumption information of the topic on different vgroups, useful for viewing consumption progress.
### Subscribe to Data
TDengine provides comprehensive and rich data subscription APIs, aimed at meeting data subscription needs under different programming languages and frameworks. These interfaces include but are not limited to creating consumers, subscribing to topics, unsubscribing, obtaining real-time data, submitting consumption progress, and getting and setting consumption progress. Currently, TDengine supports a variety of mainstream programming languages, including C, Java, Go, Rust, Python, and C#, enabling developers to easily use TDengine's data subscription features in various application scenarios.
It is worth mentioning that TDengine's data subscription APIs are highly consistent with the popular Kafka subscription APIs in the industry, making it easy for developers to get started and leverage their existing knowledge and experience. To facilitate user understanding and reference, TDengine's official documentation provides detailed descriptions and example codes of various APIs, which can be accessed in the connectors section of the TDengine official website. Through these APIs, developers can efficiently implement real-time data subscription and processing to meet data handling needs in various complex scenarios.
### Replay Feature
TDengine's data subscription feature supports a replay function, allowing users to replay the data stream in the actual order of data writing. This feature is based on TDengine's efficient WAL mechanism, ensuring data consistency and reliability.
To use the data subscription's replay feature, users can specify the time range in the query statement to precisely control the start and end times of the replay. This allows users to easily replay data within a specific time period, whether for troubleshooting, data analysis, or other purposes.
If the following 3 data entries were written, then during replay, the first entry is returned first, followed by the second entry after 5 seconds, and the third entry 3 seconds after obtaining the second entry.
```text
2023/09/22 00:00:00.000
2023/09/22 00:00:05.000
2023/09/22 00:00:08.000
```
When using the data subscription's replay feature, note the following:
- The replay function of data subscription only supports data playback for query subscriptions; supertable and database subscriptions do not support playback.
- Replay does not support progress saving.
- Because data playback itself requires processing time, there is a precision error of several tens of milliseconds in playback.

View File

@ -0,0 +1,100 @@
---
title: Caching
slug: /advanced-features/caching
---
In the big data applications of the Internet of Things (IoT) and the Industrial Internet of Things (IIoT), the value of real-time data often far exceeds that of historical data. Enterprises not only need data processing systems to have efficient real-time writing capabilities but also need to quickly obtain the latest status of devices or perform real-time calculations and analyses on the latest data. Whether it's monitoring the status of industrial equipment, tracking vehicle locations in the Internet of Vehicles, or real-time readings of smart meters, current values are indispensable core data in business operations. These data are directly related to production safety, operational efficiency, and user experience.
For example, in industrial production, the current operating status of production line equipment is crucial. Operators need to monitor key indicators such as temperature, pressure, and speed in real-time. If there is an anomaly in the equipment, these data must be presented immediately so that process parameters can be quickly adjusted to avoid downtime or greater losses. In the field of the Internet of Vehicles, taking DiDi as an example, the real-time location data of vehicles is key to optimizing dispatch strategies and improving operational efficiency on the DiDi platform, ensuring that each passenger gets on the vehicle quickly and enjoys a higher quality travel experience.
At the same time, dashboard systems and smart meters, as windows for on-site operations and user ends, also need real-time data support. Whether it's factory managers obtaining real-time production indicators through dashboards or household users checking the usage of smart water and electricity meters at any time, real-time data not only affects operational and decision-making efficiency but also directly relates to user satisfaction with the service.
## Limitations of Traditional Caching Solutions
To meet these high-frequency real-time query needs, many enterprises choose to integrate caching technologies like Redis into their big data platforms, enhancing query performance by adding a caching layer between the database and applications. However, this approach also brings several problems:
- Increased system complexity: Additional deployment and maintenance of the cache cluster are required, raising higher demands on system architecture.
- Rising operational costs: Additional hardware resources are needed to support the cache, increasing maintenance and management expenses.
- Consistency issues: Data synchronization between the cache and the database requires additional mechanisms to ensure consistency, otherwise data inconsistencies may occur.
## TDengine's Solution: Built-in Read Cache
To address these issues, TDengine has designed and implemented a read cache mechanism specifically for high-frequency real-time query scenarios in IoT and IIoT. This mechanism automatically caches the last record of each table in memory, thus meeting users' real-time query needs for current values without introducing third-party caching technologies.
TDengine uses a time-driven cache management strategy, prioritizing the storage of the latest data in the cache, allowing for quick results without needing to access the hard disk. When the cache capacity reaches the set limit, the system will batch-write the earliest data to the disk, enhancing query efficiency and effectively reducing the disk's write load, thereby extending the hardware's lifespan.
Users can customize the cache mode by setting the `cachemodel` parameter, including caching the latest row of data, the most recent non-NULL value of each column, or caching both rows and columns. This flexible design is particularly important in IoT scenarios, making real-time queries of device status more efficient and accurate.
This built-in read cache mechanism significantly reduces query latency, avoids the complexity and operational costs of introducing external systems like Redis, and reduces the pressure of frequent queries on the storage system, greatly enhancing the overall throughput of the system. It ensures stable and efficient operation even in high-concurrency scenarios. Through read caching, TDengine provides a more lightweight real-time data processing solution, not only optimizing query performance but also reducing overall operational costs, providing strong technical support for IoT and IIoT users.
## TDengine's Read Cache Configuration
When creating a database, users can choose whether to enable the caching mechanism to store the latest data of each subtable in that database. This caching mechanism is controlled by the database creation parameter `cachemodel`. The parameter `cachemodel` has the following 4 options:
- none: no caching
- last_row: caches the most recent row of data from the subtable, significantly improving the performance of the `last_row` function
- last_value: caches the most recent non-NULL value of each column from the subtable, significantly improving the performance of the `last` function when there are no special effects (such as WHERE, ORDER BY, GROUP BY, INTERVAL)
- both: caches both the most recent row and column, equivalent to the behaviors of `last_row` and `last_value` simultaneously effective
When using database read caching, the `cachesize` parameter can be used to configure the memory size for each vnode.
- cachesize: represents the memory size used to cache the most recent data of subtables in each vnode. The default is 1, the range is [1, 65536], in MB. It should be configured reasonably according to the machine memory.
For specific database creation, related parameters, and operation instructions, please refer to [Creating a Database](../../tdengine-reference/sql-manual/manage-databases/)
## Caching Practices for Real-Time Data Queries
This section takes smart electric meters as an example to look in detail at how LAST caching improves the performance of real-time data queries. First, use the taosBenchmark tool to generate the time-series data of smart electric meters needed for this chapter.
```shell
# taosBenchmark -d power -Q --start-timestamp=1600000000000 --tables=10000 --records=10000 --time-step=10000 -y
```
The above command, the taosBenchmark tool in TDengine created a test database for electric meters named `power`, generating a total of 1 billion time-series data entries. The timestamp of the time-series data starts from `1600000000000 (2020-09-13T20:26:40+08:00)`, with the supertable `meters` containing 10,000 devices (subtables), each device having 10,000 data entries, and the data collection frequency is 10 seconds per entry.
To query the latest current and timestamp data of any electric meter, execute the following SQL:
```sql
taos> select last(ts,current) from meters;
last(ts) | last(current) |
=================================================
2020-09-15 00:13:10.000 | 1.1294620 |
Query OK, 1 row(s) in set (0.353815s)
taos> select last_row(ts,current) from meters;
last_row(ts) | last_row(current) |
=================================================
2020-09-15 00:13:10.000 | 1.1294620 |
Query OK, 1 row(s) in set (0.344070s)
```
If you want to use caching to query the latest timestamp data of any electric meter, execute the following SQL and check if the database cache is effective.
```sql
taos> alter database power cachemodel 'both' ;
Query OK, 0 row(s) affected (0.046092s)
taos> show create database power\G;
*************************** 1.row ***************************
Database: power
Create Database: CREATE DATABASE `power` BUFFER 256 CACHESIZE 1 CACHEMODEL 'both' COMP 2 DURATION 14400m WAL_FSYNC_P...
Query OK, 1 row(s) in set (0.000282s)
```
Query the latest real-time data of the electric meter again; the first query will perform cache computation, significantly reducing the latency of subsequent queries.
```sql
taos> select last(ts,current) from meters;
last(ts) | last(current) |
=================================================
2020-09-15 00:13:10.000 | 1.1294620 |
Query OK, 1 row(s) in set (0.044021s)
taos> select last_row(ts,current) from meters;
last_row(ts) | last_row(current) |
=================================================
2020-09-15 00:13:10.000 | 1.1294620 |
Query OK, 1 row(s) in set (0.046682s)
```
As can be seen, the query latency has been reduced from 353/344ms to 44ms, an improvement of approximately 8 times.

View File

@ -0,0 +1,284 @@
---
title: Stream Processing
slug: /advanced-features/stream-processing
---
import Image from '@theme/IdealImage';
import watermarkImg from '../assets/stream-processing-01.png';
In the processing of time-series data, it is often necessary to clean and preprocess the raw data before using a time-series database for long-term storage. Moreover, it is common to use the original time-series data to generate new time-series data through calculations. In traditional time-series data solutions, it is often necessary to deploy systems like Kafka, Flink, etc., for stream processing. However, the complexity of stream processing systems brings high development and operational costs.
TDengine's stream computing engine provides the capability to process data streams in real-time as they are written. It uses SQL to define real-time stream transformations. Once data is written into the stream's source table, it is automatically processed in the defined manner and pushed to the destination table according to the defined trigger mode. It offers a lightweight solution that replaces complex stream processing systems and can provide millisecond-level computational result latency under high-throughput data writing scenarios.
Stream computing can include data filtering, scalar function computations (including UDFs), and window aggregation (supporting sliding windows, session windows, and state windows). It can use supertables, subtables, and basic tables as source tables, writing into destination supertables. When creating a stream, the destination supertable is automatically created, and newly inserted data is processed and written into it as defined by the stream. Using the `partition by` clause, partitions can be divided by table name or tags, and different partitions will be written into different subtables of the destination supertable.
TDengine's stream computing can support aggregation of supertables distributed across multiple nodes and can handle out-of-order data writing. It provides a watermark mechanism to measure the degree of tolerance for data disorder and offers an `ignore expired` configuration option to decide the handling strategy for out-of-order data — either discard or recalculate.
Below is a detailed introduction to the specific methods used in stream computing.
## Creating Stream Computing
The syntax is as follows:
```sql
CREATE STREAM [IF NOT EXISTS] stream_name [stream_options] INTO stb_name
[(field1_name, ...)] [TAGS (column_definition [, column_definition] ...)]
SUBTABLE(expression) AS subquery
stream_options: {
TRIGGER [AT_ONCE | WINDOW_CLOSE | MAX_DELAY time]
WATERMARK time
IGNORE EXPIRED [0|1]
DELETE_MARK time
FILL_HISTORY [0|1]
IGNORE UPDATE [0|1]
}
column_definition:
col_name col_type [COMMENT 'string_value']
```
The subquery is a subset of the regular query syntax.
```sql
subquery: SELECT select_list
from_clause
[WHERE condition]
[PARTITION BY tag_list]
[window_clause]
window_clause: {
SESSION(ts_col, tol_val)
| STATE_WINDOW(col)
| INTERVAL(interval_val [, interval_offset]) [SLIDING (sliding_val)]
| EVENT_WINDOW START WITH start_trigger_condition END WITH end_trigger_condition
| COUNT_WINDOW(count_val[, sliding_val])
}
```
The subquery supports session windows, state windows, and sliding windows. When used with supertables, session windows and state windows must be used together with `partition by tbname`.
1. SESSION is a session window, where tol_val is the maximum range of the time interval. All data within the tol_val time interval belong to the same window. If the time interval between two consecutive data points exceeds tol_val, the next window automatically starts.
2. EVENT_WINDOW is an event window, defined by start and end conditions. The window starts when the start_trigger_condition is met and closes when the end_trigger_condition is met. start_trigger_condition and end_trigger_condition can be any condition expressions supported by TDengine and can include different columns.
3. COUNT_WINDOW is a counting window, divided by a fixed number of data rows. count_val is a constant, a positive integer, and must be at least 2 and less than 2147483648. count_val represents the maximum number of data rows in each COUNT_WINDOW. If the total number of data rows cannot be evenly divided by count_val, the last window will have fewer rows than count_val. sliding_val is a constant, representing the number of rows the window slides, similar to the SLIDING in INTERVAL.
The definition of a window is exactly the same as in the time-series data window query, for details refer to the TDengine window functions section.
The following SQL will create a stream computation. After execution, TDengine will automatically create a supertable named avg_vol. This stream computation uses a 1min time window and a 30s forward increment to calculate the average voltage of these smart meters, and writes the results from the meters data into avg_vol. Data from different partitions will be written into separate subtables.
```sql
CREATE STREAM avg_vol_s INTO avg_vol AS
SELECT _wstart, count(*), avg(voltage) FROM power.meters PARTITION BY tbname INTERVAL(1m) SLIDING(30s);
```
The explanations of the relevant parameters involved in this section are as follows.
- stb_name is the table name of the supertable where the computation results are saved. If this supertable does not exist, it will be automatically created; if it already exists, the column schema information will be checked. See section 6.3.8.
- The tags clause defines the rules for creating tags in the stream computation. Through the tags field, custom tag values can be generated for each partition's corresponding subtable.
## Rules and Strategies for Stream Computation
### Partitioning in Stream Computation
In TDengine, we can use the partition by clause combined with tbname, tag columns, ordinary columns, or expressions to perform multi-partition computations on a stream. Each partition has its own timeline and time window, and they will aggregate data separately and write the results into different subtables of the destination table. If the partition by clause is not used, all data will be written into the same subtable by default.
Specifically, partition by + tbname is a very practical operation, which means performing stream computation for each subtable. The advantage of this is that it allows for customized processing based on the characteristics of each subtable, thereby improving computational efficiency.
When creating a stream, if the substable clause is not used, the supertable created by the stream computation will contain a unique tag column groupId. Each partition will be assigned a unique groupId, and the corresponding subtable name will be calculated using the MD5 algorithm. TDengine will automatically create these subtables to store the computation results of each partition. This mechanism makes data management more flexible and efficient, and also facilitates subsequent data querying and analysis.
If the statement for creating the stream contains a substable clause, users can generate custom table names for each partition's corresponding subtable. Example as follows.
```sql
CREATE STREAM avg_vol_s INTO avg_vol SUBTABLE(CONCAT('new-', tname)) AS SELECT _wstart, count(*), avg(voltage) FROM meters PARTITION BY tbname tname INTERVAL(1m);
```
In the PARTITION clause, an alias tname is defined for tbname, and the alias in the PARTITION clause can be used for expression calculation in the SUBTABLE clause. In the example above, the rule for newly created subtables is new- + subtable name + _supertable name +_groupId.
**Note**: If the length of the subtable name exceeds the limit of TDengine, it will be truncated. If the subtable name to be generated already exists in another supertable, since TDengine's subtable names are unique, the creation of the corresponding new subtable and the writing of data will fail.
### Stream Computation Processing Historical Data
Under normal circumstances, stream computation tasks will not process data that was written to the source table before the stream was created. This is because the trigger for stream computation is based on newly written data, not existing data. However, if we need to process these existing historical data, we can set the fill_history option to 1 when creating the stream.
By enabling the fill_history option, the created stream computation task will be capable of processing data written before, during, and after the creation of the stream. This means that data written either before or after the creation of the stream will be included in the scope of stream computation, thus ensuring data integrity and consistency. This setting provides users with greater flexibility, allowing them to flexibly handle historical and new data according to actual needs.
For example, create a stream to count the number of data entries generated by all smart meters every 10s, and also calculate historical data. SQL as follows:
```sql
create stream if not exists count_history_s fill_history 1 into count_history as select count(*) from power.meters interval(10s)
```
Combined with the fill_history 1 option, it is possible to process data only within a specific historical time range, such as data after a historical moment (January 30, 2020).
```sql
create stream if not exists count_history_s fill_history 1 into count_history as select count(*) from power.meters where ts > '2020-01-30' interval(10s)
```
For instance, to process data within a specific time period, the end time can be a future date.
```sql
create stream if not exists count_history_s fill_history 1 into count_history as select count(*) from power.meters where ts > '2020-01-30' and ts < '2023-01-01' interval(10s)
```
If the stream task has completely expired and you no longer want it to monitor or process data, you can manually delete it, and the computed data will still be retained.
### Trigger Modes for Stream Computing
When creating a stream, you can specify the trigger mode of stream computing through the TRIGGER command. For non-window computations, the trigger is real-time; for window computations, there are currently 4 trigger modes, with WINDOW_CLOSE as the default.
1. AT_ONCE: Triggered immediately upon writing.
2. WINDOW_CLOSE: Triggered when the window closes (the closing of the window is determined by the event time, can be used in conjunction with watermark).
3. MAX_DELAY time: If the window closes, computation is triggered. If the window has not closed, and the duration since it has not closed exceeds the time specified by max delay, computation is triggered.
4. FORCE_WINDOW_CLOSE: Based on the current time of the operating system, only the results of the currently closed window are calculated and pushed out. The window is only calculated once at the moment of closure, and will not be recalculated subsequently. This mode currently only supports INTERVAL windows (does not support sliding); FILL_HISTORY must be 0, IGNORE EXPIRED must be 1, IGNORE UPDATE must be 1; FILL only supports PREV, NULL, NONE, VALUE.
The closing of the window is determined by the event time, such as when the event stream is interrupted or continuously delayed, at which point the event time cannot be updated, possibly leading to outdated computation results.
Therefore, stream computing provides the MAX_DELAY trigger mode that combines event time with processing time: MAX_DELAY mode triggers computation immediately when the window closes, and its unit can be specified, specific units: a (milliseconds), s (seconds), m (minutes), h (hours), d (days), w (weeks). Additionally, when data is written, if the time that triggers computation exceeds the time specified by MAX_DELAY, computation is triggered immediately.
### Window Closure in Stream Computing
The core of stream computing lies in using the event time (i.e., the timestamp primary key in the written record) as the basis for calculating the window closure time, rather than relying on the TDengine server's time. Using event time as the basis effectively avoids issues caused by discrepancies between client and server times and can properly address challenges such as out-of-order data writing.
To further control the tolerance level for out-of-order data, stream computing introduces the watermark mechanism. When creating a stream, users can specify the value of watermark through the stream_option parameter, which defines the upper bound of tolerance for out-of-order data, defaulting to 0.
Assuming T = Latest event time - watermark, each time new data is written, the system updates the window closure time based on this formula. Specifically, the system closes all open windows whose end time is less than T. If the trigger mode is set to window_close or max_delay, the aggregated results of the window are pushed. The diagram below illustrates the window closure process in stream computing.
<figure>
<Image img={watermarkImg} alt="Window closure in stream processing"/>
<figcaption>Figure 1. Window closure diagram</figcaption>
</figure>
In the diagram above, the vertical axis represents moments, and the dots on the horizontal axis represent the data received. The related process is described as follows.
1. At moment T1, the 7th data point arrives, and based on T = Latest event - watermark, the calculated time falls within the second window, so the second window does not close.
2. At moment T2, the 6th and 8th data points arrive late to TDengine, and since the Latest event has not changed, T also remains unchanged, and the out-of-order data entering the second window has not yet been closed, thus it can be correctly processed.
3. At moment T3, the 10th data point arrives, T moves forward beyond the closure time of the second window, which is then closed, and the out-of-order data is correctly processed.
In window_close or max_delay modes, window closure directly affects the push results. In at_once mode, window closure only relates to memory usage.
### Expired Data Handling Strategy
For windows that have closed, data that falls into such windows again is marked as expired data. TDengine offers two ways to handle expired data, specified by the IGNORE EXPIRED option.
1. Recalculate, i.e., IGNORE EXPIRED 0: Re-find all data corresponding to the window from the TSDB and recalculate to get the latest result.
2. Directly discard, i.e., IGNORE EXPIRED 1: Default configuration, ignore expired data.
Regardless of the mode, the watermark should be properly set to obtain correct results (direct discard mode) or avoid frequent re-triggering of recalculations that lead to performance overhead (recalculation mode).
### Data Update Handling Strategy
TDengine offers two ways to handle modified data, specified by the IGNORE UPDATE option.
1. Check whether the data has been modified, i.e., IGNORE UPDATE 0: Default configuration, if modified, recalculate the corresponding window.
2. Do not check whether the data has been modified, calculate all as incremental data, i.e., IGNORE UPDATE 1.
## Other Strategies for Stream Computing
### Writing to an Existing Supertable
When the result of stream computing needs to be written into an existing supertable, ensure that the `stb_name` column corresponds correctly with the subquery output results. If the position and number of the `stb_name` column match exactly with the subquery output results, there is no need to explicitly specify the correspondence; if the data types do not match, the system will automatically convert the subquery output results to the corresponding `stb_name` column type.
For already existing supertables, the system will check the schema information of the columns to ensure they match the subquery output results. Here are some key points:
1. Check if the schema information of the columns matches; if not, automatically perform type conversion. Currently, an error is reported only if the data length exceeds 4096 bytes; otherwise, type conversion can be performed.
2. Check if the number of columns is the same; if different, explicitly specify the correspondence between the supertable and the subquery columns, otherwise, an error is reported. If the same, you can specify the correspondence or not; if not specified, they correspond by position order.
**Note** Although stream computing can write results to an existing supertable, it cannot allow two existing stream computations to write result data to the same (super) table. This is to avoid data conflicts and inconsistencies, ensuring data integrity and accuracy. In practice, set the column correspondence according to actual needs and data structure to achieve efficient and accurate data processing.
### Customizing Tags for Target Tables
Users can generate custom tag values for each partition's subtable, as shown in the stream creation statement below:
```sql
CREATE STREAM output_tag trigger at_once INTO output_tag_s TAGS(alias_tag varchar(100)) as select _wstart, count(*) from power.meters partition by concat("tag-", tbname) as alias_tag interval(10s));
```
In the PARTITION clause, an alias `alias_tag` is defined for `concat("tag-", tbname)`, corresponding to the custom tag name of the supertable `output_tag_s`. In the example above, the tag of the newly created subtable by the stream will use the prefix 'tag-' connected to the original table name as the tag value. The following checks will be performed on the tag information:
1. Check if the schema information of the tag matches; if not, automatically perform data type conversion. Currently, an error is reported only if the data length exceeds 4096 bytes; otherwise, type conversion can be performed.
2. Check if the number of tags is the same; if different, explicitly specify the correspondence between the supertable and the subquery tags, otherwise, an error is reported. If the same, you can specify the correspondence or not; if not specified, they correspond by position order.
### Cleaning Up Intermediate States of Stream Computing
```sql
DELETE_MARK time
```
DELETE_MARK is used to delete cached window states, i.e., deleting the intermediate results of stream computing. Cached window states are mainly used for window result updates caused by expired data. If not set, the default value is 10 years.
## Specific Operations of Stream Computing
### Deleting Stream Computing
Only deletes the stream computing task; data written by stream computing will not be deleted, SQL as follows:
```sql
DROP STREAM [IF EXISTS] stream_name;
```
### Displaying Stream Computing
View the SQL of stream computing tasks as follows:
```sql
SHOW STREAMS;
```
To display more detailed information, you can use:
```sql
SELECT * from information_schema.`ins_streams`;
```
### Pausing Stream Computing Tasks
The SQL to pause stream computing tasks is as follows:
```sql
PAUSE STREAM [IF EXISTS] stream_name;
```
If IF EXISTS is not specified, an error is reported if the stream does not exist. If it exists, the stream computing is paused. If IF EXISTS is specified, it returns success if the stream does not exist. If it exists, the stream computing is paused.
### Resuming Stream Computing Tasks
The SQL to resume stream computing tasks is as follows. If IGNORE UNTREATED is specified, it ignores the data written during the pause period of the stream computing task when resuming.
```sql
RESUME STREAM [IF EXISTS] [IGNORE UNTREATED] stream_name;
```
If IF EXISTS is not specified, an error is reported if the stream does not exist. If it exists, the stream computing is resumed. If IF EXISTS is specified, it returns success if the stream does not exist. If it exists, the stream computing is resumed. If IGNORE UNTREATED is specified, it ignores the data written during the pause period of the stream computing task when resuming.
### Stream Computing Upgrade Fault Recovery
After upgrading TDengine, if the stream computing is not compatible, you need to delete the stream computing and then recreate it. The steps are as follows:
1. Modify taos.cfg, add `disableStream 1`
2. Restart taosd. If the startup fails, change the name of the stream directory to avoid taosd trying to load the stream computing data information during startup. Avoid using the delete operation to prevent risks caused by misoperations. The folders that need to be modified: `$dataDir/vnode/vnode*/tq/stream`, where `$dataDir` refers to the directory where TDengine stores data. In the `$dataDir/vnode/` directory, there will be multiple directories like vnode1, vnode2...vnode*, all need to change the name of the tq/stream directory to tq/stream.bk
3. Start taos
```sql
drop stream xxxx; ---- xxx refers to the stream name
flush database stream_source_db; ---- The database where the supertable for stream computing data reading is located
flush database stream_dest_db; ---- The database where the supertable for stream computing data writing is located
```
Example:
```sql
create stream streams1 into test1.streamst as select _wstart, count(a) c1 from test.st interval(1s) ;
drop stream streams1;
flush database test;
flush database test1;
```
4. Close taosd
5. Modify taos.cfg, remove `disableStream 1`, or change `disableStream` to 0
6. Start taosd

View File

@ -0,0 +1,57 @@
---
title: EdgeCloud Orchestration
slug: /advanced-features/edge-cloud-orchestration
---
import Image from '@theme/IdealImage';
import edgeCloud from '../assets/edge-cloud-orchestration-01.png';
## Why Edge-Cloud Collaboration is Needed
In industrial Internet scenarios, edge devices are used only to handle local data, and decision-makers cannot form a global understanding of the entire system based solely on information collected by edge devices. In practical applications, edge devices need to report data to cloud computing platforms (public or private clouds), where data aggregation and information integration are carried out, providing decision-makers with a global insight into the entire dataset. This edge-cloud collaboration architecture has gradually become an important pillar supporting the development of the industrial Internet.
Edge devices mainly monitor and alert on specific data on the production line, such as real-time production data in a particular workshop, and then synchronize this edge-side production data to the big data platform in the cloud.
On the edge side, there is a high requirement for real-time performance, but the data volume may not be large, typically ranging from a few thousand to tens of thousands of monitoring points in a workshop. On the central side, computing resources are generally abundant, capable of aggregating data from the edge side for analysis and computation.
To achieve this operation, the requirements for the database or data storage layer are to ensure that data can be reported step by step and selectively. In some scenarios, where the overall data volume is very large, selective reporting is necessary. For example, raw records collected every second on the edge side, when reported to the central side, are downsampled to once a minute, which greatly reduces the data volume but still retains key information for long-term data analysis and prediction.
In the past industrial data collection process, data was collected from industrial logic controllers PLCs, then entered into Historian, the industrial real-time database, to support business applications. These systems are not easy to scale horizontally, and are heavily dependent on the Windows ecosystem, which is relatively closed.
## TDengine's Edge-Cloud Collaboration Solution
TDengine Enterprise is committed to providing powerful edge-cloud collaboration capabilities, with the following notable features:
- Efficient data synchronization: Supports synchronization efficiency of millions of data per second, ensuring fast and stable data transmission between the edge side and the cloud.
- Multi-data source integration: Compatible with various external data sources, such as AVEVA PI System, OPC-UA, OPC-DA, MQTT, etc., to achieve broad data access and integration.
- Flexible configuration of synchronization rules: Provides configurable synchronization rules, allowing users to customize the strategy and method of data synchronization according to actual needs.
- Offline continuation and re-subscription: Supports offline continuation and re-subscription functions, ensuring the continuity and integrity of data synchronization in the event of unstable or interrupted networks.
- Historical data migration: Supports the migration of historical data, facilitating users to seamlessly migrate historical data to a new system when upgrading or replacing systems.
TDengine's data subscription feature offers great flexibility to subscribers, allowing users to configure subscription objects as needed. Users can subscribe to a database, a supertable, or even a query statement with filtering conditions. This enables users to implement selective data synchronization, syncing truly relevant data (including offline and out-of-order data) from one cluster to another to meet the data needs of various complex scenarios.
The following diagram illustrates the implementation of an edge-cloud collaboration architecture in TDengine Enterprise using a specific production workshop example. In the production workshop, real-time data generated by equipment is stored in TDengine deployed on the edge side. The TDengine deployed in the branch factory subscribes to the data from the TDengine in the production workshop. To better meet business needs, data analysts set some subscription rules, such as data downsampling or syncing only data exceeding a specified threshold. Similarly, the TDengine deployed on the corporate side then subscribes to data from various branch factories, achieving corporate-level data aggregation, ready for further analysis and processing.
<figure>
<Image img={edgeCloud} alt="Edge-cloud orchestration diagram"/>
<figcaption>Edge-cloud orchestration diagram</figcaption>
</figure>
This implementation approach has the following advantages:
- No coding required, just simple configuration on the edge side and cloud.
- Greatly improved automation of cross-regional data synchronization, reducing error rates.
- No need for data caching, reducing batch sending, avoiding traffic peak congestion bandwidth.
- Data synchronization through subscription, with configurable rules, simple, flexible, and highly real-time.
- Both edge and cloud use TDengine, completely unifying the data model, reducing data governance difficulty.
Manufacturing enterprises often face a pain point in data synchronization. Many enterprises currently use offline methods to synchronize data, but TDengine Enterprise achieves real-time data synchronization with configurable rules. This method can avoid the resource waste and bandwidth congestion risks caused by regular large data transfers.
## Advantages of Edge-Cloud Collaboration
The IT and OT (Operational Technology) construction conditions of traditional industries vary, and compared to the internet industry, most enterprises are significantly behind in digital investment. Many enterprises still use outdated systems to process data, which are often independent of each other, forming so-called data silos.
In this context, to inject new vitality into traditional industries with AI, the primary task is to integrate systems scattered in various corners and their collected data, breaking the limitations of data silos. However, this process is full of challenges, as it involves multiple systems and a plethora of industrial Internet protocols, and data aggregation is not a simple merging task. It requires cleaning, processing, and handling data from different sources to integrate it into a unified platform.
When all data is aggregated into one system, the efficiency of accessing and processing data is significantly improved. Enterprises can respond more quickly to real-time data, solve problems more effectively, and achieve efficient collaboration among internal and external staff, enhancing overall operational efficiency.
Additionally, after data aggregation, advanced third-party AI analysis tools can be utilized for improved anomaly detection, real-time alerts, and provide more accurate predictions for production capacity, cost, and equipment maintenance. This will enable decision-makers to better grasp the overall macro situation, provide strong support for the development of the enterprise, and help traditional industries achieve digital transformation and intelligent upgrades.

View File

@ -0,0 +1,56 @@
---
title: TDengine 2.x
slug: /advanced-features/data-connectors/tdengine-2
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/tdengine-2-01.png';
import imgStep2 from '../../assets/tdengine-2-02.png';
import imgStep3 from '../../assets/tdengine-2-03.png';
import imgStep4 from '../../assets/tdengine-2-04.png';
This section describes how to create a data migration task through the Explorer interface to migrate data from the old version of TDengine2 to the current cluster.
## Feature Overview
taosX migrates data by querying the source cluster and writing the results to the target database. Specifically, taosX uses the data of a subtable over a period of time as the basic unit of query, and writes the data to be migrated to the target database in batches.
taosX supports three migration modes:
1. **history** mode. This refers to migrating data within a specified time range. If no time range is specified, it migrates all data up to the time the task was created. The task stops once migration is complete.
2. **realtime** mode. It synchronizes data from the time the task is created onwards. The task will continue to run unless manually stopped.
3. **both** mode. It first executes in history mode, then in realtime mode.
Under each migration mode, you can specify whether to migrate the table structure. If "always" is selected, the structure of the table is synchronized to the target database before migrating data. This process may take longer if there are many subtables. If it is certain that the target database already has the same table interface as the source database, it is recommended to choose "none" to save time.
The task saves progress information to the disk during operation, so if the task is paused and then restarted, or if it automatically recovers from an anomaly, the task will not start over from the beginning.
For more options, it is recommended to read the description of each form field on the task creation page in detail.
## Specific Steps
First, click on the "Data Writing" menu on the left, then click the "Add Data Source" button on the right.
<figure>
<Image img={imgStep1} alt="Add data source"/>
<figcaption>Figure 1. Add a data source</figcaption>
</figure>
Then enter the task name, such as "migrate-test", and finally select the type "TDengine2". At this point, the form switches to a form dedicated to migrating data from TDengine2, containing a large number of options, each with detailed explanations, as shown in the images below.
<figure>
<Image img={imgStep2} alt="Add data source"/>
<figcaption>Figure 2. Add a data source</figcaption>
</figure>
<figure>
<Image img={imgStep3} alt="Add data source"/>
<figcaption>Figure 3. Add a data source</figcaption>
</figure>
<figure>
<Image img={imgStep4} alt="Add data source"/>
<figcaption>Figure 4. Add a data source</figcaption>
</figure>
After clicking the "Submit" button to submit the task, return to the "Data Source" task list page to monitor the status of the task.

View File

@ -0,0 +1,110 @@
---
title: TDengine 3.x
slug: /advanced-features/data-connectors/tdengine-3
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/tdengine-3-01.png';
import imgStep2 from '../../assets/tdengine-3-02.png';
import imgStep3 from '../../assets/tdengine-3-03.png';
import imgStep4 from '../../assets/tdengine-3-04.png';
import imgStep5 from '../../assets/tdengine-3-05.png';
import imgStep6 from '../../assets/tdengine-3-06.png';
import imgStep7 from '../../assets/tdengine-3-07.png';
import imgStep8 from '../../assets/tdengine-3-08.png';
import imgStep9 from '../../assets/tdengine-3-09.png';
This document describes how to use Explorer to subscribe to data from another cluster to this cluster.
## Preparation
Create the required Topic in the source cluster, which can subscribe to the entire database, supertable, or subtable. In this example, we demonstrate subscribing to a database named test.
### Step One: Enter the "Data Subscription" page
Open the Explorer interface of the source cluster, click the "Data Subscription" menu on the left, then click "Add New Topic".
<figure>
<Image img={imgStep1} alt=""/>
</figure>
### Step Two: Add a New Topic
Enter the topic name, select the database to subscribe to.
<figure>
<Image img={imgStep2} alt=""/>
</figure>
### Step Three: Copy the Topic's DSN
Click the "Create" button, return to the topic list and copy the **DSN** of the topic for later use.
<figure>
<Image img={imgStep3} alt=""/>
</figure>
## Create Subscription Task
### Step One: Enter the "Add Data Source" page
1. Click the "Data Writing" menu on the left
2. Click "Add Data Source"
<figure>
<Image img={imgStep4} alt=""/>
</figure>
### Step Two: Enter Data Source Information
1. Enter the task name
2. Select the task type "TDengine3"
3. Select the target database
4. Paste the DSN copied in the preparation step into the **Topic DSN** field. For example: tmq+ws://root:taosdata@localhost:6041/topic
5. After completing the above steps, click the "Connectivity Check" button to test connectivity with the source
<figure>
<Image img={imgStep5} alt=""/>
</figure>
### Step Three: Fill in Subscription Settings and Submit Task
1. Choose the subscription start position. Configurable to start from the earliest or latest data, default is earliest
2. Set the timeout period. Supports units ms (milliseconds), s (seconds), m (minutes), h (hours), d (days), M (months), y (years)
3. Set the subscription group ID. The subscription group ID is an arbitrary string used to identify a subscription group, with a maximum length of 192. If not specified, a randomly generated group ID will be used.
4. Set the client ID. The client ID is an arbitrary string used to identify the client, with a maximum length of 192.
5. Synchronize data that has been written to disk. If enabled, it can synchronize data that has been written to the TSDB time-series data storage file (i.e., not in WAL). If disabled, only data that has not yet been written to disk (i.e., saved in WAL) will be synchronized.
6. Synchronize table deletion operations. If enabled, table deletion operations will be synchronized to the target database.
7. Synchronize data deletion operations. If enabled, data deletion operations will be synchronized to the target database.
8. Compression. Enable WebSocket compression support to reduce network bandwidth usage.
9. Click the "Submit" button to submit the task
<figure>
<Image img={imgStep6} alt=""/>
</figure>
## Monitor Task Execution
After submitting the task, return to the data source page to view the task status. The task will first be added to the execution queue and will start running shortly.
<figure>
<Image img={imgStep7} alt=""/>
</figure>
Click the "View" button to monitor the dynamic statistical information of the task.
<figure>
<Image img={imgStep8} alt=""/>
</figure>
You can also click the left collapse button to expand the task's activity information. If the task runs abnormally, detailed explanations can be seen here.
<figure>
<Image img={imgStep9} alt=""/>
</figure>
## Advanced Usage
1. FROM DSN supports multiple Topics, with multiple Topic names separated by commas. For example: `tmq+ws://root:taosdata@localhost:6041/topic1,topic2,topic3`
2. In the FROM DSN, you can also use the database name, supertable name, or subtable name instead of the Topic name. For example: `tmq+ws://root:taosdata@localhost:6041/db1,db2,db3`, in this case, there is no need to create a Topic in advance, taosX will automatically recognize that a database name is used and automatically create a subscription Topic in the source cluster.
3. FROM DSN supports the group.id parameter, to explicitly specify the group ID used for subscription. If not specified, a randomly generated group ID will be used.

View File

@ -0,0 +1,201 @@
---
title: AVEVA PI System
sidebar_label: PI System
slug: /advanced-features/data-connectors/pi-system
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/pi-system-01.png';
import imgStep2 from '../../assets/pi-system-02.png';
import imgStep3 from '../../assets/pi-system-03.png';
import imgStep4 from '../../assets/pi-system-04.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from the PI system to the current TDengine cluster.
## Feature Overview
The PI system is a software product suite used for data collection, retrieval, analysis, transmission, and visualization, serving as the infrastructure for enterprise-level systems managing real-time data and events. taosX can extract real-time or historical data from the PI system using the PI connector plugin.
From the perspective of data timeliness, PI data source tasks are divided into two categories: **real-time tasks** and **backfill tasks**. In the task type dropdown list, these two categories correspond to the names: **PI** and **PI backfill**.
From the data model perspective, PI data source tasks are divided into **single-column model** tasks and **multi-column model** tasks:
1. **Single-column model** tasks map one PI Point to one table in TDengine
2. **Multi-column model** tasks map one PI AF element to one table
Regarding the type of connected data source, PI data source tasks are further divided into **Archive Server** data sources and **AF Server** data sources. For **Archive Server** data sources, only the **single-column model** can be used. For **AF Server** data sources, both **single-column model** and **multi-column model** can be chosen.
Users configure the data mapping rules from PI to TDengine through a CSV file, referred to as the **model configuration file**:
1. For tasks using the AF Server single-column model, taosX automatically identifies which attributes of the element are referencing PI Point data, mapping one PI Point attribute to one table.
2. For tasks using the AF Server multi-column model, one element corresponds to one table. taosX by default maps PI Point attributes to TDengine Metric columns and other attributes to TDengine tag columns.
## Creating Tasks
### Add Data Source
In the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep1} alt=""/>
</figure>
### Basic Configuration
Enter the task name in **Name**, such as "test";
Select **PI** or **PI backfill** from the **Type** dropdown list.
If the taosX service is running on or can directly connect to the server where the PI system is located (dependent on PI AF SDK), **Proxy** is not necessary; otherwise, configure **Proxy**: select the specified proxy from the dropdown, or click the **+Create New Proxy** button on the right to create a new proxy and follow the prompts to configure the proxy. That is, taosX or its proxy needs to be deployed on a host that can directly connect to the PI system.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep2} alt=""/>
</figure>
### Connection Configuration
The PI connector supports two connection methods:
1. **PI Data Archive Only**: Does not use AF mode. In this mode, directly fill in the **PI Service Name** (server address, usually using the hostname).
<figure>
<Image img={imgStep3} alt=""/>
</figure>
2. **PI Data Archive and Asset Framework (AF) Server**: Uses AF SDK. In addition to configuring the service name, this mode also requires configuring the PI system (AF Server) name (hostname) and AF database name.
<figure>
<Image img={imgStep4} alt=""/>
</figure>
Click the **Connectivity Check** button to verify if the data source is available.
### Data Model Configuration
This part has two tabs, corresponding to the configuration of the single-column model and the multi-column model. If this is your first configuration, whether you choose a single-column model or a multi-column model, be sure to click the "Download Default Configuration" button. This action will trigger the generation of the default **model configuration file** and also download the **model configuration file** to your local machine, which you can view or edit. After editing, you can also upload it again to overwrite the default configuration.
If you want to synchronize all points or all template elements, then the default configuration is sufficient. If you want to filter specific naming patterns of points or element templates, you need to fill in the filter conditions before clicking "Download Default Configuration".
#### Multi-column Model Configuration File
Below is an example of a multi-column model configuration file. This configuration file includes configurations for two supertables: one is the metertemplate table, which receives data from elements of the MeterTemplate template; the other is the farm table, which receives data from elements of the Farm template.
```csv
SuperTable,metertemplate
SubTable,${element_name}_${element_id}
Template,MeterTemplate
Filter,
ts,KEY,TIMESTAMP,$ts
voltage,COLUMN,DOUBLE,$voltage
voltage_status,COLUMN,INT,$voltage_status
current,COLUMN,DOUBLE,$current
current_status,COLUMN,INT,$current_status
element_id,tag,VARCHAR(100),$element_id
element_name,tag,VARCHAR(100),$element_name
path,tag,VARCHAR(100),$path
categories,tag,VARCHAR(100),$categories
SuperTable,farm
SubTable,${element_name}_${element_id}
Template,Farm
Filter,
ts,KEY,TIMESTAMP,$ts
wind_speed,COLUMN,FLOAT,$wind_speed
wind_speed_status,COLUMN,INT,$wind_speed_status
power_production,COLUMN,FLOAT,$power_production
power_production_status,COLUMN,INT,$power_production_status
lost_power,COLUMN,FLOAT,$lost_power
lost_power_status,COLUMN,INT,$lost_power_status
farm_lifetime_production__weekly_,COLUMN,FLOAT,$farm_lifetime_production__weekly_
farm_lifetime_production__weekly__status,COLUMN,INT,$farm_lifetime_production__weekly__status
farm_lifetime_production__hourly_,COLUMN,FLOAT,$farm_lifetime_production__hourly_
farm_lifetime_production__hourly__status,COLUMN,INT,$farm_lifetime_production__hourly__status
element_id,tag,VARCHAR(100),$element_id
element_name,tag,VARCHAR(100),$element_name
path,tag,VARCHAR(100),$path
categories,tag,VARCHAR(100),$categories
```
The multi-column model configuration file consists of one or more supertable definitions. Each supertable configuration includes:
1. Correspondence between supertables and templates
2. Correspondence between attributes and TDengine Metric columns
3. Correspondence between attributes and TDengine tag columns
4. Source data filtering conditions
5. For each column, whether it is a Metrics column or a tag column, a mapping rule can be configured, see [Zero-code third-party data access](../) "Data extraction, filtering, and transformation" section
#### Single-column model configuration file
Below is an example of a single-column model configuration file.
```csv
SuperTable,volt_float32
SubTable,${point_name}
Filter,
ts,KEY,TIMESTAMP,$ts
value,COLUMN,FLOAT,$value
status,COLUMN,INT,$status
path,tag,VARCHAR(200),$path
point_name,tag,VARCHAR(100),$point_name
ptclassname,tag,VARCHAR(100),$ptclassname
sourcetag,tag,VARCHAR(100),$sourcetag
tag,tag,VARCHAR(100),$tag
descriptor,tag,VARCHAR(100),$descriptor
exdesc,tag,VARCHAR(100),$exdesc
engunits,tag,VARCHAR(100),$engunits
pointsource,tag,VARCHAR(100),$pointsource
step,tag,VARCHAR(100),$step
future,tag,VARCHAR(100),$future
element_paths,tag,VARCHAR(512),`$element_paths.replace("\\", ".")`
SuperTable,milliampere_float32
SubTable,${point_name}
Filter,
ts,KEY,TIMESTAMP,$ts
value,COLUMN,FLOAT,$value
status,COLUMN,INT,$status
path,tag,VARCHAR(200),$path
point_name,tag,VARCHAR(100),$point_name
ptclassname,tag,VARCHAR(100),$ptclassname
sourcetag,tag,VARCHAR(100),$sourcetag
tag,tag,VARCHAR(100),$tag
descriptor,tag,VARCHAR(100),$descriptor
exdesc,tag,VARCHAR(100),$exdesc
engunits,tag,VARCHAR(100),$engunits
pointsource,tag,VARCHAR(100),$pointsource
step,tag,VARCHAR(100),$step
future,tag,VARCHAR(100),$future
element_paths,tag,VARCHAR(512),`$element_paths.replace("\\", ".")`
Meter_1000004_Voltage,POINT,volt_float32
Meter_1000004_Current,POINT,milliampere_float32
Meter_1000001_Voltage,POINT,volt_float32
Meter_1000001_Current,POINT,milliampere_float32
Meter_1000474_Voltage,POINT,volt_float32
Meter_1000474_Current,POINT,milliampere_float32
```
The single-column model configuration file is divided into two parts. The first part, like the multi-column model configuration file, consists of several supertable definitions. The second part is the point list, which configures the mapping between points and supertables. The default configuration maps points with the same UOM and data type to the same supertable.
### Backfill Configuration
1. For PI tasks, you can configure the "restart compensation time." If the task is unexpectedly interrupted, configuring this parameter when restarting is very useful as it allows taosX to automatically backfill data for a period.
2. For PI backfill tasks, you must configure the start and end times of the backfill.
### Advanced Options
The advanced options vary for different types of tasks. Common advanced options include:
1. Connector log level
2. Batch size for connector queries and data sending
3. Maximum delay for a single read
For **real-time tasks of the multi-column model**, there are also the following switch options:
1. Whether to synchronize newly added elements. If enabled, the PI connector will listen for newly added elements under the template and automatically synchronize the data of the newly added elements without needing to restart the task.
2. Whether to synchronize changes in static attributes. If enabled, the PI connector will synchronize all changes in static attributes (non-PI Point attributes). That is, if a static attribute value of an element in the PI AF Server is modified, the corresponding tag value in the TDengine table will also be modified.
3. Whether to synchronize the deletion of elements. If enabled, the PI connector will listen for events of element deletions under the configured template and synchronize the deletion of the corresponding subtable in TDengine.
4. Whether to synchronize the deletion of historical data. If enabled, for the time-series data of an element, if data at a certain time is deleted in PI, the corresponding column data at that time in TDengine will be set to null.
5. Whether to synchronize the modification of historical data. If enabled, for the time-series data of an element, if historical data is modified in PI, the corresponding data at that time in TDengine will also be updated.

View File

@ -0,0 +1,249 @@
---
title: OPC UA
slug: /advanced-features/data-connectors/opc-ua
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/opc-ua-01.png';
import imgStep2 from '../../assets/opc-ua-02.png';
import imgStep3 from '../../assets/opc-ua-03.png';
import imgStep4 from '../../assets/opc-ua-04.png';
import imgStep5 from '../../assets/opc-ua-05.png';
import imgStep6 from '../../assets/opc-ua-06.png';
import imgStep7 from '../../assets/opc-ua-07.png';
import imgStep8 from '../../assets/opc-ua-08.png';
import imgStep9 from '../../assets/opc-ua-09.png';
This section describes how to create data migration tasks through the Explorer interface to synchronize data from an OPC-UA server to the current TDengine cluster.
## Overview
OPC is one of the interoperability standards for securely and reliably exchanging data in the field of industrial automation and other industries.
OPC-UA is the next-generation standard of the classic OPC specifications, a platform-independent, service-oriented architecture specification that integrates all the functionalities of the existing OPC Classic specifications, providing a path to a more secure and scalable solution.
TDengine can efficiently read data from OPC-UA servers and write it to TDengine, enabling real-time data ingestion.
## Creating a Task
### 1. Add a Data Source
On the data writing page, click the **+ Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep1} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, for example, for environmental temperature and humidity monitoring, name it **environment-monitoring**.
Select **OPC-UA** from the **Type** dropdown list.
**Proxy** is optional, you can select a specific proxy from the dropdown list, or click the **+ Create New Proxy** button on the right.
Select a target database from the **Target Database** dropdown list, or click the **+ Create Database** button on the right.
<figure>
<Image img={imgStep2} alt=""/>
</figure>
### 3. Configure Connection Information
In the **Connection Configuration** area, fill in the **OPC-UA Service Address**, for example: `127.0.0.1:5000`, and configure the data transmission security mode, with three security modes available:
1. None: Communication data is transmitted in plaintext.
2. Sign: Communication data is verified using a digital signature to protect data integrity.
3. SignAndEncrypt: Communication data is verified using a digital signature and encrypted using encryption algorithms to ensure data integrity, authenticity, and confidentiality.
If you choose Sign or SignAndEncrypt as the security mode, you must select a valid security policy. Security policies define how to implement the encryption and verification mechanisms in the security mode, including the encryption algorithms used, key lengths, digital certificates, etc. Available security policies include:
1. None: Only selectable when the security mode is None.
2. Basic128Rsa15: Uses RSA algorithm and 128-bit key length to sign or encrypt communication data.
3. Basic256: Uses AES algorithm and 256-bit key length to sign or encrypt communication data.
4. Basic256Sha256: Uses AES algorithm and 256-bit key length, and encrypts digital signatures using the SHA-256 algorithm.
5. Aes128Sha256RsaOaep: Uses AES-128 algorithm for encrypting and decrypting communication data, encrypts digital signatures using the SHA-256 algorithm, and uses RSA algorithm and OAEP mode for encrypting and decrypting symmetric communication keys.
6. Aes256Sha256RsaPss: Uses AES-256 algorithm for encrypting and decrypting communication data, encrypts digital signatures using the SHA-256 algorithm, and uses RSA algorithm and PSS mode for encrypting and decrypting symmetric communication keys.
<figure>
<Image img={imgStep3} alt=""/>
</figure>
### 4. Choose Authentication Method
As shown below, switch tabs to choose different authentication methods, with the following options available:
1. Anonymous
2. Username
3. Certificate Access: Can be the same as the security communication certificate, or a different certificate.
<figure>
<Image img={imgStep4} alt=""/>
</figure>
After configuring the connection properties and authentication method, click the **Connectivity Check** button to check if the data source is available. If using a security communication certificate or authentication certificate, the certificate must be trusted by the OPC UA server, otherwise, it will still fail.
### 5. Configure Points Set
**Points Set** can choose to use a CSV file template or **Select All Points**.
#### 5.1. Upload CSV Configuration File
You can download the CSV blank template and configure the point information according to the template, then upload the CSV configuration file to configure points; or download data points according to the configured filter conditions, and download in the format specified by the CSV template.
CSV files have the following rules:
1. File Encoding
The encoding format of the CSV file uploaded by the user must be one of the following:
(1) UTF-8 with BOM
(2) UTF-8 (i.e., UTF-8 without BOM)
2. Header Configuration Rules
The header is the first line of the CSV file, with the following rules:
(1) The header of the CSV can configure the following columns:
| Number | Column Name | Description | Required | Default Behavior |
| ------ | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| 1 | point_id | The id of the data point on the OPC UA server | Yes | None |
| 2 | stable | The corresponding supertable for the data point in TDengine | Yes | None |
| 3 | tbname | The corresponding subtable for the data point in TDengine | Yes | None |
| 4 | enable | Whether to collect data from this point | No | Use the unified default value `1` for enable |
| 5 | value_col | The column name in TDengine corresponding to the collected value of the data point | No | Use the unified default value `val` as the value_col |
| 6 | value_transform | The transformation function executed in taosX for the collected value of the data point | No | Do not transform the collected value uniformly |
| 7 | type | The data type of the collected value of the data point | No | Use the original type of the collected value as the data type in TDengine |
| 8 | quality_col | The column name in TDengine corresponding to the quality of the collected value | No | Do not add a quality column in TDengine uniformly |
| 9 | ts_col | The original timestamp column of the data point in TDengine | No | If both ts_col and received_ts_col are non-empty, use the former as the timestamp column; if one of ts_col or received_ts_col is non-empty, use the non-empty column as the timestamp column; if both are empty, use the original timestamp of the data point as the timestamp column with the default name `ts`. |
| 10 | received_ts_col | The timestamp column in TDengine when the data point value is received | No | Same as above |
| 11 | ts_transform | The transformation function executed in taosX for the original timestamp of the data point | No | Do not transform the original timestamp of the data point uniformly |
| 12 | received_ts_transform | The transformation function executed in taosX for the received timestamp of the data point | No | Do not transform the received timestamp of the data point uniformly |
| 13 | tag::VARCHAR(200)::name | The Tag column corresponding to the data point in TDengine. Here `tag` is a reserved keyword indicating that this column is a tag; `VARCHAR(200)` indicates the type of tag; `name` is the actual name of the tag. | No | If 1 or more tag columns are configured, use the configured tag columns; if no tag columns are configured and stable exists in TDengine, use the tags of the stable in TDengine; if no tag columns are configured and stable does not exist in TDengine, automatically add the following 2 tag columns: tag::VARCHAR(256)::point_id and tag::VARCHAR(256)::point_name |
(2) In the CSV Header, there cannot be duplicate columns;
(3) In the CSV Header, columns like `tag::VARCHAR(200)::name` can be configured multiple times, corresponding to multiple Tags in TDengine, but the names of the Tags cannot be repeated.
(4) In the CSV Header, the order of the columns does not affect the CSV file validation rules;
(5) In the CSV Header, columns that are not listed in the table above can be configured, such as: sequence number, these columns will be automatically ignored.
3. Row Configuration Rules
Each Row in the CSV file configures an OPC data point. The rules for Rows are as follows:
(1) Correspondence with columns in the Header
| Number | Column in Header | Type of Value | Value Range | Mandatory | Default Value |
| ------ | ----------------------- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------------------------ |
| 1 | point_id | String | Strings like `ns=3;i=1005`, must meet the OPC UA ID specification, i.e., include ns and id parts | Yes | |
| 2 | enable | int | 0: Do not collect this point, and delete the corresponding subtable in TDengine before the OPC DataIn task starts; 1: Collect this point, do not delete the subtable before the OPC DataIn task starts. | No | 1 |
| 3 | stable | String | Any string that meets the TDengine supertable naming convention; if special character `.` exists, replace with underscore if `{type}` exists: if type in CSV file is not empty, replace with the value of type if type is empty, replace with the original type of the collected value | Yes | |
| 4 | tbname | String | Any string that meets the TDengine subtable naming convention; for OPC UA: if `{ns}` exists, replace with ns from point_id if `{id}` exists, replace with id from point_id for OPC DA: if `{tag_name}` exists, replace with tag_name | Yes | |
| 5 | value_col | String | Column name that meets TDengine naming convention | No | val |
| 6 | value_transform | String | Expressions that meet the Rhai engine, for example: `(val + 10) / 1000 * 2.0`, `log(val) + 10`, etc.; | No | None |
| 7 | type | String | Supported types include: b/bool/i8/tinyint/i16/small/inti32/int/i64/bigint/u8/tinyint unsigned/u16/smallint unsigned/u32/int unsigned/u64/bigint unsigned/f32/float/f64/double/timestamp/timestamp(ms)/timestamp(us)/timestamp(ns)/json | No | Original type of the data point value |
| 8 | quality_col | String | Column name that meets TDengine naming convention | No | None |
| 9 | ts_col | String | Column name that meets TDengine naming convention | No | ts |
| 10 | received_ts_col | String | Column name that meets TDengine naming convention | No | rts |
| 11 | ts_transform | String | Supports +, -, *, /, % operators, for example: ts / 1000* 1000, sets the last 3 digits of a timestamp in ms to 0; ts + 8 *3600* 1000, adds 8 hours to a timestamp in ms; ts - 8 *3600* 1000, subtracts 8 hours from a timestamp in ms; | No | None |
| 12 | received_ts_transform | String | No | None | |
| 13 | tag::VARCHAR(200)::name | String | The value inside a tag, when the tag type is VARCHAR, can be in Chinese | No | NULL |
(2) `point_id` is unique throughout the DataIn task, meaning: in an OPC DataIn task, a data point can only be written to one subtable in TDengine. If you need to write a data point to multiple subtables, you need to create multiple OPC DataIn tasks;
(3) When `point_id` is different but `tbname` is the same, `value_col` must be different. This configuration allows data from multiple data points of different types to be written to different columns in the same subtable. This method corresponds to the "OPC data into TDengine wide table" usage scenario.
4. Other Rules
(1) If the number of columns in Header and Row are inconsistent, the validation fails, and the user is prompted with the line number that does not meet the requirements;
(2) Header is on the first line and cannot be empty;
(3) There must be at least one data point;
#### 5.2. Selecting Data Points
Data points can be filtered by configuring **Root Node ID**, **Namespace**, **Regular Matching**, etc.
Configure **Supertable Name**, **Table Name** to specify the supertable and subtable where the data will be written.
Configure **Primary Key Column**, choose `origin_ts` to use the original timestamp of the OPC data point as the primary key in TDengine; choose `received_ts` to use the data's reception timestamp as the primary key in TDengine. Configure **Primary Key Alias** to specify the name of the TDengine timestamp column.
<figure>
<Image img={imgStep5} alt=""/>
</figure>
### 6. Collection Configuration
In the collection configuration, configure the current task's collection mode, collection interval, collection timeout, etc.
<figure>
<Image img={imgStep6} alt=""/>
</figure>
As shown in the image above:
- **Collection Mode**: Can use `subscribe` or `observe` mode.
- `subscribe`: Subscription mode, reports data changes and writes to TDengine.
- `observe`: According to the `collection interval`, polls the latest value of the data point and writes to TDengine.
- **Collection Interval**: Default is 10 seconds, the interval for collecting data points, starting from the end of the last data collection, polls the latest value of the data point and writes to TDengine. Only configurable in `observe` **Collection Mode**.
- **Collection Timeout**: If the data from the OPC server is not returned within the set time when reading data points, the read fails, default is 10 seconds. Only configurable in `observe` **Collection Mode**.
When using **Selecting Data Points** in the **Data Point Set**, the collection configuration can configure **Data Point Update Mode** and **Data Point Update Interval** to enable dynamic data point updates. **Dynamic Data Point Update** refers to, during the task operation, after OPC Server adds or deletes data points, the data points that meet the conditions will automatically be added to the current task without needing to restart the OPC task.
- Data Point Update Mode: Can choose `None`, `Append`, `Update`.
- None: Do not enable dynamic data point updates;
- Append: Enable dynamic data point updates, but only append;
- Update: Enable dynamic data point updates, append or delete;
- Data Point Update Interval: Effective when "Data Point Update Mode" is `Append` and `Update`. Unit: seconds, default value is 600, minimum value: 60, maximum value: 2147483647.
### 7. Advanced Options
<figure>
<Image img={imgStep7} alt=""/>
</figure>
As shown in the image above, configure advanced options for more detailed optimization of performance, logs, etc.
**Log Level** defaults to `info`, with options `error`, `warn`, `info`, `debug`, `trace`.
In **Maximum Write Concurrency**, set the maximum concurrency limit for writing to taosX. Default value: 0, meaning auto, automatically configures concurrency.
In **Batch Size**, set the batch size for each write, i.e., the maximum number of messages sent at one time.
In **Batch Delay**, set the maximum delay for a single send (in seconds), when the timeout ends, as long as there is data, it is sent immediately even if it does not meet the **Batch Size**.
In **Save Raw Data**, choose whether to save raw data. Default value: No.
When saving raw data, the following 2 parameters are effective.
In **Maximum Retention Days**, set the maximum retention days for raw data.
In **Raw Data Storage Directory**, set the path for saving raw data. If using Agent, the storage path refers to the path on the server where the Agent is located, otherwise it is on the taosX server. The path can use placeholders `$DATA_DIR` and `:id` as part of the path.
- On Linux platform, `$DATA_DIR` is /var/lib/taos/taosx, by default the storage path is `/var/lib/taos/taosx/tasks/<task_id>/rawdata`.
- On Windows platform, `$DATA_DIR` is C:\TDengine\data\taosx, by default the storage path is `C:\TDengine\data\taosx\tasks\<task_id>\rawdata`.
### 8. Completion
Click the **Submit** button to complete the creation of the OPC UA to TDengine data synchronization task. Return to the **Data Source List** page to view the status of the task execution.
## Add Data Points
During the task execution, click **Edit**, then click the **Add Data Points** button to append data points to the CSV file.
<figure>
<Image img={imgStep8} alt=""/>
</figure>
In the pop-up form, fill in the information for the data points.
<figure>
<Image img={imgStep9} alt=""/>
</figure>
Click the **Confirm** button to complete the addition of the data points.

View File

@ -0,0 +1,221 @@
---
title: OPC DA
slug: /advanced-features/data-connectors/opc-da
---
import Image from '@theme/IdealImage';
import imgStep1 from '../../assets/opc-da-01.png';
import imgStep2 from '../../assets/opc-da-02.png';
import imgStep3 from '../../assets/opc-da-03.png';
import imgStep4 from '../../assets/opc-da-04.png';
import imgStep5 from '../../assets/opc-da-05.png';
import imgStep6 from '../../assets/opc-da-06.png';
import imgStep7 from '../../assets/opc-da-07.png';
import imgStep8 from '../../assets/opc-da-08.png';
This section describes how to create data migration tasks through the Explorer interface, synchronizing data from an OPC-DA server to the current TDengine cluster.
## Overview
OPC is one of the interoperability standards for secure and reliable data exchange in the field of industrial automation and other industries.
OPC DA (Data Access) is a classic COM-based specification, only applicable to Windows. Although OPC DA is not the latest and most efficient data communication specification, it is widely used. This is mainly because some old equipment only supports OPC DA.
TDengine can efficiently read data from OPC-DA servers and write it to TDengine, achieving real-time data storage.
## Creating a Task
### 1. Add a Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep1} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, for example, for environmental temperature and humidity monitoring, name it **environment-monitoring**.
Select **OPC-DA** from the **Type** dropdown list.
If the taosX service is running on the same server as OPC-DA, **Proxy** is not necessary; otherwise, configure **Proxy**: select a specified proxy from the dropdown, or click the **+Create New Proxy** button on the right to create a new proxy and follow the prompts to configure the proxy.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep2} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the **OPC-DA Service Address** in the **Connection Configuration** area, for example: `127.0.0.1/Matrikon.OPC.Simulation.1`, and configure the authentication method.
Click the **Connectivity Check** button to check if the data source is available.
<figure>
<Image img={imgStep3} alt=""/>
</figure>
### 4. Configure Points Set
**Points Set** can choose to use a CSV file template or **Select All Points**.
#### 4.1. Upload CSV Configuration File
You can download the CSV blank template and configure the point information according to the template, then upload the CSV configuration file to configure the points; or download the data points according to the configured filter conditions, and download them in the format specified by the CSV template.
CSV files have the following rules:
1. File Encoding
The encoding format of the CSV file uploaded by the user must be one of the following:
(1) UTF-8 with BOM
(2) UTF-8 (i.e., UTF-8 without BOM)
2. Header Configuration Rules
The header is the first line of the CSV file, with the following rules:
(1) The header of the CSV can configure the following columns:
| No. | Column Name | Description | Required | Default Behavior |
| ---- | ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| 1 | tag_name | The id of the data point on the OPC DA server | Yes | None |
| 2 | stable | The supertable in TDengine corresponding to the data point | Yes | None |
| 3 | tbname | The subtable in TDengine corresponding to the data point | Yes | None |
| 4 | enable | Whether to collect data from this point | No | Use a unified default value `1` for enable |
| 5 | value_col | The column name in TDengine corresponding to the collected value of the data point | No | Use a unified default value `val` as the value_col |
| 6 | value_transform | The transform function executed in taosX for the collected value of the data point | No | Do not perform a transform on the collected value |
| 7 | type | The data type of the collected value of the data point | No | Use the original type of the collected value as the data type in TDengine |
| 8 | quality_col | The column name in TDengine corresponding to the quality of the collected value | No | Do not add a quality column in TDengine |
| 9 | ts_col | The timestamp column in TDengine corresponding to the original timestamp of the data point | No | If both ts_col and received_ts_col are non-empty, use the former as the timestamp column; if one of ts_col or received_ts_col is non-empty, use the non-empty column as the timestamp column; if both are empty, use the original timestamp of the data point as the timestamp column in TDengine, with the default column name ts. |
| 10 | received_ts_col | The timestamp column in TDengine corresponding to the timestamp when the data point value was received | No | |
| 11 | ts_transform | The transform function executed in taosX for the original timestamp of the data point | No | Do not perform a transform on the original timestamp of the data point |
| 12 | received_ts_transform | The transform function executed in taosX for the received timestamp of the data point | No | Do not perform a transform on the received timestamp of the data point |
| 13 | tag::VARCHAR(200)::name | The Tag column in TDengine corresponding to the data point. Where `tag` is a reserved keyword, indicating that this column is a tag column; `VARCHAR(200)` indicates the type of this tag, which can also be other legal types; `name` is the actual name of this tag. | No | If configuring more than one tag column, use the configured tag columns; if no tag columns are configured, and stable exists in TDengine, use the tags of the stable in TDengine; if no tag columns are configured, and stable does not exist in TDengine, automatically add the following two tag columns by default: tag::VARCHAR(256)::point_idtag::VARCHAR(256)::point_name |
(2) In the CSV Header, there cannot be duplicate columns;
(3) In the CSV Header, columns like `tag::VARCHAR(200)::name` can be configured multiple times, corresponding to multiple Tags in TDengine, but the names of the Tags cannot be duplicated.
(4) In the CSV Header, the order of columns does not affect the CSV file validation rules;
(5) In the CSV Header, columns that are not listed in the table above, such as: serial number, will be automatically ignored.
3. Row Configuration Rules
Each Row in the CSV file configures an OPC data point. The rules for Rows are as follows:
(1) Correspondence with columns in the Header
| Number | Column in Header | Type of Value | Range of Values | Mandatory | Default Value |
| ------ | ----------------------- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------------------------ |
| 1 | tag_name | String | Strings like `root.parent.temperature`, must meet the OPC DA ID specification | Yes | |
| 2 | enable | int | 0: Do not collect this point, and delete the corresponding subtable in TDengine before the OPC DataIn task starts; 1: Collect this point, do not delete the subtable before the OPC DataIn task starts. | No | 1 |
| 3 | stable | String | Any string that meets the TDengine supertable naming convention; if there are special characters `.`, replace with underscore. If `{type}` exists: if type in CSV file is not empty, replace with the value of type; if empty, replace with the original type of the collected value | Yes | |
| 4 | tbname | String | Any string that meets the TDengine subtable naming convention; for OPC UA: if `{ns}` exists, replace with ns from point_id; if `{id}` exists, replace with id from point_id; for OPC DA: if `{tag_name}` exists, replace with tag_name | Yes | |
| 5 | value_col | String | Column name that meets TDengine naming convention | No | val |
| 6 | value_transform | String | Computation expressions supported by Rhai engine, such as: `(val + 10) / 1000 * 2.0`, `log(val) + 10`, etc.; | No | None |
| 7 | type | String | Supported types include: b/bool/i8/tinyint/i16/smallint/i32/int/i64/bigint/u8/tinyint unsigned/u16/smallint unsigned/u32/int unsigned/u64/bigint unsigned/f32/floatf64/double/timestamp/timestamp(ms)/timestamp(us)/timestamp(ns)/json | No | Original type of data point value |
| 8 | quality_col | String | Column name that meets TDengine naming convention | No | None |
| 9 | ts_col | String | Column name that meets TDengine naming convention | No | ts |
| 10 | received_ts_col | String | Column name that meets TDengine naming convention | No | rts |
| 11 | ts_transform | String | Supports +, -, *, /, % operators, for example: ts / 1000* 1000, sets the last 3 digits of a ms unit timestamp to 0; ts + 8 *3600* 1000, adds 8 hours to a ms precision timestamp; ts - 8 *3600* 1000, subtracts 8 hours from a ms precision timestamp; | No | None |
| 12 | received_ts_transform | String | No | None | |
| 13 | tag::VARCHAR(200)::name | String | The value in tag, when the tag type is VARCHAR, it can be in Chinese | No | NULL |
(2) `tag_name` is unique throughout the DataIn task, that is: in an OPC DataIn task, a data point can only be written to one subtable in TDengine. If you need to write a data point to multiple subtables, you need to create multiple OPC DataIn tasks;
(3) When `tag_name` is different but `tbname` is the same, `value_col` must be different. This configuration allows data from multiple data points of different types to be written to different columns in the same subtable. This corresponds to the "OPC data into TDengine wide table" scenario.
4. Other Rules
(1) If the number of columns in Header and Row are not consistent, validation fails, and the user is prompted with the line number that does not meet the requirements;
(2) Header is on the first line and cannot be empty;
(3) Row must have more than 1 line;
#### 4.2. Selecting Data Points
Data points can be filtered by configuring the **Root Node ID** and **Regular Expression**.
Configure **Supertable Name** and **Table Name** to specify the supertable and subtable where the data will be written.
Configure **Primary Key Column**, choosing `origin_ts` to use the original timestamp of the OPC data point as the primary key in TDengine; choosing `received_ts` to use the timestamp when the data is received as the primary key. Configure **Primary Key Alias** to specify the name of the TDengine timestamp column.
<figure>
<Image img={imgStep4} alt=""/>
</figure>
### 5. Collection Configuration
In the collection configuration, set the current task's collection interval, connection timeout, and collection timeout.
<figure>
<Image img={imgStep5} alt=""/>
</figure>
As shown in the image:
- **Connection Timeout**: Configures the timeout for connecting to the OPC server, default is 10 seconds.
- **Collection Timeout**: If data is not returned from the OPC server within the set time during data point reading, the read fails, default is 10 seconds.
- **Collection Interval**: Default is 10 seconds, the interval for data point collection, starting from the end of the last data collection, polling to read the latest value of the data point and write it into TDengine.
When using **Select Data Points** in the **Data Point Set**, the collection configuration can configure **Data Point Update Mode** and **Data Point Update Interval** to enable dynamic data point updates. **Dynamic Data Point Update** means that during the task operation, if OPC Server adds or deletes data points, the matching data points will automatically be added to the current task without needing to restart the OPC task.
- Data Point Update Mode: Can choose `None`, `Append`, `Update`.
- None: Do not enable dynamic data point updates;
- Append: Enable dynamic data point updates, but only append;
- Update: Enable dynamic data point updates, append or delete;
- Data Point Update Interval: Effective when "Data Point Update Mode" is `Append` and `Update`. Unit: seconds, default value is 600, minimum value: 60, maximum value: 2147483647.
### 6. Advanced Options
<figure>
<Image img={imgStep6} alt=""/>
</figure>
As shown above, configure advanced options for more detailed optimization of performance, logs, etc.
**Log Level** defaults to `info`, with options `error`, `warn`, `info`, `debug`, `trace`.
In **Maximum Write Concurrency**, set the limit for the maximum number of concurrent writes to taosX. Default value: 0, meaning auto, automatically configures concurrency.
In **Batch Size**, set the batch size for each write, that is, the maximum number of messages sent at once.
In **Batch Delay**, set the maximum delay for a single send (in seconds). When the timeout ends, as long as there is data, it is sent immediately even if it does not meet the **Batch Size**.
In **Save Raw Data**, choose whether to save raw data. Default value: no.
When saving raw data, the following 2 parameters are effective.
In **Maximum Retention Days**, set the maximum retention days for raw data.
In **Raw Data Storage Directory**, set the path for saving raw data. If using Agent, the storage path refers to the path on the server where Agent is located, otherwise it is on the taosX server. The path can include placeholders `$DATA_DIR` and `:id` as part of the path.
- On Linux platform, `$DATA_DIR` is /var/lib/taos/taosx, by default the storage path is `/var/lib/taos/taosx/tasks/<task_id>/rawdata`.
- On Windows platform, `$DATA_DIR` is C:\TDengine\data\taosx, by default the storage path is `C:\TDengine\data\taosx\tasks\<task_id>\rawdata`.
### 7. Completion
Click the **Submit** button to complete the creation of the OPC DA to TDengine data synchronization task, return to the **Data Source List** page to view the task execution status.
## Add Data Points
During the task execution, click **Edit**, then click the **Add Data Points** button to append data points to the CSV file.
<figure>
<Image img={imgStep7} alt=""/>
</figure>
In the pop-up form, fill in the information for the data points.
<figure>
<Image img={imgStep8} alt=""/>
</figure>
Click the **Confirm** button to complete the addition of data points.

View File

@ -0,0 +1,204 @@
---
title: MQTT
slug: /advanced-features/data-connectors/mqtt
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/mqtt-01.png';
import imgStep02 from '../../assets/mqtt-02.png';
import imgStep03 from '../../assets/mqtt-03.png';
import imgStep04 from '../../assets/mqtt-04.png';
import imgStep05 from '../../assets/mqtt-05.png';
import imgStep06 from '../../assets/mqtt-06.png';
import imgStep07 from '../../assets/mqtt-07.png';
import imgStep08 from '../../assets/mqtt-08.png';
import imgStep09 from '../../assets/mqtt-09.png';
import imgStep10 from '../../assets/mqtt-10.png';
import imgStep11 from '../../assets/mqtt-11.png';
import imgStep12 from '../../assets/mqtt-12.png';
import imgStep13 from '../../assets/mqtt-13.png';
import imgStep14 from '../../assets/mqtt-14.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from MQTT to the current TDengine cluster.
## Overview
MQTT stands for Message Queuing Telemetry Transport. It is a lightweight messaging protocol that is easy to implement and use.
TDengine can subscribe to data from an MQTT broker via an MQTT connector and write it into TDengine, enabling real-time data streaming.
## Creating a Task
### 1. Add a Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, such as: "test_mqtt";
Select **MQTT** from the **Type** dropdown list.
**Broker** is optional, you can select a specific broker from the dropdown list or click the **+Create New Broker** button on the right.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection and Authentication Information
Enter the MQTT broker's address in **MQTT Address**, for example: `192.168.1.42`
Enter the MQTT broker's port in **MQTT Port**, for example: `1883`
Enter the MQTT broker's username in **User**.
Enter the MQTT broker's password in **Password**.
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure SSL Certificate
If the MQTT broker uses an SSL certificate, upload the certificate file in **SSL Certificate**.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Collection Information
Fill in the collection task related configuration parameters in the **Collection Configuration** area.
Select the MQTT protocol version from the **MQTT Protocol** dropdown list. There are three options: `3.1`, `3.1.1`, `5.0`. The default value is 3.1.
Enter the client identifier in **Client ID**, after which a client id with the prefix `taosx` will be generated (for example, if the identifier entered is `foo`, the generated client id will be `taosxfoo`). If the switch at the end is turned on, the current task's task id will be concatenated after `taosx` and before the entered identifier (the generated client id will look like `taosx100foo`). All client ids connecting to the same MQTT address must be unique.
Enter the keep alive interval in **Keep Alive**. If the broker does not receive any message from the client within the keep alive interval, it will assume the client has disconnected and will close the connection.
The keep alive interval is the time interval negotiated between the client and the broker to check if the client is active. If the client does not send a message to the broker within the keep alive interval, the broker will disconnect.
In **Clean Session**, choose whether to clear the session. The default value is true.
Fill in the Topic names to be consumed in **Subscription Topics and QoS Configuration**. Use the following format: `topic1::0,topic2::1`.
Click the **Check Connectivity** button to check if the data source is available.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
### 6. Configure MQTT Payload Parsing
Fill in the Payload parsing related configuration parameters in the **MQTT Payload Parsing** area.
taosX can use a JSON extractor to parse data and allows users to specify the data model in the database, including specifying table names and supertable names, setting ordinary columns and tag columns, etc.
#### 6.1 Parsing
There are three methods to obtain sample data:
Click the **Retrieve from Server** button to get sample data from MQTT.
Click the **File Upload** button to upload a CSV file and obtain sample data.
Fill in the example data from the MQTT message body in **Message Body**.
JSON data supports JSONObject or JSONArray, and the json parser can parse the following data:
``` json
{"id": 1, "message": "hello-word"}
{"id": 2, "message": "hello-word"}
```
or
``` json
[{"id": 1, "message": "hello-word"},{"id": 2, "message": "hello-word"}]
```
The analysis results are as follows:
<figure>
<Image img={imgStep06} alt=""/>
</figure>
Click the **magnifying glass icon** to view the preview of the analysis results.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
#### 6.2 Field Splitting
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `message` field into `message_0` and `message_1`, select the split extractor, fill in the separator as -, and number as 2.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
Click **Delete** to remove the current extraction rule.
Click **Add** to add more extraction rules.
Click the **magnifying glass icon** to view the preview of the extraction/split results.
<figure>
<Image img={imgStep09} alt=""/>
</figure>
#### 6.3 Data Filtering
In **Filter**, fill in the filtering conditions, for example: write `id != 1`, then only data with id not equal to 1 will be written to TDengine.
<figure>
<Image img={imgStep10} alt=""/>
</figure>
Click **Delete** to remove the current filtering rule.
Click the **magnifying glass icon** to view the preview of the filtering results.
<figure>
<Image img={imgStep11} alt=""/>
</figure>
#### 6.4 Table Mapping
In the **Target Supertable** dropdown, select a target supertable, or click the **Create Supertable** button on the right.
In **Mapping**, fill in the subtable name in the target supertable, for example: `t_{id}`. Fill in the mapping rules according to the requirements, where mapping supports setting default values.
<figure>
<Image img={imgStep12} alt=""/>
</figure>
Click **Preview** to view the mapping results.
<figure>
<Image img={imgStep13} alt=""/>
</figure>
### 7. Advanced Options
In the **Log Level** dropdown, select a log level. There are five options: `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`. The default is INFO.
When **saving raw data**, the following two parameters are effective.
Set the maximum retention days for raw data in **Maximum Retention Days**.
Set the storage path for raw data in **Raw Data Storage Directory**.
<figure>
<Image img={imgStep14} alt=""/>
</figure>
### 8. Completion
Click the **Submit** button to complete the creation of the MQTT to TDengine data synchronization task, return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,263 @@
---
title: Apache Kafka
sidebar_label: Kafka
slug: /advanced-features/data-connectors/kafka
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/kafka-01.png';
import imgStep02 from '../../assets/kafka-02.png';
import imgStep03 from '../../assets/kafka-03.png';
import imgStep04 from '../../assets/kafka-04.png';
import imgStep05 from '../../assets/kafka-05.png';
import imgStep06 from '../../assets/kafka-06.png';
import imgStep07 from '../../assets/kafka-07.png';
import imgStep08 from '../../assets/kafka-08.png';
import imgStep09 from '../../assets/kafka-09.png';
import imgStep10 from '../../assets/kafka-10.png';
import imgStep11 from '../../assets/kafka-11.png';
import imgStep12 from '../../assets/kafka-12.png';
import imgStep13 from '../../assets/kafka-13.png';
import imgStep14 from '../../assets/kafka-14.png';
import imgStep15 from '../../assets/kafka-15.png';
import imgStep16 from '../../assets/kafka-16.png';
import imgStep17 from '../../assets/kafka-17.png';
import imgStep18 from '../../assets/kafka-18.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from Kafka to the current TDengine cluster.
## Feature Overview
Apache Kafka is an open-source distributed streaming system used for stream processing, real-time data pipelines, and large-scale data integration.
TDengine can efficiently read data from Kafka and write it into TDengine, enabling historical data migration or real-time data streaming.
## Creating a Task
### 1. Add a Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, such as: "test_kafka";
Select **Kafka** from the **Type** dropdown list.
**Proxy** is optional; if needed, you can select a specific proxy from the dropdown, or click **+Create New Proxy** on the right.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
**bootstrap-server**, for example: `192.168.1.92`.
**Service Port**, for example: `9092`.
When there are multiple broker addresses, add a **+Add Broker** button at the bottom right of the connection settings to add bootstrap-server and service port pairs.
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure SASL Authentication Mechanism
If the server has enabled SASL authentication, you need to enable SASL here and configure the relevant content. Currently, three authentication mechanisms are supported: PLAIN/SCRAM-SHA-256/GSSAPI. Please choose according to the actual situation.
#### 4.1. PLAIN Authentication
Select the `PLAIN` authentication mechanism and enter the username and password:
<figure>
<Image img={imgStep04} alt=""/>
</figure>
#### 4.2. SCRAM (SCRAM-SHA-256) Authentication
Select the `SCRAM-SHA-256` authentication mechanism and enter the username and password:
<figure>
<Image img={imgStep05} alt=""/>
</figure>
#### 4.3. GSSAPI Authentication
Select `GSSAPI`, which will use the [RDkafka client](https://github.com/confluentinc/librdkafka) to invoke the GSSAPI applying Kerberos authentication mechanism:
<figure>
<Image img={imgStep06} alt=""/>
</figure>
The required information includes:
- Kerberos service name, usually `kafka`;
- Kerberos authentication principal, i.e., the authentication username, such as `kafkaclient`;
- Kerberos initialization command (optional, generally not required);
- Kerberos keytab, you need to provide and upload the file;
All the above information must be provided by the Kafka service manager.
In addition, the [Kerberos](https://web.mit.edu/kerberos/) authentication service needs to be configured on the server. Use `apt install krb5-user` on Ubuntu; on CentOS, use `yum install krb5-workstation`.
After configuration, you can use the [kcat](https://github.com/edenhill/kcat) tool to verify Kafka topic consumption:
```bash
kcat <topic> \
-b <kafka-server:port> \
-G kcat \
-X security.protocol=SASL_PLAINTEXT \
-X sasl.mechanism=GSSAPI \
-X sasl.kerberos.keytab=</path/to/kafkaclient.keytab> \
-X sasl.kerberos.principal=<kafkaclient> \
-X sasl.kerberos.service.name=kafka
```
If an error occurs: "Server xxxx not found in kerberos database", you need to configure the domain name corresponding to the Kafka node and configure reverse DNS resolution `rdns = true` in the Kerberos client configuration file `/etc/krb5.conf`.
### 5. Configure SSL Certificate
If the server has enabled SSL encryption authentication, SSL needs to be enabled here and related content configured.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 6. Configure Collection Information
Fill in the configuration parameters related to the collection task in the **Collection Configuration** area.
Enter the timeout duration in **Timeout**. If no data is consumed from Kafka, and the timeout is exceeded, the data collection task will exit. The default value is 0 ms. When the timeout is set to 0, it will wait indefinitely until data becomes available or an error occurs.
Enter the Topic name to be consumed in **Topic**. Multiple Topics can be configured, separated by commas. For example: `tp1,tp2`.
Enter the client identifier in **Client ID**. After entering, a client ID with the prefix `taosx` will be generated (for example, if the identifier entered is `foo`, the generated client ID will be `taosxfoo`). If the switch at the end is turned on, the current task's task ID will be concatenated after `taosx` and before the entered identifier (the generated client ID will look like `taosx100foo`). Note that when using multiple taosX subscriptions for the same Topic to achieve load balancing, a consistent client ID must be entered to achieve the balancing effect.
Enter the consumer group identifier in **Consumer Group ID**. After entering, a consumer group ID with the prefix `taosx` will be generated (for example, if the identifier entered is `foo`, the generated consumer group ID will be `taosxfoo`). If the switch at the end is turned on, the current task's task ID will be concatenated after `taosx` and before the entered identifier (the generated consumer group ID will look like `taosx100foo`).
In the **Offset** dropdown, select from which Offset to start consuming data. There are three options: `Earliest`, `Latest`, `ByTime(ms)`. The default is Earliest.
- Earliest: Requests the earliest offset.
- Latest: Requests the latest offset.
Set the maximum duration to wait for insufficient data when fetching messages in **Maximum Duration to Fetch Data** (in milliseconds), the default value is 100ms.
Click the **Connectivity Check** button to check if the data source is available.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 7. Configure Payload Parsing
Fill in the configuration parameters related to Payload parsing in the **Payload Parsing** area.
#### 7.1 Parsing
There are three methods to obtain sample data:
Click the **Retrieve from Server** button to get sample data from Kafka.
Click the **File Upload** button to upload a CSV file and obtain sample data.
Enter sample data from the Kafka message body in **Message Body**.
JSON data supports JSONObject or JSONArray, and the following data can be parsed using a JSON parser:
``` json
{"id": 1, "message": "hello-word"}
{"id": 2, "message": "hello-word"}
```
or
``` json
[{"id": 1, "message": "hello-word"},{"id": 2, "message": "hello-word"}]
```
The parsing results are shown as follows:
<figure>
<Image img={imgStep09} alt=""/>
</figure>
Click the **magnifying glass icon** to view the preview parsing results.
<figure>
<Image img={imgStep10} alt=""/>
</figure>
#### 7.2 Field Splitting
In **Extract or Split from Columns**, fill in the fields to extract or split from the message body, for example: split the message field into `message_0` and `message_1`, select the split extractor, fill in the separator as -, and number as 2.
Click **Add** to add more extraction rules.
Click **Delete** to delete the current extraction rule.
<figure>
<Image img={imgStep11} alt=""/>
</figure>
Click the **magnifying glass icon** to view the preview extraction/splitting results.
<figure>
<Image img={imgStep12} alt=""/>
</figure>
#### 7.3 Data Filtering
In **Filter**, fill in the filtering conditions, for example: enter `id != 1`, then only data with id not equal to 1 will be written to TDengine.
Click **Add** to add more filtering rules.
Click **Delete** to delete the current filtering rule.
<figure>
<Image img={imgStep13} alt=""/>
</figure>
Click the **magnifying glass icon** to view the preview filtering results.
<figure>
<Image img={imgStep14} alt=""/>
</figure>
#### 7.4 Table Mapping
In the **Target Supertable** dropdown, select a target supertable, or click the **Create Supertable** button on the right.
In the **Mapping** section, fill in the name of the subtable in the target supertable, for example: `t_{id}`. Fill in the mapping rules as required, where mapping supports setting default values.
<figure>
<Image img={imgStep15} alt=""/>
</figure>
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep16} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
<figure>
<Image img={imgStep17} alt=""/>
</figure>
<figure>
<Image img={imgStep18} alt=""/>
</figure>
### 9. Completion of Creation
Click the **Submit** button to complete the creation of the Kafka to TDengine data synchronization task. Return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,125 @@
---
title: InfluxDB
slug: /advanced-features/data-connectors/influxdb
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/influxdb-01.png';
import imgStep02 from '../../assets/influxdb-02.png';
import imgStep03 from '../../assets/influxdb-03.png';
import imgStep04 from '../../assets/influxdb-04.png';
import imgStep05 from '../../assets/influxdb-05.png';
import imgStep06 from '../../assets/influxdb-06.png';
import imgStep07 from '../../assets/influxdb-07.png';
import imgStep08 from '../../assets/influxdb-08.png';
import imgStep09 from '../../assets/influxdb-09.png';
import imgStep10 from '../../assets/influxdb-10.png';
This section describes how to create a data migration task through the Explorer interface to migrate data from InfluxDB to the current TDengine cluster.
## Feature Overview
InfluxDB is a popular open-source time-series database optimized for handling large volumes of time-series data. TDengine can efficiently read data from InfluxDB through the InfluxDB connector and write it into TDengine, enabling historical data migration or real-time data synchronization.
The task saves progress information to the disk during operation, so if the task is paused and restarted, or if it automatically recovers from an anomaly, it will not start over. For more options, it is recommended to read the explanations of each form field on the task creation page in detail.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the add data source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_influxdb_01`*.
Select *`InfluxDB`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu, or click the **+ Create New Proxy** button on the right.
**Target Database** is required. Since InfluxDB can store data with time precision of seconds, milliseconds, microseconds, and nanoseconds, you need to select a *`nanosecond precision database`* here, or click the **+ Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source InfluxDB database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
In the **Authentication** area, there are two tabs, *`1.x version`* and *`2.x version`*, due to different authentication parameters and significant API differences between different versions of InfluxDB databases. Please choose according to the actual situation:
*`1.x version`*
**Version** Select the version of the source InfluxDB database from the dropdown menu.
**User** Enter the user of the source InfluxDB database, who must have read permissions in that organization.
**Password** Enter the login password for the above user in the source InfluxDB database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
*`2.x version`*
**Version** Select the version of the source InfluxDB database from the dropdown menu.
**Organization ID** Enter the organization ID of the source InfluxDB database, which is a string of hexadecimal characters, not the organization name, and can be obtained from the InfluxDB console's Organization->About page.
**Token** Enter the access token for the source InfluxDB database, which must have read permissions in that organization.
**Add Database Retention Policy** This is a *`Yes/No`* toggle. InfluxQL requires a combination of database and retention policy (DBRP) to query data. The cloud version of InfluxDB and some 2.x versions require manually adding this mapping. Turn on this switch, and the connector can automatically add it when executing tasks.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Below the **Authentication** area, there is a **Connectivity Check** button. Users can click this button to check if the information filled in above can normally access the data of the source InfluxDB database. The check results are shown below:
**Failed**
<figure>
<Image img={imgStep06} alt=""/>
</figure>
**Successful**
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 5. Configure Task Information
**Bucket** is a named space in the InfluxDB database for storing data. Each task needs to specify a bucket. Users need to first click the **Get Schema** button on the right to obtain the data structure information of the current source InfluxDB database, and then select from the dropdown menu as shown below:
<figure>
<Image img={imgStep08} alt=""/>
</figure>
**Measurements** are optional. Users can select one or more Measurements to synchronize from the dropdown menu. If none are specified, all will be synchronized.
**Start Time** refers to the start time of the data in the source InfluxDB database. The timezone for the start time uses the timezone selected in explorer, and this field is required.
**End Time** refers to the end time of the data in the source InfluxDB database. If no end time is specified, synchronization of the latest data will continue; if an end time is specified, synchronization will only continue up to this end time. The timezone for the end time uses the timezone selected in explorer, and this field is optional.
**Time Range per Read (minutes)** is the maximum time range for the connector to read data from the source InfluxDB database in a single read. This is a very important parameter, and users need to decide based on server performance and data storage density. If the range is too small, the execution speed of the synchronization task will be very slow; if the range is too large, it may cause the InfluxDB database system to fail due to high memory usage.
**Delay (seconds)** is an integer between 1 and 30. To eliminate the impact of out-of-order data, TDengine always waits for the duration specified here before reading data.
### 6. Configure Advanced Options
The **Advanced Options** area is collapsed by default. Click the `>` on the right to expand it, as shown below:
<figure>
<Image img={imgStep09} alt=""/>
</figure>
<figure>
<Image img={imgStep10} alt=""/>
</figure>
### 7. Completion of Creation
Click the **Submit** button to complete the creation of the data synchronization task from InfluxDB to TDengine. Return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,99 @@
---
title: OpenTSDB
slug: /advanced-features/data-connectors/opentsdb
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/opentsdb-01.png';
import imgStep02 from '../../assets/opentsdb-02.png';
import imgStep03 from '../../assets/opentsdb-03.png';
import imgStep04 from '../../assets/opentsdb-04.png';
import imgStep05 from '../../assets/opentsdb-05.png';
import imgStep06 from '../../assets/opentsdb-06.png';
import imgStep07 from '../../assets/opentsdb-07.png';
import imgStep08 from '../../assets/opentsdb-08.png';
This section describes how to create a data migration task through the Explorer interface to migrate data from OpenTSDB to the current TDengine cluster.
## Overview
OpenTSDB is a real-time monitoring information collection and display platform built on the HBase system. TDengine can efficiently read data from OpenTSDB through the OpenTSDB connector and write it into TDengine, achieving historical data migration or real-time data synchronization.
During the operation, the task will save progress information to the disk, so if the task is paused and restarted, or automatically recovers from an anomaly, it will not start over. For more options, it is recommended to read the explanations of each form field on the task creation page in detail.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the add data source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_opentsdb_01`*.
Select *`OpenTSDB`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu, or click the **+ Create New Proxy** button on the right.
**Target Database** is required. Since OpenTSDB stores data with millisecond precision, you need to select a *`millisecond precision database`*, or click the **+ Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source OpenTSDB database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
Below the **Connection Configuration** area, there is a **Connectivity Check** button. Users can click this button to check whether the information filled in above can normally access the data from the source OpenTSDB database. The check results are shown below:
**Failed**
<figure>
<Image img={imgStep04} alt=""/>
</figure>
**Successful**
<figure>
<Image img={imgStep05} alt=""/>
</figure>
### 4. Configure Task Information
**Metrics** are the physical quantities in which data is stored in the OpenTSDB database. Users can specify multiple metrics to synchronize, or synchronize all data in the database if none are specified. If users specify metrics, they need to first click the **Get Metrics** button on the right to obtain all the metric information from the current source OpenTSDB database, and then select from the dropdown menu, as shown below:
<figure>
<Image img={imgStep06} alt=""/>
</figure>
**Start Time** refers to the start time of the data in the source OpenTSDB database, using the timezone selected in explorer, and this field is required.
**End Time** refers to the end time of the data in the source OpenTSDB database. If no end time is specified, the synchronization of the latest data will continue; if an end time is specified, synchronization will only continue up to this end time, using the timezone selected in explorer, and this field is optional.
**Time Range per Read (minutes)** is the maximum time range for the connector to read data from the source OpenTSDB database in a single operation. This is a very important parameter, and users need to decide based on server performance and data storage density. If the range is too small, the execution speed of the synchronization task will be very slow; if the range is too large, it may cause the OpenTSDB database system to fail due to excessive memory usage.
**Delay (seconds)** is an integer ranging from 1 to 30. To eliminate the impact of out-of-order data, TDengine always waits for the duration specified here before reading the data.
### 5. Configure Advanced Options
The **Advanced Options** area is collapsed by default. Click the `>` on the right to expand it, as shown in the following images:
<figure>
<Image img={imgStep07} alt=""/>
</figure>
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 6. Completion of Creation
Click the **Submit** button to complete the creation of the OpenTSDB to TDengine data synchronization task. Return to the **Data Source List** page to view the status of the task.

View File

@ -0,0 +1,122 @@
---
title: CSV File
slug: /advanced-features/data-connectors/csv-file
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/csv-file-01.png';
import imgStep02 from '../../assets/csv-file-02.png';
import imgStep03 from '../../assets/csv-file-03.png';
import imgStep04 from '../../assets/csv-file-04.png';
import imgStep05 from '../../assets/csv-file-05.png';
import imgStep06 from '../../assets/csv-file-06.png';
import imgStep07 from '../../assets/csv-file-07.png';
import imgStep10 from '../../assets/csv-file-10.png';
import imgStep11 from '../../assets/csv-file-11.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from CSV to the current TDengine cluster.
## Feature Overview
Import data from one or more CSV files into TDengine.
## Create Task
### 1. Add Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, such as: "test_csv";
Select **CSV** from the **Type** dropdown list.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure CSV Options
Click to enable or disable in the **Include Header** area, if enabled, the first line will be treated as column information.
In the **Ignore First N Rows** area, fill in N, indicating to ignore the first N rows of the CSV file.
Select in the **Field Separator** area, the separator between CSV fields, default is ",".
Select in the **Field Enclosure** area, used to surround field content when CSV fields contain separators or newline characters, ensuring the entire field is correctly recognized, default is "\"".
Select in the **Comment Prefix** area, if a line in the CSV file starts with the character specified here, that line will be ignored, default is "#".
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Parsing CSV File
Upload a CSV file locally, for example: test-json.csv, this example csv file will be used later to configure extraction and filtering conditions.
#### 4.1 Parsing
Click **Select File**, choose test-json.csv, then click **Parse** to preview the recognized columns.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
**Preview Parsing Results**
<figure>
<Image img={imgStep05} alt=""/>
</figure>
#### 4.2 Field Splitting
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the message field into `text_0` and `text_1`, select the split extractor, fill in the separator as -, and number as 2.
Click **Delete** to remove the current extraction rule.
Click **Add** to add more extraction rules.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
Click the **Magnifying Glass Icon** to preview the extraction or splitting results.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
<!-- In **Filter**, fill in the filtering conditions, for example: fill in `id != 1`, then only data with id not equal to 1 will be written into TDengine.
Click **Delete** to remove the current filtering rule.
![csv-08.png](./csv-08.png)
Click the **Magnifying Glass Icon** to view the preview filtering results.
![csv-09.png](./csv-09.png) -->
#### 4.3 Table Mapping
Select a target supertable from the **Target Supertable** dropdown list, or click the **Create Supertable** button on the right.
In **Mapping**, fill in the subtable name of the target supertable, for example: `t_${groupid}`.
<figure>
<Image img={imgStep10} alt=""/>
</figure>
Click **Preview** to preview the mapping results.
<figure>
<Image img={imgStep11} alt=""/>
</figure>
### 5. Completion
Click the **Submit** button to complete the creation of the CSV to TDengine data synchronization task, return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,164 @@
---
title: AVEVA Historian
slug: /advanced-features/data-connectors/aveva-historian
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/aveva-historian-01.png';
import imgStep02 from '../../assets/aveva-historian-02.png';
import imgStep03 from '../../assets/aveva-historian-03.png';
import imgStep04 from '../../assets/aveva-historian-04.png';
import imgStep05 from '../../assets/aveva-historian-05.png';
import imgStep06 from '../../assets/aveva-historian-06.png';
import imgStep07 from '../../assets/aveva-historian-07.png';
import imgStep08 from '../../assets/aveva-historian-08.png';
This section describes how to create data migration/data synchronization tasks through the Explorer interface, migrating/synchronizing data from AVEVA Historian to the current TDengine cluster.
## Feature Overview
AVEVA Historian is an industrial big data analytics software, formerly known as Wonderware. It captures and stores high-fidelity industrial big data, unleashing constrained potential to improve operations.
TDengine can efficiently read data from AVEVA Historian and write it into TDengine, enabling historical data migration or real-time data synchronization.
## Creating Tasks
### 1. Add a Data Source
On the data writing page, click the **+Add Data Source** button to enter the add data source page.
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in **Name**, such as: "test_avevaHistorian";
Select **AVEVA Historian** from the **Type** dropdown list.
**Proxy** is optional, if needed, you can select a specific proxy from the dropdown, or click the **+Create New Proxy** button on the right.
Select a target database from the **Target Database** dropdown list, or click the **+Create Database** button on the right.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
In the **Connection Configuration** area, fill in the **Server Address** and **Server Port**.
In the **Authentication** area, fill in the **Username** and **Password**.
Click the **Connectivity Check** button to check if the data source is available.
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Collection Information
Fill in the collection task related configuration parameters in the **Collection Configuration** area.
#### 4.1. Migrate Data
If you want to perform data migration, configure the following parameters:
Select **migrate** from the **Collection Mode** dropdown list.
In **Tags**, fill in the list of tags to migrate, separated by commas (,).
In **Tag Group Size**, fill in the size of the tag group.
In **Task Start Time**, fill in the start time of the data migration task.
In **Task End Time**, fill in the end time of the data migration task.
In **Query Time Window**, fill in a time interval, the data migration task will divide time windows according to this interval.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
#### 4.2. Synchronize Data from the History Table
If you want to synchronize data from the **Runtime.dbo.History** table to TDengine, configure the following parameters:
Select **synchronize** from the **Collection Mode** dropdown list.
In **Table**, select **Runtime.dbo.History**.
In **Tags**, fill in the list of tags to migrate, separated by commas (,).
In **Tag Group Size**, fill in the size of the tag group.
In **Task Start Time**, fill in the start time of the data migration task.
In **Query Time Window**, fill in a time interval, the historical data part will divide time windows according to this interval.
In **Real-time Synchronization Interval**, fill in a time interval, the real-time data part will poll data according to this interval.
In **Disorder Time Upper Limit**, fill in a time interval, data that enters the database after this time during real-time data synchronization may be lost.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
#### 4.3. Synchronize Data from the Live Table
If you want to synchronize data from the **Runtime.dbo.Live** table to TDengine, configure the following parameters:
Select **synchronize** from the **Collection Mode** dropdown list.
In **Table**, select **Runtime.dbo.Live**.
In **Tags**, fill in the list of tags to migrate, separated by commas (,).
In **Real-time Synchronization Interval**, fill in a time interval, the real-time data part will poll data according to this interval.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 5. Configure Data Mapping
Fill in the data mapping related configuration parameters in the **Data Mapping** area.
Click the **Retrieve from Server** button to fetch sample data from the AVEVA Historian server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator as `,`, and number as 2.
In **Filter**, fill in the filtering conditions, for example: enter `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to which you want to map, as well as the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 6. Configure Advanced Options
Fill in the related configuration parameters in the **Advanced Options** area.
Set the maximum read concurrency in **Maximum Read Concurrency**. Default value: 0, which means auto, automatically configures the concurrency.
Set the batch size for each write in **Batch Size**, that is: the maximum number of messages sent at once.
In **Save Raw Data**, choose whether to save the raw data. Default value: No.
When saving raw data, the following two parameters are effective.
Set the maximum retention days for raw data in **Maximum Retention Days**.
Set the storage path for raw data in **Raw Data Storage Directory**.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 7. Completion of Creation
Click the **Submit** button to complete the creation of the task. After submitting the task, return to the **Data Writing** page to view the status of the task.

View File

@ -0,0 +1,139 @@
---
title: MySQL
slug: /advanced-features/data-connectors/mysql
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/mysql-01.png';
import imgStep02 from '../../assets/mysql-02.png';
import imgStep03 from '../../assets/mysql-03.png';
import imgStep04 from '../../assets/mysql-04.png';
import imgStep05 from '../../assets/mysql-05.png';
import imgStep06 from '../../assets/mysql-06.png';
import imgStep07 from '../../assets/mysql-07.png';
import imgStep08 from '../../assets/mysql-08.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from MySQL to the current TDengine cluster.
## Overview
MySQL is one of the most popular relational databases. Many systems have used or are using MySQL databases to store data reported by IoT and industrial internet devices. However, as the number of devices in the access systems grows and the demand for real-time data feedback from users increases, MySQL can no longer meet business needs. Starting from TDengine Enterprise Edition 3.3.0.0, TDengine can efficiently read data from MySQL and write it into TDengine, achieving historical data migration or real-time data synchronization, and solving the technical pain points faced by businesses.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the top left corner of the data writing page to enter the Add Data Source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_mysql_01`*.
Select *`MySQL`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu, or click the **+ Create New Proxy** button on the right to create a new proxy.
**Target Database** is required. You can click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source MySQL database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source MySQL database, who must have read permissions in the organization.
**Password** Enter the login password for the user mentioned above in the source MySQL database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Connection Options
**Character Set** Set the character set for the connection. The default character set is utf8mb4. MySQL 5.5.3 supports this feature. If connecting to an older version, it is recommended to change to utf8.
Options include utf8, utf8mb4, utf16, utf32, gbk, big5, latin1, ascii.
**SSL Mode** Set whether to negotiate a secure SSL TCP/IP connection with the server or the priority of negotiation. The default value is PREFERRED. Options include DISABLED, PREFERRED, REQUIRED.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Then click the **Check Connectivity** button, where users can click this button to check if the information filled in above can normally fetch data from the source MySQL database.
### 6. Configure SQL Query
**Subtable Field** is used to split subtables, it is a select distinct SQL statement that queries non-repeated items of specified field combinations, usually corresponding to the tag in transform:
> This configuration is mainly to solve the problem of data migration disorder, and it needs to be used together with **SQL Template**, otherwise it cannot achieve the expected effect, usage examples are as follows:
>
> 1. Fill in the subtable field statement `select distinct col_name1, col_name2 from table`, which means using the fields col_name1 and col_name2 in the source table to split the subtables of the target supertable
> 2. Add subtable field placeholders in the **SQL Template**, for example, the `${col_name1} and ${col_name2}` part in `select * from table where ts >= ${start} and ts < ${end} and ${col_name1} and ${col_name2}`
> 3. Configure `col_name1` and `col_name2` two tag mappings in **transform**
**SQL Template** is the SQL statement template used for querying, the SQL statement must include time range conditions, and the start and end times must appear in pairs. The time range defined in the SQL statement template consists of a column representing time in the source database and the placeholders defined below.
> SQL uses different placeholders to represent different time format requirements, specifically the following placeholder formats:
>
> 1. `${start}`, `${end}`: Represents RFC3339 format timestamps, e.g.: 2024-03-14T08:00:00+0800
> 2. `${start_no_tz}`, `${end_no_tz}`: Represents RFC3339 strings without timezone: 2024-03-14T08:00:00
> 3. `${start_date}`, `${end_date}`: Represents date only, e.g.: 2024-03-14
>
> To solve the problem of data migration disorder, it is advisable to add sorting conditions in the query statement, such as `order by ts asc`.
**Start Time** The start time for migrating data, this field is required.
**End Time** The end time for migrating data, which can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data and the task will not stop automatically.
**Query Interval** The time interval for querying data in segments, default is 1 day. To avoid querying a large amount of data at once, a data synchronization sub-task will use the query interval to segment the data retrieval.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writes, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Data Mapping
In the **Data Mapping** area, fill in the configuration parameters related to data mapping.
Click the **Retrieve from Server** button to fetch sample data from the MySQL server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator `,`, and number `2`.
In **Filter**, fill in the filtering conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to map to, and the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** The limit on the number of data source connections or reading threads, modify this parameter when the default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. The default is 10000.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 9. Completion
Click the **Submit** button to complete the creation of the data synchronization task from MySQL to TDengine, and return to the **Data Source List** page to view the task execution status.

View File

@ -0,0 +1,140 @@
---
title: PostgreSQL
slug: /advanced-features/data-connectors/postgresql
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/postgresql-01.png';
import imgStep02 from '../../assets/postgresql-02.png';
import imgStep03 from '../../assets/postgresql-03.png';
import imgStep04 from '../../assets/postgresql-04.png';
import imgStep05 from '../../assets/postgresql-05.png';
import imgStep06 from '../../assets/postgresql-06.png';
import imgStep07 from '../../assets/postgresql-07.png';
import imgStep08 from '../../assets/postgresql-08.png';
This section describes how to create a data migration task through the Explorer interface to migrate data from PostgreSql to the current TDengine cluster.
## Feature Overview
PostgreSQL is a very powerful, open-source client/server relational database management system with many features found in large commercial RDBMSs, including transactions, subselects, triggers, views, foreign key referential integrity, and sophisticated locking capabilities.
TDengine can efficiently read data from PostgreSQL and write it to TDengine, enabling historical data migration or real-time data synchronization.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the add data source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_postgres_01`*.
Select *`PostgreSQL`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu or click the **+ Create New Proxy** button on the right to create a new proxy.
**Target Database** is required. You can click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source PostgreSQL database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source PostgreSQL database, who must have read permissions in the organization.
**Password** Enter the login password for the user mentioned above in the source PostgreSQL database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Connection Options
**Application Name** Set the application name to identify the connected application.
**SSL Mode** Set whether to negotiate a secure SSL TCP/IP connection with the server or the priority of such negotiation. The default value is PREFER. Options include DISABLE, ALLOW, PREFER, REQUIRE.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Then click the **Check Connectivity** button, where users can click this button to check if the information filled in above can normally fetch data from the source PostgreSQL database.
### 6. Configure SQL Query
**Subtable Field** Used to split subtables, it is a select distinct SQL statement querying non-repeated items of specified field combinations, usually corresponding to the tag in transform:
> This configuration is mainly to solve the data migration disorder problem, and it needs to be used in conjunction with **SQL Template**, otherwise, it cannot achieve the expected effect, usage examples are as follows:
>
> 1. Fill in the subtable field statement `select distinct col_name1, col_name2 from table`, which means using the fields col_name1 and col_name2 in the source table to split the subtables of the target supertable
> 2. Add subtable field placeholders in the **SQL Template**, for example, the `${col_name1} and ${col_name2}` part in `select * from table where ts >= ${start} and ts < ${end} and ${col_name1} and ${col_name2}`
> 3. Configure `col_name1` and `col_name2` two tag mappings in **transform**
**SQL Template** Used for the SQL statement template for querying, the SQL statement must include time range conditions, and the start and end times must appear in pairs. The time range defined in the SQL statement template consists of a column representing time in the source database and the placeholders defined below.
> Different placeholders represent different time format requirements in SQL, specifically including the following placeholder formats:
>
> 1. `${start}`, `${end}`: Represents RFC3339 format timestamps, e.g.: 2024-03-14T08:00:00+0800
> 2. `${start_no_tz}`, `${end_no_tz}`: Represents RFC3339 strings without timezone: 2024-03-14T08:00:00
> 3. `${start_date}`, `${end_date}`: Represents date only, e.g.: 2024-03-14
>
> To solve the problem of data migration disorder, sorting conditions should be added to the query statement, such as `order by ts asc`.
**Start Time** The start time for migrating data, this field is required.
**End Time** The end time for migrating data, which can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data and the task will not stop automatically.
**Query Interval** The time interval for querying data in segments, default is 1 day. To avoid querying a large amount of data at once, a data synchronization subtask will use the query interval to segment the data retrieval.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writes, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Data Mapping
In the **Data Mapping** area, fill in the configuration parameters related to data mapping.
Click the **Retrieve from Server** button to fetch sample data from the PostgreSQL server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator `,`, and number `2`.
In **Filter**, fill in the filtering conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to map to, and the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** Limit on the number of data source connections or read threads. Modify this parameter when the default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. The default is 10000.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 9. Completion
Click the **Submit** button to complete the creation of the data synchronization task from PostgreSQL to TDengine. Return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,129 @@
---
title: Oracle Database
slug: /advanced-features/data-connectors/oracle-database
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/oracle-database-01.png';
import imgStep02 from '../../assets/oracle-database-02.png';
import imgStep03 from '../../assets/oracle-database-03.png';
import imgStep04 from '../../assets/oracle-database-04.png';
import imgStep05 from '../../assets/oracle-database-05.png';
import imgStep06 from '../../assets/oracle-database-06.png';
import imgStep07 from '../../assets/oracle-database-07.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from Oracle to the current TDengine cluster.
## Feature Overview
The Oracle database system is a popular relational database management system worldwide, known for its good portability, ease of use, and strong functionality, suitable for various large, medium, and small computer environments. It is an efficient, reliable, and high-throughput database solution.
TDengine can efficiently read data from Oracle and write it to TDengine, enabling historical data migration or real-time data synchronization.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the Add Data Source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example, *`test_oracle_01`*.
Select *`Oracle`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Agent** is optional. If needed, you can select a specific agent from the dropdown menu or click the **+ Create New Agent** button on the right to create a new agent.
**Target Database** is required. You can click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source Oracle database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source Oracle database, who must have read permissions in the organization.
**Password** Enter the login password for the user mentioned above in the source Oracle database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
Then click the **Check Connectivity** button, where users can click this button to check if the information filled in above can normally access data from the source Oracle database.
### 5. Configure SQL Query
**Subtable Field** is used to split the subtable field, which is a select distinct SQL statement querying non-repeated items of specified field combinations, usually corresponding to the tag in transform:
> This configuration is mainly to solve the problem of data migration disorder. It needs to be used in conjunction with **SQL Template**, otherwise, it cannot achieve the expected effect. Usage examples are as follows:
>
> 1. Fill in the subtable field statement `select distinct col_name1, col_name2 from table`, which means using the fields col_name1 and col_name2 in the source table to split the subtable of the target supertable
> 2. Add subtable field placeholders in the **SQL Template**, for example, the `${col_name1} and ${col_name2}` part in `select * from table where ts >= ${start} and ts < ${end} and ${col_name1} and ${col_name2}`
> 3. Configure the mappings of `col_name1` and `col_name2` as two tags in **transform**
**SQL Template** is the SQL statement template used for querying, which must include time range conditions, and the start and end times must appear in pairs. The time range defined in the SQL statement template consists of a column representing time in the source database and the placeholders defined below.
> Different placeholders represent different time format requirements in SQL, specifically including the following placeholder formats:
>
> 1. `${start}`, `${end}`: Represents RFC3339 format timestamps, e.g., 2024-03-14T08:00:00+0800
> 2. `${start_no_tz}`, `${end_no_tz}`: Represents RFC3339 strings without timezone: 2024-03-14T08:00:00
> 3. `${start_date}`, `${end_date}`: Represents date only, but since there is no pure date type in Oracle, it will include zero hour, zero minute, and zero second, e.g., 2024-03-14 00:00:00, so be careful when using `date <= ${end_date}` as it does not include data of the day 2024-03-14
>
> To solve the problem of data migration disorder, a sorting condition should be added to the query statement, such as `order by ts asc`.
**Start Time** The start time for migrating data, this field is required.
**End Time** The end time for migrating data, which can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data and the task will not stop automatically.
**Query Interval** The time interval for querying data in segments, default is 1 day. To avoid querying too much data at once, a data synchronization sub-task will use the query interval to segment the data retrieval.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writes, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
### 6. Configure Data Mapping
In the **Data Mapping** area, fill in the configuration parameters related to data mapping.
Click the **Retrieve from Server** button to get sample data from the Oracle server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, choose the split extractor, fill in the separator `,`, and number `2`.
In **Filter**, fill in the filtering conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to map to, and the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** Limit on the number of data source connections or reading threads, modify this parameter when the default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. The default is 10000.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Completion
Click the **Submit** button to complete the creation of the data synchronization task from Oracle to TDengine, return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,147 @@
---
title: Microsoft SQL Server
sidebar_label: SQL Server
slug: /advanced-features/data-connectors/sql-server
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/sql-server-01.png';
import imgStep02 from '../../assets/sql-server-02.png';
import imgStep03 from '../../assets/sql-server-03.png';
import imgStep04 from '../../assets/sql-server-04.png';
import imgStep05 from '../../assets/sql-server-05.png';
import imgStep06 from '../../assets/sql-server-06.png';
import imgStep07 from '../../assets/sql-server-07.png';
import imgStep08 from '../../assets/sql-server-08.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from Microsoft SQL Server to the current TDengine cluster.
## Feature Overview
Microsoft SQL Server is one of the most popular relational databases. Many systems have used or are using Microsoft SQL Server to store data reported by IoT and industrial internet devices. However, as the number of devices in the access systems grows and the demand for real-time data feedback from users increases, Microsoft SQL Server can no longer meet business needs. Starting from TDengine Enterprise Edition 3.3.2.0, TDengine can efficiently read data from Microsoft SQL Server and write it into TDengine, achieving historical data migration or real-time data synchronization, and solving technical pain points faced by businesses.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the upper left corner of the data writing page to enter the Add Data Source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example *`test_mssql_01`*.
Select *`Microsoft SQL Server`* from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Agent** is optional. If needed, you can select a specific agent from the dropdown menu, or click the **+ Create New Agent** button on the right to create a new agent.
**Target Database** is required. You can click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *`connection information for the source Microsoft SQL Server database`* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source Microsoft SQL Server database, who must have read permissions in the organization.
**Password** Enter the login password for the user mentioned above in the source Microsoft SQL Server database.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Connection Options
**Instance Name** Set the Microsoft SQL Server instance name (defined in SQL Browser, only available on Windows platform, if specified, the port will be replaced with the value returned from SQL Browser).
**Application Name** Set the application name to identify the connecting application.
**Encryption** Set whether to use an encrypted connection. The default value is Off. Options include Off, On, NotSupported, Required.
**Trust Certificate** Set whether to trust the server certificate. If enabled, the server certificate will not be verified and will be accepted as is (if trust is enabled, the `Trust Certificate CA` field below will be hidden).
**Trust Certificate CA** Set whether to trust the server's certificate CA. If a CA file is uploaded, the server certificate will be verified based on the provided CA certificate in addition to the system trust store.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Then click the **Check Connectivity** button. Users can click this button to check if the information filled in above can normally retrieve data from the source Microsoft SQL Server database.
### 6. Configure SQL Query
**Subtable Field** is used to split subtables, it is a select distinct SQL statement that queries non-repeated items of specified field combinations, usually corresponding to the tag in transform:
> This configuration is mainly to solve the data migration disorder problem, and needs to be used in conjunction with **SQL Template**, otherwise it cannot achieve the expected effect, usage examples are as follows:
>
> 1. Fill in the subtable field statement `select distinct col_name1, col_name2 from table`, which means using the fields col_name1 and col_name2 in the source table to split the subtables of the target supertable
> 2. Add subtable field placeholders in the **SQL Template**, for example, the `${col_name1} and ${col_name2}` part in `select * from table where ts >= ${start} and ts < ${end} and ${col_name1} and ${col_name2}`
> 3. Configure the mappings of `col_name1` and `col_name2` as two tags in **transform**
**SQL Template** is used for the SQL statement template for querying, which must include a time range condition, and the start time and end time must appear in pairs. The time range defined in the SQL statement template consists of a column representing time in the source database and the placeholders defined below.
> SQL uses different placeholders to represent different time format requirements, specifically the following placeholder formats:
>
> 1. `${start}`, `${end}`: Represents RFC3339 format timestamp, e.g.: 2024-03-14T08:00:00+0800
> 2. `${start_no_tz}`, `${end_no_tz}`: Represents an RFC3339 string without timezone: 2024-03-14T08:00:00
> 3. `${start_date}`, `${end_date}`: Represents date only, e.g.: 2024-03-14
>
> Note: Only `datetime2` and `datetimeoffset` support querying using start/end, `datetime` and `smalldatetime` can only use start_no_tz/end_no_tz for querying, and `timestamp` cannot be used as a query condition.
>
> To solve the problem of data migration disorder, it is advisable to add a sorting condition in the query statement, such as `order by ts asc`.
**Start Time** The start time of the data migration, this field is mandatory.
**End Time** The end time of the data migration, which can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data, and the task will not stop automatically.
**Query Interval** The time interval for segmenting data queries, default is 1 day. To avoid querying too much data at once, a data synchronization subtask will use the query interval to segment the data.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writing, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Data Mapping
Fill in the related configuration parameters in the **Data Mapping** area.
Click the **Retrieve from Server** button to get sample data from the Microsoft SQL Server.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator as `,`, and number as 2.
In **Filter**, fill in the filter conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine.
In **Mapping**, select the supertable in TDengine to which you want to map, and the columns to map to the supertable.
Click **Preview** to view the results of the mapping.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** Limit on the number of data source connections or reading threads, modify this parameter when default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. The default is 10000.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 9. Completion
Click the **Submit** button to complete the creation of the data synchronization task from Microsoft SQL Server to TDengine, and return to the **Data Source List** page to view the status of the task execution.

View File

@ -0,0 +1,164 @@
---
title: MongoDB
slug: /advanced-features/data-connectors/mongodb
---
import Image from '@theme/IdealImage';
import imgStep01 from '../../assets/mongodb-01.png';
import imgStep02 from '../../assets/mongodb-02.png';
import imgStep03 from '../../assets/mongodb-03.png';
import imgStep04 from '../../assets/mongodb-04.png';
import imgStep05 from '../../assets/mongodb-05.png';
import imgStep06 from '../../assets/mongodb-06.png';
import imgStep07 from '../../assets/mongodb-07.png';
import imgStep08 from '../../assets/mongodb-08.png';
This section describes how to create data migration tasks through the Explorer interface, migrating data from MongoDB to the current TDengine cluster.
## Feature Overview
MongoDB is a product that lies between relational and non-relational databases, widely used in content management systems, mobile applications, and the Internet of Things, among other fields. Starting from TDengine Enterprise Edition 3.3.3.0, TDengine can efficiently read data from MongoDB and write it into TDengine, achieving historical data migration or real-time data synchronization, and addressing technical pain points faced by businesses.
## Creating a Task
### 1. Add a Data Source
Click the **+ Add Data Source** button in the top right corner of the data writing page to enter the Add Data Source page, as shown below:
<figure>
<Image img={imgStep01} alt=""/>
</figure>
### 2. Configure Basic Information
Enter the task name in the **Name** field, for example `test_mongodb_01`.
Select `MongoDB` from the **Type** dropdown menu, as shown below (the fields on the page will change after selection).
**Proxy** is optional. If needed, you can select a specific proxy from the dropdown menu, or click the **+ Create New Proxy** button on the right to create a new proxy.
**Target Database** is mandatory. You can select a specific database from the dropdown menu, or click the **+ Create Database** button on the right to create a new database.
<figure>
<Image img={imgStep02} alt=""/>
</figure>
### 3. Configure Connection Information
Fill in the *connection information for the source MongoDB database* in the **Connection Configuration** area, as shown below:
<figure>
<Image img={imgStep03} alt=""/>
</figure>
### 4. Configure Authentication Information
**User** Enter the user of the source MongoDB database, who must have read permissions in the MongoDB system.
**Password** Enter the login password for the user mentioned above in the source MongoDB database.
**Authentication Database** The database in MongoDB where user information is stored, default is admin.
<figure>
<Image img={imgStep04} alt=""/>
</figure>
### 5. Configure Connection Options
**Application Name** Set the application name to identify the connected application.
**SSL Certificate** Set whether to use an encrypted connection, which is off by default. If enabled, you need to upload the following two files:
&emsp; 1. **CA File** Upload the SSL encryption certificate authority file.
&emsp; 2. **Certificate File** Upload the SSL encryption certificate file.
<figure>
<Image img={imgStep05} alt=""/>
</figure>
Then click the **Check Connectivity** button, where users can click this button to check if the information filled in above can normally retrieve data from the source MongoDB database.
### 6. Configure Data Query
**Database** The source database in MongoDB, which can be dynamically configured using placeholders, such as `database_${Y}`. See the table below for a list of available placeholders.
**Collection** The collection in MongoDB, which can be dynamically configured using placeholders, such as `collection_${md}`. See the table below for a list of available placeholders.
|Placeholder|Description|Example Data|
| :-----: | :------------: |:--------:|
|Y|Complete Gregorian year, zero-padded 4-digit integer|2024|
|y|Gregorian year divided by 100, zero-padded 2-digit integer|24|
|M|Integer month (1 - 12)|1|
|m|Integer month (01 - 12)|01|
|B|Full English spelling of the month|January|
|b|Abbreviation of the month in English (3 letters)|Jan|
|D|Numeric representation of the date (1 - 31)|1|
|d|Numeric representation of the date (01 - 31)|01|
|J|Day of the year (1 - 366)|1|
|j|Day of the year (001 - 366)|001|
|F|Equivalent to `${Y}-${m}-${d}`|2024-01-01|
**Subtable Fields** Fields used to split subtables, usually corresponding to tags in transform, separated by commas, such as col_name1,col_name2.
This configuration is mainly to solve the problem of data migration disorder, and needs to be used in conjunction with **Query Template**, otherwise it cannot achieve the expected effect. Usage examples are as follows:
1. Configure two subtable fields `col_name1,col_name2`
2. Add subtable field placeholders in the **Query Template**, such as the `${col_name1}, ${col_name2}` part in `{"ddate":{"$gte":${start_datetime},"$lt":${end_datetime}}, ${col_name1}, ${col_name2}}`
3. Configure `col_name1` and `col_name2` two tag mappings in **transform**
**Query Template** is used for querying data with a JSON format query statement, which must include a time range condition, and the start and end times must appear in pairs. The time range defined in the template consists of a time-representing column from the source database and the placeholders defined below.
Different placeholders represent different time format requirements, specifically the following placeholder formats:
1. `${start_datetime}`, `${end_datetime}`: Corresponds to filtering by the backend datetime type field, e.g., `{"ddate":{"$gte":${start_datetime},"$lt":${end_datetime}}}` will be converted to `{"ddate":{"$gte":{"$date":"2024-06-01T00:00:00+00:00"},"$lt":{"$date":"2024-07-01T00:00:00+00:00"}}}`
2. `${start_timestamp}`, `${end_timestamp}`: Corresponds to filtering by the backend timestamp type field, e.g., `{"ttime":{"$gte":${start_timestamp},"$lt":${end_timestamp}}}` will be converted to `{"ttime":{"$gte":{"$timestamp":{"t":123,"i":456}},"$lt":{"$timestamp":{"t":123,"i":456}}}}`
**Query Sorting** Sorting conditions during query execution, in JSON format, must comply with MongoDB's sorting condition format specifications, with usage examples as follows:
1. `{"createtime":1}`: MongoDB query results are returned in ascending order by createtime.
2. `{"createdate":1, "createtime":1}`: MongoDB query results are returned in ascending order by createdate and createtime.
**Start Time** The start time for migrating data, this field is mandatory.
**End Time** The end time for migrating data, can be left blank. If set, the migration task will stop automatically after reaching the end time; if left blank, it will continuously synchronize real-time data, and the task will not stop automatically.
**Query Interval** The time interval for segmenting data queries, default is 1 day. To avoid querying too much data at once, a data synchronization subtask will use the query interval to segment the data.
**Delay Duration** In real-time data synchronization scenarios, to avoid losing data due to delayed writes, each synchronization task will read data from before the delay duration.
<figure>
<Image img={imgStep06} alt=""/>
</figure>
### 7. Configure Data Mapping
Fill in the data mapping related configuration parameters in the **Payload Transformation** area.
Click the **Retrieve from Server** button to fetch sample data from the MongoDB server.
In **Parsing**, choose from JSON/Regex/UDT to parse the original message body, and click the **Preview** button on the right to view the parsing results after configuration.
In **Extract or Split from Column**, fill in the fields to extract or split from the message body, for example: split the `vValue` field into `vValue_0` and `vValue_1`, select the split extractor, fill in the separator as `,`, number as 2, and click the **Preview** button on the right to view the transformation results after configuration.
In **Filter**, fill in the filtering conditions, for example: write `Value > 0`, then only data where Value is greater than 0 will be written to TDengine, and click the **Preview** button on the right to view the filtering results after configuration.
In **Mapping**, select the supertable in TDengine to which the data will be mapped, as well as the columns to map to the supertable, and click the **Preview** button on the right to view the mapping results after configuration.
<figure>
<Image img={imgStep07} alt=""/>
</figure>
### 8. Configure Advanced Options
The **Advanced Options** area is collapsed by default, click the `>` on the right to expand it, as shown below:
**Maximum Read Concurrency** Limit on the number of data source connections or reading threads, modify this parameter when default parameters do not meet the needs or when adjusting resource usage.
**Batch Size** The maximum number of messages or rows sent at once. Default is 10000.
<figure>
<Image img={imgStep08} alt=""/>
</figure>
### 9. Completion
Click the **Submit** button to complete the creation of the data synchronization task from MongoDB to TDengine, and return to the **Data Source List** page to view the task execution status.

View File

@ -0,0 +1,324 @@
---
title: Data Connectors
slug: /advanced-features/data-connectors
---
import Image from '@theme/IdealImage';
import imgZeroCode from '../../assets/data-connectors-01.png';
import imgSampleData from '../../assets/data-connectors-02.png';
import imgJsonParsing from '../../assets/data-connectors-03.png';
import imgRegexParsing from '../../assets/data-connectors-04.png';
import imgResults from '../../assets/data-connectors-05.png';
import imgSplit from '../../assets/data-connectors-06.png';
## Overview
TDengine Enterprise is equipped with a powerful visual data management tool—taosExplorer. With taosExplorer, users can easily submit tasks to TDengine through simple configurations in the browser, achieving seamless data import from various data sources into TDengine with zero coding. During the import process, TDengine automatically extracts, filters, and transforms the data to ensure the quality of the imported data. Through this zero-code data source integration method, TDengine has successfully transformed into an outstanding platform for aggregating time-series big data. Users do not need to deploy additional ETL tools, thereby greatly simplifying the overall architecture design and improving data processing efficiency.
The diagram below shows the system architecture of the zero-code integration platform.
<figure>
<Image img={imgZeroCode} alt="Zero-code access platform"/>
<figcaption>Figure 1. Zero-code access platform</figcaption>
</figure>
## Supported Data Sources
The data sources currently supported by TDengine are as follows:
| Data Source | Supported Version | Description |
| --- | --- | --- |
| Aveva PI System | PI AF Server Version 2.10.9.593 or above | An industrial data management and analytics platform, formerly known as OSIsoft PI System, capable of real-time collection, integration, analysis, and visualization of industrial data, helping enterprises achieve intelligent decision-making and refined management |
| Aveva Historian | AVEVA Historian 2020 RS SP1 | Industrial big data analytics software, formerly known as Wonderware Historian, designed for industrial environments to store, manage, and analyze real-time and historical data from various industrial devices and sensors |
| OPC DA | Matrikon OPC version: 1.7.2.7433 | Abbreviation for Open Platform Communications, an open, standardized communication protocol for data exchange between automation devices from different manufacturers. Initially developed by Microsoft, it was aimed at addressing interoperability issues in the industrial control field; the OPC protocol was first released in 1996, then known as OPC DA (Data Access), mainly for real-time data collection and control. |
| OPC UA | KeepWare KEPServerEx 6.5 | In 2006, the OPC Foundation released the OPC UA (Unified Architecture) standard, a service-oriented, object-oriented protocol with higher flexibility and scalability, which has become the mainstream version of the OPC protocol |
| MQTT | emqx: 3.0.0 to 5.7.1<br/> hivemq: 4.0.0 to 4.31.0<br/> mosquitto: 1.4.4 to 2.0.18 | Abbreviation for Message Queuing Telemetry Transport, a lightweight communication protocol based on the publish/subscribe pattern, designed for low overhead, low bandwidth usage instant messaging, widely applicable in IoT, small devices, mobile applications, and other fields. |
| Kafka | 2.11 ~ 3.8.0 | An open-source stream processing platform developed by the Apache Software Foundation, primarily used for processing real-time data and providing a unified, high-throughput, low-latency messaging system. It features high speed, scalability, persistence, and a distributed design, enabling it to handle hundreds of thousands of read/write operations per second, support thousands of clients, while maintaining data reliability and availability. |
| InfluxDB | 1.7, 1.8, 2.0-2.7 | A popular open-source time-series database optimized for handling large volumes of time-series data.|
| OpenTSDB | 2.4.1 | A distributed, scalable time-series database based on HBase. It is primarily used for storing, indexing, and providing access to metric data collected from large-scale clusters (including network devices, operating systems, applications, etc.), making this data more accessible and graphically presentable. |
| MySQL | 5.6,5.7,8.0+ | One of the most popular relational database management systems, known for its small size, fast speed, low overall ownership cost, and particularly its open-source nature, making it the choice for website database development for both medium-sized and large websites. |
| Oracle | 11G/12c/19c | Oracle Database System is one of the world's popular relational database management systems, known for its good portability, ease of use, powerful features, suitable for various large, medium, and small computer environments. It is an efficient, reliable, and high-throughput database solution. |
| PostgreSQL | v15.0+ | PostgreSQL is a very powerful open-source client/server relational database management system, with many features found in large commercial RDBMS, including transactions, sub-selects, triggers, views, foreign key referential integrity, and complex locking capabilities.|
| SQL Server | 2012/2022 | Microsoft SQL Server is a relational database management system developed by Microsoft, known for its ease of use, good scalability, and high integration with related software. |
| MongoDB | 3.6+ | MongoDB is a product between relational and non-relational databases, widely used in content management systems, mobile applications, and the Internet of Things, among many other fields. |
| CSV | - | Abbreviation for Comma Separated Values, a plain text file format separated by commas, commonly used in spreadsheet or database software. |
| TDengine 2.x | 2.4 or 2.6+ | Older version of TDengine, no longer maintained, upgrade to the latest version 3.0 is recommended. |
| TDengine 3.x | Source version+ | Use TMQ for subscribing to specified databases or supertables from TDengine. |
## Data Extraction, Filtering, and Transformation
Since there can be multiple data sources, each data source may have different physical units, naming conventions, and time zones. To address this issue, TDengine has built-in ETL capabilities that can parse and extract the required data from the data packets of data sources, and perform filtering and transformation to ensure the quality of the data written and provide a unified namespace. The specific functions are as follows:
1. Parsing: Use JSON Path or regular expressions to parse fields from the original message.
2. Extracting or splitting from columns: Use split or regular expressions to extract multiple fields from an original field.
3. Filtering: Messages are only written to TDengine if the expression's value is true.
4. Transformation: Establish conversion and mapping relationships between parsed fields and TDengine supertable fields.
Below is a detailed explanation of the data transformation rules.
### Parsing
Only unstructured data sources need this step. Currently, MQTT and Kafka data sources use the rules provided in this step to parse unstructured data to preliminarily obtain structured data, i.e., row and column data that can be described by fields. In the explorer, you need to provide sample data and parsing rules to preview the parsed structured data presented in a table.
#### Sample Data
<figure>
<Image img={imgZeroCode} alt="Sample data"/>
<figcaption>Figure 2. Sample data</figcaption>
</figure>
As shown in the image, the textarea input box contains the sample data, which can be obtained in three ways:
1. Directly enter the sample data in the textarea;
2. Click the button on the right "Retrieve from Server" to get the sample data from the configured server and append it to the sample data textarea;
3. Upload a file, appending the file content to the sample data textarea.
Each piece of sample data ends with a carriage return.
#### Parsing<a name="parse"></a>
Parsing is the process of parsing unstructured strings into structured data. The message body's parsing rules currently support JSON, Regex, and UDT.
##### JSON Parsing
JSON parsing supports JSONObject or JSONArray. The following JSON sample data can automatically parse fields: `groupid`, `voltage`, `current`, `ts`, `inuse`, `location`.
``` json
{"groupid": 170001, "voltage": "221V", "current": 12.3, "ts": "2023-12-18T22:12:00", "inuse": true, "location": "beijing.chaoyang.datun"}
{"groupid": 170001, "voltage": "220V", "current": 12.2, "ts": "2023-12-18T22:12:02", "inuse": true, "location": "beijing.chaoyang.datun"}
{"groupid": 170001, "voltage": "216V", "current": 12.5, "ts": "2023-12-18T22:12:04", "inuse": false, "location": "beijing.chaoyang.datun"}
```
Or
``` json
[{"groupid": 170001, "voltage": "221V", "current": 12.3, "ts": "2023-12-18T22:12:00", "inuse": true, "location": "beijing.chaoyang.datun"},
{"groupid": 170001, "voltage": "220V", "current": 12.2, "ts": "2023-12-18T22:12:02", "inuse": true, "location": "beijing.chaoyang.datun"},
{"groupid": 170001, "voltage": "216V", "current": 12.5, "ts": "2023-12-18T22:12:04", "inuse": false, "location": "beijing.chaoyang.datun"}]
```
Subsequent examples will only explain using JSONObject.
The following nested JSON data can automatically parse fields `groupid`, `data_voltage`, `data_current`, `ts`, `inuse`, `location_0_province`, `location_0_city`, `location_0_datun`, and you can also choose which fields to parse and set aliases for the parsed fields.
``` json
{"groupid": 170001, "data": { "voltage": "221V", "current": 12.3 }, "ts": "2023-12-18T22:12:00", "inuse": true, "location": [{"province": "beijing", "city":"chaoyang", "street": "datun"}]}
```
<figure>
<Image img={imgJsonParsing} alt="JSON parsing"/>
<figcaption>Figure 3. JSON parsing</figcaption>
</figure>
##### Regex Regular Expressions<a name="regex"></a>
You can use **named capture groups** in regular expressions to extract multiple fields from any string (text) field. As shown in the figure, extract fields such as access IP, timestamp, and accessed URL from nginx logs.
``` re
(?<ip>\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b)\s-\s-\s\[(?<ts>\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}\s\+\d{4})\]\s"(?<method>[A-Z]+)\s(?<url>[^\s"]+).*(?<status>\d{3})\s(?<length>\d+)
```
<figure>
<Image img={imgRegexParsing} alt="Regex parsing"/>
<figcaption>Figure 4. Regex parsing</figcaption>
</figure>
##### UDT Custom Parsing Scripts
Custom rhai syntax scripts for parsing input data (refer to `https://rhai.rs/book/`), the script currently only supports json format raw data.
**Input**: In the script, you can use the parameter data, which is the Object Map after the raw data is parsed from json;
**Output**: The output data must be an array.
For example, for data reporting three-phase voltage values, which are entered into three subtables respectively, such data needs to be parsed
``` json
{
"ts": "2024-06-27 18:00:00",
"voltage": "220.1,220.3,221.1",
"dev_id": "8208891"
}
```
Then you can use the following script to extract the three voltage data.
```rhai
let v3 = data["voltage"].split(",");
[
#{"ts": data["ts"], "val": v3[0], "dev_id": data["dev_id"]},
#{"ts": data["ts"], "val": v3[1], "dev_id": data["dev_id"]},
#{"ts": data["ts"], "val": v3[2], "dev_id": data["dev_id"]}
]
```
The final parsing result is shown below:
<figure>
<Image img={imgResults} alt="Parsed results"/>
<figcaption>Figure 5. Parsed results</figcaption>
</figure>
### Extraction or Splitting
The parsed data may still not meet the data requirements of the target table. For example, the original data collected by a smart meter is as follows (in json format):
``` json
{"groupid": 170001, "voltage": "221V", "current": 12.3, "ts": "2023-12-18T22:12:00", "inuse": true, "location": "beijing.chaoyang.datun"}
{"groupid": 170001, "voltage": "220V", "current": 12.2, "ts": "2023-12-18T22:12:02", "inuse": true, "location": "beijing.chaoyang.datun"}
{"groupid": 170001, "voltage": "216V", "current": 12.5, "ts": "2023-12-18T22:12:04", "inuse": false, "location": "beijing.chaoyang.datun"}
```
Using json rules, the voltage is parsed as a string with units, and it is desired to use int type to record voltage and current values for statistical analysis, so further splitting of the voltage is needed; additionally, the date is expected to be split into date and time for storage.
As shown in the figure below, you can use the split rule on the source field `ts` to split it into date and time, and use regex to extract the voltage value and unit from the field `voltage`. The split rule needs to set **delimiter** and **number of splits**, and the naming rule for the split fields is `{original field name}_{sequence number}`. The Regex rule is the same as in the parsing process, using **named capture groups** to name the extracted fields.
### Filtering<a name="filter"></a>
The filtering feature can set filtering conditions, and only data rows that meet the conditions will be written to the target table. The result of the filter condition expression must be of boolean type. Before writing filter conditions, it is necessary to determine the type of parsed fields, and based on the type of parsed fields, judgment functions and comparison operators (`>`, `>=`, `<=`, `<`, `==`, `!=`) can be used to judge.
#### Field Types and Conversion
Only by clearly parsing the type of each field can you use the correct syntax for data filtering.
Fields parsed using the json rule are automatically set to types based on their attribute values:
1. bool type: `"inuse": true`
2. int type: `"voltage": 220`
3. float type: `"current" : 12.2`
4. String type: `"location": "MX001"`
Data parsed using regex rules are all string types.
Data extracted or split using split and regex are string types.
If the extracted data type is not the expected type, data type conversion can be performed. A common data type conversion is converting a string to a numeric type. Supported conversion functions are as follows:
|Function|From type|To type|e.g.|
|:----|:----|:----|:----|
| parse_int | string | int | parse_int("56") // Results in integer 56 |
| parse_float | string | float | parse_float("12.3") // Results in float 12.3 |
#### Conditional Expressions
Different data types have their own ways of writing conditional expressions.
##### BOOL type
You can use variables or the `!` operator, for example for the field "inuse": true, you can write the following expressions:
> 1. inuse
> 2. !inuse
##### Numeric types (int/float)
Numeric types support comparison operators `==`, `!=`, `>`, `>=`, `<`, `<=`.
##### String type
Use comparison operators to compare strings.
String functions
|Function|Description|e.g.|
|:----|:----|:----|
| is_empty | returns true if the string is empty | s.is_empty() |
| contains | checks if a certain character or sub-string occurs in the string | s.contains("substring") |
| starts_with | returns true if the string starts with a certain string | s.starts_with("prefix") |
| ends_with | returns true if the string ends with a certain string | s.ends_with("suffix") |
| len | returns the number of characters (not number of bytes) in the string, must be used with comparison operator | s.len == 5 to check if the string length is 5; len as a property returns int, different from the first four functions which directly return bool. |
##### Compound Expressions
Multiple conditional expressions can be combined using logical operators (&&, ||, !).
For example, the following expression represents fetching data from smart meters installed in Beijing with a voltage value greater than 200.
> location.starts_with("beijing") && voltage > 200
### Mapping
Mapping is mapping the **source field** parsed, extracted, or split to the **target table field**. It can be directly mapped, or it can be mapped to the target table after some rule calculations.
#### Selecting the target supertable
After selecting the target supertable, all tags and columns of the supertable will be loaded.
The source field is automatically mapped to the tag and column of the target supertable using the mapping rule based on the name.
For example, the following parsed, extracted, or split preview data:
#### Mapping Rules <a name="expression"></a>
The supported mapping rules are shown in the following table:
|rule|description|
|:----|:----|
| mapping | Direct mapping, need to select the mapping source field.|
| value | Constant, can enter string constants or numeric constants, the entered constant value is directly stored.|
| generator | Generator, currently only supports the timestamp generator now, which stores the current time when storing.|
| join | String connector, can specify connecting characters to concatenate selected multiple source fields.|
| format | **String formatting tool**, fill in the formatting string, for example, if there are three source fields year, month, day representing year, month, and day, and you wish to store them in the yyyy-MM-dd date format, you can provide a formatting string as `${year}-${month}-${day}`. Where `${}` acts as a placeholder, the placeholder can be a source field or a string type field function handling|
| sum | Select multiple numeric fields for addition calculation.|
| expr | **Numeric operation expression**, can perform more complex function processing and mathematical operations on numeric fields.|
##### Supported string processing functions in `format`
|Function|description|e.g.|
|:----|:----|:----|
| pad(len, pad_chars) | pads the string with a character or a string to at least a specified length | "1.2".pad(5, '0') // Result is "1.200" |
|trim|trims the string of whitespace at the beginning and end|" abc ee ".trim() // Result is "abc ee"|
|sub_string(start_pos, len)|extracts a sub-string, two parameters:<br />1. start position, counting from end if < 0<br />2. (optional) number of characters to extract, none if ≤ 0, to end if omitted|"012345678".sub_string(5) // "5678"<br />"012345678".sub_string(5, 2) // "56"<br />"012345678".sub_string(-2) // "78"|
|replace(substring, replacement)|replaces a sub-string with another|"012345678".replace("012", "abc") // "abc345678"|
##### Mathematical expressions in `expr`
Basic mathematical operations support addition `+`, subtraction `-`, multiplication `*`, and division `/`.
For example, if the data source collects temperature values in Celsius and the target database stores values in Fahrenheit, then the collected temperature data needs to be converted.
If the source field is `temperature`, then use the expression `temperature * 1.8 + 32`.
Mathematical expressions also support the use of mathematical functions, as shown in the table below:
|Function|description|e.g.|
|:----|:----|:----|
|sin, cos, tan, sinh, cosh|Trigonometry|a.sin() |
|asin, acos, atan, asinh, acosh|arc-trigonometry|a.asin()|
|sqrt|Square root|a.sqrt() // 4.sqrt() == 2|
|exp|Exponential|a.exp()|
|ln, log|Logarithmic|a.ln() // e.ln() == 1<br />a.log() // 10.log() == 1|
|floor, ceiling, round, int, fraction|rounding|a.floor() // (4.2).floor() == 4<br />a.ceiling() // (4.2).ceiling() == 5<br />a.round() // (4.2).round() == 4<br />a.int() // (4.2).int() == 4<br />a.fraction() // (4.2).fraction() == 0.2|
#### Subtable name mapping
Subtable names are strings and can be defined using the string formatting `format` expression in the mapping rules.
## Creating a Task
Below, using MQTT data source as an example, we explain how to create a task of MQTT type, consume data from MQTT Broker, and write into TDengine.
1. After logging into taosExplorer, click on "Data Writing" on the left navigation bar to enter the task list page.
2. On the task list page, click "+ Add Data Source" to enter the task creation page.
3. After entering the task name, select the type as MQTT, then you can create a new proxy or select an already created proxy.
4. Enter the IP address and port number of the MQTT broker, for example: 192.168.1.100:1883
5. Configure authentication and SSL encryption:
- If the MQTT broker has enabled user authentication, enter the username and password of the MQTT broker in the authentication section;
- If the MQTT broker has enabled SSL encryption, you can turn on the SSL certificate switch on the page and upload the CA's certificate, as well as the client's certificate and private key files;
6. In the "Collection Configuration" section, you can select the version of the MQTT protocol, currently supporting 3.1, 3.1.1, 5.0; when configuring the Client ID, be aware that if multiple tasks are created for the same MQTT broker, the Client IDs should be different to avoid conflicts, which could cause the tasks to not run properly; when configuring the topic and QoS, use the format `<topic name>::<QoS>`, where the QoS values range from 0, 1, 2, representing at most once, at least once, exactly once; after configuring the above information, you can click the "Check Connectivity" button to check the configurations, if the connectivity check fails, please modify according to the specific error tips returned on the page;
7. During the process of syncing data from the MQTT broker, taosX also supports extracting, filtering, and mapping operations on the fields in the message body. In the text box under "Payload Transformation", you can directly input a sample of the message body, or import it by uploading a file, and in the future, it will also support retrieving sample messages directly from the configured server;
8. For extracting fields from the message body, currently, two methods are supported: JSON and regular expressions. For simple key/value formatted JSON data, you can directly click the extract button to display the parsed field names; for complex JSON data, you can use JSON Path to extract the fields of interest; when using regular expressions to extract fields, ensure the correctness of the regular expressions;
9. After the fields in the message body are parsed, you can set filtering rules based on the parsed field names, and only data that meets the filtering rules will be written into TDengine, otherwise, the message will be ignored; for example, you can configure a filtering rule as voltage > 200, meaning only data with a voltage greater than 200V will be synced to TDengine;
10. Finally, after configuring the mapping rules between the fields in the message body and the fields in the supertable, you can submit the task; in addition to basic mapping, here you can also convert the values of the fields in the message, for example, you can use the expression (expr) to calculate the power from the original message body's voltage and current before writing it into TDengine;
11. After submitting the task, it will automatically return to the task list page, if the submission is successful, the status of the task will switch to "Running", if the submission fails, you can check the activity log of the task to find the error reason;
12. For tasks that are running, clicking the view button of the metrics allows you to view the detailed running metrics of the task, the popup window is divided into 2 tabs, displaying the cumulative metrics of the task's multiple runs and the metrics of this run, these metrics are automatically refreshed every 2 seconds.
## Task Management
On the task list page, you can also start, stop, view, delete, copy, and other operations on tasks. You can also view the running status of each task, including the number of records written, traffic, etc.
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```

View File

@ -0,0 +1,13 @@
---
title: Advanced Features
slug: /advanced-features
---
This chapter mainly introduces the advanced features of TDengine, such as data subscription, caching, stream computing, edge-cloud collaboration, and data access.
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```

View File

@ -0,0 +1,769 @@
---
title: Connecting to TDengine
slug: /developer-guide/connecting-to-tdengine
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import Image from '@theme/IdealImage';
import imgConnect from '../assets/connecting-to-tdengine-01.png';
import ConnJava from "./_connect_java.mdx";
import ConnGo from "./_connect_go.mdx";
import ConnRust from "./_connect_rust.mdx";
import ConnNode from "./_connect_node.mdx";
import ConnPythonNative from "./_connect_python.mdx";
import ConnCSNative from "./_connect_cs.mdx";
import ConnC from "./_connect_c.mdx";
import InstallOnLinux from "../14-reference/05-connector/_linux_install.mdx";
import InstallOnWindows from "../14-reference/05-connector/_windows_install.mdx";
import InstallOnMacOS from "../14-reference/05-connector/_macos_install.mdx";
import VerifyLinux from "../14-reference/05-connector/_verify_linux.mdx";
import VerifyMacOS from "../14-reference/05-connector/_verify_macos.mdx";
import VerifyWindows from "../14-reference/05-connector/_verify_windows.mdx";
TDengine provides a rich set of application development interfaces. To facilitate users in quickly developing their applications, TDengine supports connectors for multiple programming languages. The official connectors include support for C/C++, Java, Python, Go, Node.js, C#, Rust, Lua (community contribution), and PHP (community contribution). These connectors support connecting to the TDengine cluster using the native interface (taosc) and REST interface (not supported in some languages yet). Community developers have also contributed several unofficial connectors, such as ADO.NET connector, Lua connector, and PHP connector. Additionally, TDengine can directly call the REST API provided by taosadapter for data writing and querying operations.
## Connection Methods
TDengine provides three methods for establishing connections:
1. Direct connection between the client driver taosc and the server program taosd, referred to as "native connection" in the text below.
2. Connection to taosd through the REST API provided by the taosAdapter component, referred to as "REST connection" in the text below.
3. Connection to taosd through the WebSocket API provided by the taosAdapter component, referred to as "WebSocket connection" in the text below.
<figure>
<Image img={imgConnect} alt="Connecting to TDengine"/>
<figcaption>Figure 1. Connecting to TDengine</figcaption>
</figure>
Regardless of the method used to establish the connection, the connectors provide the same or similar API to operate the database and can execute SQL statements. The initialization of the connection slightly differs, but users will not feel any difference in usage.
For various connection methods and language connector support, please refer to: [Connector Features](../../tdengine-reference/client-libraries/)
Key differences include:
1. Using native connection requires ensuring that the client driver taosc and the server's TDengine version are compatible.
2. Using REST connection does not require installing the client driver taosc, offering the advantage of cross-platform ease of use, but it lacks features like data subscription and binary data types. Additionally, compared to native and WebSocket connections, the performance of REST connections is the lowest. REST interfaces are stateless. When using REST connections, it is necessary to specify the database names of tables and supertables in SQL.
3. Using WebSocket connection also does not require installing the client driver taosc.
4. Connecting to cloud service instances must use REST connection or WebSocket connection.
**WebSocket connection is recommended**
## Installing the Client Driver taosc
If you choose a native connection and your application is not running on the same server as TDengine, you need to install the client driver first; otherwise, you can skip this step. To avoid incompatibility between the client driver and the server, please use consistent versions.
### Installation Steps
<Tabs defaultValue="linux" groupId="os">
<TabItem value="linux" label="Linux">
<InstallOnLinux />
</TabItem>
<TabItem value="windows" label="Windows">
<InstallOnWindows />
</TabItem>
<TabItem value="macos" label="macOS">
<InstallOnMacOS />
</TabItem>
</Tabs>
### Installation Verification
After completing the above installation and configuration, and confirming that the TDengine service has started running normally, you can log in using the TDengine command-line program `taos` included in the installation package.
<Tabs defaultValue="linux" groupId="os">
<TabItem value="linux" label="Linux">
<VerifyLinux />
</TabItem>
<TabItem value="windows" label="Windows">
<VerifyWindows />
</TabItem>
<TabItem value="macos" label="macOS">
<VerifyMacOS />
</TabItem>
</Tabs>
## Installing Connectors
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
If you are using Maven to manage your project, simply add the following dependency to your pom.xml.
```xml
<dependency>
<groupId>com.taosdata.jdbc</groupId>
<artifactId>taos-jdbcdriver</artifactId>
<version>3.4.0</version>
</dependency>
```
</TabItem>
<TabItem label="Python" value="python">
- **Pre-installation Preparation**
- Install Python. Recent versions of the taospy package require Python 3.6.2+. Earlier versions of the taospy package require Python 3.7+. The taos-ws-py package requires Python 3.7+. If Python is not already installed on your system, refer to [Python BeginnersGuide](https://wiki.python.org/moin/BeginnersGuide/Download) for installation.
- Install [pip](https://pypi.org/project/pip/). In most cases, the Python installation package comes with the pip tool; if not, refer to the [pip documentation](https://pip.pypa.io/en/stable/installation/) for installation.
- If using a native connection, you also need to [install the client driver](../connecting-to-tdengine/). The client software package includes the TDengine client dynamic link library (libtaos.so or taos.dll) and TDengine CLI.
- **Using pip to Install**
- Uninstall old versions
If you have previously installed old versions of the Python connector, please uninstall them first.
```shell
pip3 uninstall taos taospy
pip3 uninstall taos taos-ws-py
```
- Install `taospy`
- Latest version
```shell
pip3 install taospy
```
- Install a specific version
```shell
pip3 install taospy==2.3.0
```
- Install from GitHub
```shell
pip3 install git+https://github.com/taosdata/taos-connector-python.git
```
Note: This package is for native connection
- Install `taos-ws-py`
```bash
pip3 install taos-ws-py
```
Note: This package is for WebSocket connection
- Install both `taospy` and `taos-ws-py`
```bash
pip3 install taospy[ws]
```
</TabItem>
</Tabs>
- **Installation Verification**
<Tabs defaultValue="rest">
<TabItem value="native" label="Native Connection">
For native connections, it is necessary to verify that both the client driver and the Python connector itself are correctly installed. If the `taos` module can be successfully imported, then the client driver and Python connector are correctly installed. You can enter in the Python interactive Shell:
```python
import taos
```
</TabItem>
<TabItem value="rest" label="REST Connection">
For REST connections, you only need to verify if the `taosrest` module can be successfully imported. You can enter in the Python interactive Shell:
```python
import taosrest
```
</TabItem>
<TabItem value="ws" label="WebSocket Connection">
For WebSocket connections, you only need to verify if the `taosws` module can be successfully imported. You can enter in the Python interactive Shell:
```python
import taosws
```
</TabItem>
</Tabs>
<Tabs>
<TabItem label="Go" value="go">
Edit `go.mod` to add the `driver-go` dependency.
```go-mod title=go.mod
module goexample
go 1.17
require github.com/taosdata/driver-go/v3 latest
```
:::note
driver-go uses cgo to wrap the taosc API. cgo requires GCC to compile C source code. Therefore, make sure GCC is installed on your system.
:::
</TabItem>
<TabItem label="Rust" value="rust">
Edit `Cargo.toml` to add the `taos` dependency.
```toml title=Cargo.toml
[dependencies]
taos = { version = "*"}
```
:::info
The Rust connector distinguishes different connection methods through different features. It supports both native and WebSocket connections by default. If only a WebSocket connection is needed, set the `ws` feature:
```toml
taos = { version = "*", default-features = false, features = ["ws"] }
```
:::
</TabItem>
<TabItem label="Node.js" value="node">
- **Pre-installation Preparation**
- Install the Node.js development environment, using version 14 or above. Download link: [https://nodejs.org/en/download/](https://nodejs.org/en/download/)
- **Installation**
- Use npm to install the Node.js connector
```shell
npm install @tdengine/websocket
```
Note: Node.js currently only supports WebSocket connections
- **Installation Verification**
- Create a verification directory, for example: `~/tdengine-test`, download the [nodejsChecker.js source code](https://github.com/taosdata/TDengine/tree/main/docs/examples/node/websocketexample/nodejsChecker.js) from GitHub to local.
- Execute the following commands in the command line.
```bash
npm init -y
npm install @tdengine/websocket
node nodejsChecker.js
```
- After performing the above steps, the command line will output the results of nodeChecker.js connecting to the TDengine instance and performing simple insertion and query operations.
</TabItem>
<TabItem label="C#" value="csharp">
Edit the project configuration file to add a reference to [TDengine.Connector](https://www.nuget.org/packages/TDengine.Connector/):
```xml title=csharp.csproj
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<StartupObject>TDengineExample.AsyncQueryExample</StartupObject>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="TDengine.Connector" Version="3.1.0" />
</ItemGroup>
</Project>
```
You can also add it via the dotnet command:
```shell
dotnet add package TDengine.Connector
```
:::note
The following example code is based on dotnet6.0. If you are using another version, you may need to make appropriate adjustments.
:::
</TabItem>
<TabItem label="C" value="c">
If you have already installed the TDengine server software or the TDengine client driver taosc, then the C connector is already installed and no additional action is required.
</TabItem>
<TabItem label="REST API" value="rest">
To access TDengine using the REST API method, no drivers or connectors need to be installed.
</TabItem>
</Tabs>
## Establishing Connection
Before proceeding with this step, please ensure that there is a running TDengine that can be accessed, and that the server's FQDN is configured correctly. The following example code assumes that TDengine is installed on the local machine, and that the FQDN (default localhost) and serverPort (default 6030) are using the default configuration.
### Connection Parameters
There are many configuration options for connecting, so before establishing a connection, let's first introduce the parameters used by the connectors of each language to establish a connection.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
The parameters for establishing a connection with the Java connector are URL and Properties.
The JDBC URL format for TDengine is: `jdbc:[TAOS|TAOS-WS|TAOS-RS]://[host_name]:[port]/[database_name]?[user={user}|&password={password}|&charset={charset}|&cfgdir={config_dir}|&locale={locale}|&timezone={timezone}|&batchfetch={batchfetch}]`
For detailed explanations of URL and Properties parameters and how to use them, see [URL specifications](../../tdengine-reference/client-libraries/java/)
</TabItem>
<TabItem label="Python" value="python">
The Python connector uses the `connect()` method to establish a connection, here are the specific parameters for the connection:
- url: URL of the `taosAdapter` REST service. The default is port `6041` on `localhost`.
- user: TDengine username. The default is `root`.
- password: TDengine user password. The default is `taosdata`.
- timeout: HTTP request timeout in seconds. The default is `socket._GLOBAL_DEFAULT_TIMEOUT`. Generally, no configuration is needed.
</TabItem>
<TabItem label="Go" value="go">
The data source name has a generic format, similar to [PEAR DB](http://pear.php.net/manual/en/package.database.db.intro-dsn.php), but without the type prefix (brackets indicate optional):
```text
[username[:password]@][protocol[(address)]]/[dbname][?param1=value1&...&paramN=valueN]
```
Complete DSN format:
```text
username:password@protocol(address)/dbname?param=value
```
Supported DSN parameters are as follows:
Native connection:
- `cfg` specifies the taos.cfg directory
- `cgoThread` specifies the number of cgo operations that can be executed concurrently, default is the number of system cores
- `cgoAsyncHandlerPoolSize` specifies the size of the async function handler, default is 10000
REST connection:
- `disableCompression` whether to accept compressed data, default is true which means not accepting compressed data, set to false if data transmission uses gzip compression.
- `readBufferSize` the size of the buffer for reading data, default is 4K (4096), this value can be increased appropriately when the query result data volume is large.
- `token` the token used when connecting to cloud services.
- `skipVerify` whether to skip certificate verification, default is false which means not skipping certificate verification, set to true if connecting to an insecure service.
WebSocket connection:
- `enableCompression` whether to send compressed data, default is false which means not sending compressed data, set to true if data transmission uses compression.
- `readTimeout` the timeout for reading data, default is 5m.
- `writeTimeout` the timeout for writing data, default is 10s.
</TabItem>
<TabItem label="Rust" value="rust">
Rust connector uses DSN to create connections, the basic structure of the DSN description string is as follows:
```text
<driver>[+<protocol>]://[[<username>:<password>@]<host>:<port>][/<database>][?<p1>=<v1>[&<p2>=<v2>]]
|------|------------|---|-----------|-----------|------|------|------------|-----------------------|
|driver| protocol | | username | password | host | port | database | params |
```
For detailed explanation of DSN and how to use it, see [Connection Features](../../tdengine-reference/client-libraries/rust/)
</TabItem>
<TabItem label="Node.js" value="node">
Node.js connector uses DSN to create connections, the basic structure of the DSN description string is as follows:
```text
[+<protocol>]://[[<username>:<password>@]<host>:<port>][/<database>][?<p1>=<v1>[&<p2>=<v2>]]
|------------|---|-----------|-----------|------|------|------------|-----------------------|
| protocol | | username | password | host | port | database | params |
```
- **protocol**: Establish a connection using the websocket protocol. For example, `ws://localhost:6041`
- **username/password**: Username and password for the database.
- **host/port**: Host address and port number. For example, `localhost:6041`
- **database**: Database name.
- **params**: Other parameters. For example, token.
- Complete DSN example:
```js
ws://root:taosdata@localhost:6041
```
</TabItem>
<TabItem label="C#" value="csharp">
ConnectionStringBuilder uses a key-value pair method to set connection parameters, where key is the parameter name and value is the parameter value, separated by a semicolon `;`.
For example:
```csharp
"protocol=WebSocket;host=127.0.0.1;port=6041;useSSL=false"
```
Supported parameters are as follows:
- `host`: The address of the TDengine instance.
- `port`: The port of the TDengine instance.
- `username`: Username for the connection.
- `password`: Password for the connection.
- `protocol`: Connection protocol, options are Native or WebSocket, default is Native.
- `db`: Database to connect to.
- `timezone`: Time zone, default is the local time zone.
- `connTimeout`: Connection timeout, default is 1 minute.
Additional parameters supported for WebSocket connections:
- `readTimeout`: Read timeout, default is 5 minutes.
- `writeTimeout`: Send timeout, default is 10 seconds.
- `token`: Token for connecting to TDengine cloud.
- `useSSL`: Whether to use SSL connection, default is false.
- `enableCompression`: Whether to enable WebSocket compression, default is false.
- `autoReconnect`: Whether to automatically reconnect, default is false.
- `reconnectRetryCount`: Number of retries for reconnection, default is 3.
- `reconnectIntervalMs`: Reconnection interval in milliseconds, default is 2000.
-
</TabItem>
<TabItem label="C" value="c">
**WebSocket Connection**
For C/C++ language connectors, the WebSocket connection uses the `ws_connect()` function to establish a connection with the TDengine database. Its parameter is a DSN description string, structured as follows:
```text
<driver>[+<protocol>]://[[<username>:<password>@]<host>:<port>][/<database>][?<p1>=<v1>[&<p2>=<v2>]]
|------|------------|---|-----------|-----------|------|------|------------|-----------------------|
|driver| protocol | | username | password | host | port | database | params |
```
For detailed explanation of DSN and how to use it, see [Connection Features](../../tdengine-reference/client-libraries/cpp/#dsn)
**Native Connection**
For C/C++ language connectors, the native connection method uses the `taos_connect()` function to establish a connection with the TDengine database. Detailed parameters are as follows:
- `host`: Hostname or IP address of the database server to connect to. If it is a local database, `"localhost"` can be used.
- `user`: Username for logging into the database.
- `passwd`: Password corresponding to the username.
- `db`: Default database name when connecting. If no database is specified, pass `NULL` or an empty string.
- `port`: Port number the database server listens on. The default port number is `6030`.
The `taos_connect_auth()` function is also provided for establishing a connection with the TDengine database using an MD5 encrypted password. This function is similar to `taos_connect`, but differs in the handling of the password, as `taos_connect_auth` requires the MD5 encrypted string of the password.
</TabItem>
<TabItem label="REST API" value="rest">
When accessing TDengine via REST API, the application directly establishes an HTTP connection with taosAdapter, and it is recommended to use a connection pool to manage connections.
For specific parameters using the REST API, refer to: [HTTP request format](../../tdengine-reference/client-libraries/rest-api/)
</TabItem>
</Tabs>
### WebSocket Connection
Below are code examples for establishing WebSocket connections in various language connectors. It demonstrates how to connect to the TDengine database using WebSocket and set some parameters for the connection. The whole process mainly involves establishing the database connection and handling exceptions.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WSConnectExample.java:main}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/connect_websocket_examples.py:connect}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/connect/wsexample/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/connect.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:createConnect}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsConnect/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/connect_example.c}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
Below are examples of code for establishing native connections in various languages. It demonstrates how to connect to the TDengine database using a native connection method and set some parameters for the connection. The entire process mainly involves establishing a database connection and handling exceptions.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JNIConnectExample.java:main}}
```
</TabItem>
<TabItem label="Python" value="python">
<ConnPythonNative />
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/connect/cgoexample/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/connect.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/connect/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
<ConnC />
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### REST Connection
Below are examples of code for establishing REST connections in various languages. It demonstrates how to connect to the TDengine database using a REST connection method. The entire process mainly involves establishing a database connection and handling exceptions.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/RESTConnectExample.java:main}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/connect_rest_example.py:connect}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/connect/restexample/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
Not supported
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
Not supported
</TabItem>
<TabItem label="C" value="c">
Not supported
</TabItem>
<TabItem label="REST API" value="rest">
Access TDengine using the REST API method, where the application independently establishes an HTTP connection.
</TabItem>
</Tabs>
:::tip
If the connection fails, in most cases it is due to incorrect FQDN or firewall settings. For detailed troubleshooting methods, please see ["Encountering the error 'Unable to establish connection, what should I do?'"](../../frequently-asked-questions/) in the "Common Questions and Feedback".
:::
## Connection Pool
Some connectors offer a connection pool, or can be used in conjunction with existing connection pool components. By using a connection pool, applications can quickly obtain available connections from the pool, avoiding the overhead of creating and destroying connections with each operation. This not only reduces resource consumption but also improves response speed. Additionally, connection pools support the management of connections, such as limiting the maximum number of connections and checking the validity of connections, ensuring efficient and reliable use of connections. We **recommend managing connections using a connection pool**.
Below are code examples of connection pool support for various language connectors.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
**HikariCP**
Example usage is as follows:
```java
{{#include docs/examples/java/src/main/java/com/taos/example/HikariDemo.java:connection_pool}}
```
> After obtaining a connection through HikariDataSource.getConnection(), you need to call the close() method after use, which actually does not close the connection but returns it to the pool.
> For more issues about using HikariCP, please see the [official documentation](https://github.com/brettwooldridge/HikariCP).
**Druid**
Example usage is as follows:
```java
{{#include docs/examples/java/src/main/java/com/taos/example/DruidDemo.java:connection_pool}}
```
> For more issues about using Druid, please see the [official documentation](https://github.com/alibaba/druid).
</TabItem>
<TabItem label="Python" value="python">
<ConnPythonNative />
</TabItem>
<TabItem label="Go" value="go">
Using `sql.Open` creates a connection that has already implemented a connection pool, and you can set connection pool parameters through the API, as shown in the example below
```go
{{#include docs/examples/go/connect/connpool/main.go:pool}}
```
</TabItem>
<TabItem label="Rust" value="rust">
In complex applications, it is recommended to enable connection pooling. The connection pool for [taos] by default (in asynchronous mode) is implemented using [deadpool].
Below, you can create a connection pool with default parameters.
```rust
let pool: Pool<TaosBuilder> = TaosBuilder::from_dsn("taos:///")
.unwrap()
.pool()
.unwrap();
```
You can also use the connection pool builder to set the connection pool parameters:
```rust
let pool: Pool<TaosBuilder> = Pool::builder(Manager::from_dsn(self.dsn.clone()).unwrap().0)
.max_size(88) // Maximum number of connections
.build()
.unwrap();
```
In your application code, use `pool.get()?` to obtain a connection object [Taos].
```rust
let taos = pool.get()?;
```
</TabItem>
</Tabs>

View File

@ -0,0 +1,338 @@
---
title: Running SQL Statements
sidebar_label: Running SQL Statements
slug: /developer-guide/running-sql-statements
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
TDengine provides comprehensive support for the SQL language, allowing users to query, insert, and delete data using familiar SQL syntax. TDengine's SQL also supports database and table management operations, such as creating, modifying, and deleting databases and tables. TDengine extends standard SQL by introducing features unique to time-series data processing, such as aggregation queries, downsampling, and interpolation queries, to adapt to the characteristics of time-series data. These extensions enable users to process time-series data more efficiently and perform complex data analysis and processing. For specific supported SQL syntax, please refer to [TDengine SQL](../../tdengine-reference/sql-manual/)
Below, we introduce how to use language connectors to execute SQL for creating databases, tables, writing data, and querying data.
:::note
REST connection: Connectors for various programming languages encapsulate the use of `HTTP` requests for connections, supporting data writing and querying operations, with developers still using the interfaces provided by the connectors to access `TDengine`.
REST API: Directly call the REST API interface provided by `taosadapter` for data writing and querying operations. Code examples use the `curl` command for demonstration.
:::
## Creating Databases and Tables
Below, using smart meters as an example, we show how to use language connectors to execute SQL commands to create a database named `power`, then use the `power` database as the default database.
Next, create a supertable (STABLE) named `meters`, whose table structure includes columns for timestamp, current, voltage, phase, etc., and labels for group ID and location.
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JdbcCreatDBDemo.java:create_db_and_table}}
```
</TabItem>
<TabItem label="Python" value="python">
```python title="WebSocket Connection"
{{#include docs/examples/python/create_db_ws.py}}
```
```python title="Native Connection"
{{#include docs/examples/python/create_db_native.py}}
```
```python title="Rest Connection"
{{#include docs/examples/python/create_db_rest.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/sqlquery/main.go:create_db_and_table}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/createdb.rs:create_db_and_table}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:create_db_and_table}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsInsert/Program.cs:create_db_and_table}}
```
</TabItem>
<TabItem label="C" value="c">
```c title="WebSocket Connection"
{{#include docs/examples/c-ws/create_db_demo.c:create_db_and_table}}
```
```c title="Native Connection"
{{#include docs/examples/c/create_db_demo.c:create_db_and_table}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Create Database
```bash
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql' \
--data 'CREATE DATABASE IF NOT EXISTS power'
```
Create Table, specify the database as `power` in the URL
```bash
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql/power' \
--data 'CREATE STABLE IF NOT EXISTS meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (groupId INT, location BINARY(24))'
```
</TabItem>
</Tabs>
> **Note**: It is recommended to construct SQL statements in the format of `<dbName>.<tableName>`. It is not recommended to use `USE DBName` in applications.
## Insert Data
Below, using smart meters as an example, demonstrates how to use connectors to execute SQL to insert data into the `power` database's `meters` supertable. The example uses TDengine's auto table creation SQL syntax, writes 3 records into the d1001 subtable, writes 1 record into the d1002 subtable, and then prints the actual number of records inserted.
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JdbcInsertDataDemo.java:insert_data}}
```
**Note**
NOW is an internal system function, defaulting to the current time of the client's computer. NOW + 1s represents the client's current time plus 1 second, with the number following representing the time unit: a (millisecond), s (second), m (minute), h (hour), d (day), w (week), n (month), y (year).
</TabItem>
<TabItem label="Python" value="python">
```python title="WebSocket Connection"
{{#include docs/examples/python/insert_ws.py}}
```
```python title="Native Connection"
{{#include docs/examples/python/insert_native.py}}
```
```python title="Rest Connection"
{{#include docs/examples/python/insert_rest.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/sqlquery/main.go:insert_data}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/insert.rs:insert_data}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:insertData}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsInsert/Program.cs:insert_data}}
```
</TabItem>
<TabItem label="C" value="c">
```c title="WebSocket Connection"
{{#include docs/examples/c-ws/insert_data_demo.c:insert_data}}
```
```c title="Native Connection"
{{#include docs/examples/c/insert_data_demo.c:insert_data}}
```
**Note**
NOW is an internal system function, defaulting to the current time of the client's computer. NOW + 1s represents the client's current time plus 1 second, where the number is followed by a time unit: a (milliseconds), s (seconds), m (minutes), h (hours), d (days), w (weeks), n (months), y (years).
</TabItem>
<TabItem label="REST API" value="rest">
Write data
```bash
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql' \
--data 'INSERT INTO power.d1001 USING power.meters TAGS(2,'\''California.SanFrancisco'\'') VALUES (NOW + 1a, 10.30000, 219, 0.31000) (NOW + 2a, 12.60000, 218, 0.33000) (NOW + 3a, 12.30000, 221, 0.31000) power.d1002 USING power.meters TAGS(3, '\''California.SanFrancisco'\'') VALUES (NOW + 1a, 10.30000, 218, 0.25000)'
```
</TabItem>
</Tabs>
## Query data
Below, using smart meters as an example, demonstrates how to use connectors in various languages to execute SQL to query data from the `power` database `meters` supertable, querying up to 100 rows of data and printing the results line by line.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JdbcQueryDemo.java:query_data}}
```
**Note** Querying and operating relational databases are consistent, use indices starting from 1 to get returned field content, and it is recommended to use field names to retrieve.
</TabItem>
<TabItem label="Python" value="python">
```python title="WebSocket Connection"
{{#include docs/examples/python/query_ws.py}}
```
```python title="Native Connection"
{{#include docs/examples/python/query_native.py}}
```
```python title="Rest Connection"
{{#include docs/examples/python/query_rest.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/sqlquery/main.go:select_data}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/query.rs:query_data}}
```
Rust connector also supports using **serde** for deserializing to get structured results:
```rust
{{#include docs/examples/rust/nativeexample/examples/query.rs:query_data_2}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:queryData}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsInsert/Program.cs:select_data}}
```
</TabItem>
<TabItem label="C" value="c">
```c title="WebSocket Connection"
{{#include docs/examples/c-ws/query_data_demo.c:query_data}}
```
```c title="Native Connection"
{{#include docs/examples/c/query_data_demo.c:query_data}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Query Data
```bash
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql' \
--data 'SELECT ts, current, location FROM power.meters limit 100'
```
</TabItem>
</Tabs>
## Execute SQL with reqId
reqId can be used for request link tracing, similar to the role of traceId in distributed systems. A request might need to pass through multiple services or modules to be completed. reqId is used to identify and associate all related operations of this request, allowing us to track and analyze the complete path of the request.
Using reqId has the following benefits:
- Request tracing: By associating the same reqId with all related operations of a request, you can trace the complete path of the request in the system.
- Performance analysis: By analyzing a request's reqId, you can understand the processing time of the request across various services and modules, thereby identifying performance bottlenecks.
- Fault diagnosis: When a request fails, you can identify where the problem occurred by examining the reqId associated with the request.
If the user does not set a reqId, the connector will internally generate one randomly, but it is recommended that users explicitly set it to better associate it with their requests.
Below are code examples of setting reqId to execute SQL in various language connectors.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/JdbcReqIdDemo.java:with_reqid}}
```
</TabItem>
<TabItem label="Python" value="python">
```python title="WebSocket Connection"
{{#include docs/examples/python/reqid_ws.py}}
```
```python title="Native Connection"
{{#include docs/examples/python/reqid_native.py}}
```
```python title="Rest Connection"
{{#include docs/examples/python/reqid_rest.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/queryreqid/main.go:query_id}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/query.rs:query_with_req_id}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/sql_example.js:sqlWithReqid}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsInsert/Program.cs:query_id}}
```
</TabItem>
<TabItem label="C" value="c">
```c "WebSocket Connection"
{{#include docs/examples/c-ws/with_reqid_demo.c:with_reqid}}
```
```c "Native Connection"
{{#include docs/examples/c/with_reqid_demo.c:with_reqid}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Query data, specify reqId as 3
```bash
curl --location -uroot:taosdata 'http://127.0.0.1:6041/rest/sql?req_id=3' \
--data 'SELECT ts, current, location FROM power.meters limit 1'
```
</TabItem>
</Tabs>

View File

@ -0,0 +1,329 @@
---
title: Ingesting Data in Schemaless Mode
sidebar_label: Schemaless Ingestion
slug: /developer-guide/schemaless-ingestion
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
In IoT applications, to achieve functions such as automated management, business analysis, and device monitoring, it is often necessary to collect a large number of data items. However, due to reasons such as application logic upgrades and hardware adjustments of the devices themselves, the data collection items may change frequently. To address this challenge, TDengine provides a schemaless writing method, aimed at simplifying the data recording process.
With the schemaless writing method, users do not need to create supertables or subtables in advance, as TDengine will automatically create the corresponding storage structures based on the actual data written. Additionally, when necessary, the schemaless writing method can also automatically add necessary data columns or tag columns to ensure that the data written by users is correctly stored.
It is worth noting that the supertables and their corresponding subtables created through the schemaless writing method have no functional differences from those created directly through SQL. Users can still use SQL to write data directly into them. However, since the table names generated by the schemaless writing method are based on tag values according to a fixed mapping rule, these table names may lack readability and are not easy to understand.
**When using the schemaless writing method, tables are created automatically, and manual creation of tables may lead to unknown errors.**
## Schemaless Writing Line Protocol
TDengine's schemaless writing line protocol is compatible with InfluxDB's line protocol, OpenTSDB's telnet line protocol, and OpenTSDB's JSON format protocol. For the standard writing protocols of InfluxDB and OpenTSDB, please refer to their respective official documentation.
Below, we first introduce the protocol content extended by TDengine based on InfluxDB's line protocol. This protocol allows users to control the (supertable) schema in a more detailed manner. Using a string to express a data row, multiple rows of strings can be passed into the writing API at once to achieve batch writing of multiple data rows, with the format specified as follows.
```text
measurement,tag_set field_set timestamp
```
The parameters are explained as follows.
- measurement is the table name, separated by a comma from tag_set.
- tag_set is formatted as `<tag_key>=<tag_value>, <tag_key>=<tag_value>`, representing tag column data, separated by commas, and separated by a space from field_set.
- field_set is formatted as `<field_key>=<field_value>, <field_key>=<field_value>`, representing ordinary columns, also separated by commas, and separated by a space from timestamp.
- timestamp is the primary key timestamp for this row of data.
- Schemaless writing does not support writing data for tables with a second primary key column.
All data in tag_set are automatically converted to nchar data type and do not need to use double quotes.
In the schemaless writing line protocol, each data item in field_set needs to describe its own data type, with specific requirements as follows.
- If enclosed in double quotes, it represents varchar type, e.g., "abc".
- If enclosed in double quotes and prefixed with L or l, it represents nchar type, e.g., L" error message ".
- If enclosed in double quotes and prefixed with G or g, it represents geometry type, e.g., G"Point(4.343 89.342)".
- If enclosed in double quotes and prefixed with B or b, it represents varbinary type, the double quotes can contain hexadecimal starting with \x or strings, e.g., B"\x98f46e" and B"hello".
- For spaces, equal signs (=), commas (,), double quotes ("), and backslashes (\), a backslash (\) is needed for escaping (all in half-width English characters). The domain escape rules for the schemaless writing protocol are shown in the following table.
| **Number** | **Field** | **Characters to Escape** |
| -------- | -------- | ---------------- |
| 1 | Supertable name | comma, space |
| 2 | Tag name | comma, equal sign, space |
| 3 | Tag value | comma, equal sign, space |
| 4 | Column name | comma, equal sign, space |
| 5 | Column value | double quotes, backslash |
If two consecutive backslashes are used, the first backslash acts as an escape character; if there is only one backslash, no escape is needed. The backslash escape rules for the schemaless writing protocol are shown in the following table.
| **Number** | **Backslash** | **Escapes to** |
| -------- | ------------ | ---------- |
| 1 | \ | \ |
| 2 | \\\\ | \ |
| 3 | \\\\\\ | \\\\ |
| 4 | \\\\\\\\ | \\\\ |
| 5 | \\\\\\\\\\ | \\\\\\ |
| 6 | \\\\\\\\\\\\ | \\\\\\ |
Numeric types are distinguished by suffixes. The escape rules for numeric types in the schema-less write protocol are shown in the following table.
| **Number** | **Suffix** | **Mapped Type** | **Size (Bytes)** |
| ---------- | ---------- | ---------------------------- | ---------------- |
| 1 | None or f64| double | 8 |
| 2 | f32 | float | 4 |
| 3 | i8/u8 | TinyInt/UTinyInt | 1 |
| 4 | i16/u16 | SmallInt/USmallInt | 2 |
| 5 | i32/u32 | Int/UInt | 4 |
| 6 | i64/i/u64/u| BigInt/BigInt/UBigInt/UBigInt| 8 |
- t, T, true, True, TRUE, f, F, false, False will be directly treated as BOOL type.
For example, the following data line indicates: under the supertable named st, a subtable with tags t1 as "3" (NCHAR), t2 as "4" (NCHAR), t3 as "t3" (NCHAR), writing a row of data with column c1 as 3 (BIGINT), c2 as false (BOOL), c3 as "passit" (BINARY), c4 as 4 (DOUBLE), and the primary timestamp as 1626006833639000000.
```json
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
```
Note that if there is a case error in describing the data type suffix or the data type specified for the data is incorrect, it may trigger an error message and cause data writing to fail.
TDengine provides idempotence for data writing, meaning you can repeatedly call the API to write data that failed previously. However, it does not provide atomicity for writing multiple rows of data. That is, during the batch writing process of multiple rows of data, some data may be written successfully while others may fail.
## Schema-less Write Handling Rules
Schema-less writes handle row data according to the following principles:
1. The subtable name is generated using the following rules: first, combine the measurement name with the tag's key and value into the following string:
```json
"measurement,tag_key1=tag_value1,tag_key2=tag_value2"
```
- Note that tag_key1, tag_key2 are not in the original order entered by the user, but are sorted in ascending order by tag name. Therefore, tag_key1 is not the first tag entered in the line protocol.
After sorting, calculate the MD5 hash value "md5_val" of this string. Then combine the calculated result with the string to generate the table name: "t_md5_val". The "t_" is a fixed prefix, and each table automatically generated through this mapping relationship has this prefix.
- If you do not want to use the automatically generated table name, there are two ways to specify the subtable name (the first method has higher priority).
1. By configuring the smlAutoChildTableNameDelimiter parameter in taos.cfg (excluding `@ # space CR LF tab`).
1. For example: configure smlAutoChildTableNameDelimiter=- and insert data as st,t0=cpu1,t1=4 c1=3 1626006833639000000, the created table name would be cpu1-4.
2. By configuring the smlChildTableName parameter in taos.cfg.
1. For example: configure smlChildTableName=tname and insert data as st,tname=cpu1,t1=4 c1=3 1626006833639000000, the created table name would be cpu1. Note that if multiple rows of data have the same tname but different tag_sets, the tag_set specified during the first automatic table creation is used, and other rows will ignore it.
2. If the supertable obtained from parsing the line protocol does not exist, it will be created (it is not recommended to manually create supertables, otherwise data insertion may be abnormal).
3. If the subtable obtained from parsing the line protocol does not exist, Schemaless will create the subtable according to the subtable name determined in step 1 or 2.
4. If the tag columns or regular columns specified in the data row do not exist, they will be added to the supertable (only additions, no deletions).
5. If some tag columns or regular columns exist in the supertable but are not specified in a data row, their values will be set to NULL in that row.
6. For BINARY or NCHAR columns, if the length of the values provided in the data row exceeds the limit of the column type, the maximum character storage limit of the column will be automatically increased (only additions, no deletions) to ensure the complete storage of data.
7. Errors encountered during the entire processing process will interrupt the writing process and return an error code.
8. To improve writing efficiency, it is assumed by default that the order of the field_set in the same supertable is the same (the first data contains all fields, and subsequent data follow this order). If the order is different, configure the smlDataFormat parameter to false. Otherwise, data will be written in the same order, and the data in the database will be abnormal. Starting from version 3.0.3.0, it automatically checks whether the order is consistent, and this configuration is deprecated.
9. Since SQL table creation does not support dots (.), Schemaless also processes dots (.) in automatically created table names, replacing them with underscores (_). If the subtable name is manually specified and contains a dot (.), it will also be converted to an underscore (_).
10. taos.cfg adds the smlTsDefaultName configuration (value as a string), which only works on the client side. After configuration, the time column name for Schemaless automatic table creation can be set through this configuration. If not configured, the default is _ts.
11. The supertable or subtable names in schema-less writing are case-sensitive.
12. Schema-less writing still follows TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48KB (from version 3.0.5.0 it is 64KB), and the total length of tag values cannot exceed 16KB.
## Time Resolution Recognition
Schema-less writing supports three specified modes, as shown in the table below:
| **Number** | **Value** | **Description** |
| ---------- | -------------------- | -------------------------------- |
| 1 | SML_LINE_PROTOCOL | InfluxDB Line Protocol |
| 2 | SML_TELNET_PROTOCOL | OpenTSDB Text Line Protocol |
| 3 | SML_JSON_PROTOCOL | JSON Protocol Format |
In the SML_LINE_PROTOCOL parsing mode, users need to specify the time resolution of the input timestamp. The available time resolutions are as follows:
| **Number** | **Time Resolution Definition** | **Meaning** |
| ---------- | ----------------------------------- | -------------- |
| 1 | TSDB_SML_TIMESTAMP_NOT_CONFIGURED | Undefined (invalid) |
| 2 | TSDB_SML_TIMESTAMP_HOURS | Hours |
| 3 | TSDB_SML_TIMESTAMP_MINUTES | Minutes |
| 4 | TSDB_SML_TIMESTAMP_SECONDS | Seconds |
| 5 | TSDB_SML_TIMESTAMP_MILLI_SECONDS | Milliseconds |
| 6 | TSDB_SML_TIMESTAMP_MICRO_SECONDS | Microseconds |
| 7 | TSDB_SML_TIMESTAMP_NANO_SECONDS | Nanoseconds |
In the SML_TELNET_PROTOCOL and SML_JSON_PROTOCOL modes, the time precision is determined by the length of the timestamp (consistent with the standard operation of OpenTSDB), and the user-specified time resolution will be ignored.
## Data Mode Mapping Rules
Data from the InfluxDB line protocol will be mapped to schema-based data, where the measurement maps to the supertable name, tag names in the tag_set map to tag names in the data schema, and names in the field_set map to column names. For example, the following data.
```json
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
```
This line of data maps to create a supertable: st, which includes 3 tags of type nchar: t1, t2, t3. Five data columns, namely ts (timestamp), c1 (bigint), c3 (binary), c2 (bool), c4 (bigint). Mapped into the following SQL statement:
```json
create stable st (_ts timestamp, c1 bigint, c2 bool, c3 binary(6), c4 bigint) tags(t1 nchar(1), t2 nchar(1), t3 nchar(2))
```
## Data Mode Change Handling
This section will explain the impact on the data schema under different line data writing scenarios.
When using line protocol to write a field type with a clear identifier, subsequent changes to the field type definition will result in a clear data schema error, triggering the write API to report an error. As shown below,
```json
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4 1626006833639000000
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4i 1626006833640000000
```
The data type mapping of the first line defines the c4 column as Double, but the second line declares the column as BigInt through a numeric suffix, thus triggering a parsing error in schema-less writing.
If the line protocol in the previous rows declares a data column as binary, and subsequent requirements for a longer binary length, this will trigger a change in the supertable schema.
```json
st,t1=3,t2=4,t3=t3 c1=3i64,c5="pass" 1626006833639000000
st,t1=3,t2=4,t3=t3 c1=3i64,c5="passit" 1626006833640000000
```
The line protocol parsing in the first line declares that column c5 is a binary(4) field. The second line of data writing extracts that column c5 is still a binary column, but its width is 6. At this point, the width of the binary needs to be increased to accommodate the new string.
```json
st,t1=3,t2=4,t3=t3 c1=3i64 1626006833639000000
st,t1=3,t2=4,t3=t3 c1=3i64,c6="passit" 1626006833640000000
```
The second line of data adds a column c6 relative to the first line, with a type of binary(6). Thus, a column c6, type binary(6), will be automatically added.
## Schemaless Writing Example
Below, using smart meters as an example, we introduce code samples for writing data using the schemaless writing interface with various language connectors. This includes three protocols: InfluxDB's line protocol, OpenTSDB's TELNET line protocol, and OpenTSDB's JSON format protocol.
:::note
- Since the rules for automatic table creation with schemaless writing differ from those in the previous SQL examples, please ensure that the `meters`, `metric_telnet`, and `metric_json` tables do not exist before running the code samples.
- OpenTSDB's TELNET line protocol and OpenTSDB's JSON format protocol only support one data column, so we have used other examples.
:::
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/SchemalessWsTest.java:schemaless}}
```
Execute schemaless writing with reqId, where the last parameter reqId can be used for request link tracing.
```java
writer.write(lineDemo, SchemalessProtocolType.LINE, SchemalessTimestampType.NANO_SECONDS, 1L);
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/schemaless_ws.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/schemaless/ws/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/schemaless.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/line_example.js}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssml/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/sml_insert_demo.c:schemaless}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/SchemalessJniTest.java:schemaless}}
```
Execute schemaless writing with reqId, where the last parameter reqId can be used for request link tracing.
```java
writer.write(lineDemo, SchemalessProtocolType.LINE, SchemalessTimestampType.NANO_SECONDS, 1L);
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/schemaless_native.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/schemaless/native/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/schemaless.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/nativesml/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/sml_insert_demo.c:schemaless}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Querying the Written Data
By running the example code from the previous section, tables will be automatically created in the power database. We can query the data using taos shell or an application. Below is an example of querying the data from the supertable and meters table using taos shell.
```shell
taos> show power.stables;
stable_name |
=================================
meter_current |
stb0_0 |
meters |
Query OK, 3 row(s) in set (0.002527s)
taos> select * from power.meters limit 1 \G;
*************************** 1.row ***************************
_ts: 2021-07-11 20:33:53.639
current: 10.300000199999999
voltage: 219
phase: 0.310000000000000
groupid: 2
location: California.SanFrancisco
Query OK, 1 row(s) in set (0.004501s)
```

View File

@ -0,0 +1,126 @@
---
title: Ingesting Data in Parameter Binding Mode
sidebar_label: Parameter Binding
slug: /developer-guide/parameter-binding
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
When inserting data using parameter binding, it can avoid the resource consumption of SQL syntax parsing, thereby significantly improving the write performance. The reasons why parameter binding can improve writing efficiency include:
- Reduced parsing time: With parameter binding, the structure of the SQL statement is determined at the first execution, and subsequent executions only need to replace parameter values, thus avoiding syntax parsing each time and reducing parsing time.
- Precompilation: When using parameter binding, the SQL statement can be precompiled and cached. When executed later with different parameter values, the precompiled version can be used directly, improving execution efficiency.
- Reduced network overhead: Parameter binding also reduces the amount of data sent to the database because only parameter values need to be sent, not the complete SQL statement, especially when performing a large number of similar insert or update operations, this difference is particularly noticeable.
**Tips: It is recommended to use parameter binding for data insertion**
Next, we continue to use smart meters as an example to demonstrate the efficient writing functionality of parameter binding with various language connectors:
1. Prepare a parameterized SQL insert statement for inserting data into the supertable `meters`. This statement allows dynamically specifying subtable names, tags, and column values.
2. Loop to generate multiple subtables and their corresponding data rows. For each subtable:
- Set the subtable's name and tag values (group ID and location).
- Generate multiple rows of data, each including a timestamp, randomly generated current, voltage, and phase values.
- Perform batch insertion operations to insert these data rows into the corresponding subtable.
3. Finally, print the actual number of rows inserted into the table.
## WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WSParameterBindingBasicDemo.java:para_bind}}
```
This is a [more detailed parameter binding example](https://github.com/taosdata/TDengine/blob/main/docs/examples/java/src/main/java/com/taos/example/WSParameterBindingFullDemo.java)
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/stmt_ws.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/stmt/ws/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/stmt.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/stmt_example.js:createConnect}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wsStmt/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/stmt_insert_demo.c}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/ParameterBindingBasicDemo.java:para_bind}}
```
This is a [more detailed parameter binding example](https://github.com/taosdata/TDengine/blob/main/docs/examples/java/src/main/java/com/taos/example/ParameterBindingFullDemo.java)
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/stmt_native.py}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/stmt/native/main.go}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/stmt.rs}}
```
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/stmtInsert/Program.cs:main}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/stmt_insert_demo.c}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>

View File

@ -0,0 +1,934 @@
---
title: Managing Consumers
slug: /developer-guide/manage-consumers
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
TDengine provides data subscription and consumption interfaces similar to those of message queue products. In many scenarios, by adopting TDengine's time-series big data platform, there is no need to integrate additional message queue products, thus simplifying application design and reducing maintenance costs. This chapter introduces the related APIs and usage methods for data subscription with various language connectors. For basic information on data subscription, please refer to [Data Subscription](../../advanced-features/data-subscription/)
## Creating Topics
Please use taos shell or refer to the [Execute SQL](../running-sql-statements/) section to execute the SQL for creating topics: `CREATE TOPIC IF NOT EXISTS topic_meters AS SELECT ts, current, voltage, phase, groupid, location FROM meters`
The above SQL will create a subscription named topic_meters. Each record in the messages obtained using this subscription is composed of the columns selected by this query statement `SELECT ts, current, voltage, phase, groupid, location FROM meters`.
**Note**
In the implementation of TDengine connectors, there are the following limitations for subscription queries.
- Query statement limitation: Subscription queries can only use select statements and do not support other types of SQL, such as subscribing to databases, subscribing to supertables (non-select methods), insert, update, or delete, etc.
- Raw data query: Subscription queries can only query raw data, not aggregated or calculated results.
- Time order limitation: Subscription queries can only query data in chronological order.
## Creating Consumers
The concept of TDengine consumers is similar to Kafka, where consumers receive data streams by subscribing to topics. Consumers can be configured with various parameters, such as connection methods, server addresses, automatic Offset submission, etc., to suit different data processing needs. Some language connectors' consumers also support advanced features such as automatic reconnection and data transmission compression to ensure efficient and stable data reception.
### Creation Parameters
There are many parameters for creating consumers, which flexibly support various connection types, Offset submission methods, compression, reconnection, deserialization, and other features. The common basic configuration items applicable to all language connectors are shown in the following table:
| Parameter Name | Type | Description | Remarks |
| :-----------------------: | :-----: | ------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `td.connect.ip` | string | Server IP address | |
| `td.connect.user` | string | Username | |
| `td.connect.pass` | string | Password | |
| `td.connect.port` | integer | Server port number | |
| `group.id` | string | Consumer group ID, the same consumer group shares consumption progress | <br />**Required**. Maximum length: 192.<br />Each topic can have up to 100 consumer groups |
| `client.id` | string | Client ID | Maximum length: 192 |
| `auto.offset.reset` | enum | Initial position of the consumer group subscription | <br />`earliest`: default(version < 3.2.0.0); subscribe from the beginning; <br/>`latest`: default(version >= 3.2.0.0); only subscribe from the latest data; <br/>`none`: cannot subscribe without a committed offset |
| `enable.auto.commit` | boolean | Whether to enable automatic consumption point submission, true: automatic submission, client application does not need to commit; false: client application needs to commit manually | Default is true |
| `auto.commit.interval.ms` | integer | Time interval for automatically submitting consumption records, in milliseconds | Default is 5000 |
| `msg.with.table.name` | boolean | Whether to allow parsing the table name from the message, not applicable to column subscription (column subscription can write tbname as a column in the subquery statement) (from version 3.2.0.0 this parameter is deprecated, always true) | Default is off |
| `enable.replay` | boolean | Whether to enable data replay function | Default is off |
| `session.timeout.ms` | integer | Timeout after consumer heartbeat is lost, after which rebalance logic is triggered, and upon success, that consumer will be removed (supported from version 3.3.3.0) | Default is 12000, range [6000, 1800000] |
| `max.poll.interval.ms` | integer | The longest time interval for consumer poll data fetching, exceeding this time will be considered as the consumer being offline, triggering rebalance logic, and upon success, that consumer will be removed (supported from version 3.3.3.0) | Default is 300000, range [1000, INT32_MAX] |
Below are the connection parameters for connectors in various languages:
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
The parameters for creating a consumer with the Java connector are Properties. For a list of parameters you can set, please refer to [Consumer Parameters](../../tdengine-reference/client-libraries/java/)
For other parameters, refer to the common basic configuration items mentioned above.
</TabItem>
<TabItem label="Python" value="python">
The `td.connect.websocket.scheme` parameter is provided to indicate the protocol type, other parameters are the same as the common basic configuration items.
</TabItem>
<TabItem label="Go" value="go">
Supported properties list for creating consumers:
- `ws.url`: WebSocket connection address.
- `ws.message.channelLen`: WebSocket message channel buffer length, default 0.
- `ws.message.timeout`: WebSocket message timeout, default 5m.
- `ws.message.writeWait`: WebSocket message write timeout, default 10s.
- `ws.message.enableCompression`: Whether to enable compression for WebSocket, default false.
- `ws.autoReconnect`: Whether WebSocket should automatically reconnect, default false.
- `ws.reconnectIntervalMs`: WebSocket reconnect interval in milliseconds, default 2000.
- `ws.reconnectRetryCount`: WebSocket reconnect retry count, default 3.
See the table above for other parameters.
</TabItem>
<TabItem label="Rust" value="rust">
The parameters for creating a consumer with the Rust connector are DSN. For a list of parameters you can set, please refer to [DSN](../../tdengine-reference/client-libraries/rust/#dsn)
For other parameters, refer to the common basic configuration items mentioned above.
</TabItem>
<TabItem label="Node.js" value="node">
The `WS_URL` parameter is provided to indicate the server address to connect to, other parameters are the same as the common basic configuration items.
</TabItem>
<TabItem label="C#" value="csharp">
Supported properties list for creating consumers:
- `useSSL`: Whether to use SSL connection, default false.
- `token`: Token for connecting to TDengine cloud.
- `ws.message.enableCompression`: Whether to enable WebSocket compression, default false.
- `ws.autoReconnect`: Whether to automatically reconnect, default false.
- `ws.reconnect.retry.count`: Reconnect attempts, default 3.
- `ws.reconnect.interval.ms`: Reconnect interval in milliseconds, default 2000.
See the table above for other parameters.
</TabItem>
<TabItem label="C" value="c">
- WebSocket connection: Since it uses dsn, the four configuration items `td.connect.ip`, `td.connect.port`, `td.connect.user`, and `td.connect.pass` are not needed, the rest are the same as the common configuration items.
- Native connection: Same as the common basic configuration items.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### WebSocket Connection
Introduces how connectors in various languages use WebSocket connection method to create consumers. Specify the server address to connect, set auto-commit, start consuming from the latest message, specify `group.id` and `client.id`, etc. Some language connectors also support deserialization parameters.
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:create_consumer}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:create_consumer}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:create_consumer}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:create_consumer_dsn}}
```
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:create_consumer_ac}}
```
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_example.js:create_consumer}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:create_consumer}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:create_consumer_1}}
```
```c
{{#include docs/examples/c-ws/tmq_demo.c:create_consumer_2}}
```
Call the `build_consumer` function to attempt to obtain the consumer instance `tmq`. Print a success log if successful, and a failure log if not.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
Introduce how connectors in various languages use native connections to create consumers. Specify the server address for the connection, set auto-commit, start consuming from the latest message, and specify information such as `group.id` and `client.id`. Some language connectors also support deserialization parameters.
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/ConsumerLoopFull.java:create_consumer}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:create_consumer}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:create_consumer}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs:create_consumer_dsn}}
```
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs:create_consumer_ac}}
```
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:create_consumer}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:create_consumer_1}}
```
```c
{{#include docs/examples/c/tmq_demo.c:create_consumer_2}}
```
Call the `build_consumer` function to attempt to obtain the consumer instance `tmq`. Print a success log if successful, and a failure log if not.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Subscribe to Consume Data
After subscribing to a topic, consumers can start receiving and processing messages from these topics. The example code for subscribing to consume data is as follows:
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:poll_data_code_piece}}
```
- The parameters of the `subscribe` method mean: the list of topics subscribed to (i.e., names), supporting subscription to multiple topics simultaneously.
- `poll` is called each time to fetch a message, which may contain multiple records.
- `ResultBean` is a custom internal class, whose field names and data types correspond one-to-one with the column names and data types, allowing objects of type `ResultBean` to be deserialized using the `value.deserializer` property's corresponding deserialization class.
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:subscribe}}
```
- The parameters of the `subscribe` method mean: the list of topics subscribed to (i.e., names), supporting subscription to multiple topics simultaneously.
- `poll` is called each time to fetch a message, which may contain multiple records.
- `records` contains multiple block segments, each of which may contain multiple records.
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:subscribe}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:consume}}
```
- Consumers can subscribe to one or more `TOPIC`, generally it is recommended that a consumer subscribes to only one `TOPIC`.
- TMQ message queue is a [futures::Stream](https://docs.rs/futures/latest/futures/stream/index.html) type, which can be used with the corresponding API to consume each message and mark it as consumed through `.commit`.
- `Record` is a custom structure, whose field names and data types correspond one-to-one with the column names and data types, allowing objects of type `Record` to be deserialized using `serde`.
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_seek_example.js:subscribe}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:subscribe}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:build_topic_list}}
```
```c
{{#include docs/examples/c-ws/tmq_demo.c:basic_consume_loop}}
```
```c
{{#include docs/examples/c-ws/tmq_demo.c:msg_process}}
```
```c
{{#include docs/examples/c-ws/tmq_demo.c:subscribe_3}}
```
Steps for subscribing and consuming data:
1. Call the `ws_build_topic_list` function to create a topic list `topic_list`.
2. If `topic_list` is `NULL`, it means creation failed, and the function returns `-1`.
3. Use the `ws_tmq_subscribe` function to subscribe to the topic list specified by `tmq`. If the subscription fails, print an error message.
4. Destroy the topic list `topic_list` to free resources.
5. Call the `basic_consume_loop` function to start the basic consumption loop, processing the subscribed messages.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:poll_data_code_piece}}
```
- The parameters of the `subscribe` method mean: the list of topics (i.e., names) to subscribe to, supporting subscription to multiple topics simultaneously.
- `poll` is called each time to get a message, which may contain multiple records.
- `ResultBean` is a custom internal class, whose field names and data types correspond one-to-one with the column names and data types, allowing objects of type `ResultBean` to be deserialized based on the `value.deserializer` property's corresponding deserialization class.
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:subscribe}}
```
- The parameters of the `subscribe` method mean: the list of topics (i.e., names) to subscribe to, supporting subscription to multiple topics simultaneously.
- `poll` is called each time to get a message, which may contain multiple records.
- `records` contains multiple blocks, each of which may contain multiple records.
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:subscribe}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:consume}}
```
- Consumers can subscribe to one or more `TOPIC`, generally it is recommended that a consumer subscribes to only one `TOPIC`.
- The TMQ message queue is a [futures::Stream](https://docs.rs/futures/latest/futures/stream/index.html) type, which can be used with the corresponding API to consume each message and mark it as consumed with `.commit`.
- `Record` is a custom structure, whose field names and data types correspond one-to-one with the column names and data types, allowing objects of type `Record` to be deserialized through `serde`.
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:subscribe}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:build_topic_list}}
```
```c
{{#include docs/examples/c/tmq_demo.c:basic_consume_loop}}
```
```c
{{#include docs/examples/c/tmq_demo.c:msg_process}}
```
```c
{{#include docs/examples/c/tmq_demo.c:subscribe_3}}
```
Subscription and consumption data steps:
1. Call the `build_topic_list` function to create a topic list `topic_list`.
2. If `topic_list` is `NULL`, it means creation failed, and the function returns `-1`.
3. Use the `tmq_subscribe` function to subscribe to the topic list specified by `tmq`. If the subscription fails, print an error message.
4. Destroy the topic list `topic_list` to free resources.
5. Call the `basic_consume_loop` function to start the basic consumption loop, processing the subscribed messages.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Specifying the Subscription Offset
Consumers can specify to start reading messages from a specific Offset in the partition, allowing them to reread messages or skip processed messages. Below is how connectors in various languages specify the subscription Offset.
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:consumer_seek}}
```
1. Use the consumer.poll method to poll data until data is obtained.
2. For the first batch of polled data, print the content of the first message and obtain the current consumer's partition assignment information.
3. Use the consumer.seekToBeginning method to reset the offset of all partitions to the starting position and print the successful reset message.
4. Poll data again using the consumer.poll method and print the content of the first message.
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:assignment}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:seek}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs:seek_offset}}
```
1. By calling the consumer.assignments() method, obtain the consumer's current partition assignment information and record the initial assignment status.
2. Traverse each partition assignment information, for each partition: extract the topic, consumer group ID (vgroup_id), current offset (current), starting offset (begin), and ending offset (end).
Record this information.
1. Call the consumer.offset_seek method to set the offset to the starting position. If the operation fails, record the error information and current assignment status.
2. After adjusting the offset for all partitions, obtain and record the consumer's partition assignment information again to confirm the status after the offset adjustment.
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_seek_example.js:offset}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:seek}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:consume_repeatly}}
```
1. Use the `ws_tmq_get_topic_assignment` function to obtain the assignment information for a specific topic, including the number of assignments and the details of each assignment.
2. If fetching the assignment information fails, print an error message and return.
3. For each assignment, use the `ws_tmq_offset_seek` function to set the consumer's offset to the earliest offset.
4. If setting the offset fails, print an error message.
5. Release the assignment information array to free resources.
6. Call the `basic_consume_loop` function to start a new consumption loop and process messages.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:consumer_seek}}
```
1. Use the consumer.poll method to poll data until data is obtained.
2. For the first batch of polled data, print the content of the first data item and obtain the current consumer's partition assignment information.
3. Use the consumer.seekToBeginning method to reset the offset of all partitions to the beginning position and print a message of successful reset.
4. Poll data again using the consumer.poll method and print the content of the first data item.
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:assignment}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:seek}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs:seek_offset}}
```
1. Obtain the consumer's current partition assignment information by calling the consumer.assignments() method and record the initial assignment status.
2. For each partition assignment, extract the topic, consumer group ID (vgroup_id), current offset, beginning offset, and ending offset. Record this information.
3. Use the consumer.offset_seek method to set the offset to the beginning position. If the operation fails, record the error information and the current assignment status.
4. After adjusting the offset for all partitions, obtain and record the consumer's partition assignment information again to confirm the status after the offset adjustment.
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:seek}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:consume_repeatly}}
```
1. Use the `tmq_get_topic_assignment` function to obtain the assignment information for a specific topic, including the number of assignments and the details of each assignment.
2. If fetching the assignment information fails, print an error message and return.
3. For each assignment, use the `tmq_offset_seek` function to set the consumer's offset to the earliest offset.
4. If setting the offset fails, print an error message.
5. Release the assignment information array to free resources.
6. Call the `basic_consume_loop` function to start a new consumption loop and process messages.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Commit Offset
After a consumer has read and processed messages, it can commit the Offset, indicating that the consumer has successfully processed messages up to this Offset. Offset commits can be automatic (committed periodically based on configuration) or manual (controlled by the application when to commit).
When creating a consumer, if the property `enable.auto.commit` is set to false, the offset can be manually committed.
**Note**: Before manually submitting the consumption progress, ensure that the message has been processed correctly; otherwise, the incorrectly processed message will not be consumed again. Automatic submission may commit the consumption progress of the previous message during the current `poll`, so please ensure that the message processing is completed before the next `poll` or message retrieval.
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:commit_code_piece}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:commit_offset}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:commit_offset}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:consumer_commit_manually}}
```
You can manually submit the consumption progress using the `consumer.commit` method.
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_example.js:commit}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:commit_offset}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:manual_commit}}
```
You can manually submit the consumption progress using the `ws_tmq_commit_sync` function.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:commit_code_piece}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:commit_offset}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:commit_offset}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:consumer_commit_manually}}
```
You can manually submit the consumption progress using the `consumer.commit` method.
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:commit_offset}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:manual_commit}}
```
You can manually commit the consumption progress using the `tmq_commit_sync` function.
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Unsubscribe and Close Consumption
Consumers can unsubscribe from topics and stop receiving messages. When a consumer is no longer needed, the consumer instance should be closed to release resources and disconnect from the TDengine server.
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:unsubscribe_data_code_piece}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_websocket_example.py:unsubscribe}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/ws/main.go:close}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:unsubscribe}}
```
**Note**: Once the consumer unsubscribes and is closed, it cannot be reused. If you want to subscribe to a new `topic`, please recreate the consumer.
</TabItem>
<TabItem label="Node.js" value="node">
```js
{{#include docs/examples/node/websocketexample/tmq_example.js:unsubscribe}}
```
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs:close}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c-ws/tmq_demo.c:unsubscribe_and_close}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:unsubscribe_data_code_piece}}
```
</TabItem>
<TabItem label="Python" value="python">
```python
{{#include docs/examples/python/tmq_native.py:unsubscribe}}
```
</TabItem>
<TabItem label="Go" value="go">
```go
{{#include docs/examples/go/tmq/native/main.go:close}}
```
</TabItem>
<TabItem label="Rust" value="rust">
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs:unsubscribe}}
```
**Note**: After the consumer unsubscribes, it is closed and cannot be reused. If you want to subscribe to a new `topic`, please create a new consumer.
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs:close}}
```
</TabItem>
<TabItem label="C" value="c">
```c
{{#include docs/examples/c/tmq_demo.c:unsubscribe_and_close}}
```
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
## Complete Examples
### WebSocket Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
<details>
<summary>Complete code example</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/WsConsumerLoopFull.java:consumer_demo}}
```
**Note**: The value of the `value.deserializer` configuration parameter should be adjusted according to the package path of the test environment.
</details>
</TabItem>
<TabItem label="Python" value="python">
<details>
<summary>Complete code example</summary>
```python
{{#include docs/examples/python/tmq_websocket_example.py}}
```
</details>
</TabItem>
<TabItem label="Go" value="go">
<details>
<summary>Complete code example</summary>
```go
{{#include docs/examples/go/tmq/ws/main.go}}
```
</details>
</TabItem>
<TabItem label="Rust" value="rust">
<details>
<summary>Complete code example</summary>
```rust
{{#include docs/examples/rust/restexample/examples/tmq.rs}}
```
</details>
</TabItem>
<TabItem label="Node.js" value="node">
<details>
<summary>Complete code example</summary>
```js
{{#include docs/examples/node/websocketexample/tmq_example.js}}
```
</details>
</TabItem>
<TabItem label="C#" value="csharp">
<details>
<summary>Complete code example</summary>
```csharp
{{#include docs/examples/csharp/wssubscribe/Program.cs}}
```
</details>
</TabItem>
<TabItem label="C" value="c">
<details>
<summary>Complete code example</summary>
```c
{{#include docs/examples/c-ws/tmq_demo.c}}
```
</details>
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>
### Native Connection
<Tabs defaultValue="java" groupId="lang">
<TabItem value="java" label="Java">
<details>
<summary>Complete code example</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/ConsumerLoopFull.java:consumer_demo}}
```
**Note**: The value of the `value.deserializer` configuration parameter should be adjusted according to the package path in the test environment.
</details>
</TabItem>
<TabItem label="Python" value="python">
<details>
<summary>Complete code example</summary>
```python
{{#include docs/examples/python/tmq_native.py}}
```
</details>
</TabItem>
<TabItem label="Go" value="go">
<details>
<summary>Complete code example</summary>
```go
{{#include docs/examples/go/tmq/native/main.go}}
```
</details>
</TabItem>
<TabItem label="Rust" value="rust">
<details>
<summary>Complete code example</summary>
```rust
{{#include docs/examples/rust/nativeexample/examples/tmq.rs}}
```
</details>
</TabItem>
<TabItem label="Node.js" value="node">
Not supported
</TabItem>
<TabItem label="C#" value="csharp">
<details>
<summary>Complete code example</summary>
```csharp
{{#include docs/examples/csharp/subscribe/Program.cs}}
```
</details>
</TabItem>
<TabItem label="C" value="c">
<details>
<summary>Complete code example</summary>
```c
{{#include docs/examples/c/tmq_demo.c}}
```
</details>
</TabItem>
<TabItem label="REST API" value="rest">
Not supported
</TabItem>
</Tabs>

View File

@ -0,0 +1,924 @@
---
sidebar_label: User-Defined Functions
title: User-Defined Functions (UDF)
slug: /developer-guide/user-defined-functions
---
## Introduction to UDF
In some application scenarios, the query functionality required by the application logic cannot be directly implemented using built-in functions. TDengine allows the writing of user-defined functions (UDFs) to address the needs of special application scenarios. Once successfully registered in the cluster, UDFs can be called in SQL just like system built-in functions, with no difference in usage. UDFs are divided into scalar functions and aggregate functions. Scalar functions output a value for each row of data, such as absolute value (abs), sine function (sin), string concatenation function (concat), etc. Aggregate functions output a value for multiple rows of data, such as average (avg), maximum value (max), etc.
TDengine supports writing UDFs in two programming languages: C and Python. UDFs written in C have performance nearly identical to built-in functions, while those written in Python can utilize the rich Python computation libraries. To prevent exceptions during UDF execution from affecting the database service, TDengine uses process isolation technology, executing UDFs in a separate process. Even if a user-written UDF crashes, it will not affect the normal operation of TDengine.
## Developing UDFs in C Language
When implementing UDFs in C language, you need to implement the specified interface functions:
- Scalar functions need to implement the scalar interface function scalarfn.
- Aggregate functions need to implement the aggregate interface functions `aggfn_start`, `aggfn`, `aggfn_finish`.
- If initialization is needed, implement `udf_init`.
- If cleanup is needed, implement `udf_destroy`.
### Interface Definition
The interface function names are the UDF name, or the UDF name connected with specific suffixes (`_start`,`_finish`, `_init`,`_destroy`). Function names described later in the content, such as `scalarfn`, `aggfn`, should be replaced with the UDF name.
#### Scalar Function Interface
A scalar function is a function that converts input data into output data, typically used for calculating and transforming a single data value. The prototype of the scalar function interface is as follows.
```c
int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn *resultColumn);
```
Key parameter descriptions are as follows:
- inputDataBlock: The input data block.
- resultColumn: The output column.
#### Aggregate Function Interface
An aggregate function is a special type of function used for grouping and calculating data to generate summary information. The working principle of aggregate functions is as follows:
- Initialize the result buffer: First, the `aggfn_start` function is called to generate a result buffer for storing intermediate results.
- Group data: Related data is divided into multiple row data blocks, each containing a group of data with the same grouping key.
- Update intermediate results: For each data block, the `aggfn` function is called to update the intermediate results. The `aggfn` function performs calculations according to the type of aggregate function (such as sum, avg, count, etc.) and stores the results in the result buffer.
- Generate the final result: After updating the intermediate results of all data blocks, the `aggfn_finish` function is called to extract the final result from the result buffer. The final result contains either 0 or 1 data row, depending on the type of aggregate function and the input data.
The prototype of the aggregate function interface is as follows.
```c
int32_t aggfn_start(SUdfInterBuf *interBuf);
int32_t aggfn(SUdfDataBlock* inputBlock, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf);
int32_t aggfn_finish(SUdfInterBuf* interBuf, SUdfInterBuf *result);
```
Key parameter descriptions are as follows:
- `interBuf`: Intermediate result buffer.
- `inputBlock`: The input data block.
- `newInterBuf`: New intermediate result buffer.
- `result`: The final result.
#### Initialization and Destruction Interface
The initialization and destruction interfaces are common interfaces used by both scalar and aggregate functions, with the following APIs.
```c
int32_t udf_init()
int32_t udf_destroy()
```
Among them, the `udf_init` function completes the initialization work, and the `udf_destroy` function completes the cleanup work. If there is no initialization work, there is no need to define the `udf_init` function; if there is no cleanup work, there is no need to define the `udf_destroy` function.
### Scalar Function Template
The template for developing scalar functions in C language is as follows.
```c
#include "taos.h"
#include "taoserror.h"
#include "taosudf.h"
// Initialization function.
// If no initialization, we can skip definition of it.
// The initialization function shall be concatenation of the udf name and _init suffix.
// @return error number defined in taoserror.h
int32_t scalarfn_init() {
// initialization.
return TSDB_CODE_SUCCESS;
}
// Scalar function main computation function.
// @param inputDataBlock, input data block composed of multiple columns with each column defined by SUdfColumn
// @param resultColumn, output column
// @return error number defined in taoserror.h
int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn* resultColumn) {
// read data from inputDataBlock and process, then output to resultColumn.
return TSDB_CODE_SUCCESS;
}
// Cleanup function.
// If no cleanup related processing, we can skip definition of it.
// The destroy function shall be concatenation of the udf name and _destroy suffix.
// @return error number defined in taoserror.h
int32_t scalarfn_destroy() {
// clean up
return TSDB_CODE_SUCCESS;
}
```
### Aggregate Function Template
The template for developing aggregate functions in C language is as follows.
```c
#include "taos.h"
#include "taoserror.h"
#include "taosudf.h"
// Initialization function.
// If no initialization, we can skip definition of it.
// The initialization function shall be concatenation of the udf name and _init suffix.
// @return error number defined in taoserror.h
int32_t aggfn_init() {
// initialization.
return TSDB_CODE_SUCCESS;
}
// Aggregate start function.
// The intermediate value or the state(@interBuf) is initialized in this function.
// The function name shall be concatenation of udf name and _start suffix.
// @param interbuf intermediate value to initialize
// @return error number defined in taoserror.h
int32_t aggfn_start(SUdfInterBuf* interBuf) {
// initialize intermediate value in interBuf
return TSDB_CODE_SUCCESS;
}
// Aggregate reduce function.
// This function aggregate old state(@interbuf) and one data bock(inputBlock) and output a new state(@newInterBuf).
// @param inputBlock input data block
// @param interBuf old state
// @param newInterBuf new state
// @return error number defined in taoserror.h
int32_t aggfn(SUdfDataBlock* inputBlock, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf) {
// read from inputBlock and interBuf and output to newInterBuf
return TSDB_CODE_SUCCESS;
}
// Aggregate function finish function.
// This function transforms the intermediate value(@interBuf) into the final output(@result).
// The function name must be concatenation of aggfn and _finish suffix.
// @interBuf : intermediate value
// @result: final result
// @return error number defined in taoserror.h
int32_t int32_t aggfn_finish(SUdfInterBuf* interBuf, SUdfInterBuf *result) {
// read data from inputDataBlock and process, then output to result
return TSDB_CODE_SUCCESS;
}
// Cleanup function.
// If no cleanup related processing, we can skip definition of it.
// The destroy function shall be concatenation of the udf name and _destroy suffix.
// @return error number defined in taoserror.h
int32_t aggfn_destroy() {
// clean up
return TSDB_CODE_SUCCESS;
}
```
### Compilation
In TDengine, to implement UDF, you need to write C language source code and compile it into a dynamic link library file according to TDengine's specifications.
Prepare the UDF source code `bit_and.c` as described earlier. For example, on a Linux operating system, execute the following command to compile into a dynamic link library file.
```shell
gcc -g -O0 -fPIC -shared bit_and.c -o libbitand.so
```
It is recommended to use GCC version 7.5 or above to ensure reliable operation.
### C UDF Data Structures
```c
typedef struct SUdfColumnMeta {
int16_t type;
int32_t bytes;
uint8_t precision;
uint8_t scale;
} SUdfColumnMeta;
typedef struct SUdfColumnData {
int32_t numOfRows;
int32_t rowsAlloc;
union {
struct {
int32_t nullBitmapLen;
char *nullBitmap;
int32_t dataLen;
char *data;
} fixLenCol;
struct {
int32_t varOffsetsLen;
int32_t *varOffsets;
int32_t payloadLen;
char *payload;
int32_t payloadAllocLen;
} varLenCol;
};
} SUdfColumnData;
typedef struct SUdfColumn {
SUdfColumnMeta colMeta;
bool hasNull;
SUdfColumnData colData;
} SUdfColumn;
typedef struct SUdfDataBlock {
int32_t numOfRows;
int32_t numOfCols;
SUdfColumn **udfCols;
} SUdfDataBlock;
typedef struct SUdfInterBuf {
int32_t bufLen;
char *buf;
int8_t numOfResult; //zero or one
} SUdfInterBuf;
```
The data structures are described as follows:
- `SUdfDataBlock` contains the number of rows `numOfRows` and the number of columns `numOfCols`. `udfCols[i]` (0 \<= i \<= numCols-1) represents each column's data, type `SUdfColumn*`.
- `SUdfColumn` includes the column's data type definition `colMeta` and the column's data `colData`.
- `SUdfColumnMeta` members are defined similarly to data type definitions in `taos.h`.
- `SUdfColumnData` can be variable-length, `varLenCol` defines variable-length data, and `fixLenCol` defines fixed-length data.
- `SUdfInterBuf` defines an intermediate structure buffer and the number of results in the buffer `numOfResult`
To better operate the above data structures, some convenience functions are provided, defined in `taosudf.h`.
### C UDF Example Code
#### Scalar Function Example [bit_and](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/bit_and.c)
`bit_and` implements the bitwise AND function for multiple columns. If there is only one column, it returns that column. `bit_and` ignores null values.
<details>
<summary>bit_and.c</summary>
```c
{{#include tests/script/sh/bit_and.c}}
```
</details>
#### Aggregate Function Example 1 Returning Numeric Type [l2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/l2norm.c)
`l2norm` implements the second-order norm of all data in the input columns, i.e., squaring each data point, then summing them up, and finally taking the square root.
<details>
<summary>l2norm.c</summary>
```c
{{#include tests/script/sh/l2norm.c}}
```
</details>
#### Aggregate Function Example 2 Returning String Type [max_vol](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/max_vol.c)
`max_vol` implements finding the maximum voltage from multiple input voltage columns, returning a composite string value consisting of the device ID + the position (row, column) of the maximum voltage + the maximum voltage value.
Create table:
```bash
create table battery(ts timestamp, vol1 float, vol2 float, vol3 float, deviceId varchar(16));
```
Create custom function:
```bash
create aggregate function max_vol as '/root/udf/libmaxvol.so' outputtype binary(64) bufsize 10240 language 'C';
```
Use custom function:
```bash
select max_vol(vol1, vol2, vol3, deviceid) from battery;
```
<details>
<summary>max_vol.c</summary>
```c
{{#include tests/script/sh/max_vol.c}}
```
</details>
## Developing UDFs in Python Language
### Environment Setup
The specific steps to prepare the environment are as follows:
- Step 1, prepare the Python runtime environment.
- Step 2, install the Python package taospyudf. The command is as follows.
```shell
pip3 install taospyudf
```
- Step 3, execute the command ldconfig.
- Step 4, start the taosd service.
The installation process will compile C++ source code, so cmake and gcc must be present on the system. The compiled libtaospyudf.so file will automatically be copied to the /usr/local/lib/ directory, so if you are not a root user, you need to add sudo during installation. After installation, you can check if this file is in the directory:
```shell
root@server11 ~/udf $ ls -l /usr/local/lib/libtaos*
-rw-r--r-- 1 root root 671344 May 24 22:54 /usr/local/lib/libtaospyudf.so
```
### Interface Definition
When developing UDFs in Python, you need to implement the specified interface functions. The specific requirements are as follows.
- Scalar functions need to implement the scalar interface function process.
- Aggregate functions need to implement the aggregate interface functions start, reduce, finish.
- If initialization is needed, the init function should be implemented.
- If cleanup work is needed, implement the destroy function.
#### Scalar Function Interface
The interface for scalar functions is as follows.
```Python
def process(input: datablock) -> tuple[output_type]:
```
The main parameters are as follows:
- input: datablock is similar to a two-dimensional matrix, read the python object located at row and col through the member method data(row, col)
- The return value is a tuple of Python objects, each element type as the output type.
#### Aggregate Function Interface
The interface for aggregate functions is as follows.
```Python
def start() -> bytes:
def reduce(inputs: datablock, buf: bytes) -> bytes
def finish(buf: bytes) -> output_type:
```
The above code defines 3 functions, each used to implement a custom aggregate function. The specific process is as follows.
First, the start function is called to generate the initial result buffer. This result buffer is used to store the internal state of the aggregate function, which is continuously updated as input data is processed.
Then, the input data is divided into multiple row data blocks. For each row data block, the reduce function is called, and the current row data block (inputs) and the current intermediate result (buf) are passed as parameters. The reduce function updates the internal state of the aggregate function based on the input data and current state, and returns a new intermediate result.
Finally, when all row data blocks have been processed, the finish function is called. This function takes the final intermediate result (buf) as a parameter and generates the final output from it. Due to the nature of aggregate functions, the final output can only contain 0 or 1 data entries. This output result is returned to the caller as the result of the aggregate function calculation.
#### Initialization and Destruction Interface
The interfaces for initialization and destruction are as follows.
```Python
def init()
def destroy()
```
Parameter description:
- `init` completes the initialization work
- `destroy` completes the cleanup work
**Note** When developing UDFs in Python, you must define both `init` and `destroy` functions
### Scalar Function Template
The template for developing scalar functions in Python is as follows.
```Python
def init():
# initialization
def destroy():
# destroy
def process(input: datablock) -> tuple[output_type]:
```
### Aggregate Function Template
The template for developing aggregate functions in Python is as follows.
```Python
def init():
#initialization
def destroy():
#destroy
def start() -> bytes:
#return serialize(init_state)
def reduce(inputs: datablock, buf: bytes) -> bytes
# deserialize buf to state
# reduce the inputs and state into new_state.
# use inputs.data(i, j) to access python object of location(i, j)
# serialize new_state into new_state_bytes
return new_state_bytes
def finish(buf: bytes) -> output_type:
#return obj of type outputtype
```
### Data Type Mapping
The table below describes the mapping between TDengine SQL data types and Python data types. Any type of NULL value is mapped to Python's None value.
| **TDengine SQL Data Type** | **Python Data Type** |
| :-----------------------: | ------------ |
| TINYINT / SMALLINT / INT / BIGINT | int |
| TINYINT UNSIGNED / SMALLINT UNSIGNED / INT UNSIGNED / BIGINT UNSIGNED | int |
| FLOAT / DOUBLE | float |
| BOOL | bool |
| BINARY / VARCHAR / NCHAR | bytes|
| TIMESTAMP | int |
| JSON and other types | Not supported |
### Development Examples
This article includes 5 example programs, ranging from basic to advanced, and also contains numerous practical debugging tips.
Note: **Within UDF, logging cannot be done using the print function; you must write to a file or use Python's built-in logging library.**
#### Example One
Write a UDF function that only accepts a single integer: Input n, output ln(n^2 + 1).
First, write a Python file, located in a system directory, such as `/root/udf/myfun.py` with the following content.
```python
from math import log
def init():
pass
def destroy():
pass
def process(block):
rows, _ = block.shape()
return [log(block.data(i, 0) ** 2 + 1) for i in range(rows)]
```
This file contains 3 functions, `init` and `destroy` are empty functions, they are the lifecycle functions of UDF, even if they do nothing, they must be defined. The most crucial is the `process` function, which accepts a data block. This data block object has two methods.
1. `shape()` returns the number of rows and columns of the data block
2. `data(i, j)` returns the data at row i, column j
The scalar function's `process` method must return as many rows of data as there are in the data block. The above code ignores the number of columns, as it only needs to compute each row's first column.
Next, create the corresponding UDF function, execute the following statement in the TDengine CLI.
```sql
create function myfun as '/root/udf/myfun.py' outputtype double language 'Python'
```
```shell
taos> create function myfun as '/root/udf/myfun.py' outputtype double language 'Python';
Create OK, 0 row(s) affected (0.005202s)
```
It looks smooth, next let's check all the custom functions in the system to confirm it was created successfully.
```text
taos> show functions;
name |
=================================
myfun |
Query OK, 1 row(s) in set (0.005767s)
```
Generate test data, you can execute the following commands in the TDengine CLI.
```sql
create database test;
create table t(ts timestamp, v1 int, v2 int, v3 int);
insert into t values('2023-05-01 12:13:14', 1, 2, 3);
insert into t values('2023-05-03 08:09:10', 2, 3, 4);
insert into t values('2023-05-10 07:06:05', 3, 4, 5);
```
Test the myfun function.
```sql
taos> select myfun(v1, v2) from t;
DB error: udf function execution failure (0.011088s)
```
Unfortunately, the execution failed. What could be the reason? Check the udfd process logs.
```shell
tail -10 /var/log/taos/udfd.log
```
Found the following error messages.
```text
05/24 22:46:28.733545 01665799 UDF ERROR can not load library libtaospyudf.so. error: operation not permitted
05/24 22:46:28.733561 01665799 UDF ERROR can not load python plugin. lib path libtaospyudf.so
```
The error is clear: the Python plugin `libtaospyudf.so` was not loaded. If you encounter this error, please refer to the previous section on setting up the environment.
After fixing the environment error, execute again as follows.
```sql
taos> select myfun(v1) from t;
myfun(v1) |
============================
0.693147181 |
1.609437912 |
2.302585093 |
```
With this, we have completed our first UDF 😊, and learned some basic debugging methods.
#### Example 2
Although the myfun function passed the test, it has two drawbacks.
1. This scalar function only accepts 1 column of data as input, and it will not throw an exception if multiple columns are passed.
```sql
taos> select myfun(v1, v2) from t;
myfun(v1, v2) |
============================
0.693147181 |
1.609437912 |
2.302585093 |
```
2. It does not handle null values. We expect that if the input contains null, it will throw an exception and terminate execution. Therefore, the process function is improved as follows.
```python
def process(block):
rows, cols = block.shape()
if cols > 1:
raise Exception(f"require 1 parameter but given {cols}")
return [ None if block.data(i, 0) is None else log(block.data(i, 0) ** 2 + 1) for i in range(rows)]
```
Execute the following statement to update the existing UDF.
```sql
create or replace function myfun as '/root/udf/myfun.py' outputtype double language 'Python';
```
Passing two arguments to myfun will result in a failure.
```sql
taos> select myfun(v1, v2) from t;
DB error: udf function execution failure (0.014643s)
```
Custom exception messages are logged in the plugin log file `/var/log/taos/taospyudf.log`.
```text
2023-05-24 23:21:06.790 ERROR [1666188] [doPyUdfScalarProc@507] call pyUdfScalar proc function. context 0x7faade26d180. error: Exception: require 1 parameter but given 2
At:
/var/lib/taos//.udf/myfun_3_1884e1281d9.py(12): process
```
Thus, we have learned how to update UDFs and view the error logs output by UDFs.
(Note: If the UDF does not take effect after an update, in versions prior to TDengine 3.0.5.0 (not inclusive), it is necessary to restart taosd, while in version 3.0.5.0 and later, restarting taosd is not required for the update to take effect.)
#### Example Three
Input (x1, x2, ..., xn), output the sum of each value and its index multiplied: `1 *x1 + 2* x2 + ... + n * xn`. If x1 to xn contain null, the result is null.
This example differs from Example One in that it can accept any number of columns as input and needs to process each column's value. Write the UDF file /root/udf/nsum.py.
```python
def init():
pass
def destroy():
pass
def process(block):
rows, cols = block.shape()
result = []
for i in range(rows):
total = 0
for j in range(cols):
v = block.data(i, j)
if v is None:
total = None
break
total += (j + 1) * block.data(i, j)
result.append(total)
return result
```
Create the UDF.
```sql
create function nsum as '/root/udf/nsum.py' outputtype double language 'Python';
```
Test the UDF.
```sql
taos> insert into t values('2023-05-25 09:09:15', 6, null, 8);
Insert OK, 1 row(s) affected (0.003675s)
taos> select ts, v1, v2, v3, nsum(v1, v2, v3) from t;
ts | v1 | v2 | v3 | nsum(v1, v2, v3) |
================================================================================================
2023-05-01 12:13:14.000 | 1 | 2 | 3 | 14.000000000 |
2023-05-03 08:09:10.000 | 2 | 3 | 4 | 20.000000000 |
2023-05-10 07:06:05.000 | 3 | 4 | 5 | 26.000000000 |
2023-05-25 09:09:15.000 | 6 | NULL | 8 | NULL |
Query OK, 4 row(s) in set (0.010653s)
```
#### Example Four
Write a UDF that takes a timestamp as input and outputs the next closest Sunday. For example, if today is 2023-05-25, then the next Sunday is 2023-05-28.
To complete this function, you need to use the third-party library moment. First, install this library.
```shell
pip3 install moment
```
Then write the UDF file `/root/udf/nextsunday.py`.
```python
import moment
def init():
pass
def destroy():
pass
def process(block):
rows, cols = block.shape()
if cols > 1:
raise Exception("require only 1 parameter")
if not type(block.data(0, 0)) is int:
raise Exception("type error")
return [moment.unix(block.data(i, 0)).replace(weekday=7).format('YYYY-MM-DD')
for i in range(rows)]
```
The UDF framework maps TDengine's timestamp type to Python's int type, so this function only accepts an integer representing milliseconds. The process method first checks the parameters, then uses the moment package to replace the day of the week with Sunday, and finally formats the output. The output string length is fixed at 10 characters long, so you can create the UDF function like this.
```sql
create function nextsunday as '/root/udf/nextsunday.py' outputtype binary(10) language 'Python';
```
At this point, test the function. If you started taosd with systemctl, you will definitely encounter an error.
```sql
taos> select ts, nextsunday(ts) from t;
DB error: udf function execution failure (1.123615s)
```
```shell
tail -20 taospyudf.log
2023-05-25 11:42:34.541 ERROR [1679419] [PyUdf::PyUdf@217] py udf load module failure. error ModuleNotFoundError: No module named 'moment'
```
This is because the location of "moment" is not in the default library search path of the python udf plugin. How to confirm this? Search `taospyudf.log` with the following command.
```shell
grep 'sys path' taospyudf.log | tail -1
```
The output is as follows
```text
2023-05-25 10:58:48.554 INFO [1679419] [doPyOpen@592] python sys path: ['', '/lib/python38.zip', '/lib/python3.8', '/lib/python3.8/lib-dynload', '/lib/python3/dist-packages', '/var/lib/taos//.udf']
```
It is found that the default third-party library installation path searched by the python udf plugin is: `/lib/python3/dist-packages`, while moment is installed by default in `/usr/local/lib/python3.8/dist-packages`. Next, we modify the default library search path of the python udf plugin.
First, open the python3 command line and check the current sys.path.
```python
>>> import sys
>>> ":".join(sys.path)
'/usr/lib/python3.8:/usr/lib/python3.8/lib-dynload:/usr/local/lib/python3.8/dist-packages:/usr/lib/python3/dist-packages'
```
Copy the output string from the script above, then edit `/var/taos/taos.cfg` and add the following configuration.
```shell
UdfdLdLibPath /usr/lib/python3.8:/usr/lib/python3.8/lib-dynload:/usr/local/lib/python3.8/dist-packages:/usr/lib/python3/dist-packages
```
After saving, execute `systemctl restart taosd`, then test again and there will be no errors.
```sql
taos> select ts, nextsunday(ts) from t;
ts | nextsunday(ts) |
===========================================
2023-05-01 12:13:14.000 | 2023-05-07 |
2023-05-03 08:09:10.000 | 2023-05-07 |
2023-05-10 07:06:05.000 | 2023-05-14 |
2023-05-25 09:09:15.000 | 2023-05-28 |
Query OK, 4 row(s) in set (1.011474s)
```
#### Example Five
Write an aggregate function to calculate the difference between the maximum and minimum values of a column.
The difference between aggregate functions and scalar functions is: scalar functions have multiple outputs corresponding to multiple rows of input, whereas aggregate functions have a single output corresponding to multiple rows of input. The execution process of an aggregate function is somewhat similar to the classic map-reduce framework, where the framework divides the data into several chunks, each mapper handles a chunk, and the reducer aggregates the results of the mappers. The difference is that, in the TDengine Python UDF, the reduce function has both map and reduce capabilities. The reduce function takes two parameters: one is the data it needs to process, and the other is the result of other tasks executing the reduce function. See the following example `/root/udf/myspread.py`.
```python
import io
import math
import pickle
LOG_FILE: io.TextIOBase = None
def init():
global LOG_FILE
LOG_FILE = open("/var/log/taos/spread.log", "wt")
log("init function myspead success")
def log(o):
LOG_FILE.write(str(o) + '\n')
def destroy():
log("close log file: spread.log")
LOG_FILE.close()
def start():
return pickle.dumps((-math.inf, math.inf))
def reduce(block, buf):
max_number, min_number = pickle.loads(buf)
log(f"initial max_number={max_number}, min_number={min_number}")
rows, _ = block.shape()
for i in range(rows):
v = block.data(i, 0)
if v > max_number:
log(f"max_number={v}")
max_number = v
if v < min_number:
log(f"min_number={v}")
min_number = v
return pickle.dumps((max_number, min_number))
def finish(buf):
max_number, min_number = pickle.loads(buf)
return max_number - min_number
```
In this example, we not only defined an aggregate function but also added the functionality to record execution logs.
1. The `init` function opens a file for logging.
2. The `log` function records logs, automatically converting the incoming object into a string and appending a newline.
3. The `destroy` function closes the log file after execution.
4. The `start` function returns the initial buffer to store intermediate results of the aggregate function, initializing the maximum value as negative infinity and the minimum value as positive infinity.
5. The `reduce` function processes each data block and aggregates the results.
6. The `finish` function converts the buffer into the final output.
Execute the following SQL statement to create the corresponding UDF.
```sql
create or replace aggregate function myspread as '/root/udf/myspread.py' outputtype double bufsize 128 language 'Python';
```
This SQL statement has two important differences from the SQL statement used to create scalar functions.
1. Added the `aggregate` keyword.
2. Added the `bufsize` keyword, which is used to specify the memory size for storing intermediate results. This value can be larger than the actual usage. In this example, the intermediate result is a tuple consisting of two floating-point arrays, which actually occupies only 32 bytes when serialized, but the specified `bufsize` is 128. You can use the Python command line to print the actual number of bytes used.
```python
>>> len(pickle.dumps((12345.6789, 23456789.9877)))
32
```
To test this function, you can see that the output of `myspread` is consistent with that of the built-in `spread` function.
```sql
taos> select myspread(v1) from t;
myspread(v1) |
============================
5.000000000 |
Query OK, 1 row(s) in set (0.013486s)
taos> select spread(v1) from t;
spread(v1) |
============================
5.000000000 |
Query OK, 1 row(s) in set (0.005501s)
```
Finally, by checking the execution log, you can see that the reduce function was executed 3 times, during which the max value was updated 4 times, and the min value was updated only once.
```shell
root@server11 /var/log/taos $ cat spread.log
init function myspead success
initial max_number=-inf, min_number=inf
max_number=1
min_number=1
initial max_number=1, min_number=1
max_number=2
max_number=3
initial max_number=3, min_number=1
max_number=6
close log file: spread.log
```
Through this example, we learned how to define aggregate functions and print custom log information.
### More Python UDF Example Code
#### Scalar Function Example [pybitand](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pybitand.py)
`pybitand` implements the bitwise AND function for multiple columns. If there is only one column, it returns that column. `pybitand` ignores null values.
<details>
<summary>pybitand.py</summary>
```Python
{{#include tests/script/sh/pybitand.py}}
```
</details>
#### Aggregate Function Example [pyl2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pyl2norm.py)
`pyl2norm` calculates the second-order norm of all data in the input column, i.e., squares each data point, then sums them up, and finally takes the square root.
<details>
<summary>pyl2norm.py</summary>
```c
{{#include tests/script/sh/pyl2norm.py}}
```
</details>
#### Aggregate Function Example [pycumsum](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pycumsum.py)
`pycumsum` uses numpy to calculate the cumulative sum of all data in the input column.
<details>
<summary>pycumsum.py</summary>
```c
{{#include tests/script/sh/pycumsum.py}}
```
</details>
## Managing UDFs
The process of managing UDFs in a cluster involves creating, using, and maintaining these functions. Users can create and manage UDFs in the cluster through SQL. Once created, all users in the cluster can use these functions in SQL. Since UDFs are stored on the cluster's mnode, they remain available even after the cluster is restarted.
When creating UDFs, it is necessary to distinguish between scalar functions and aggregate functions. Scalar functions accept zero or more input parameters and return a single value. Aggregate functions accept a set of input values and return a single value by performing some calculation (such as summing, counting, etc.) on these values. If the wrong function category is declared during creation, an error will be reported when the function is called through SQL.
Additionally, users need to ensure that the input data type matches the UDF program, and the output data type of the UDF matches the `outputtype`. This means that when creating a UDF, you need to specify the correct data types for input parameters and output values. This helps ensure that when the UDF is called, the input data is correctly passed to the UDF, and the output values match the expected data types.
### Creating Scalar Functions
The SQL syntax for creating scalar functions is as follows.
```sql
CREATE [OR REPLACE] FUNCTION function_name AS library_path OUTPUTTYPE output_type LANGUAGE 'Python';
```
The parameters are explained as follows.
- or replace: If the function already exists, it modifies the existing function properties.
- function_name: The name of the scalar function when called in SQL.
- language: Supports C and Python languages (version 3.7 and above), default is C.
- library_path: If the programming language is C, the path is the absolute path to the library file containing the UDF implementation dynamic link library, usually pointing to a .so file. If the programming language is Python, the path is the path to the Python file containing the UDF implementation. The path needs to be enclosed in single or double quotes in English.
- output_type: The data type name of the function computation result.
### Creating Aggregate Functions
The SQL syntax for creating aggregate functions is as follows.
```sql
CREATE [OR REPLACE] AGGREGATE FUNCTION function_name library_path OUTPUTTYPE output_type BUFSIZE buffer_size LANGUAGE 'Python';
```
Here, `buffer_size` represents the size of the buffer for intermediate calculation results, in bytes. The meanings of other parameters are the same as those for scalar functions.
The following SQL creates a UDF named `l2norm`.
```sql
CREATE AGGREGATE FUNCTION l2norm AS "/home/taos/udf_example/libl2norm.so" OUTPUTTYPE DOUBLE bufsize 8;
```
### Deleting UDFs
The SQL syntax for deleting a UDF with a specified name is as follows.
```sql
DROP FUNCTION function_name;
```
### Viewing UDFs
The SQL to display all currently available UDFs in the cluster is as follows.
```sql
show functions;
```
### Viewing Function Information
Each update of a UDF with the same name increases the version number by 1.
```sql
select * from ins_functions \G;
```

View File

@ -0,0 +1,450 @@
---
title: Ingesting Data Efficiently
slug: /developer-guide/ingesting-data-efficiently
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import Image from '@theme/IdealImage';
import imgThread from '../assets/ingesting-data-efficiently-01.png';
This section describes how to write data to TDengine efficiently.
## Principles of Efficient Writing {#principle}
### From the Client Application's Perspective {#application-view}
From the perspective of the client application, efficient data writing should consider the following factors:
1. The amount of data written at once. Generally, the larger the batch of data written at once, the more efficient it is (but the advantage disappears beyond a certain threshold). When writing to TDengine using SQL, try to concatenate more data in one SQL statement. Currently, the maximum length of a single SQL statement supported by TDengine is 1,048,576 (1MB) characters.
2. Number of concurrent connections. Generally, the more concurrent connections writing data at the same time, the more efficient it is (but efficiency may decrease beyond a certain threshold, depending on the server's processing capacity).
3. Distribution of data across different tables (or subtables), i.e., the adjacency of the data being written. Generally, writing data to the same table (or subtable) in each batch is more efficient than writing to multiple tables (or subtables).
4. Method of writing. Generally:
- Binding parameters is more efficient than writing SQL. Parameter binding avoids SQL parsing (but increases the number of calls to the C interface, which also has a performance cost).
- Writing SQL without automatic table creation is more efficient than with automatic table creation because it frequently checks whether the table exists.
- Writing SQL is more efficient than schema-less writing because schema-less writing automatically creates tables and supports dynamic changes to the table structure.
Client applications should fully and appropriately utilize these factors. In a single write operation, try to write data only to the same table (or subtable), set the batch size after testing and tuning to a value that best suits the current system's processing capacity, and similarly set the number of concurrent writing connections after testing and tuning to achieve the best writing speed in the current system.
### From the Data Source's Perspective {#datasource-view}
Client applications usually need to read data from a data source before writing it to TDengine. From the data source's perspective, the following situations require adding a queue between the reading and writing threads:
1. There are multiple data sources, and the data generation speed of a single data source is much lower than the writing speed of a single thread, but the overall data volume is relatively large. In this case, the role of the queue is to aggregate data from multiple sources to increase the amount of data written at once.
2. The data generation speed of a single data source is much greater than the writing speed of a single thread. In this case, the role of the queue is to increase the concurrency of writing.
3. Data for a single table is scattered across multiple data sources. In this case, the role of the queue is to aggregate the data for the same table in advance, improving the adjacency of the data during writing.
If the data source for the writing application is Kafka, and the writing application itself is a Kafka consumer, then Kafka's features can be utilized for efficient writing. For example:
1. Write data from the same table to the same Topic and the same Partition to increase data adjacency.
2. Aggregate data by subscribing to multiple Topics.
3. Increase the concurrency of writing by increasing the number of Consumer threads.
4. Increase the maximum amount of data fetched each time to increase the maximum amount of data written at once.
### From the Server Configuration's Perspective {#setting-view}
From the server configuration's perspective, the number of vgroups should be set appropriately when creating the database based on the number of disks in the system, the I/O capability of the disks, and the processor's capacity to fully utilize system performance. If there are too few vgroups, the system's performance cannot be maximized; if there are too many vgroups, it will cause unnecessary resource competition. The recommended number of vgroups is typically twice the number of CPU cores, but this should still be adjusted based on the specific system resource configuration.
For more tuning parameters, please refer to [Database Management](../../tdengine-reference/sql-manual/manage-databases/) and [Server Configuration](../../tdengine-reference/components/taosd/).
## Efficient Writing Example {#sample-code}
### Scenario Design {#scenario}
The following example program demonstrates how to write data efficiently, with the scenario designed as follows:
- The TDengine client application continuously reads data from other data sources. In the example program, simulated data generation is used to mimic reading from data sources.
- The speed of a single connection writing to TDengine cannot match the speed of reading data, so the client application starts multiple threads, each establishing a connection with TDengine, and each thread has a dedicated fixed-size message queue.
- The client application hashes the received data according to the table name (or subtable name) to different threads, i.e., writing to the message queue corresponding to that thread, ensuring that data belonging to a certain table (or subtable) will always be processed by a fixed thread.
- Each sub-thread empties the data in its associated message queue or reaches a predetermined threshold of data volume, writes that batch of data to TDengine, and continues to process the data received afterwards.
<figure>
<Image img={imgThread} alt="Thread model for efficient writing example"/>
<figcaption>Figure 1. Thread model for efficient writing example</figcaption>
</figure>
### Sample Code {#code}
This section provides sample code for the above scenario. The principle of efficient writing is the same for other scenarios, but the code needs to be modified accordingly.
This sample code assumes that the source data belongs to different subtables of the same supertable (meters). The program has already created this supertable in the test database before starting to write data. For subtables, they will be automatically created by the application according to the received data. If the actual scenario involves multiple supertables, only the code for automatic table creation in the write task needs to be modified.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
**Program Listing**
| Class Name | Function Description |
| ----------------- | -------------------------------------------------------------------------------- |
| FastWriteExample | Main program |
| ReadTask | Reads data from a simulated source, hashes the table name to get the Queue Index, writes to the corresponding Queue |
| WriteTask | Retrieves data from the Queue, forms a Batch, writes to TDengine |
| MockDataSource | Simulates generating data for a certain number of meters subtables |
| SQLWriter | WriteTask relies on this class to complete SQL stitching, automatic table creation, SQL writing, and SQL length checking |
| StmtWriter | Implements parameter binding for batch writing (not yet completed) |
| DataBaseMonitor | Counts the writing speed and prints the current writing speed to the console every 10 seconds |
Below are the complete codes and more detailed function descriptions for each class.
<details>
<summary>FastWriteExample</summary>
The main program is responsible for:
1. Creating message queues
2. Starting write threads
3. Starting read threads
4. Counting the writing speed every 10 seconds
The main program exposes 4 parameters by default, which can be adjusted each time the program is started, for testing and tuning:
1. Number of read threads. Default is 1.
2. Number of write threads. Default is 3.
3. Total number of simulated tables. Default is 1,000. This will be evenly divided among the read threads. If the total number of tables is large, table creation will take longer, and the initial writing speed statistics may be slow.
4. Maximum number of records written per batch. Default is 3,000.
Queue capacity (taskQueueCapacity) is also a performance-related parameter, which can be adjusted by modifying the program. Generally speaking, the larger the queue capacity, the less likely it is to be blocked when enqueuing, the greater the throughput of the queue, but the larger the memory usage. The default value of the sample program is already set large enough.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/FastWriteExample.java}}
```
</details>
<details>
<summary>ReadTask</summary>
The read task is responsible for reading data from the data source. Each read task is associated with a simulated data source. Each simulated data source can generate data for a certain number of tables. Different simulated data sources generate data for different tables.
The read task writes to the message queue in a blocking manner. That is, once the queue is full, the write operation will be blocked.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/ReadTask.java}}
```
</details>
<details>
<summary>WriteTask</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/WriteTask.java}}
```
</details>
<details>
<summary>MockDataSource</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/MockDataSource.java}}
```
</details>
<details>
<summary>SQLWriter</summary>
The SQLWriter class encapsulates the logic of SQL stitching and data writing. Note that none of the tables are created in advance; instead, they are created in batches using the supertable as a template when a table not found exception is caught, and then the INSERT statement is re-executed. For other exceptions, this simply logs the SQL statement being executed at the time; you can also log more clues to facilitate error troubleshooting and fault recovery.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/SQLWriter.java}}
```
</details>
<details>
<summary>DataBaseMonitor</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/DataBaseMonitor.java}}
```
</details>
**Execution Steps**
<details>
<summary>Execute the Java Example Program</summary>
Before running the program, configure the environment variable `TDENGINE_JDBC_URL`. If the TDengine Server is deployed on the local machine, and the username, password, and port are all default values, then you can configure:
```shell
TDENGINE_JDBC_URL="jdbc:TAOS://localhost:6030?user=root&password=taosdata"
```
**Execute the example program in a local integrated development environment**
1. Clone the TDengine repository
```shell
git clone git@github.com:taosdata/TDengine.git --depth 1
```
2. Open the `docs/examples/java` directory with the integrated development environment.
3. Configure the environment variable `TDENGINE_JDBC_URL` in the development environment. If the global environment variable `TDENGINE_JDBC_URL` has already been configured, you can skip this step.
4. Run the class `com.taos.example.highvolume.FastWriteExample`.
**Execute the example program on a remote server**
To execute the example program on a server, follow these steps:
1. Package the example code. Execute in the directory TDengine/docs/examples/java:
```shell
mvn package
```
2. Create an examples directory on the remote server:
```shell
mkdir -p examples/java
```
3. Copy dependencies to the specified directory on the server:
- Copy dependency packages, only once
```shell
scp -r .\target\lib <user>@<host>:~/examples/java
```
- Copy the jar package of this program, copy every time the code is updated
```shell
scp -r .\target\javaexample-1.0.jar <user>@<host>:~/examples/java
```
4. Configure the environment variable.
Edit `~/.bash_profile` or `~/.bashrc` and add the following content for example:
```shell
export TDENGINE_JDBC_URL="jdbc:TAOS://localhost:6030?user=root&password=taosdata"
```
The above uses the default JDBC URL when TDengine Server is deployed locally. You need to modify it according to your actual situation.
5. Start the example program with the Java command, command template:
```shell
java -classpath lib/*:javaexample-1.0.jar com.taos.example.highvolume.FastWriteExample <read_thread_count> <white_thread_count> <total_table_count> <max_batch_size>
```
6. End the test program. The test program will not end automatically; after obtaining a stable writing speed under the current configuration, press <kbd>CTRL</kbd> + <kbd>C</kbd> to end the program.
Below is a log output from an actual run, with machine configuration 16 cores + 64G + SSD.
```text
root@vm85$ java -classpath lib/*:javaexample-1.0.jar com.taos.example.highvolume.FastWriteExample 2 12
18:56:35.896 [main] INFO c.t.e.highvolume.FastWriteExample - readTaskCount=2, writeTaskCount=12 tableCount=1000 maxBatchSize=3000
18:56:36.011 [WriteThread-0] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.015 [WriteThread-0] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.021 [WriteThread-1] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.022 [WriteThread-1] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.031 [WriteThread-2] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.032 [WriteThread-2] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.041 [WriteThread-3] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.042 [WriteThread-3] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.093 [WriteThread-4] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.094 [WriteThread-4] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.099 [WriteThread-5] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.100 [WriteThread-5] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.100 [WriteThread-6] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.101 [WriteThread-6] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.103 [WriteThread-7] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.104 [WriteThread-7] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.105 [WriteThread-8] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.107 [WriteThread-8] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.108 [WriteThread-9] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.109 [WriteThread-9] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.156 [WriteThread-10] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.157 [WriteThread-11] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.158 [WriteThread-10] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.158 [ReadThread-0] INFO com.taos.example.highvolume.ReadTask - started
18:56:36.158 [ReadThread-1] INFO com.taos.example.highvolume.ReadTask - started
18:56:36.158 [WriteThread-11] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:46.369 [main] INFO c.t.e.highvolume.FastWriteExample - count=18554448 speed=1855444
18:56:56.946 [main] INFO c.t.e.highvolume.FastWriteExample - count=39059660 speed=2050521
18:57:07.322 [main] INFO c.t.e.highvolume.FastWriteExample - count=59403604 speed=2034394
18:57:18.032 [main] INFO c.t.e.highvolume.FastWriteExample - count=80262938 speed=2085933
18:57:28.432 [main] INFO c.t.e.highvolume.FastWriteExample - count=101139906 speed=2087696
18:57:38.921 [main] INFO c.t.e.highvolume.FastWriteExample - count=121807202 speed=2066729
18:57:49.375 [main] INFO c.t.e.highvolume.FastWriteExample - count=142952417 speed=2114521
18:58:00.689 [main] INFO c.t.e.highvolume.FastWriteExample - count=163650306 speed=2069788
18:58:11.646 [main] INFO c.t.e.highvolume.FastWriteExample - count=185019808 speed=2136950
```
</details>
</TabItem>
<TabItem label="Python" value="python">
**Program Listing**
The Python example program uses a multi-process architecture and employs a cross-process message queue.
| Function or Class | Description |
| ------------------------ | ------------------------------------------------------------------- |
| main function | Entry point of the program, creates various subprocesses and message queues |
| run_monitor_process function | Creates database, supertables, tracks write speed and periodically prints to console |
| run_read_task function | Main logic for read processes, responsible for reading data from other data systems and distributing it to assigned queues |
| MockDataSource class | Simulates a data source, implements iterator interface, returns the next 1,000 records for each table in batches |
| run_write_task function | Main logic for write processes. Retrieves as much data as possible from the queue and writes in batches |
| SQLWriter class | Handles SQL writing and automatic table creation |
| StmtWriter class | Implements batch writing with parameter binding (not yet completed) |
<details>
<summary>main function</summary>
The main function is responsible for creating message queues and launching subprocesses, which are of 3 types:
1. 1 monitoring process, responsible for database initialization and tracking write speed
2. n read processes, responsible for reading data from other data systems
3. m write processes, responsible for writing to the database
The main function can accept 5 startup parameters, in order:
1. Number of read tasks (processes), default is 1
2. Number of write tasks (processes), default is 1
3. Total number of simulated tables, default is 1,000
4. Queue size (in bytes), default is 1,000,000
5. Maximum number of records written per batch, default is 3,000
```python
{{#include docs/examples/python/fast_write_example.py:main}}
```
</details>
<details>
<summary>run_monitor_process</summary>
The monitoring process is responsible for initializing the database and monitoring the current write speed.
```python
{{#include docs/examples/python/fast_write_example.py:monitor}}
```
</details>
<details>
<summary>run_read_task function</summary>
The read process, responsible for reading data from other data systems and distributing it to assigned queues.
```python
{{#include docs/examples/python/fast_write_example.py:read}}
```
</details>
<details>
<summary>MockDataSource</summary>
Below is the implementation of the mock data source. We assume that each piece of data generated by the data source includes the target table name information. In practice, you might need certain rules to determine the target table name.
```python
{{#include docs/examples/python/mockdatasource.py}}
```
</details>
<details>
<summary>run_write_task function</summary>
The write process retrieves as much data as possible from the queue and writes in batches.
```python
{{#include docs/examples/python/fast_write_example.py:write}}
```
</details>
<details>
The SQLWriter class encapsulates the logic of SQL stitching and data writing. None of the tables are pre-created; instead, they are batch-created using the supertable as a template when a table does not exist error occurs, and then the INSERT statement is re-executed. For other errors, the SQL executed at the time is recorded for error troubleshooting and fault recovery. This class also checks whether the SQL exceeds the maximum length limit, based on the TDengine 3.0 limit, the supported maximum SQL length of 1,048,576 is passed in by the input parameter maxSQLLength.
<summary>SQLWriter</summary>
```python
{{#include docs/examples/python/sql_writer.py}}
```
</details>
**Execution Steps**
<details>
<summary>Execute the Python Example Program</summary>
1. Prerequisites
- TDengine client driver installed
- Python3 installed, recommended version >= 3.8
- taospy installed
2. Install faster-fifo to replace the built-in multiprocessing.Queue in python
```shell
pip3 install faster-fifo
```
3. Click the "View Source" link above to copy the `fast_write_example.py`, `sql_writer.py`, and `mockdatasource.py` files.
4. Execute the example program
```shell
python3 fast_write_example.py <READ_TASK_COUNT> <WRITE_TASK_COUNT> <TABLE_COUNT> <QUEUE_SIZE> <MAX_BATCH_SIZE>
```
Below is an actual output from a run, on a machine configured with 16 cores + 64G + SSD.
```text
root@vm85$ python3 fast_write_example.py 8 8
2022-07-14 19:13:45,869 [root] - READ_TASK_COUNT=8, WRITE_TASK_COUNT=8, TABLE_COUNT=1000, QUEUE_SIZE=1000000, MAX_BATCH_SIZE=3000
2022-07-14 19:13:48,882 [root] - WriteTask-0 started with pid 718347
2022-07-14 19:13:48,883 [root] - WriteTask-1 started with pid 718348
2022-07-14 19:13:48,884 [root] - WriteTask-2 started with pid 718349
2022-07-14 19:13:48,884 [root] - WriteTask-3 started with pid 718350
2022-07-14 19:13:48,885 [root] - WriteTask-4 started with pid 718351
2022-07-14 19:13:48,885 [root] - WriteTask-5 started with pid 718352
2022-07-14 19:13:48,886 [root] - WriteTask-6 started with pid 718353
2022-07-14 19:13:48,886 [root] - WriteTask-7 started with pid 718354
2022-07-14 19:13:48,887 [root] - ReadTask-0 started with pid 718355
2022-07-14 19:13:48,888 [root] - ReadTask-1 started with pid 718356
2022-07-14 19:13:48,889 [root] - ReadTask-2 started with pid 718357
2022-07-14 19:13:48,889 [root] - ReadTask-3 started with pid 718358
2022-07-14 19:13:48,890 [root] - ReadTask-4 started with pid 718359
2022-07-14 19:13:48,891 [root] - ReadTask-5 started with pid 718361
2022-07-14 19:13:48,892 [root] - ReadTask-6 started with pid 718364
2022-07-14 19:13:48,893 [root] - ReadTask-7 started with pid 718365
2022-07-14 19:13:56,042 [DataBaseMonitor] - count=6676310 speed=667631.0
2022-07-14 19:14:06,196 [DataBaseMonitor] - count=20004310 speed=1332800.0
2022-07-14 19:14:16,366 [DataBaseMonitor] - count=32290310 speed=1228600.0
2022-07-14 19:14:26,527 [DataBaseMonitor] - count=44438310 speed=1214800.0
2022-07-14 19:14:36,673 [DataBaseMonitor] - count=56608310 speed=1217000.0
2022-07-14 19:14:46,834 [DataBaseMonitor] - count=68757310 speed=1214900.0
2022-07-14 19:14:57,280 [DataBaseMonitor] - count=80992310 speed=1223500.0
2022-07-14 19:15:07,689 [DataBaseMonitor] - count=93805310 speed=1281300.0
2022-07-14 19:15:18,020 [DataBaseMonitor] - count=106111310 speed=1230600.0
2022-07-14 19:15:28,356 [DataBaseMonitor] - count=118394310 speed=1228300.0
2022-07-14 19:15:38,690 [DataBaseMonitor] - count=130742310 speed=1234800.0
2022-07-14 19:15:49,000 [DataBaseMonitor] - count=143051310 speed=1230900.0
2022-07-14 19:15:59,323 [DataBaseMonitor] - count=155276310 speed=1222500.0
2022-07-14 19:16:09,649 [DataBaseMonitor] - count=167603310 speed=1232700.0
2022-07-14 19:16:19,995 [DataBaseMonitor] - count=179976310 speed=1237300.0
```
</details>
:::note
When using the Python connector to connect to TDengine with multiple processes, there is a limitation: connections cannot be established in the parent process; all connections must be created in the child processes.
If a connection is created in the parent process, any connection attempts in the child processes will be perpetually blocked. This is a known issue.
:::
</TabItem>
</Tabs>

View File

@ -1,3 +1,3 @@
```c title="Native Connection"
```c
{{#include docs/examples/c/connect_example.c}}
```

View File

@ -0,0 +1,17 @@
#### Using a Unified Interface for Database Access
```go title="Native Connection"
{{#include docs/examples/go/connect/cgoexample/main.go}}
```
```go title="REST Connection"
{{#include docs/examples/go/connect/restexample/main.go}}
```
#### Using Advanced Wrappers
You can also use the `af` package from driver-go to establish connections. This module encapsulates advanced features of TDengine, such as parameter binding, subscription, etc.
```go title="Establishing Native Connection Using af Package"
{{#include docs/examples/go/connect/afconn/main.go}}
```

View File

@ -6,10 +6,10 @@
{{#include docs/examples/java/src/main/java/com/taos/example/RESTConnectExample.java:main}}
```
When using REST connection, the feature of bulk pulling can be enabled if the size of resulting data set is huge.
When using REST connections, if the amount of data queried is large, you can also enable batch fetching.
```java title="Enable Bulk Pulling" {4}
```java title="Enable Batch Fetching" {4}
{{#include docs/examples/java/src/main/java/com/taos/example/WSConnectExample.java:main}}
```
More configuration about connection, please refer to [Java Client Library](../../reference/connectors/java)
For more connection parameter configurations, refer to [Java Connector](../../connector/java)

View File

@ -1,3 +1,3 @@
```php title=""native"
```php title="Native Connection"
{{#include docs/examples/php/connect.php}}
```

View File

@ -1,3 +1,3 @@
```python title="Native Connection"
```python
{{#include docs/examples/python/connect_example.py}}
```

View File

@ -0,0 +1,8 @@
```rust title="Native Connection"
{{#include docs/examples/rust/nativeexample/examples/connect.rs}}
```
:::note
For the Rust connector, the difference in connection methods is only reflected in the features used. If the "ws" feature is enabled, only the WebSocket implementation will be compiled.
:::

View File

@ -0,0 +1,27 @@
---
title: Developer's Guide
slug: /developer-guide
---
To develop an application, if you plan to use TDengine as a tool for time-series data processing, there are several things to do:
1. Determine the connection method to TDengine. No matter what programming language you use, you can always use the REST interface, but you can also use connectors unique to each programming language for convenient connections.
2. Based on your application scenario, determine the data model. Depending on the characteristics of the data, decide whether to create one or multiple databases; distinguish between static tags and collected metrics, establish the correct supertables, and create subtables.
3. Decide on the method of inserting data. TDengine supports data insertion using standard SQL, but also supports Schemaless mode insertion, which allows data to be written directly without manually creating tables.
4. Based on business requirements, determine which SQL queries need to be written.
5. If you want to perform lightweight real-time statistical analysis based on time-series data, including various monitoring dashboards, it is recommended to use the streaming computing capabilities of TDengine 3.0, instead of deploying complex streaming computing systems like Spark or Flink.
6. If your application has modules that need to consume inserted data and you want to be notified when new data is inserted, it is recommended to use the data subscription feature provided by TDengine, without the need to deploy Kafka or other messaging queue software.
7. In many scenarios (such as vehicle management), applications need to obtain the latest status of each data collection point, so it is recommended to use TDengine's Cache feature, instead of deploying separate caching software like Redis.
8. If you find that TDengine's functions do not meet your requirements, you can use User Defined Functions (UDF) to solve the problem.
This section is organized in the order mentioned above. For ease of understanding, TDengine provides example code for each feature and each supported programming language, located at [Example Code](https://github.com/taosdata/TDengine/tree/main/docs/examples). All example codes are guaranteed to be correct by CI, scripts located at [Example Code CI](https://github.com/taosdata/TDengine/tree/main/tests/docs-examples-test).
If you want to learn more about using SQL, check out the [SQL Manual](../tdengine-reference/sql-manual/). If you want to learn more about using various connectors, read the [Connector Reference Guide](../tdengine-reference/client-libraries/). If you also want to integrate TDengine with third-party systems, such as Grafana, please refer to [Third-Party Tools](../third-party-tools/).
If you encounter any problems during the development process, please click ["Report Issue"](https://github.com/taosdata/TDengine/issues/new/choose) at the bottom of each page to submit an Issue directly on GitHub.
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```

View File

@ -1,75 +0,0 @@
---
title: Resource Planning
sidebar_label: Resource Planning
description: This document describes how to plan compute and storage resources for your TDengine cluster.
---
It is important to plan computing and storage resources if using TDengine to build an IoT, time-series or Big Data platform. How to plan the CPU, memory and disk resources required, will be described in this chapter.
## Server Memory Requirements
Each database creates a fixed number of vgroups. This number is 2 by default and can be configured with the `vgroups` parameter. The number of replicas can be controlled with the `replica` parameter. Each replica requires one vnode per vgroup. Altogether, the memory required by each database depends on the following configuration options:
- vgroups
- replica
- buffer
- pages
- pagesize
- cachesize
For more information, see [Database](../../reference/taos-sql/database).
The memory required by a database is therefore greater than or equal to:
```
vgroups * replica * (buffer + pages * pagesize + cachesize)
```
However, note that this requirement is spread over all dnodes in the cluster, not on a single physical machine. The physical servers that run dnodes meet the requirement together. If a cluster has multiple databases, the memory required increases accordingly. In complex environments where dnodes were added after initial deployment in response to increasing resource requirements, load may not be balanced among the original dnodes and newer dnodes. In this situation, the actual status of your dnodes is more important than theoretical calculations.
## Client Memory Requirements
For the client programs using TDengine client driver `taosc` to connect to the server side there is a memory requirement as well.
The memory consumed by a client program is mainly about the SQL statements for data insertion, caching of table metadata, and some internal use. Assuming maximum number of tables is N (the memory consumed by the metadata of each table is 256 bytes), maximum number of threads for parallel insertion is T, maximum length of a SQL statement is S (normally 1 MB), the memory required by a client program can be estimated using the below formula:
```
M = (T * S * 3 + (N / 4096) + 100)
```
For example, if the number of parallel data insertion threads is 100, total number of tables is 10,000,000, then the minimum memory requirement of a client program is:
```
100 * 3 + (10000000 / 4096) + 100 = 2741 (MBytes)
```
So, at least 3GB needs to be reserved for such a client.
## CPU Requirement
The CPU resources required depend on two aspects:
- **Data Insertion** Each dnode of TDengine can process at least 10,000 insertion requests in one second, while each insertion request can have multiple rows. The difference in computing resource consumed, between inserting 1 row at a time, and inserting 10 rows at a time is very small. So, the more the number of rows that can be inserted one time, the higher the efficiency. If each insert request contains more than 200 records, a single core can process more than 1 million records per second. Inserting in batch also imposes requirements on the client side which needs to cache rows to insert in batch once the number of cached rows reaches a threshold.
- **Data Query** High efficiency query is provided in TDengine, but it's hard to estimate the CPU resource required because the queries used in different use cases and the frequency of queries vary significantly. It can only be verified with the query statements, query frequency, data size to be queried, and other requirements provided by users.
In short, the CPU resource required for data insertion can be estimated but it's hard to do so for query use cases. If possible, ensure that CPU usage remains below 50%. If this threshold is exceeded, it's a reminder for system operator to add more nodes in the cluster to expand resources.
## Disk Requirement
The compression ratio in TDengine is much higher than that in RDBMS. In most cases, the compression ratio in TDengine is bigger than 5, or even 10 in some cases, depending on the characteristics of the original data. The data size before compression can be calculated based on below formula:
```
Raw DataSize = numOfTables * rowSizePerTable * rowsPerTable
```
For example, there are 10,000,000 meters, while each meter collects data every 15 minutes and the data size of each collection is 128 bytes, so the raw data size of one year is: 10000000 \* 128 \* 24 \* 60 / 15 \* 365 = 44.8512(TB). Assuming compression ratio is 5, the actual disk size is: 44.851 / 5 = 8.97024(TB).
Parameter `keep` can be used to set how long the data will be kept on disk. To further reduce storage cost, multiple storage levels can be enabled in TDengine, with the coldest data stored on the cheapest storage device. This is completely transparent to application programs.
To increase performance, multiple disks can be setup for parallel data reading or data inserting. Please note that an expensive disk array is not necessary because replications are used in TDengine to provide high availability.
## Number of Hosts
A host can be either physical or virtual. The total memory, total CPU, total disk required can be estimated according to the formulae mentioned previously. If the number of data replicas is not 1, the required resources are multiplied by the number of replicas.
Then, according to the system resources that a single host can provide, assuming all hosts have the same resources, the number of hosts can be derived easily.

File diff suppressed because it is too large Load Diff

View File

@ -1,25 +0,0 @@
---
title: Fault Tolerance and Disaster Recovery
description: This document describes how TDengine provides fault tolerance and disaster recovery.
---
## Fault Tolerance
TDengine uses **WAL**, i.e. Write Ahead Log, to achieve fault tolerance and high reliability.
When a data block is received by TDengine, the original data block is first written into WAL. The log in WAL will be deleted only after the data has been written into data files in the database. Data can be recovered from WAL in case the server is stopped abnormally for any reason and then restarted.
There are 2 configuration parameters related to WAL:
- wal_level: Specifies the WAL level. 1 indicates that WAL is enabled but fsync is disabled. 2 indicates that WAL and fsync are both enabled. The default value is 1.
- wal_fsync_period: This parameter is only valid when wal_level is set to 2. It specifies the interval, in milliseconds, of invoking fsync. If set to 0, it means fsync is invoked immediately once WAL is written.
To achieve absolutely no data loss, set wal_level to 2 and wal_fsync_period to 0. There is a performance penalty to the data ingestion rate. However, if the concurrent data insertion threads on the client side can reach a big enough number, for example 50, the data ingestion performance will be still good enough. Our verification shows that the drop is only 30% when wal_fsync_period is set to 3000 milliseconds.
## Disaster Recovery
TDengine provides disaster recovery by using taosX to replicate data between two TDengine clusters which are deployed in two distant data centers. Assume there are two TDengine clusters, A and B, A is the source and B is the target, and A takes the workload of writing and querying. You can deploy `taosX` in the data center where cluster A resides in, `taosX` consumes the data written into cluster A and writes into cluster B. If the data center of cluster A is disrupted because of disaster, you can switch to cluster B to take the workload of data writing and querying, and deploy a `taosX` in the data center of cluster B to replicate data from cluster B to cluster A if cluster A has been recovered, or another cluster C if cluster A has not been recovered.
You can use the data replication feature of `taosX` to build more complicated disaster recovery solution.
taosX is only provided in TDengine enterprise edition, for more details please contact business@tdengine.com.

View File

@ -1,62 +0,0 @@
---
title: Data Import
description: This document describes how to import data into TDengine.
---
There are multiple ways of importing data provided by TDengine: import with script, import from data file, import using `taosdump`.
## Import Using Script
TDengine CLI `taos` supports `source <filename>` command for executing the SQL statements in the file in batch. The SQL statements for creating databases, creating tables, and inserting rows can be written in a single file with one statement on each line, then the file can be executed using the `source` command in TDengine CLI `taos` to execute the SQL statements in order and in batch. In the script file, any line beginning with "#" is treated as comments and ignored silently.
## Import from Data File
In TDengine CLI, data can be imported from a CSV file into an existing table. The data in a single CSV must belong to the same table and must be consistent with the schema of that table. The SQL statement is as below:
```sql
insert into tb1 file 'path/data.csv';
```
:::note
If there is a description in the first line of the CSV file, please remove it before importing. If there is no value for a column, please use `NULL` without quotes.
:::
For example, there is a subtable d1001 whose schema is as below:
```sql
taos> DESCRIBE d1001
Field | Type | Length | Note |
=================================================================================
ts | TIMESTAMP | 8 | |
current | FLOAT | 4 | |
voltage | INT | 4 | |
phase | FLOAT | 4 | |
location | BINARY | 64 | TAG |
groupid | INT | 4 | TAG |
```
The format of the CSV file to be imported, data.csv, is as below:
```csv
'2018-10-04 06:38:05.000',10.30000,219,0.31000
'2018-10-05 06:38:15.000',12.60000,218,0.33000
'2018-10-06 06:38:16.800',13.30000,221,0.32000
'2018-10-07 06:38:05.000',13.30000,219,0.33000
'2018-10-08 06:38:05.000',14.30000,219,0.34000
'2018-10-09 06:38:05.000',15.30000,219,0.35000
'2018-10-10 06:38:05.000',16.30000,219,0.31000
'2018-10-11 06:38:05.000',17.30000,219,0.32000
'2018-10-12 06:38:05.000',18.30000,219,0.31000
```
Then, the below SQL statement can be used to import data from file "data.csv", assuming the file is located under the home directory of the current Linux user.
```sql
taos> insert into d1001 file '~/data.csv';
Query OK, 9 row(s) affected (0.004763s)
```
## Import using taosdump
A convenient tool for importing and exporting data is provided by TDengine, `taosdump`, which can be used to export data from one TDengine cluster and import into another one. For the details of using `taosdump` please refer to the taosdump documentation.

View File

@ -1,22 +0,0 @@
---
title: Data Export
description: This document describes how to export data from TDengine.
---
There are two ways of exporting data from a TDengine cluster:
- Using a SQL statement in TDengine CLI
- Using the `taosdump` tool
## Export Using SQL
If you want to export the data of a table or a STable, please execute the SQL statement below, in the TDengine CLI.
```sql
select * from <tb_name> >> data.csv;
```
The data of table or STable specified by `tb_name` will be exported into a file named `data.csv` in CSV format.
## Export Using taosdump
With `taosdump`, you can choose to export the data of all databases, a database, a table or a STable, you can also choose to export the data within a time range, or even only export the schema definition of a table. For the details of using `taosdump` please refer to the taosdump documentation.

View File

@ -1,331 +0,0 @@
---
title: TDengine Monitoring
description: This document describes how to monitor your TDengine cluster.
---
After TDengine is started, it automatically writes monitoring data including CPU, memory and disk usage, bandwidth, number of requests, disk I/O speed, slow queries, into a designated database at a predefined interval through taosKeeper. Additionally, some important system operations, like logon, create user, drop database, and alerts and warnings generated in TDengine are written into the `log` database too. A system operator can view the data in `log` database from TDengine CLI or from a web console.
The collection of the monitoring information is enabled by default, but can be disabled by parameter `monitor` in the configuration file.
## TDinsight
TDinsight is a complete solution which uses the monitoring database `log` mentioned previously, and Grafana, to monitor a TDengine cluster.
A script `TDinsight.sh` is provided to deploy TDinsight automatically.
Download `TDinsight.sh` with the below command:
```bash
wget https://github.com/taosdata/grafanaplugin/raw/master/dashboards/TDinsight.sh
chmod +x TDinsight.sh
```
Prepare:
1. TDengine Server
- The URL of REST service: for example `http://localhost:6041` if TDengine is deployed locally
- User name and password
2. Grafana Alert Notification
You can use below command to setup Grafana alert notification.
An existing Grafana Notification Channel can be specified with parameter `-E`, the notifier uid of the channel can be obtained by `curl -u admin:admin localhost:3000/api/alert-notifications |jq`
```bash
./TDinsight.sh -a http://localhost:6041 -u root -p taosdata -E <notifier uid>
```
Launch `TDinsight.sh` with the command above and restart Grafana, then open Dashboard `http://localhost:3000/d/tdinsight`.
## log database
The data of tdinsight dashboard is stored in `log` database (default. You can change it in taoskeeper's config file. For more infrmation, please reference to [taoskeeper document](../../reference/components/taosKeeper)). The taoskeeper will create log database on taoskeeper startup.
### taosd\_cluster\_basic table
`taosd_cluster_basic` table contains cluster basic information.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|first\_ep|VARCHAR||first ep of cluster|
|first\_ep\_dnode\_id|INT||dnode id or first\_ep|
|cluster_version|VARCHAR||tdengine version. such as: 3.0.4.0|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_cluster\_info table
`taosd_cluster_info` table contains cluster information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|cluster\_uptime|DOUBLE||seconds of master's uptime|
|dbs\_total|DOUBLE||total number of databases in cluster|
|tbs\_total|DOUBLE||total number of tables in cluster|
|stbs\_total|DOUBLE||total number of stables in cluster|
|dnodes\_total|DOUBLE||total number of dnodes in cluster|
|dnodes\_alive|DOUBLE||total number of dnodes in ready state|
|mnodes\_total|DOUBLE||total number of mnodes in cluster|
|mnodes\_alive|DOUBLE||total number of mnodes in ready state|
|vgroups\_total|DOUBLE||total number of vgroups in cluster|
|vgroups\_alive|DOUBLE||total number of vgroups in ready state|
|vnodes\_total|DOUBLE||total number of vnode in cluster|
|vnodes\_alive|DOUBLE||total number of vnode in ready state|
|connections\_total|DOUBLE||total number of connections to cluster|
|topics\_total|DOUBLE||total number of topics in cluster|
|streams\_total|DOUBLE||total number of streams in cluster|
|grants_expire\_time|DOUBLE||time until grants expire in seconds|
|grants_timeseries\_used|DOUBLE||timeseries used|
|grants_timeseries\_total|DOUBLE||total timeseries|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_vgroups\_info table
`taosd_vgroups_info` table contains vgroups information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|tables\_num|DOUBLE||number of tables per vgroup|
|status|DOUBLE||status, value range:unsynced = 0, ready = 1|
|vgroup\_id|VARCHAR|TAG|vgroup id|
|database\_name|VARCHAR|TAG|database for the vgroup|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_dnodes\_info table
`taosd_dnodes_info` table contains dnodes information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|uptime|DOUBLE||dnode uptime in `seconds`|
|cpu\_engine|DOUBLE||cpu usage of tdengine. read from `/proc/<taosd_pid>/stat`|
|cpu\_system|DOUBLE||cpu usage of server. read from `/proc/stat`|
|cpu\_cores|DOUBLE||cpu cores of server|
|mem\_engine|DOUBLE||memory usage of tdengine. read from `/proc/<taosd_pid>/status`|
|mem\_free|DOUBLE||available memory on the server in `KB`|
|mem\_total|DOUBLE||total memory of server in `KB`|
|disk\_used|DOUBLE||usage of data dir in `bytes`|
|disk\_total|DOUBLE||the capacity of data dir in `bytes`|
|system\_net\_in|DOUBLE||network throughput rate in byte/s. read from `/proc/net/dev`|
|system\_net\_out|DOUBLE||network throughput rate in byte/s. read from `/proc/net/dev`|
|io\_read|DOUBLE||io throughput rate in byte/s. read from `/proc/<taosd_pid>/io`|
|io\_write|DOUBLE||io throughput rate in byte/s. read from `/proc/<taosd_pid>/io`|
|io\_read\_disk|DOUBLE||io throughput rate of disk in byte/s. read from `/proc/<taosd_pid>/io`|
|io\_write\_disk|DOUBLE||io throughput rate of disk in byte/s. read from `/proc/<taosd_pid>/io`|
|vnodes\_num|DOUBLE||number of vnodes per dnode|
|masters|DOUBLE||number of master vnodes|
|has\_mnode|DOUBLE||if the dnode has mnode, value range:include=1, not_include=0|
|has\_qnode|DOUBLE||if the dnode has qnode, value range:include=1, not_include=0|
|has\_snode|DOUBLE||if the dnode has snode, value range:include=1, not_include=0|
|has\_bnode|DOUBLE||if the dnode has bnode, value range:include=1, not_include=0|
|error\_log\_count|DOUBLE||error count|
|info\_log\_count|DOUBLE||info count|
|debug\_log\_count|DOUBLE||debug count|
|trace\_log\_count|DOUBLE||trace count|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_dnodes\_status table
`taosd_dnodes_status` table contains dnodes information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|status|DOUBLE||dnode status, value range:ready=1offline =0|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_dnodes\_log\_dir table
`log_dir` table contains log directory information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|avail|DOUBLE||available space for log directory in `bytes`|
|used|DOUBLE||used space for data directory in `bytes`|
|total|DOUBLE||total space for data directory in `bytes`|
|name|VARCHAR|TAG|log directory. default is `/var/log/taos/`|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_dnodes\_data\_dir table
`taosd_dnodes_data_dir` table contains data directory information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|avail|DOUBLE||available space for data directory in `bytes`|
|used|DOUBLE||used space for data directory in `bytes`|
|total|DOUBLE||total space for data directory in `bytes`|
|level|VARCHAR|TAG|level for multi-level storage|
|name|VARCHAR|TAG|data directory. default is `/var/lib/taos`|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_mnodes\_info table
`taosd_mnodes_info` table contains mnode information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|role|DOUBLE||the role of mnode. value range:offline = 0,follower = 100,candidate = 101,leader = 102,error = 103,learner = 104|
|mnode\_id|VARCHAR|TAG|master node id|
|mnode\_ep|VARCHAR|TAG|master node endpoint|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_vnodes\_role table
`taosd_vnodes_role` table contains vnode role information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|role|DOUBLE||role. value range:offline = 0,follower = 100,candidate = 101,leader = 102,error = 103,learner = 104|
|vgroup\_id|VARCHAR|TAG|vgroup id|
|database\_name|VARCHAR|TAG|database for the vgroup|
|dnode\_id|VARCHAR|TAG|dnode id|
|cluster\_id|VARCHAR|TAG|cluster id|
### taosd\_sql\_req table
`taosd_sql_req` tables contains taosd sql records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|count|DOUBLE||sql count|
|result|VARCHAR|TAG|sql execution resultvalue range: Success, Failed|
|username|VARCHAR|TAG|user name who executed the sql|
|sql\_type|VARCHAR|TAG|sql typevalue range:inserted_rows|
|dnode\_id|VARCHAR|TAG|dnode id|
|dnode\_ep|VARCHAR|TAG|dnode endpoint|
|vgroup\_id|VARCHAR|TAG|dnode id|
|cluster\_id|VARCHAR|TAG|cluster id|
### taos\_sql\_req 表
`taos_sql_req` tables contains taos sql records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|count|DOUBLE||sql count|
|result|VARCHAR|TAG|sql execution resultvalue range: Success, Failed|
|username|VARCHAR|TAG|user name who executed the sql|
|sql\_type|VARCHAR|TAG|sql typevalue range:select, insertdelete|
|cluster\_id|VARCHAR|TAG|cluster id|
### taos\_slow\_sql 表
`taos_slow_sql` ables contains taos slow sql records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|count|DOUBLE||sql count|
|result|VARCHAR|TAG|sql execution resultvalue range: Success, Failed|
|username|VARCHAR|TAG|user name who executed the sql|
|duration|VARCHAR|TAG|sql execution durationvalue range:3-10s,10-100s,100-1000s,1000s-|
|cluster\_id|VARCHAR|TAG|cluster id|
### keeper\_monitor table
`keeper_monitor` table contains keeper monitor information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|cpu|FLOAT||cpu usage|
|mem|FLOAT||memory usage|
|identify|NCHAR|TAG||
### taosadapter\_restful\_http\_request\_total table
`taosadapter_restful_http_request_total` table contains taosadapter rest request information record. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||metric value|
|client\_ip|NCHAR|TAG|client ip|
|endpoint|NCHAR|TAG|taosadpater endpoint|
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
|status\_code|NCHAR|TAG|status code|
### taosadapter\_restful\_http\_request\_fail table
`taosadapter_restful_http_request_fail` table contains taosadapter failed rest request information record. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||metric value|
|client\_ip|NCHAR|TAG|client ip|
|endpoint|NCHAR|TAG|taosadpater endpoint|
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
|status\_code|NCHAR|TAG|status code|
### taosadapter\_restful\_http\_request\_in\_flight table
`taosadapter_restful_http_request_in_flight` table contains taosadapter rest request information record in real time. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||metric value|
|endpoint|NCHAR|TAG|taosadpater endpoint|
### taosadapter\_restful\_http\_request\_summary\_milliseconds table
`taosadapter_restful_http_request_summary_milliseconds` table contains the summary or rest information record. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|count|DOUBLE|||
|sum|DOUBLE|||
|0.5|DOUBLE|||
|0.9|DOUBLE|||
|0.99|DOUBLE|||
|0.1|DOUBLE|||
|0.2|DOUBLE|||
|endpoint|NCHAR|TAG|taosadpater endpoint|
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
### taosadapter\_system\_mem\_percent table
`taosadapter_system_mem_percent` table contains taosadapter memory usage information. The timestamp of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||metric value|
|endpoint|NCHAR|TAG|taosadpater endpoint|
### taosadapter\_system\_cpu\_percent table
`taosadapter_system_cpu_percent` table contains taosadapter cup usage information. The timestamp of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|gauge|DOUBLE||mertic value|
|endpoint|NCHAR|TAG|taosadpater endpoint|

View File

@ -1,72 +0,0 @@
---
title: Problem Diagnostics
description: This document describes how to diagnose issues with your TDengine cluster.
---
## Network Connection Diagnostics
When a TDengine client is unable to access a TDengine server, the network connection between the client side and the server side must be checked to find the root cause and resolve problems.
Diagnostics for network connections can be executed between Linux/Windows/macOS.
Diagnostic steps:
1. If the port range to be diagnosed is being occupied by a `taosd` server process, please first stop `taosd.
2. On the server side, execute command `taos -n server -P <port> -l <pktlen>` to monitor the port range starting from the port specified by `-P` parameter with the role of "server".
3. On the client side, execute command `taos -n client -h <fqdn of server> -P <port> -l <pktlen>` to send a testing package to the specified server and port.
-l &lt;pktlen&gt;: The size of the testing package, in bytes. The value range is [11, 64,000] and default value is 1,000.
Please note that the package length must be same in the above 2 commands executed on server side and client side respectively.
Output of the server side for the example is below:
```bash
# taos -n server -P 6030 -l 1000
network test server is initialized, port:6030
request is received, size:1000
request is received, size:1000
...
...
...
request is received, size:1000
request is received, size:1000
```
Output of the client side for the example is below:
```bash
# taos -n client -h 172.27.0.7 -P 6000
taos -n client -h v3s2 -P 6030 -l 1000
network test client is initialized, the server is v3s2:6030
request is sent, size:1000
response is received, size:1000
request is sent, size:1000
response is received, size:1000
...
...
...
request is sent, size:1000
response is received, size:1000
request is sent, size:1000
response is received, size:1000
total succ: 100/100 cost: 16.23 ms speed: 5.87 MB/s
```
The output needs to be checked carefully for the system operator to find the root cause and resolve the problem.
## Server Log
The parameter `debugFlag` is used to control the log level of the `taosd` server process. The default value is 131. For debugging and tracing, it needs to be set to either 135 or 143 respectively.
Once this parameter is set to 135 or 143, the log file grows very quickly especially when there is a huge volume of data insertion and data query requests. Ensure that the disk drive on which logs are stored has sufficient space.
## Client Log
An independent log file, named as "taoslog+&lt;seq num&gt;" is generated for each client program, i.e. a client process. The parameter `debugFlag` is used to control the log level. The default value is 131. For debugging and tracing, it needs to be set to either 135 or 143 respectively.
The default value of `debugFlag` is also 131 and only logs at level of INFO/ERROR/WARNING are recorded. As stated above, for debugging and tracing, it needs to be changed to 135 or 143 respectively, so that logs at DEBUG or TRACE level can be recorded.
The maximum length of a single log file is controlled by parameter `numOfLogLines` and only 2 log files are kept for each `taosd` server process.
Log files are written in an async way to minimize the workload on disk, but the trade off for performance is that a few log lines may be lost in some extreme conditions. You can configure asynclog to 0 when needed for troubleshooting purposes to ensure that no log information is lost.

View File

@ -1,13 +0,0 @@
---
title: Administration
description: This document describes how to perform management operations on your TDengine cluster from an administrator's perspective.
---
This chapter is mainly written for system administrators. It covers download, install/uninstall, data import/export, system monitoring, user management, connection management, capacity planning and system optimization.
```mdx-code-block
import DocCardList from '@theme/DocCardList';
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
<DocCardList items={useCurrentSidebarCategory().items}/>
```

View File

@ -1,17 +0,0 @@
#### Unified Database Access Interface
```go title="Native Connection"
{{#include docs/examples/go/connect/cgoexample/main.go}}
```
```go title="REST Connection"
{{#include docs/examples/go/connect/restexample/main.go}}
```
#### Advanced Features
The af package of driver-go can also be used to establish connection, with this way some advanced features of TDengine, like parameter binding and subscription, can be used.
```go title="Establish native connection using af package"
{{#include docs/examples/go/connect/afconn/main.go}}
```

View File

@ -1,8 +0,0 @@
```rust title="Native Connection"
{{#include docs/examples/rust/nativeexample/examples/connect.rs}}
```
:::note
For Rust client library, the connection depends on the feature being used. If "ws" feature is enabled, then only the implementation for "websocket" is compiled and packaged.
:::

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

View File

@ -1,300 +0,0 @@
---
title: Connect to TDengine
sidebar_label: Connect
description: This document describes how to establish connections to TDengine and how to install and use TDengine client libraries.
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import ConnJava from "./_connect_java.mdx";
import ConnGo from "./_connect_go.mdx";
import ConnRust from "./_connect_rust.mdx";
import ConnNode from "./_connect_node.mdx";
import ConnPythonNative from "./_connect_python.mdx";
import ConnCSNative from "./_connect_cs.mdx";
import ConnC from "./_connect_c.mdx";
import ConnR from "./_connect_r.mdx";
import ConnPHP from "./_connect_php.mdx";
import InstallOnLinux from "../../14-reference/05-connectors/_linux_install.mdx";
import InstallOnWindows from "../../14-reference/05-connectors/_windows_install.mdx";
import InstallOnMacOS from "../../14-reference/05-connectors/_macos_install.mdx";
import VerifyLinux from "../../14-reference/05-connectors/_verify_linux.mdx";
import VerifyWindows from "../../14-reference/05-connectors/_verify_windows.mdx";
import VerifyMacOS from "../../14-reference/05-connectors/_verify_macos.mdx";
Any application running on any platform can access TDengine through the REST API provided by TDengine. For information, see [REST API](../../reference/connectors/rest-api/). Applications can also use the client libraries for various programming languages, including C/C++, Java, Python, Go, Node.js, C#, and Rust, to access TDengine. These client libraries support connecting to TDengine clusters using both native interfaces (taosc). Some client libraries also support connecting over a REST interface. Community developers have also contributed several unofficial client libraries, such as the ADO.NET, Lua, and PHP libraries.
## Establish Connection
There are three ways for a client library to establish connections to TDengine:
1. Native connection through the TDengine client driver (taosc).
2. REST connection through the REST API provided by the taosAdapter component.
3. Websocket connection provided by the taosAdapter component.
![TDengine connection type](connection-type-en.webp)
For these ways of connections, client libraries provide similar APIs for performing operations and running SQL statements on your databases. The main difference is the method of establishing the connection, which is not visible to users.
Key differences:
1. For a Native connection, the client driver taosc and the server TDengine version must be compatible.
2. For a REST connection, users do not need to install the client driver taosc, providing the advantage of cross-platform ease of use. However, functions such as data subscription and binary data types are not available. Additionally, compared to Native and Websocket connections, a REST connection has the worst performance.
3. For a Websocket connection, users also do not need to install the client driver taosc.
4. To connect to a cloud service instance, you need to use the REST connection or Websocket connection.
Normally we recommend using **Websocket connection**.
## Install Client Driver taosc
If you are choosing to use the native connection and the the application is not on the same host as TDengine server, the TDengine client driver taosc needs to be installed on the application host. If choosing to use the REST connection or the application is on the same host as TDengine server, this step can be skipped. It's better to use same version of taosc as the TDengine server.
### Install
<Tabs defaultValue="linux" groupId="os">
<TabItem value="linux" label="Linux">
<InstallOnLinux />
</TabItem>
<TabItem value="windows" label="Windows">
<InstallOnWindows />
</TabItem>
<TabItem value="macos" label="MacOS">
<InstallOnMacOS />
</TabItem>
</Tabs>
### Verify
After the above installation and configuration are done and making sure TDengine service is already started and in service, the TDengine command-line interface `taos` can be launched to access TDengine.
<Tabs defaultValue="linux" groupId="os">
<TabItem value="linux" label="Linux">
<VerifyLinux />
</TabItem>
<TabItem value="windows" label="Windows">
<VerifyWindows />
</TabItem>
<TabItem value="macos" label="MacOS">
<VerifyMacOS />
</TabItem>
</Tabs>
## Install Client Library
<Tabs groupId="lang">
<TabItem label="Java" value="java">
If `maven` is used to manage the projects, what needs to be done is only adding below dependency in `pom.xml`.
```xml
<dependency>
<groupId>com.taosdata.jdbc</groupId>
<artifactId>taos-jdbcdriver</artifactId>
<version>3.3.3</version>
</dependency>
```
</TabItem>
<TabItem label="Python" value="python">
Install from PyPI using `pip`:
```
pip install taospy
```
Install from Git URL:
```
pip install git+https://github.com/taosdata/taos-connector-python.git
```
</TabItem>
<TabItem label="Go" value="go">
Just need to add `driver-go` dependency in `go.mod` .
```go-mod title=go.mod
module goexample
go 1.17
require github.com/taosdata/driver-go/v3 latest
```
:::note
`driver-go` uses `cgo` to wrap the APIs provided by taosc, while `cgo` needs `gcc` to compile source code in C language, so please make sure you have proper `gcc` on your system.
:::
</TabItem>
<TabItem label="Rust" value="rust">
Just need to add `taos` dependency in `Cargo.toml`.
```toml title=Cargo.toml
[dependencies]
taos = { version = "*"}
```
:::info
Rust client library uses different features to distinguish the way to establish connection. To establish Websocket connection, please enable `ws` feature.
```toml
taos = { version = "*", default-features = false, features = ["ws"] }
```
:::
</TabItem>
<TabItem label="Node.js" value="node">
Node.js client library provides different ways of establishing connections by providing different packages.
1. Install Node.js Native Client Library
```
npm install @tdengine/client
```
:::note
It's recommend to use Node whose version is between `node-v12.8.0` and `node-v13.0.0`.
:::
2. Install Node.js REST Client Library
```
npm install @tdengine/rest
```
</TabItem>
<TabItem label="C#" value="csharp">
Just need to add the reference to [TDengine.Connector](https://www.nuget.org/packages/TDengine.Connector/) in the project configuration file.
```xml title=csharp.csproj
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<StartupObject>TDengineExample.AsyncQueryExample</StartupObject>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="TDengine.Connector" Version="3.1.0" />
</ItemGroup>
</Project>
```
Or add by `dotnet` command.
```
dotnet add package TDengine.Connector
```
:::note
The sample code below are based on dotnet6.0, they may need to be adjusted if your dotnet version is not exactly same.
:::
</TabItem>
<TabItem label="R" value="r">
1. Download [taos-jdbcdriver-version-dist.jar](https://repo1.maven.org/maven2/com/taosdata/jdbc/taos-jdbcdriver/3.0.0/).
2. Install the dependency package `RJDBC`:
```R
install.packages("RJDBC")
```
</TabItem>
<TabItem label="C" value="c">
If the client driver (taosc) is already installed, then the C client library is already available.
<br/>
</TabItem>
<TabItem label="PHP" value="php">
**Download Source Code Package and Unzip: **
```shell
curl -L -o php-tdengine.tar.gz https://github.com/Yurunsoft/php-tdengine/archive/refs/tags/v1.0.2.tar.gz \
&& mkdir php-tdengine \
&& tar -xzf php-tdengine.tar.gz -C php-tdengine --strip-components=1
```
> Version number `v1.0.2` is only for example, it can be replaced to any newer version.
**Non-Swoole Environment: **
```shell
phpize && ./configure && make -j && make install
```
**Specify TDengine Location: **
```shell
phpize && ./configure --with-tdengine-dir=/usr/local/Cellar/tdengine/3.0.0.0 && make -j && make install
```
> `--with-tdengine-dir=` is followed by the TDengine installation location.
> This way is useful in case TDengine location can't be found automatically or macOS.
**Swoole Environment: **
```shell
phpize && ./configure --enable-swoole && make -j && make install
```
**Enable The Extension:**
Option One: Add `extension=tdengine` in `php.ini`
Option Two: Specify the extension on CLI `php -d extension=tdengine test.php`
</TabItem>
</Tabs>
## Establish a connection
Prior to establishing connection, please make sure TDengine is already running and accessible. The following sample code assumes TDengine is running on the same host as the client program, with FQDN configured to "localhost" and serverPort configured to "6030".
<Tabs groupId="lang" defaultValue="java">
<TabItem label="Java" value="java">
<ConnJava />
</TabItem>
<TabItem label="Python" value="python">
<ConnPythonNative />
</TabItem>
<TabItem label="Go" value="go">
<ConnGo />
</TabItem>
<TabItem label="Rust" value="rust">
<ConnRust />
</TabItem>
<TabItem label="Node.js" value="node">
<ConnNode />
</TabItem>
<TabItem label="C#" value="csharp">
<ConnCSNative />
</TabItem>
<TabItem label="R" value="r">
<ConnR/>
</TabItem>
<TabItem label="C" value="c">
<ConnC />
</TabItem>
<TabItem label="PHP" value="php">
<ConnPHP />
</TabItem>
</Tabs>
:::tip
If the connection fails, in most cases it's caused by improper configuration for FQDN or firewall. Please refer to the section "Unable to establish connection" in [FAQ](../../train-faq/faq).
:::

View File

@ -1,84 +0,0 @@
---
title: Data Model
description: This document describes the data model of TDengine.
---
The data model employed by TDengine is similar to that of a relational database. You have to create databases and tables. You must design the data model based on your own business and application requirements. You should design the [STable](../../concept/#super-table-stable) (an abbreviation for super table) schema to fit your data. This chapter will explain the big picture without getting into syntactical details.
Note: before you read this chapter, please make sure you have already read through [Key Concepts](../../concept/), since TDengine introduces new concepts like "one table for one [data collection point](../../concept/#data-collection-point)" and "[super table](../../concept/#super-table-stable)".
## Create Database
The characteristics of time-series data from different data collection points may be different. Characteristics include collection frequency, retention policy and others which determine how you create and configure the database. For e.g. days to keep, number of replicas, data block size, whether data updates are allowed and other configurable parameters would be determined by the characteristics of your data and your business requirements. For TDengine to operate with the best performance, we strongly recommend that you create and configure different databases for data with different characteristics. This allows you, for example, to set up different storage and retention policies. When creating a database, there are a lot of parameters that can be configured such as, the days to keep data, the number of replicas, the size of the cache, time precision, the minimum and maximum number of rows in each data block, whether compression is enabled, the time range of the data in single data file and so on. An example is shown as follows:
```sql
CREATE DATABASE power KEEP 365 DURATION 10 BUFFER 16 WAL_LEVEL 1;
```
In the above SQL statement:
- a database named "power" is created
- the data in it is retained for 365 days, which means that data older than 365 days will be deleted automatically
- a new data file will be created every 10 days
- the size of the write cache pool on each VNode is 16 MB
- the number of vgroups is 100
- WAL is enabled but fsync is disabled For more details please refer to [Database](../../reference/taos-sql/database).
After creating a database, the current database in use can be switched using SQL command `USE`. For example the SQL statement below switches the current database to `power`.
```sql
USE power;
```
Without the current database specified, table name must be preceded with the corresponding database name.
:::note
- Any table or STable must belong to a database. To create a table or STable, the database it belongs to must be ready.
- Timestamp needs to be specified when inserting rows or querying historical rows.
:::
## Create STable
In a time-series application, there may be multiple kinds of data collection points. For example, in the electrical power system there are meters, transformers, bus bars, switches, etc. For easy and efficient aggregation of multiple tables, one STable needs to be created for each kind of data collection point. For example, for the meters in [table 1](../../concept/), the SQL statement below can be used to create the super table.
```sql
CREATE STABLE meters (ts timestamp, current float, voltage int, phase float) TAGS (location binary(64), groupId int);
```
Similar to creating a regular table, when creating a STable, the name and schema need to be provided. In the STable schema, the first column must always be a timestamp (like ts in the example), and the other columns (like current, voltage and phase in the example) are the data collected. The remaining columns can [contain data of type](../../reference/taos-sql/data-type/) integer, float, double, string etc. In addition, the schema for tags, like location and groupId in the example, must be provided. The tag type can be integer, float, string, etc. Tags are essentially the static properties of a data collection point. For example, properties like the location, device type, device group ID, manager ID are tags. Tags in the schema can be added, removed or updated. Please refer to [STable](../../reference/taos-sql/stable) for more details.
For each kind of data collection point, a corresponding STable must be created. There may be many STables in an application. For electrical power system, we need to create a STable respectively for meters, transformers, busbars, switches. There may be multiple kinds of data collection points on a single device, for example there may be one data collection point for electrical data like current and voltage and another data collection point for environmental data like temperature, humidity and wind direction. Multiple STables are required for these kinds of devices.
At most 4096 columns are allowed in a STable. If there are more than 4096 of metrics to be collected for a data collection point, multiple STables are required. There can be multiple databases in a system, while one or more STables can exist in a database.
## Create Table
A specific table needs to be created for each data collection point. Similar to RDBMS, table name and schema are required to create a table. Additionally, one or more tags can be created for each table. To create a table, a STable needs to be used as template and the values need to be specified for the tags. For example, for the smart meters table, the table can be created using below SQL statement.
```sql
CREATE TABLE d1001 USING meters TAGS ("California.SanFrancisco", 2);
```
In the above SQL statement, "d1001" is the table name, "meters" is the STable name, followed by the value of tag "Location" and the value of tag "groupId", which are "California.SanFrancisco" and "2" respectively in the example. The tag values can be updated after the table is created. Please refer to [Tables](../../reference/taos-sql/table) for details.
It's suggested to use the globally unique ID of a data collection point as the table name. For example the device serial number could be used as a unique ID. If a unique ID doesn't exist, multiple IDs that are not globally unique can be combined to form a globally unique ID. It's not recommended to use a globally unique ID as tag value.
## Create Table Automatically
In some circumstances, it's unknown whether the table already exists when inserting rows. The table can be created automatically using the SQL statement below, and nothing will happen if the table already exists.
```sql
INSERT INTO d1001 USING meters TAGS ("California.SanFrancisco", 2) VALUES (now, 10.2, 219, 0.32);
```
In the above SQL statement, a row with value `(now, 10.2, 219, 0.32)` will be inserted into table "d1001". If table "d1001" doesn't exist, it will be created automatically using STable "meters" as template with tag value `"California.SanFrancisco", 2`.
For more details please refer to [Create Table Automatically](../../reference/taos-sql/insert#automatically-create-table-when-inserting).
## Single Column vs Multiple Column
A multiple columns data model is supported in TDengine. As long as multiple metrics are collected by the same data collection point at the same time, i.e. the timestamps are identical, these metrics can be put in a single STable as columns. However, there is another kind of design, i.e. single column data model in which a table is created for each metric. This means that a STable is required for each kind of metric. For example in a single column model, 3 STables would be required for current, voltage and phase.
It's recommended to use a multiple column data model as much as possible because insert and query performance is higher. In some cases, however, the collected metrics may vary frequently and so the corresponding STable schema needs to be changed frequently too. In such cases, it's more convenient to use single column data model.

View File

@ -1,144 +0,0 @@
---
title: Insert Using SQL
description: This document describes how to insert data into TDengine using SQL.
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import JavaSQL from "./_java_sql.mdx";
import JavaStmt from "./_java_stmt.mdx";
import PySQL from "./_py_sql.mdx";
import PyStmt from "./_py_stmt.mdx";
import GoSQL from "./_go_sql.mdx";
import GoStmt from "./_go_stmt.mdx";
import RustSQL from "./_rust_sql.mdx";
import RustStmt from "./_rust_stmt.mdx";
import NodeSQL from "./_js_sql.mdx";
import NodeStmt from "./_js_stmt.mdx";
import CsSQL from "./_cs_sql.mdx";
import CsStmt from "./_cs_stmt.mdx";
import CSQL from "./_c_sql.mdx";
import CStmt from "./_c_stmt.mdx";
import PhpSQL from "./_php_sql.mdx";
import PhpStmt from "./_php_stmt.mdx";
## Introduction
Application programs can execute `INSERT` statement through client libraries to insert rows. The TDengine CLI can also be used to manually insert data.
### Insert Single Row
The below SQL statement is used to insert one row into table "d1001".
```sql
INSERT INTO d1001 VALUES (ts1, 10.3, 219, 0.31);
```
`ts1` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](../../../reference/taos-sql/insert).
### Insert Multiple Rows
Multiple rows can be inserted in a single SQL statement. The example below inserts 2 rows into table "d1001".
```sql
INSERT INTO d1001 VALUES (ts2, 10.2, 220, 0.23) (ts2, 10.3, 218, 0.25);
```
`ts1` and `ts2` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](../../../reference/taos-sql/insert).
### Insert into Multiple Tables
Data can be inserted into multiple tables in the same SQL statement. The example below inserts 2 rows into table "d1001" and 1 row into table "d1002".
```sql
INSERT INTO d1001 VALUES (ts1, 10.3, 219, 0.31) (ts2, 12.6, 218, 0.33) d1002 VALUES (ts3, 12.3, 221, 0.31);
```
`ts1`, `ts2` and `ts3` is Unix timestamp, the timestamps which is larger than the difference between current time and KEEP in config is only allowed. For further detail, refer to [TDengine SQL insert timestamp section](../../../reference/taos-sql/insert).
For more details about `INSERT` please refer to [INSERT](../../../reference/taos-sql/insert).
:::info
- Inserting in batches can improve performance. The higher the batch size, the better the performance. Please note that a single row can't exceed 48K bytes and each SQL statement can't exceed 1MB.
- Inserting with multiple threads can also improve performance. However, at a certain point, increasing the number of threads no longer offers any benefit and can even decrease performance due to the overhead involved in frequent thread switching. The optimal number of threads for a system depends on the processing capabilities and configuration of the server, the configuration of the database, the data schema, and the batch size for writing data. In general, more powerful clients and servers can support higher numbers of concurrently writing threads. Given a sufficiently powerful server, a higher number of vgroups for a database also increases the number of concurrent writes. Finally, a simpler data schema enables more concurrent writes as well.
:::
:::warning
- If the timestamp of a new record already exists in a table, columns with new data for that timestamp replace old data with new data, while columns without new data are not affected.
- The timestamp to be inserted must be newer than the timestamp of subtracting current time by the parameter `KEEP`. If `KEEP` is set to 3650 days, then the data older than 3650 days ago can't be inserted. The timestamp to be inserted cannot be newer than the timestamp of current time plus parameter `DURATION`. If `DURATION` is set to 2, the data newer than 2 days later can't be inserted.
:::
## Sample program
### Insert Using SQL
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
<JavaSQL />
</TabItem>
<TabItem label="Python" value="python">
<PySQL />
</TabItem>
<TabItem label="Go" value="go">
<GoSQL />
</TabItem>
<TabItem label="Rust" value="rust">
<RustSQL />
</TabItem>
<TabItem label="Node.js" value="node">
<NodeSQL />
</TabItem>
<TabItem label="C#" value="csharp">
<CsSQL />
</TabItem>
<TabItem label="C" value="c">
<CSQL />
</TabItem>
<TabItem label="PHP" value="php">
<PhpSQL />
</TabItem>
</Tabs>
:::note
1. With either native connection or REST connection, the above samples can work well.
2. Please note that `use db` can't be used with a REST connection because REST connections are stateless, so in the samples `dbName.tbName` is used to specify the table name.
:::
### Insert with Parameter Binding
TDengine also provides API support for parameter binding. Similar to MySQL, only `?` can be used in these APIs to represent the parameters to bind. This avoids the resource consumption of SQL syntax parsing when writing data through the parameter binding interface, thus significantly improving write performance in most cases.
Parameter binding is available only with native connection.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
<JavaStmt />
</TabItem>
<TabItem label="Python" value="python">
<PyStmt />
</TabItem>
<TabItem label="Go" value="go">
<GoStmt />
</TabItem>
<TabItem label="Rust" value="rust">
<RustStmt />
</TabItem>
<TabItem label="Node.js" value="node">
<NodeStmt />
</TabItem>
<TabItem label="C#" value="csharp">
<CsStmt />
</TabItem>
<TabItem label="C" value="c">
<CStmt />
</TabItem>
<TabItem label="PHP" value="php">
<PhpStmt />
</TabItem>
</Tabs>

View File

@ -1,47 +0,0 @@
---
title: Write from Kafka
description: This document describes how to insert data into TDengine using Kafka.
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import PyKafka from "./_py_kafka.mdx";
## About Kafka
Apache Kafka is an open-source distributed event streaming platform, used by thousands of companies for high-performance data pipelines, streaming analytics, data integration, and mission-critical applications. For the key concepts of kafka, please refer to [kafka documentation](https://kafka.apache.org/documentation/#gettingStarted).
### kafka topic
Messages in Kafka are organized by topics. A topic may have one or more partitions. We can manage kafka topics through `kafka-topics`.
create a topic named `kafka-events`:
```
bin/kafka-topics.sh --create --topic kafka-events --bootstrap-server localhost:9092
```
Alter `kafka-events` topic to set partitions to 3:
```
bin/kafka-topics.sh --alter --topic kafka-events --partitions 3 --bootstrap-server=localhost:9092
```
Show all topics and partitions in Kafka:
```
bin/kafka-topics.sh --bootstrap-server=localhost:9092 --describe
```
## Insert into TDengine
We can write data into TDengine via SQL or Schemaless. For more information, please refer to [Insert Using SQL](../sql-writing/) or [High Performance Writing](../high-volume/) or [Schemaless Writing](../../../reference/schemaless/).
## Examples
<Tabs defaultValue="Python" groupId="lang">
<TabItem label="Python" value="Python">
<PyKafka />
</TabItem>
</Tabs>

View File

@ -1,80 +0,0 @@
---
title: InfluxDB Line Protocol
sidebar_label: InfluxDB Line Protocol
description: This document describes how to insert data into TDengine using the InfluxDB Line Protocol.
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import JavaLine from "./_java_line.mdx";
import PyLine from "./_py_line.mdx";
import GoLine from "./_go_line.mdx";
import RustLine from "./_rust_line.mdx";
import NodeLine from "./_js_line.mdx";
import CsLine from "./_cs_line.mdx";
import CLine from "./_c_line.mdx";
## Introduction
In the InfluxDB Line protocol format, a single line of text is used to represent one row of data. Each line contains 4 parts as shown below.
```
measurement,tag_set field_set timestamp
```
- `measurement` will be used as the name of the STable Enter a comma (,) between `measurement` and `tag_set`.
- `tag_set` will be used as tags, with format like `<tag_key>=<tag_value>,<tag_key>=<tag_value>` Enter a space between `tag_set` and `field_set`.
- `field_set`will be used as data columns, with format like `<field_key>=<field_value>,<field_key>=<field_value>` Enter a space between `field_set` and `timestamp`.
- `timestamp` is the primary key timestamp corresponding to this row of data
For example:
```
meters,location=California.LosAngeles,groupid=2 current=13.4,voltage=223,phase=0.29 1648432611249500
```
:::note
- All the data in `tag_set` will be converted to NCHAR type automatically
- Each data in `field_set` must be self-descriptive for its data type. For example 1.2f32 means a value 1.2 of float type. Without the "f" type suffix, it will be treated as type double
- Multiple kinds of precision can be used for the `timestamp` field. Time precision can be from nanosecond (ns) to hour (h)
- The rule of table name
- The child table name is created automatically in a rule to guarantee its uniqueness.
- You can configure `smlAutoChildTableNameDelimiter` in taos.cfg to specify a delimiter between tag values as the table names. For example, you set `smlAutoChildTableNameDelimiter=-` in taos.cfg, when you insert `st,t0=cpu1,t1=4 c1=3 1626006833639000000`, the child table will be `cpu1-4`
- You can configure `smlChildTableName` in taos.cfg to specify a tag value as the table names if the tag value is unique globally. For example, if a tag is called `tname` and you set `smlChildTableName=tname` in taos.cfg, when you insert `st,tname=cpu1,t1=4 c1=3 1626006833639000000`, the child table `cpu1` will be created automatically. Note that if multiple rows have the same tname but different tag_set values, the tag_set of the first row is used to create the table and the others are ignored
- It is assumed that the order of field_set in a supertable is consistent, meaning that the first record contains all fields and subsequent records store fields in the same order. If the order is not consistent, set smlDataFormat in taos.cfg to false. Otherwise, data will be written out of order and a database error will occur.(smlDataFormat in taos.cfg default to false after version of 3.0.1.3, smlDataFormat is discarded since 3.0.3.0)
:::
For more details please refer to [InfluxDB Line Protocol](https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/) and [TDengine Schemaless](../../../reference/schemaless/)
## Examples
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
<JavaLine />
</TabItem>
<TabItem label="Python" value="Python">
<PyLine />
</TabItem>
<TabItem label="Go" value="go">
<GoLine />
</TabItem>
<TabItem label="Node.js" value="node">
<NodeLine />
</TabItem>
<TabItem label="C#" value="csharp">
<CsLine />
</TabItem>
<TabItem label="C" value="c">
<CLine />
</TabItem>
</Tabs>
## Query Examples
If you want query the data of `location=California.LosAngeles,groupid=2`, here is the query SQL:
```sql
SELECT * FROM meters WHERE location = "California.LosAngeles" AND groupid = 2;
```

View File

@ -1,95 +0,0 @@
---
title: OpenTSDB Line Protocol
sidebar_label: OpenTSDB Line Protocol
description: This document describes how to insert data into TDengine using the OpenTSDB Line Protocol.
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import JavaTelnet from "./_java_opts_telnet.mdx";
import PyTelnet from "./_py_opts_telnet.mdx";
import GoTelnet from "./_go_opts_telnet.mdx";
import RustTelnet from "./_rust_opts_telnet.mdx";
import NodeTelnet from "./_js_opts_telnet.mdx";
import CsTelnet from "./_cs_opts_telnet.mdx";
import CTelnet from "./_c_opts_telnet.mdx";
## Introduction
A single line of text is used in OpenTSDB line protocol to represent one row of data. OpenTSDB employs a single column data model, so each line can only contain a single data column. There can be multiple tags. Each line contains 4 parts as below:
```txt
<metric> <timestamp> <value> <tagk_1>=<tagv_1>[ <tagk_n>=<tagv_n>]
```
- `metric` will be used as the STable name.
- `timestamp` is the timestamp of current row of data. The time precision will be determined automatically based on the length of the timestamp. Second and millisecond time precision are supported.
- `value` is a metric which must be a numeric value, The corresponding column name is "value".
- The last part is the tag set separated by spaces, all tags will be converted to NCHAR type automatically.
For example:
```txt
meters.current 1648432611250 11.3 location=California.LosAngeles groupid=3
```
- The rule of table name
- The child table name is created automatically in a rule to guarantee its uniqueness.
- You can configure `smlAutoChildTableNameDelimiter` in taos.cfg to specify a delimiter between tag values as the table names. For example, you set `smlAutoChildTableNameDelimiter=-` in taos.cfg, when you insert `st,t0=cpu1,t1=4 c1=3 1626006833639000000`, the child table will be `cpu1-4`
- You can configure `smlChildTableName` in taos.cfg to specify a tag value as the table names if the tag value is unique globally. For example, if a tag is called `tname` and you set `smlChildTableName=tname` in taos.cfg, when you insert `st,tname=cpu1,t1=4 c1=3 1626006833639000000`, the child table `cpu1` will be created automatically. Note that if multiple rows have the same tname but different tag_set values, the tag_set of the first row is used to create the table and the others are ignored
Please refer to [OpenTSDB Telnet API](http://opentsdb.net/docs/build/html/api_telnet/put.html) for more details.
## Examples
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
<JavaTelnet />
</TabItem>
<TabItem label="Python" value="Python">
<PyTelnet />
</TabItem>
<TabItem label="Go" value="go">
<GoTelnet />
</TabItem>
<TabItem label="Node.js" value="node">
<NodeTelnet />
</TabItem>
<TabItem label="C#" value="csharp">
<CsTelnet />
</TabItem>
<TabItem label="C" value="c">
<CTelnet />
</TabItem>
</Tabs>
2 STables will be created automatically and each STable has 4 rows of data in the above sample code.
```cmd
taos> use test;
Database changed.
taos> show stables;
name |
=================================
meters_current |
meters_voltage |
Query OK, 2 row(s) in set (0.002544s)
taos> select tbname, * from `meters_current`;
tbname | _ts | _value | groupid | location |
==================================================================================================================================
t_0e7bcfa21a02331c06764f275... | 2022-03-28 09:56:51.249 | 10.800000000 | 3 | California.LosAngeles |
t_0e7bcfa21a02331c06764f275... | 2022-03-28 09:56:51.250 | 11.300000000 | 3 | California.LosAngeles |
t_7e7b26dd860280242c6492a16... | 2022-03-28 09:56:51.249 | 10.300000000 | 2 | California.SanFrancisco |
t_7e7b26dd860280242c6492a16... | 2022-03-28 09:56:51.250 | 12.600000000 | 2 | California.SanFrancisco |
Query OK, 4 row(s) in set (0.005399s)
```
## Query Examples
If you want query the data of `location=California.LosAngeles groupid=3`, here is the query SQL:
```sql
SELECT * FROM `meters_current` WHERE location = "California.LosAngeles" AND groupid = 3;
```

View File

@ -1,108 +0,0 @@
---
title: OpenTSDB JSON Protocol
sidebar_label: OpenTSDB JSON Protocol
description: This document describes how to insert data into TDengine using the OpenTSDB JSON protocol.
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
import JavaJson from "./_java_opts_json.mdx";
import PyJson from "./_py_opts_json.mdx";
import GoJson from "./_go_opts_json.mdx";
import RustJson from "./_rust_opts_json.mdx";
import NodeJson from "./_js_opts_json.mdx";
import CsJson from "./_cs_opts_json.mdx";
import CJson from "./_c_opts_json.mdx";
## Introduction
A JSON string is used in OpenTSDB JSON to represent one or more rows of data, for example: For example:
```json
[
{
"metric": "sys.cpu.nice",
"timestamp": 1346846400,
"value": 18,
"tags": {
"host": "web01",
"dc": "lga"
}
},
{
"metric": "sys.cpu.nice",
"timestamp": 1346846400,
"value": 9,
"tags": {
"host": "web02",
"dc": "lga"
}
}
]
```
Similar to OpenTSDB line protocol, `metric` will be used as the STable name, `timestamp` is the timestamp to be used, `value` represents the metric collected, `tags` are the tag sets.
Please refer to [OpenTSDB HTTP API](http://opentsdb.net/docs/build/html/api_http/put.html) for more details.
:::note
- In JSON protocol, strings will be converted to NCHAR type and numeric values will be converted to double type.
- The rule of table name
- The child table name is created automatically in a rule to guarantee its uniqueness.
- You can configure `smlAutoChildTableNameDelimiter` in taos.cfg to specify a delimiter between tag values as the table names. For example, you set `smlAutoChildTableNameDelimiter=-` in taos.cfg, when you insert `st,t0=cpu1,t1=4 c1=3 1626006833639000000`, the child table will be `cpu1-4`
- You can configure `smlChildTableName` in taos.cfg to specify a tag value as the table names if the tag value is unique globally. For example, if a tag is called `tname` and you set `smlChildTableName=tname` in taos.cfg, when you insert `st,tname=cpu1,t1=4 c1=3 1626006833639000000`, the child table `cpu1` will be created automatically. Note that if multiple rows have the same tname but different tag_set values, the tag_set of the first row is used to create the table and the others are ignored
:::
## Examples
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
<JavaJson />
</TabItem>
<TabItem label="Python" value="Python">
<PyJson />
</TabItem>
<TabItem label="Go" value="go">
<GoJson />
</TabItem>
<TabItem label="Node.js" value="node">
<NodeJson />
</TabItem>
<TabItem label="C#" value="csharp">
<CsJson />
</TabItem>
<TabItem label="C" value="c">
<CJson />
</TabItem>
</Tabs>
2 STables will be created automatically and each STable has 2 rows of data in the above sample code.
```cmd
taos> use test;
Database changed.
taos> show stables;
name |
=================================
meters_current |
meters_voltage |
Query OK, 2 row(s) in set (0.001954s)
taos> select * from `meters_current`;
_ts | _value | groupid | location |
===================================================================================================================
2022-03-28 09:56:51.249 | 10.300000000 | 2.000000000 | California.SanFrancisco |
2022-03-28 09:56:51.250 | 12.600000000 | 2.000000000 | California.SanFrancisco |
Query OK, 2 row(s) in set (0.004076s)
```
## Query Examples
If you want query the data of "tags": &lcub;"location": "California.LosAngeles", "groupid": 1&rcub;, here is the query SQL:
```sql
SELECT * FROM `meters_current` WHERE location = "California.LosAngeles" AND groupid = 3;
```

View File

@ -1,442 +0,0 @@
---
title: High Performance Writing
sidebar_label: High Performance Writing
description: This document describes how to achieve high performance when writing data into TDengine.
---
import Tabs from "@theme/Tabs";
import TabItem from "@theme/TabItem";
This chapter introduces how to write data into TDengine with high throughput.
## How to achieve high performance data writing
To achieve high performance writing, there are a few aspects to consider. In the following sections we will describe these important factors in achieving high performance writing.
### Application Program
From the perspective of application program, you need to consider:
1. The data size of each single write, also known as batch size. Generally speaking, higher batch size generates better writing performance. However, once the batch size is over a specific value, you will not get any additional benefit anymore. When using SQL to write into TDengine, it's better to put as much as possible data in single SQL. The maximum SQL length supported by TDengine is 1,048,576 bytes, i.e. 1 MB.
2. The number of concurrent connections. Normally more connections can get better result. However, once the number of connections exceeds the processing ability of the server side, the performance may downgrade.
3. The distribution of data to be written across tables or sub-tables. Writing to single table in one batch is more efficient than writing to multiple tables in one batch.
4. Data Writing Protocol.
- Parameter binding mode is more efficient than SQL because it doesn't have the cost of parsing SQL.
- Writing to known existing tables is more efficient than writing to uncertain tables in automatic creating mode because the later needs to check whether the table exists or not before actually writing data into it.
- Writing in SQL is more efficient than writing in schemaless mode because schemaless writing creates table automatically and may alter table schema.
Application programs need to take care of the above factors and try to take advantage of them. The application program should write to single table in each write batch. The batch size needs to be tuned to a proper value on a specific system. The number of concurrent connections needs to be tuned to a proper value too to achieve the best writing throughput.
### Data Source
Application programs need to read data from data source then write into TDengine. If you meet one or more of below situations, you need to setup message queues between the threads for reading from data source and the threads for writing into TDengine.
1. There are multiple data sources, the data generation speed of each data source is much slower than the speed of single writing thread. In this case, the purpose of message queues is to consolidate the data from multiple data sources together to increase the batch size of single write.
2. The speed of data generation from single data source is much higher than the speed of single writing thread. The purpose of message queue in this case is to provide buffer so that data is not lost and multiple writing threads can get data from the buffer.
3. The data for single table are from multiple data source. In this case the purpose of message queues is to combine the data for single table together to improve the write efficiency.
If the data source is Kafka, then the application program is a consumer of Kafka, you can benefit from some kafka features to achieve high performance writing:
1. Put the data for a table in single partition of single topic so that it's easier to put the data for each table together and write in batch
2. Subscribe multiple topics to accumulate data together.
3. Add more consumers to gain more concurrency and throughput.
4. Incrase the size of single fetch to increase the size of write batch.
### Tune TDengine
On the server side, database configuration parameter `vgroups` needs to be set carefully to maximize the system performance. If it's set too low, the system capability can't be utilized fully; if it's set too big, unnecessary resource competition may be produced. A normal recommendation for `vgroups` parameter is 2 times of the number of CPU cores. However, depending on the actual system resources, it may still need to tuned.
For more configuration parameters, please refer to [Database Configuration](../../../reference/taos-sql/database) and [Server Configuration](../../../reference/config).
## Sample Programs
This section will introduce the sample programs to demonstrate how to write into TDengine with high performance.
### Scenario
Below are the scenario for the sample programs of high performance writing.
- Application program reads data from data source, the sample program simulates a data source by generating data
- The speed of single writing thread is much slower than the speed of generating data, so the program starts multiple writing threads while each thread establish a connection to TDengine and each thread has a message queue of fixed size.
- Application program maps the received data to different writing threads based on table name to make sure all the data for each table is always processed by a specific writing thread.
- Each writing thread writes the received data into TDengine once the message queue becomes empty or the read data meets a threshold.
![Thread Model of High Performance Writing into TDengine](highvolume.webp)
### Sample Programs
The sample programs listed in this section are based on the scenario described previously. If your scenarios is different, please try to adjust the code based on the principles described in this chapter.
The sample programs assume the source data is for all the different sub tables in same super table (meters). The super table has been created before the sample program starts to writing data. Sub tables are created automatically according to received data. If there are multiple super tables in your case, please try to adjust the part of creating table automatically.
<Tabs defaultValue="java" groupId="lang">
<TabItem label="Java" value="java">
**Program Inventory**
| Class | Description |
| ---------------- | ----------------------------------------------------------------------------------------------------- |
| FastWriteExample | Main Program |
| ReadTask | Read data from simulated data source and put into a queue according to the hash value of table name |
| WriteTask | Read data from Queue, compose a write batch and write into TDengine |
| MockDataSource | Generate data for some sub tables of super table meters |
| SQLWriter | WriteTask uses this class to compose SQL, create table automatically, check SQL length and write data |
| StmtWriter | Write in Parameter binding mode (Not finished yet) |
| DataBaseMonitor | Calculate the writing speed and output on console every 10 seconds |
Below is the list of complete code of the classes in above table and more detailed description.
<details>
<summary>FastWriteExample</summary>
The main Program is responsible for:
1. Create message queues
2. Start writing threads
3. Start reading threads
4. Output writing speed every 10 seconds
The main program provides 4 parameters for tuning:
1. The number of reading threads, default value is 1
2. The number of writing threads, default value is 2
3. The total number of tables in the generated data, default value is 1000. These tables are distributed evenly across all writing threads. If the number of tables is very big, it will cost much time to firstly create these tables.
4. The batch size of single write, default value is 3,000
The capacity of message queue also impacts performance and can be tuned by modifying program. Normally it's always better to have a larger message queue. A larger message queue means lower possibility of being blocked when enqueueing and higher throughput. But a larger message queue consumes more memory space. The default value used in the sample programs is already big enough.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/FastWriteExample.java}}
```
</details>
<details>
<summary>ReadTask</summary>
ReadTask reads data from data source. Each ReadTask is associated with a simulated data source, each data source generates data for a group of specific tables, and the data of any table is only generated from a single specific data source.
ReadTask puts data in message queue in blocking mode. That means, the putting operation is blocked if the message queue is full.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/ReadTask.java}}
```
</details>
<details>
<summary>WriteTask</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/WriteTask.java}}
```
</details>
<details>
<summary>MockDataSource</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/MockDataSource.java}}
```
</details>
<details>
<summary>SQLWriter</summary>
SQLWriter class encapsulates the logic of composing SQL and writing data. Please be noted that the tables have not been created before writing, but are created automatically when catching the exception of table doesn't exist. For other exceptions caught, the SQL which caused the exception are logged for you to debug.
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/SQLWriter.java}}
```
</details>
<details>
<summary>DataBaseMonitor</summary>
```java
{{#include docs/examples/java/src/main/java/com/taos/example/highvolume/DataBaseMonitor.java}}
```
</details>
**Steps to Launch**
<details>
<summary>Launch Java Sample Program</summary>
You need to set environment variable `TDENGINE_JDBC_URL` before launching the program. If TDengine Server is setup on localhost, then the default value for user name, password and port can be used, like below:
```
TDENGINE_JDBC_URL="jdbc:TAOS://localhost:6030?user=root&password=taosdata"
```
**Launch in IDE**
1. Clone TDengine repository
```
git clone git@github.com:taosdata/TDengine.git --depth 1
```
2. Use IDE to open `docs/examples/java` directory
3. Configure environment variable `TDENGINE_JDBC_URL`, you can also configure it before launching the IDE, if so you can skip this step.
4. Run class `com.taos.example.highvolume.FastWriteExample`
**Launch on server**
If you want to launch the sample program on a remote server, please follow below steps:
1. Package the sample programs. Execute below command under directory `TDengine/docs/examples/java`:
```
mvn package
```
2. Create `examples/java` directory on the server
```
mkdir -p examples/java
```
3. Copy dependencies (below commands assume you are working on a local Windows host and try to launch on a remote Linux host)
- Copy dependent packages
```
scp -r .\target\lib <user>@<host>:~/examples/java
```
- Copy the jar of sample programs
```
scp -r .\target\javaexample-1.0.jar <user>@<host>:~/examples/java
```
4. Configure environment variable
Edit `~/.bash_profile` or `~/.bashrc` and add below:
```
export TDENGINE_JDBC_URL="jdbc:TAOS://localhost:6030?user=root&password=taosdata"
```
If your TDengine server is not deployed on localhost or doesn't use default port, you need to change the above URL to correct value in your environment.
5. Launch the sample program
```
java -classpath lib/*:javaexample-1.0.jar com.taos.example.highvolume.FastWriteExample <read_thread_count> <white_thread_count> <total_table_count> <max_batch_size>
```
6. The sample program doesn't exit unless you press <kbd>CTRL</kbd> + <kbd>C</kbd> to terminate it.
Below is the output of running on a server of 16 cores, 64GB memory and SSD hard disk.
```
root@vm85$ java -classpath lib/*:javaexample-1.0.jar com.taos.example.highvolume.FastWriteExample 2 12
18:56:35.896 [main] INFO c.t.e.highvolume.FastWriteExample - readTaskCount=2, writeTaskCount=12 tableCount=1000 maxBatchSize=3000
18:56:36.011 [WriteThread-0] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.015 [WriteThread-0] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.021 [WriteThread-1] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.022 [WriteThread-1] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.031 [WriteThread-2] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.032 [WriteThread-2] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.041 [WriteThread-3] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.042 [WriteThread-3] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.093 [WriteThread-4] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.094 [WriteThread-4] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.099 [WriteThread-5] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.100 [WriteThread-5] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.100 [WriteThread-6] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.101 [WriteThread-6] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.103 [WriteThread-7] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.104 [WriteThread-7] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.105 [WriteThread-8] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.107 [WriteThread-8] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.108 [WriteThread-9] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.109 [WriteThread-9] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.156 [WriteThread-10] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.157 [WriteThread-11] INFO c.taos.example.highvolume.WriteTask - started
18:56:36.158 [WriteThread-10] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:36.158 [ReadThread-0] INFO com.taos.example.highvolume.ReadTask - started
18:56:36.158 [ReadThread-1] INFO com.taos.example.highvolume.ReadTask - started
18:56:36.158 [WriteThread-11] INFO c.taos.example.highvolume.SQLWriter - maxSQLLength=1048576
18:56:46.369 [main] INFO c.t.e.highvolume.FastWriteExample - count=18554448 speed=1855444
18:56:56.946 [main] INFO c.t.e.highvolume.FastWriteExample - count=39059660 speed=2050521
18:57:07.322 [main] INFO c.t.e.highvolume.FastWriteExample - count=59403604 speed=2034394
18:57:18.032 [main] INFO c.t.e.highvolume.FastWriteExample - count=80262938 speed=2085933
18:57:28.432 [main] INFO c.t.e.highvolume.FastWriteExample - count=101139906 speed=2087696
18:57:38.921 [main] INFO c.t.e.highvolume.FastWriteExample - count=121807202 speed=2066729
18:57:49.375 [main] INFO c.t.e.highvolume.FastWriteExample - count=142952417 speed=2114521
18:58:00.689 [main] INFO c.t.e.highvolume.FastWriteExample - count=163650306 speed=2069788
18:58:11.646 [main] INFO c.t.e.highvolume.FastWriteExample - count=185019808 speed=2136950
```
</details>
</TabItem>
<TabItem label="Python" value="python">
**Program Inventory**
Sample programs in Python uses multi-process and cross-process message queues.
| Function/CLass | Description |
| ---------------------------- | --------------------------------------------------------------------------- |
| main Function | Program entry point, create child processes and message queues |
| run_monitor_process Function | Create database, super table, calculate writing speed and output to console |
| run_read_task Function | Read data and distribute to message queues |
| MockDataSource Class | Simulate data source, return next 1,000 rows of each table |
| run_write_task Function | Read as much as possible data from message queue and write in batch |
| SQLWriter Class | Write in SQL and create table automatically |
| StmtWriter Class | Write in parameter binding mode (not finished yet) |
<details>
<summary>main function</summary>
`main` function is responsible for creating message queues and fork child processes, there are 3 kinds of child processes:
1. Monitoring process, initializes database and calculating writing speed
2. Reading process (n), reads data from data source
3. Writing process (m), writes data into TDengine
`main` function provides 5 parameters:
1. The number of reading tasks, default value is 1
2. The number of writing tasks, default value is 1
3. The number of tables, default value is 1,000
4. The capacity of message queue, default value is 1,000,000 bytes
5. The batch size in single write, default value is 3000
```python
{{#include docs/examples/python/fast_write_example.py:main}}
```
</details>
<details>
<summary>run_monitor_process</summary>
Monitoring process initializes database and monitoring writing speed.
```python
{{#include docs/examples/python/fast_write_example.py:monitor}}
```
</details>
<details>
<summary>run_read_task function</summary>
Reading process reads data from other data system and distributes to the message queue allocated for it.
```python
{{#include docs/examples/python/fast_write_example.py:read}}
```
</details>
<details>
<summary>MockDataSource</summary>
Below is the simulated data source, we assume table name exists in each generated data.
```python
{{#include docs/examples/python/mockdatasource.py}}
```
</details>
<details>
<summary>run_write_task function</summary>
Writing process tries to read as much as possible data from message queue and writes in batch.
```python
{{#include docs/examples/python/fast_write_example.py:write}}
```
</details>
<details>
SQLWriter class encapsulates the logic of composing SQL and writing data. Please be noted that the tables have not been created before writing, but are created automatically when catching the exception of table doesn't exist. For other exceptions caught, the SQL which caused the exception are logged for you to debug. This class also checks the SQL length, and passes the maximum SQL length by parameter maxSQLLength according to actual TDengine limit.
<summary>SQLWriter</summary>
```python
{{#include docs/examples/python/sql_writer.py}}
```
</details>
**Steps to Launch**
<details>
<summary>Launch Sample Program in Python</summary>
1. Prerequisites
- TDengine client driver has been installed
- Python3 has been installed, the the version >= 3.8
- TDengine Python client library `taospy` has been installed
2. Install faster-fifo to replace python builtin multiprocessing.Queue
```
pip3 install faster-fifo
```
3. Click the "Copy" in the above sample programs to copy `fast_write_example.py`, `sql_writer.py`, and `mockdatasource.py`.
4. Execute the program
```
python3 fast_write_example.py <READ_TASK_COUNT> <WRITE_TASK_COUNT> <TABLE_COUNT> <QUEUE_SIZE> <MAX_BATCH_SIZE>
```
Below is the output of running on a server of 16 cores, 64GB memory and SSD hard disk.
```
root@vm85$ python3 fast_write_example.py 8 8
2022-07-14 19:13:45,869 [root] - READ_TASK_COUNT=8, WRITE_TASK_COUNT=8, TABLE_COUNT=1000, QUEUE_SIZE=1000000, MAX_BATCH_SIZE=3000
2022-07-14 19:13:48,882 [root] - WriteTask-0 started with pid 718347
2022-07-14 19:13:48,883 [root] - WriteTask-1 started with pid 718348
2022-07-14 19:13:48,884 [root] - WriteTask-2 started with pid 718349
2022-07-14 19:13:48,884 [root] - WriteTask-3 started with pid 718350
2022-07-14 19:13:48,885 [root] - WriteTask-4 started with pid 718351
2022-07-14 19:13:48,885 [root] - WriteTask-5 started with pid 718352
2022-07-14 19:13:48,886 [root] - WriteTask-6 started with pid 718353
2022-07-14 19:13:48,886 [root] - WriteTask-7 started with pid 718354
2022-07-14 19:13:48,887 [root] - ReadTask-0 started with pid 718355
2022-07-14 19:13:48,888 [root] - ReadTask-1 started with pid 718356
2022-07-14 19:13:48,889 [root] - ReadTask-2 started with pid 718357
2022-07-14 19:13:48,889 [root] - ReadTask-3 started with pid 718358
2022-07-14 19:13:48,890 [root] - ReadTask-4 started with pid 718359
2022-07-14 19:13:48,891 [root] - ReadTask-5 started with pid 718361
2022-07-14 19:13:48,892 [root] - ReadTask-6 started with pid 718364
2022-07-14 19:13:48,893 [root] - ReadTask-7 started with pid 718365
2022-07-14 19:13:56,042 [DataBaseMonitor] - count=6676310 speed=667631.0
2022-07-14 19:14:06,196 [DataBaseMonitor] - count=20004310 speed=1332800.0
2022-07-14 19:14:16,366 [DataBaseMonitor] - count=32290310 speed=1228600.0
2022-07-14 19:14:26,527 [DataBaseMonitor] - count=44438310 speed=1214800.0
2022-07-14 19:14:36,673 [DataBaseMonitor] - count=56608310 speed=1217000.0
2022-07-14 19:14:46,834 [DataBaseMonitor] - count=68757310 speed=1214900.0
2022-07-14 19:14:57,280 [DataBaseMonitor] - count=80992310 speed=1223500.0
2022-07-14 19:15:07,689 [DataBaseMonitor] - count=93805310 speed=1281300.0
2022-07-14 19:15:18,020 [DataBaseMonitor] - count=106111310 speed=1230600.0
2022-07-14 19:15:28,356 [DataBaseMonitor] - count=118394310 speed=1228300.0
2022-07-14 19:15:38,690 [DataBaseMonitor] - count=130742310 speed=1234800.0
2022-07-14 19:15:49,000 [DataBaseMonitor] - count=143051310 speed=1230900.0
2022-07-14 19:15:59,323 [DataBaseMonitor] - count=155276310 speed=1222500.0
2022-07-14 19:16:09,649 [DataBaseMonitor] - count=167603310 speed=1232700.0
2022-07-14 19:16:19,995 [DataBaseMonitor] - count=179976310 speed=1237300.0
```
</details>
:::note
Don't establish connection to TDengine in the parent process if using Python client library in multi-process way, otherwise all the connections in child processes are blocked always. This is a known issue.
:::
</TabItem>
</Tabs>

View File

@ -1,3 +0,0 @@
```c
{{#include docs/examples/c/line_example.c:main}}
```

View File

@ -1,3 +0,0 @@
```c
{{#include docs/examples/c/json_protocol_example.c:main}}
```

View File

@ -1,3 +0,0 @@
```c
{{#include docs/examples/c/telnet_line_example.c:main}}
```

View File

@ -1,3 +0,0 @@
```c
{{#include docs/examples/c/insert_example.c}}
```

View File

@ -1,6 +0,0 @@
```c title=Single Row Binding
{{#include docs/examples/c/stmt_example.c}}
```
```c title=Multiple Row Binding 72:117
{{#include docs/examples/c/multi_bind_example.c}}
```

View File

@ -1,3 +0,0 @@
```csharp
{{#include docs/examples/csharp/influxdbLine/Program.cs}}
```

View File

@ -1,3 +0,0 @@
```csharp
{{#include docs/examples/csharp/optsJSON/Program.cs}}
```

View File

@ -1,3 +0,0 @@
```csharp
{{#include docs/examples/csharp/optsTelnet/Program.cs}}
```

View File

@ -1,3 +0,0 @@
```csharp
{{#include docs/examples/csharp/sqlInsert/Program.cs}}
```

View File

@ -1,3 +0,0 @@
```csharp
{{#include docs/examples/csharp/stmtInsert/Program.cs}}
```

View File

@ -1,3 +0,0 @@
```go
{{#include docs/examples/go/insert/line/main.go}}
```

View File

@ -1,3 +0,0 @@
```go
{{#include docs/examples/go/insert/json/main.go}}
```

View File

@ -1,3 +0,0 @@
```go
{{#include docs/examples/go/insert/telnet/main.go}}
```

View File

@ -1,3 +0,0 @@
```go
{{#include docs/examples/go/insert/sql/main.go}}
```

View File

@ -1,8 +0,0 @@
```go
{{#include docs/examples/go/insert/stmt/main.go}}
```
:::tip
`github.com/taosdata/driver-go/v3/wrapper` module in driver-go is the wrapper for C API, it can be used to insert data with parameter binding.
:::

View File

@ -1,3 +0,0 @@
```java
{{#include docs/examples/java/src/main/java/com/taos/example/LineProtocolExample.java}}
```

Some files were not shown because too many files have changed in this diff Show More