feat:merge from 3.0
This commit is contained in:
commit
5ed0c48af0
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.16)
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
project(
|
||||
TDengine
|
||||
|
@ -35,7 +35,7 @@ endif(${BUILD_TEST})
|
|||
add_subdirectory(source)
|
||||
add_subdirectory(tools)
|
||||
add_subdirectory(tests)
|
||||
add_subdirectory(example)
|
||||
add_subdirectory(examples/c)
|
||||
|
||||
# docs
|
||||
add_subdirectory(docs)
|
||||
|
|
|
@ -269,7 +269,7 @@ pipeline {
|
|||
}
|
||||
}
|
||||
stage('linux test') {
|
||||
agent{label " slave3_0 || slave15 || slave16 || slave17 "}
|
||||
agent{label " worker03 || slave215 || slave217 || slave219 "}
|
||||
options { skipDefaultCheckout() }
|
||||
when {
|
||||
changeRequest()
|
||||
|
@ -287,9 +287,9 @@ pipeline {
|
|||
'''
|
||||
sh '''
|
||||
cd ${WKC}/tests/parallel_test
|
||||
export DEFAULT_RETRY_TIME=1
|
||||
export DEFAULT_RETRY_TIME=2
|
||||
date
|
||||
timeout 2100 time ./run.sh -e -m /home/m.json -t /tmp/cases.task -b ${BRANCH_NAME} -l ${WKDIR}/log -o 480
|
||||
timeout 2100 time ./run.sh -e -m /home/m.json -t /tmp/cases.task -b ${BRANCH_NAME}_${BUILD_ID} -l ${WKDIR}/log -o 480
|
||||
'''
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.16)
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE OFF)
|
||||
|
||||
|
@ -46,7 +46,7 @@ ENDIF ()
|
|||
|
||||
IF (TD_WINDOWS)
|
||||
MESSAGE("${Yellow} set compiler flag for Windows! ${ColourReset}")
|
||||
SET(COMMON_FLAGS "/w /D_WIN32 /Zi")
|
||||
SET(COMMON_FLAGS "/w /D_WIN32 /DWIN32 /Zi")
|
||||
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO")
|
||||
# IF (MSVC AND (MSVC_VERSION GREATER_EQUAL 1900))
|
||||
# SET(COMMON_FLAGS "${COMMON_FLAGS} /Wv:18")
|
||||
|
|
|
@ -49,7 +49,7 @@ IF(${TD_WINDOWS})
|
|||
option(
|
||||
BUILD_TEST
|
||||
"If build unit tests using googletest"
|
||||
OFF
|
||||
ON
|
||||
)
|
||||
ELSE ()
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.16)
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
MESSAGE("Current system is ${CMAKE_SYSTEM_NAME}")
|
||||
|
||||
|
|
|
@ -243,7 +243,7 @@ void console(SRaftServer *pRaftServer) {
|
|||
|
||||
|
||||
} else if (strcmp(cmd, "dropnode") == 0) {
|
||||
char host[HOST_LEN];
|
||||
char host[HOST_LEN] = {0};
|
||||
uint32_t port;
|
||||
parseAddr(param1, host, HOST_LEN, &port);
|
||||
uint64_t rid = raftId(host, port);
|
||||
|
@ -258,7 +258,7 @@ void console(SRaftServer *pRaftServer) {
|
|||
|
||||
|
||||
} else if (strcmp(cmd, "put") == 0) {
|
||||
char buf[256];
|
||||
char buf[256] = {0};
|
||||
snprintf(buf, sizeof(buf), "%s--%s", param1, param2);
|
||||
putValue(&pRaftServer->raft, buf);
|
||||
|
||||
|
|
|
@ -62,7 +62,7 @@ TDengine的主要功能如下:
|
|||
|
||||
<figure>
|
||||
|
||||

|
||||

|
||||
|
||||
</figure>
|
||||
<center>图 1. TDengine技术生态图</center>
|
||||
|
|
|
@ -52,7 +52,7 @@ INSERT INTO d1001 VALUES (1538548685000, 10.3, 219, 0.31) (1538548695000, 12.6,
|
|||
|
||||
:::info
|
||||
|
||||
- 要提高写入效率,需要批量写入。一批写入的记录条数越多,插入效率就越高。但一条记录不能超过 16K,一条 SQL 语句总长度不能超过 1M 。
|
||||
- 要提高写入效率,需要批量写入。一批写入的记录条数越多,插入效率就越高。但一条记录不能超过 48K,一条 SQL 语句总长度不能超过 1M 。
|
||||
- TDengine 支持多线程同时写入,要进一步提高写入速度,一个客户端需要打开 20 个以上的线程同时写。但线程数达到一定数量后,无法再提高,甚至还会下降,因为线程频繁切换,带来额外开销。
|
||||
|
||||
:::
|
||||
|
|
|
@ -145,7 +145,7 @@ void subscribe_callback(TAOS_SUB* tsub, TAOS_RES *res, void* param, int code) {
|
|||
taos_unsubscribe(tsub, keep);
|
||||
```
|
||||
|
||||
其第二个参数,用于决定是否在客户端保留订阅的进度信息。如果这个参数是**false**(**0**),那无论下次调用 `taos_subscribe` 时的 `restart` 参数是什么,订阅都只能重新开始。另外,进度信息的保存位置是 _{DataDir}/subscribe/_ 这个目录下,每个订阅有一个与其 `topic` 同名的文件,删掉某个文件,同样会导致下次创建其对应的订阅时只能重新开始。
|
||||
其第二个参数,用于决定是否在客户端保留订阅的进度信息。如果这个参数是**false**(**0**),那无论下次调用 `taos_subscribe` 时的 `restart` 参数是什么,订阅都只能重新开始。另外,进度信息的保存位置是 _{DataDir}/subscribe/_ 这个目录下(注:`taos.cfg` 配置文件中 `DataDir` 参数值默认为 **/var/lib/taos/**,但是 Windows 服务器上本身不存在该目录,所以需要在 Windows 的配置文件中修改 `DataDir` 参数值为相应的已存在目录"),每个订阅有一个与其 `topic` 同名的文件,删掉某个文件,同样会导致下次创建其对应的订阅时只能重新开始。
|
||||
|
||||
代码介绍完毕,我们来看一下实际的运行效果。假设:
|
||||
|
||||
|
|
|
@ -4,6 +4,8 @@ title: 支持的数据类型
|
|||
description: "TDengine 支持的数据类型: 时间戳、浮点型、JSON 类型等"
|
||||
---
|
||||
|
||||
## 时间戳
|
||||
|
||||
使用 TDengine,最重要的是时间戳。创建并插入记录、查询历史记录的时候,均需要指定时间戳。时间戳有如下规则:
|
||||
|
||||
- 时间格式为 `YYYY-MM-DD HH:mm:ss.MS`,默认时间分辨率为毫秒。比如:`2017-08-12 18:25:58.128`
|
||||
|
@ -12,39 +14,59 @@ description: "TDengine 支持的数据类型: 时间戳、浮点型、JSON 类
|
|||
- Epoch Time:时间戳也可以是一个长整数,表示从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始的毫秒数(相应地,如果所在 Database 的时间精度设置为“微秒”,则长整型格式的时间戳含义也就对应于从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始的微秒数;纳秒精度逻辑类似。)
|
||||
- 时间可以加减,比如 now-2h,表明查询时刻向前推 2 个小时(最近 2 小时)。数字后面的时间单位可以是 b(纳秒)、u(微秒)、a(毫秒)、s(秒)、m(分)、h(小时)、d(天)、w(周)。 比如 `select * from t1 where ts > now-2w and ts <= now-1w`,表示查询两周前整整一周的数据。在指定降采样操作(down sampling)的时间窗口(interval)时,时间单位还可以使用 n (自然月) 和 y (自然年)。
|
||||
|
||||
TDengine 缺省的时间戳精度是毫秒,但通过在 `CREATE DATABASE` 时传递的 PRECISION 参数也可以支持微秒和纳秒。(从 2.1.5.0 版本开始支持纳秒精度)
|
||||
TDengine 缺省的时间戳精度是毫秒,但通过在 `CREATE DATABASE` 时传递的 PRECISION 参数也可以支持微秒和纳秒。
|
||||
|
||||
```sql
|
||||
CREATE DATABASE db_name PRECISION 'ns';
|
||||
```
|
||||
## 数据类型
|
||||
|
||||
在 TDengine 中,普通表的数据模型中可使用以下 10 种数据类型。
|
||||
在 TDengine 中,普通表的数据模型中可使用以下数据类型。
|
||||
|
||||
| # | **类型** | **Bytes** | **说明** |
|
||||
| --- | :-------: | --------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| 1 | TIMESTAMP | 8 | 时间戳。缺省精度毫秒,可支持微秒和纳秒。从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始,计时不能早于该时间。(从 2.0.18.0 版本开始,已经去除了这一时间范围限制)(从 2.1.5.0 版本开始支持纳秒精度) |
|
||||
| 2 | INT | 4 | 整型,范围 [-2^31+1, 2^31-1], -2^31 用作 NULL |
|
||||
| 3 | BIGINT | 8 | 长整型,范围 [-2^63+1, 2^63-1], -2^63 用作 NULL |
|
||||
| 4 | FLOAT | 4 | 浮点型,有效位数 6-7,范围 [-3.4E38, 3.4E38] |
|
||||
| 5 | DOUBLE | 8 | 双精度浮点型,有效位数 15-16,范围 [-1.7E308, 1.7E308] |
|
||||
| 6 | BINARY | 自定义 | 记录单字节字符串,建议只用于处理 ASCII 可见字符,中文等多字节字符需使用 nchar。理论上,最长可以有 16374 字节。binary 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 binary(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 byte 的存储空间,总共固定占用 20 bytes 的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\’`。 |
|
||||
| 7 | SMALLINT | 2 | 短整型, 范围 [-32767, 32767], -32768 用作 NULL |
|
||||
| 8 | TINYINT | 1 | 单字节整型,范围 [-127, 127], -128 用作 NULL |
|
||||
| 9 | BOOL | 1 | 布尔型,{true, false} |
|
||||
| 10 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 nchar 字符占用 4 bytes 的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\’`。nchar 使用时须指定字符串大小,类型为 nchar(10) 的列表示此列的字符串最多存储 10 个 nchar 字符,会固定占用 40 bytes 的空间。如果用户字符串长度超出声明长度,将会报错。 |
|
||||
| 11 | JSON | | json 数据类型, 只有 tag 可以是 json 格式 |
|
||||
| 1 | TIMESTAMP | 8 | 时间戳。缺省精度毫秒,可支持微秒和纳秒,详细说明见上节。 |
|
||||
| 2 | INT | 4 | 整型,范围 [-2^31, 2^31-1] |
|
||||
| 3 | INT UNSIGNED| 4| 无符号整数,[0, 2^32-1]
|
||||
| 4 | BIGINT | 8 | 长整型,范围 [-2^63, 2^63-1] |
|
||||
| 5 | BIGINT UNSIGNED | 8 | 长整型,范围 [0, 2^64-1] |
|
||||
| 6 | FLOAT | 4 | 浮点型,有效位数 6-7,范围 [-3.4E38, 3.4E38] |
|
||||
| 7 | DOUBLE | 8 | 双精度浮点型,有效位数 15-16,范围 [-1.7E308, 1.7E308] |
|
||||
| 8 | BINARY | 自定义 | 记录单字节字符串,建议只用于处理 ASCII 可见字符,中文等多字节字符需使用 nchar。 |
|
||||
| 9 | SMALLINT | 2 | 短整型, 范围 [-32768, 32767] |
|
||||
| 10 | SMALLINT UNSIGNED | 2| 无符号短整型,范围 [0, 655357] |
|
||||
| 11 | TINYINT | 1 | 单字节整型,范围 [-128, 127] |
|
||||
| 12 | TINYINT UNSIGNED | 1 | 无符号单字节整型,范围 [0, 255] |
|
||||
| 13 | BOOL | 1 | 布尔型,{true, false} |
|
||||
| 14 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 nchar 字符占用 4 bytes 的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\’`。nchar 使用时须指定字符串大小,类型为 nchar(10) 的列表示此列的字符串最多存储 10 个 nchar 字符,会固定占用 40 bytes 的空间。如果用户字符串长度超出声明长度,将会报错。 |
|
||||
| 15 | JSON | | json 数据类型, 只有 tag 可以是 json 格式 |
|
||||
| 16 | VARCHAR | 自定义 | BINARY类型的别名 |
|
||||
|
||||
:::tip
|
||||
TDengine 对 SQL 语句中的英文字符不区分大小写,自动转化为小写执行。因此用户大小写敏感的字符串及密码,需要使用单引号将字符串引起来。
|
||||
|
||||
:::
|
||||
|
||||
:::note
|
||||
虽然 BINARY 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 BINARY 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 NCHAR 类型进行保存。如果强行使用 BINARY 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。
|
||||
- TDengine 对 SQL 语句中的英文字符不区分大小写,自动转化为小写执行。因此用户大小写敏感的字符串及密码,需要使用单引号将字符串引起来。
|
||||
- 虽然 BINARY 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 BINARY 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 NCHAR 类型进行保存。如果强行使用 BINARY 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。
|
||||
- BINARY 类型理论上最长可以有 16374 字节。binary 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 binary(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 byte 的存储空间,总共固定占用 20 bytes 的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\’`。
|
||||
- SQL 语句中的数值类型将依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型,因此在使用时要注意相应类型越界的情况。例如,9999999999999999999 会认为超过长整型的上边界而溢出,而 9999999999999999999.0 会被认为是有效的浮点数。
|
||||
|
||||
:::
|
||||
|
||||
|
||||
## 常量
|
||||
TDengine支持多个类型的常量,细节如下表:
|
||||
|
||||
| # | **语法** | **类型** | **说明** |
|
||||
| --- | :-------: | --------- | -------------------------------------- |
|
||||
| 1 | [{+ \| -}]123 | BIGINT | 整型数值的字面量的类型均为BIGINT。如果用户输入超过了BIGINT的表示范围,TDengine 按BIGINT对数值进行截断。|
|
||||
| 2 | 123.45 | DOUBLE | 浮点数值的字面量的类型均为DOUBLE。TDengine依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型。|
|
||||
| 3 | 1.2E3 | DOUBLE | 科学计数法的字面量的类型为DOUBLE。|
|
||||
| 4 | 'abc' | BINARY | 单引号括住的内容为字符串字面值,其类型为BINARY,BINARY的size为实际的字符个数。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 \'。|
|
||||
| 5 | "abc" | BINARY | 双引号括住的内容为字符串字面值,其类型为BINARY,BINARY的size为实际的字符个数。对于字符串内的双引号,可以用转义字符反斜线加单引号来表示,即 \"。 |
|
||||
| 6 | TIMESTAMP {'literal' \| "literal"} | TIMESTAMP | TIMESTAMP关键字表示后面的字符串字面量需要被解释为TIMESTAMP类型。字符串需要满足YYYY-MM-DD HH:mm:ss.MS格式,其时间分辨率为当前数据库的时间分辨率。 |
|
||||
| 7 | {TRUE \| FALSE} | BOOL | 布尔类型字面量。 |
|
||||
| 8 | {'' \| "" \| '\t' \| "\t" \| ' ' \| " " \| NULL } | -- | 空值字面量。可以用于任意类型。|
|
||||
|
||||
:::note
|
||||
SQL 语句中的数值类型将依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型,因此在使用时要注意相应类型越界的情况。例如,9999999999999999999 会认为超过长整型的上边界而溢出,而 9999999999999999999.0 会被认为是有效的浮点数。
|
||||
- TDengine依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型,因此在使用时要注意相应类型越界的情况。例如,9999999999999999999会认为超过长整型的上边界而溢出,而9999999999999999999.0会被认为是有效的浮点数。
|
||||
|
||||
:::
|
||||
|
|
|
@ -12,7 +12,7 @@ CREATE TABLE [IF NOT EXISTS] tb_name (timestamp_field_name TIMESTAMP, field1_nam
|
|||
|
||||
1. 表的第一个字段必须是 TIMESTAMP,并且系统自动将其设为主键;
|
||||
2. 表名最大长度为 192;
|
||||
3. 表的每行长度不能超过 16k 个字符;(注意:每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)
|
||||
3. 表的每行长度不能超过 48KB;(注意:每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)
|
||||
4. 子表名只能由字母、数字和下划线组成,且不能以数字开头,不区分大小写
|
||||
5. 使用数据类型 binary 或 nchar,需指定其最长的字节数,如 binary(20),表示 20 字节;
|
||||
6. 为了兼容支持更多形式的表名,TDengine 引入新的转义符 "\`",可以让表名与关键词不冲突,同时不受限于上述表名称合法性约束检查。但是同样具有长度限制要求。使用转义字符以后,不再对转义字符中的内容进行大小写统一。
|
||||
|
|
|
@ -86,7 +86,7 @@ ALTER STABLE stb_name MODIFY COLUMN field_name data_type(length);
|
|||
ALTER STABLE stb_name ADD TAG new_tag_name tag_type;
|
||||
```
|
||||
|
||||
为 STable 增加一个新的标签,并指定新标签的类型。标签总数不能超过 128 个,总长度不超过 16k 个字符。
|
||||
为 STable 增加一个新的标签,并指定新标签的类型。标签总数不能超过 128 个,总长度不超过 16KB 。
|
||||
|
||||
### 删除标签
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,7 +11,7 @@ TDengine 支持按时间段窗口切分方式进行聚合结果查询,比如
|
|||
|
||||
INTERVAL 子句用于产生相等时间周期的窗口,SLIDING 用以指定窗口向前滑动的时间。每次执行的查询是一个时间窗口,时间窗口随着时间流动向前滑动。在定义连续查询的时候需要指定时间窗口(time window )大小和每次前向增量时间(forward sliding times)。如图,[t0s, t0e] ,[t1s , t1e], [t2s, t2e] 是分别是执行三次连续查询的时间窗口范围,窗口的前向滑动的时间范围 sliding time 标识 。查询过滤、聚合等操作按照每个时间窗口为独立的单位执行。当 SLIDING 与 INTERVAL 相等的时候,滑动窗口即为翻转窗口。
|
||||
|
||||

|
||||

|
||||
|
||||
INTERVAL 和 SLIDING 子句需要配合聚合和选择函数来使用。以下 SQL 语句非法:
|
||||
|
||||
|
@ -33,7 +33,7 @@ _ 从 2.1.5.0 版本开始,INTERVAL 语句允许的最短时间间隔调整为
|
|||
|
||||
使用整数(布尔值)或字符串来标识产生记录时候设备的状态量。产生的记录如果具有相同的状态量数值则归属于同一个状态窗口,数值改变后该窗口关闭。如下图所示,根据状态量确定的状态窗口分别是[2019-04-28 14:22:07,2019-04-28 14:22:10]和[2019-04-28 14:22:11,2019-04-28 14:22:12]两个。(状态窗口暂不支持对超级表使用)
|
||||
|
||||

|
||||

|
||||
|
||||
使用 STATE_WINDOW 来确定状态窗口划分的列。例如:
|
||||
|
||||
|
@ -45,7 +45,7 @@ SELECT COUNT(*), FIRST(ts), status FROM temp_tb_1 STATE_WINDOW(status);
|
|||
|
||||
会话窗口根据记录的时间戳主键的值来确定是否属于同一个会话。如下图所示,如果设置时间戳的连续的间隔小于等于 12 秒,则以下 6 条记录构成 2 个会话窗口,分别是:[2019-04-28 14:22:10,2019-04-28 14:22:30]和[2019-04-28 14:23:10,2019-04-28 14:23:30]。因为 2019-04-28 14:22:30 与 2019-04-28 14:23:10 之间的时间间隔是 40 秒,超过了连续时间间隔(12 秒)。
|
||||
|
||||

|
||||

|
||||
|
||||
在 tol_value 时间间隔范围内的结果都认为归属于同一个窗口,如果连续的两条记录的时间超过 tol_val,则自动开启下一个窗口。(会话窗口暂不支持对超级表使用)
|
||||
|
||||
|
|
|
@ -7,9 +7,9 @@ title: 边界限制
|
|||
|
||||
- 数据库名最大长度为 32。
|
||||
- 表名最大长度为 192,不包括数据库名前缀和分隔符
|
||||
- 每行数据最大长度 16k 个字符, 从 2.1.7.0 版本开始,每行数据最大长度 48k 个字符(注意:数据行内每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)。
|
||||
- 每行数据最大长度 48KB (注意:数据行内每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)。
|
||||
- 列名最大长度为 64,最多允许 4096 列,最少需要 2 列,第一列必须是时间戳。注:从 2.1.7.0 版本(不含)以前最多允许 4096 列
|
||||
- 标签名最大长度为 64,最多允许 128 个,至少要有 1 个标签,一个表中标签值的总长度不超过 16k 个字符。
|
||||
- 标签名最大长度为 64,最多允许 128 个,至少要有 1 个标签,一个表中标签值的总长度不超过 16KB 。
|
||||
- SQL 语句最大长度 1048576 个字符,也可通过客户端配置参数 maxSQLLength 修改,取值范围 65480 ~ 1048576。
|
||||
- SELECT 语句的查询结果,最多允许返回 4096 列(语句中的函数调用可能也会占用一些列空间),超限时需要显式指定较少的返回数据列,以避免语句执行报错。注: 2.1.7.0 版本(不含)之前为最多允许 1024 列
|
||||
- 库的数目,超级表的数目、表的数目,系统不做限制,仅受系统资源限制。
|
||||
|
|
|
@ -23,17 +23,17 @@ title: TDengine 参数限制与保留关键字
|
|||
去掉了 `` ‘“`\ `` (单双引号、撇号、反斜杠、空格)
|
||||
|
||||
- 数据库名:不能包含“.”以及特殊字符,不能超过 32 个字符
|
||||
- 表名:不能包含“.”以及特殊字符,与所属数据库名一起,不能超过 192 个字符,每行数据最大长度 16k 个字符
|
||||
- 表的列名:不能包含特殊字符,不能超过 64 个字符
|
||||
- 表名:不能包含“.”以及特殊字符,与所属数据库名一起,不能超过 192 个字节 ,每行数据最大长度 48KB
|
||||
- 表的列名:不能包含特殊字符,不能超过 64 个字节
|
||||
- 数据库名、表名、列名,都不能以数字开头,合法的可用字符集是“英文字符、数字和下划线”
|
||||
- 表的列数:不能超过 1024 列,最少需要 2 列,第一列必须是时间戳(从 2.1.7.0 版本开始,改为最多支持 4096 列)
|
||||
- 记录的最大长度:包括时间戳 8 byte,不能超过 16KB(每个 BINARY/NCHAR 类型的列还会额外占用 2 个 byte 的存储位置)
|
||||
- 单条 SQL 语句默认最大字符串长度:1048576 byte,但可通过系统配置参数 maxSQLLength 修改,取值范围 65480 ~ 1048576 byte
|
||||
- 记录的最大长度:包括时间戳 8 字节,不能超过 48KB(每个 BINARY/NCHAR 类型的列还会额外占用 2 个 字节 的存储位置)
|
||||
- 单条 SQL 语句默认最大字符串长度:1048576 字节,但可通过系统配置参数 maxSQLLength 修改,取值范围 65480 ~ 1048576 字节
|
||||
- 数据库副本数:不能超过 3
|
||||
- 用户名:不能超过 23 个 byte
|
||||
- 用户密码:不能超过 15 个 byte
|
||||
- 用户名:不能超过 23 个 字节
|
||||
- 用户密码:不能超过 15 个 字节
|
||||
- 标签(Tags)数量:不能超过 128 个,可以 0 个
|
||||
- 标签的总长度:不能超过 16K byte
|
||||
- 标签的总长度:不能超过 16KB
|
||||
- 记录条数:仅受存储空间限制
|
||||
- 表的个数:仅受节点个数限制
|
||||
- 库的个数:仅受节点个数限制
|
||||
|
@ -85,3 +85,47 @@ title: TDengine 参数限制与保留关键字
|
|||
| CONNECTIONS | HAVING | NOT | SOFFSET | VNODES |
|
||||
| CONNS | ID | NOTNULL | STABLE | WAL |
|
||||
| COPY | IF | NOW | STABLES | WHERE |
|
||||
| _C0 | _QSTART | _QSTOP | _QDURATION | _WSTART |
|
||||
| _WSTOP | _WDURATION | _ROWTS |
|
||||
|
||||
## 特殊说明
|
||||
### TBNAME
|
||||
`TBNAME` 可以视为超级表中一个特殊的标签,代表子表的表名。
|
||||
|
||||
获取一个超级表所有的子表名及相关的标签信息:
|
||||
|
||||
```mysql
|
||||
SELECT TBNAME, location FROM meters;
|
||||
```
|
||||
|
||||
统计超级表下辖子表数量:
|
||||
|
||||
```mysql
|
||||
SELECT COUNT(TBNAME) FROM meters;
|
||||
```
|
||||
|
||||
以上两个查询均只支持在WHERE条件子句中添加针对标签(TAGS)的过滤条件。例如:
|
||||
```mysql
|
||||
taos> SELECT TBNAME, location FROM meters;
|
||||
tbname | location |
|
||||
==================================================================
|
||||
d1004 | California.SanFrancisco |
|
||||
d1003 | California.SanFrancisco |
|
||||
d1002 | California.LosAngeles |
|
||||
d1001 | California.LosAngeles |
|
||||
Query OK, 4 row(s) in set (0.000881s)
|
||||
|
||||
taos> SELECT COUNT(tbname) FROM meters WHERE groupId > 2;
|
||||
count(tbname) |
|
||||
========================
|
||||
2 |
|
||||
Query OK, 1 row(s) in set (0.001091s)
|
||||
```
|
||||
### _QSTART/_QSTOP/_QDURATION
|
||||
表示查询过滤窗口的起始,结束以及持续时间。
|
||||
|
||||
### _WSTART/_WSTOP/_WDURATION
|
||||
窗口切分聚合查询(例如 interval/session window/state window)中表示每个切分窗口的起始,结束以及持续时间。
|
||||
|
||||
### _c0/_ROWTS
|
||||
_c0 _ROWTS 等价,表示表或超级表的第一列
|
|
@ -1 +0,0 @@
|
|||
label: 参数限制与保留关键字
|
|
@ -0,0 +1,66 @@
|
|||
---
|
||||
sidebar_label: 运算符
|
||||
title: 运算符
|
||||
---
|
||||
|
||||
## 算术运算符
|
||||
|
||||
| # | **运算符** | **支持的类型** | **说明** |
|
||||
| --- | :--------: | -------------- | -------------------------- |
|
||||
| 1 | +, - | 数值类型 | 表达正数和负数,一元运算符 |
|
||||
| 2 | +, - | 数值类型 | 表示加法和减法,二元运算符 |
|
||||
| 3 | \*, / | 数值类型 | 表示乘法和除法,二元运算符 |
|
||||
| 4 | % | 数值类型 | 表示取余运算,二元运算符 |
|
||||
|
||||
## 位运算符
|
||||
|
||||
| # | **运算符** | **支持的类型** | **说明** |
|
||||
| --- | :--------: | -------------- | ------------------ |
|
||||
| 1 | & | 数值类型 | 按位与,二元运算符 |
|
||||
| 2 | \| | 数值类型 | 按位或,二元运算符 |
|
||||
|
||||
## JSON 运算符
|
||||
|
||||
`->` 运算符可以对 JSON 类型的列按键取值。`->` 左侧是列标识符,右侧是键的字符串常量,如 `col->'name'`,返回键 `'name'` 的值。
|
||||
|
||||
## 集合运算符
|
||||
|
||||
集合运算符将两个查询的结果合并为一个结果。包含集合运算符的查询称之为复合查询。复合查询中每条查询的选择列表中的相应表达式在数量上必须匹配,且结果类型以第一条查询为准,后续查询的结果类型必须可转换到第一条查询的结果类型,转换规则同 CAST 函数。
|
||||
|
||||
TDengine 支持 `UNION ALL` 和 `UNION` 操作符。UNION ALL 将查询返回的结果集合并返回,并不去重。UNION 将查询返回的结果集合并并去重后返回。在同一个 SQL 语句中,集合操作符最多支持 100 个。
|
||||
|
||||
## 比较运算符
|
||||
|
||||
| # | **运算符** | **支持的类型** | **说明** |
|
||||
| --- | :---------------: | -------------------------------------------------------------------- | -------------------- |
|
||||
| 1 | = | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 相等 |
|
||||
| 2 | <\>, != | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型,且不可以为表的时间戳主键列 | 不相等 |
|
||||
| 3 | \>, < | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 大于,小于 |
|
||||
| 4 | \>=, <= | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 大于等于,小于等于 |
|
||||
| 5 | IS [NOT] NULL | 所有类型 | 是否为空值 |
|
||||
| 6 | [NOT] BETWEEN AND | 除 BOOL、BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 闭区间比较 |
|
||||
| 7 | IN | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型,且不可以为表的时间戳主键列 | 与列表内的任意值相等 |
|
||||
| 8 | LIKE | BINARY、NCHAR 和 VARCHAR | 通配符匹配 |
|
||||
| 9 | MATCH, NMATCH | BINARY、NCHAR 和 VARCHAR | 正则表达式匹配 |
|
||||
| 10 | CONTAINS | JSON | JSON 中是否存在某键 |
|
||||
|
||||
LIKE 条件使用通配符字符串进行匹配检查,规则如下:
|
||||
|
||||
- '%'(百分号)匹配 0 到任意个字符;'\_'(下划线)匹配单个任意 ASCII 字符。
|
||||
- 如果希望匹配字符串中原本就带有的 \_(下划线)字符,那么可以在通配符字符串中写作 \_,即加一个反斜线来进行转义。
|
||||
- 通配符字符串最长不能超过 100 字节。不建议使用太长的通配符字符串,否则将有可能严重影响 LIKE 操作的执行性能。
|
||||
|
||||
MATCH 条件和 NMATCH 条件使用正则表达式进行匹配,规则如下:
|
||||
|
||||
- 支持符合 POSIX 规范的正则表达式,具体规范内容可参见 Regular Expressions。
|
||||
- 只能针对子表名(即 tbname)、字符串类型的标签值进行正则表达式过滤,不支持普通列的过滤。
|
||||
- 正则匹配字符串长度不能超过 128 字节。可以通过参数 maxRegexStringLen 设置和调整最大允许的正则匹配字符串,该参数是客户端配置参数,需要重启客户端才能生效
|
||||
|
||||
## 逻辑运算符
|
||||
|
||||
| # | **运算符** | **支持的类型** | **说明** |
|
||||
| --- | :--------: | -------------- | --------------------------------------------------------------------------- |
|
||||
| 1 | AND | BOOL | 逻辑与,如果两个条件均为 TRUE, 则返回 TRUE。如果任一为 FALSE,则返回 FALSE |
|
||||
| 2 | OR | BOOL | 逻辑或,如果任一条件为 TRUE, 则返回 TRUE。如果两者都是 FALSE,则返回 FALSE |
|
||||
|
||||
TDengine 在计算逻辑条件时,会进行短路径优化,即对于 AND,第一个条件为 FALSE,则不再计算第二个条件,直接返回 FALSE;对于 OR,第一个条件为 TRUE,则不再计算第二个条件,直接返回 TRUE。
|
|
@ -7,8 +7,6 @@ description: "TAOS SQL 支持的语法规则、主要查询功能、支持的 SQ
|
|||
|
||||
TAOS SQL 是用户对 TDengine 进行数据写入和查询的主要工具。TAOS SQL 为了便于用户快速上手,在一定程度上提供与标准 SQL 类似的风格和模式。严格意义上,TAOS SQL 并不是也不试图提供标准的 SQL 语法。此外,由于 TDengine 针对的时序性结构化数据不提供删除功能,因此在 TAO SQL 中不提供数据删除的相关功能。
|
||||
|
||||
TAOS SQL 不支持关键字的缩写,例如 DESCRIBE 不能缩写为 DESC。
|
||||
|
||||
本章节 SQL 语法遵循如下约定:
|
||||
|
||||
- <\> 里的内容是用户需要输入的,但不要输入 <\> 本身
|
||||
|
@ -37,4 +35,4 @@ import DocCardList from '@theme/DocCardList';
|
|||
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
|
||||
|
||||
<DocCardList items={useCurrentSidebarCategory().items}/>
|
||||
```
|
||||
```
|
||||
|
|
|
@ -16,7 +16,7 @@ RESTful 接口不依赖于任何 TDengine 的库,因此客户端不需要安
|
|||
|
||||
在已经安装 TDengine 服务器端的情况下,可以按照如下方式进行验证。
|
||||
|
||||
下面以 Ubuntu 环境中使用 curl 工具(确认已经安装)来验证 RESTful 接口的正常。
|
||||
下面以 Ubuntu 环境中使用 curl 工具(确认已经安装)来验证 RESTful 接口的正常,验证前请确认 taosAdapter 服务已开启,在 Linux 系统上此服务默认由 systemd 管理,使用命令 `systemctl start taosadapter` 启动。
|
||||
|
||||
下面示例是列出所有的数据库,请把 h1.taosdata.com 和 6041(缺省值)替换为实际运行的 TDengine 服务 FQDN 和端口号:
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ title: 连接器
|
|||
|
||||
TDengine 提供了丰富的应用程序开发接口,为了便于用户快速开发自己的应用,TDengine 支持了多种编程语言的连接器,其中官方连接器包括支持 C/C++、Java、Python、Go、Node.js、C# 和 Rust 的连接器。这些连接器支持使用原生接口(taosc)和 REST 接口(部分语言暂不支持)连接 TDengine 集群。社区开发者也贡献了多个非官方连接器,例如 ADO.NET 连接器、Lua 连接器和 PHP 连接器。
|
||||
|
||||

|
||||

|
||||
|
||||
## 支持的平台
|
||||
|
||||
|
|
|
@ -114,7 +114,6 @@ TDengine 客户端驱动的安装请参考 [安装指南](/reference/connector#
|
|||
<summary>订阅和消费</summary>
|
||||
|
||||
```c
|
||||
{{#include examples/c/subscribe.c}}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
|
|
@ -11,7 +11,7 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
`taos-jdbcdriver` 是 TDengine 的官方 Java 语言连接器,Java 开发人员可以通过它开发存取 TDengine 数据库的应用软件。`taos-jdbcdriver` 实现了 JDBC driver 标准的接口,并提供两种形式的连接器。一种是通过 TDengine 客户端驱动程序(taosc)原生连接 TDengine 实例,支持数据写入、查询、订阅、schemaless 接口和参数绑定接口等功能,一种是通过 taosAdapter 提供的 REST 接口连接 TDengine 实例(2.4.0.0 及更高版本)。REST 连接实现的功能集合和原生连接有少量不同。
|
||||
|
||||

|
||||

|
||||
|
||||
上图显示了两种 Java 应用使用连接器访问 TDengine 的两种方式:
|
||||
|
||||
|
|
|
@ -14,7 +14,6 @@ import NodeInfluxLine from "../../07-develop/03-insert-data/_js_line.mdx";
|
|||
import NodeOpenTSDBTelnet from "../../07-develop/03-insert-data/_js_opts_telnet.mdx";
|
||||
import NodeOpenTSDBJson from "../../07-develop/03-insert-data/_js_opts_json.mdx";
|
||||
import NodeQuery from "../../07-develop/04-query-data/_js.mdx";
|
||||
import NodeAsyncQuery from "../../07-develop/04-query-data/_js_async.mdx";
|
||||
|
||||
`td2.0-connector` 和 `td2.0-rest-connector` 是 TDengine 的官方 Node.js 语言连接器。Node.js 开发人员可以通过它开发可以存取 TDengine 集群数据的应用软件。
|
||||
|
||||
|
@ -189,14 +188,8 @@ let cursor = conn.cursor();
|
|||
|
||||
### 查询数据
|
||||
|
||||
#### 同步查询
|
||||
|
||||
<NodeQuery />
|
||||
|
||||
#### 异步查询
|
||||
|
||||
<NodeAsyncQuery />
|
||||
|
||||
## 更多示例程序
|
||||
|
||||
| 示例程序 | 示例程序描述 |
|
||||
|
|
|
@ -24,7 +24,7 @@ taosAdapter 提供以下功能:
|
|||
|
||||
## taosAdapter 架构图
|
||||
|
||||

|
||||

|
||||
|
||||
## taosAdapter 部署方法
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ taosdump 有两种安装方式:
|
|||
|
||||
:::tip
|
||||
- taosdump 1.4.1 之后的版本提供 `-I` 参数,用于解析 avro 文件 schema 和数据,如果指定 `-s` 参数将只解析 schema。
|
||||
- taosdump 1.4.2 之后的备份使用 `-B` 参数指定的批次数,默认值为 16384,如果在某些环境下由于网络速度或磁盘性能不足导致 "Error actual dump .. batch .." 可以通过 `-B` 参数挑战为更小的值进行尝试。
|
||||
- taosdump 1.4.2 之后的备份使用 `-B` 参数指定的批次数,默认值为 16384,如果在某些环境下由于网络速度或磁盘性能不足导致 "Error actual dump .. batch .." 可以通过 `-B` 参数调整为更小的值进行尝试。
|
||||
|
||||
:::
|
||||
|
||||
|
|
|
@ -233,25 +233,25 @@ sudo systemctl enable grafana-server
|
|||
|
||||
指向 **Configurations** -> **Data Sources** 菜单,然后点击 **Add data source** 按钮。
|
||||
|
||||

|
||||

|
||||
|
||||
搜索并选择**TDengine**。
|
||||
|
||||

|
||||

|
||||
|
||||
配置 TDengine 数据源。
|
||||
|
||||

|
||||

|
||||
|
||||
保存并测试,正常情况下会报告 'TDengine Data source is working'。
|
||||
|
||||

|
||||

|
||||
|
||||
### 导入仪表盘
|
||||
|
||||
指向 **+** / **Create** - **import**(或 `/dashboard/import` url)。
|
||||
|
||||

|
||||

|
||||
|
||||
在 **Import via grafana.com** 位置键入仪表盘 ID `15167` 并 **Load**。
|
||||
|
||||
|
@ -259,7 +259,7 @@ sudo systemctl enable grafana-server
|
|||
|
||||
导入完成后,TDinsight 的完整页面视图如下所示。
|
||||
|
||||

|
||||

|
||||
|
||||
## TDinsight 仪表盘详细信息
|
||||
|
||||
|
@ -269,7 +269,7 @@ TDinsight 仪表盘旨在提供 TDengine 相关资源使用情况[dnodes, mnodes
|
|||
|
||||
### 集群状态
|
||||
|
||||

|
||||

|
||||
|
||||
这部分包括集群当前信息和状态,告警信息也在此处(从左到右,从上到下)。
|
||||
|
||||
|
@ -289,7 +289,7 @@ TDinsight 仪表盘旨在提供 TDengine 相关资源使用情况[dnodes, mnodes
|
|||
|
||||
### DNodes 状态
|
||||
|
||||

|
||||

|
||||
|
||||
- **DNodes Status**:`show dnodes` 的简单表格视图。
|
||||
- **DNodes Lifetime**:从创建 dnode 开始经过的时间。
|
||||
|
@ -298,14 +298,14 @@ TDinsight 仪表盘旨在提供 TDengine 相关资源使用情况[dnodes, mnodes
|
|||
|
||||
### MNode 概述
|
||||
|
||||

|
||||

|
||||
|
||||
1. **MNodes Status**:`show mnodes` 的简单表格视图。
|
||||
2. **MNodes Number**:类似于`DNodes Number`,MNodes 数量变化。
|
||||
|
||||
### 请求
|
||||
|
||||

|
||||

|
||||
|
||||
1. **Requests Rate(Inserts per Second)**:平均每秒插入次数。
|
||||
2. **Requests (Selects)**:查询请求数及变化率(count of second)。
|
||||
|
@ -313,7 +313,7 @@ TDinsight 仪表盘旨在提供 TDengine 相关资源使用情况[dnodes, mnodes
|
|||
|
||||
### 数据库
|
||||
|
||||

|
||||

|
||||
|
||||
数据库使用情况,对变量 `$database` 的每个值即每个数据库进行重复多行展示。
|
||||
|
||||
|
@ -325,7 +325,7 @@ TDinsight 仪表盘旨在提供 TDengine 相关资源使用情况[dnodes, mnodes
|
|||
|
||||
### DNode 资源使用情况
|
||||
|
||||

|
||||

|
||||
|
||||
数据节点资源使用情况展示,对变量 `$fqdn` 即每个数据节点进行重复多行展示。包括:
|
||||
|
||||
|
@ -346,13 +346,13 @@ TDinsight 仪表盘旨在提供 TDengine 相关资源使用情况[dnodes, mnodes
|
|||
|
||||
### 登录历史
|
||||
|
||||

|
||||

|
||||
|
||||
目前只报告每分钟登录次数。
|
||||
|
||||
### 监控 taosAdapter
|
||||
|
||||

|
||||

|
||||
|
||||
支持监控 taosAdapter 请求统计和状态详情。包括:
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
|
|||
|
||||
:::tip
|
||||
无模式所有的处理逻辑,仍会遵循 TDengine 对数据结构的底层限制,例如每行数据的总长度不能超过
|
||||
16k 字节。这方面的具体限制约束请参见 [TAOS SQL 边界限制](/taos-sql/limit)
|
||||
48KB。这方面的具体限制约束请参见 [TAOS SQL 边界限制](/taos-sql/limit)
|
||||
|
||||
:::
|
||||
|
||||
|
|
|
@ -18,21 +18,22 @@ TDengine 能够与开源数据可视化系统 [Grafana](https://www.grafana.com/
|
|||
|
||||
## 配置 Grafana
|
||||
|
||||
TDengine 的 Grafana 插件托管在 GitHub,可从 <https://github.com/taosdata/grafanaplugin/releases/latest> 下载,当前最新版本为 3.1.4。
|
||||
|
||||
推荐使用 [`grafana-cli` 命令行工具](https://grafana.com/docs/grafana/latest/administration/cli/) 进行插件安装。
|
||||
使用 [`grafana-cli` 命令行工具](https://grafana.com/docs/grafana/latest/administration/cli/) 进行插件[安装](https://grafana.com/grafana/plugins/tdengine-datasource/?tab=installation)。
|
||||
|
||||
```bash
|
||||
sudo -u grafana grafana-cli \
|
||||
--pluginUrl https://github.com/taosdata/grafanaplugin/releases/download/v3.1.7/tdengine-datasource-3.1.7.zip \
|
||||
plugins install tdengine-datasource
|
||||
grafana-cli plugins install tdengine-datasource
|
||||
# with sudo
|
||||
sudo -u grafana grafana-cli plugins install tdengine-datasource
|
||||
```
|
||||
|
||||
或者下载到本地并解压到 Grafana 插件目录。
|
||||
或者从 [GitHub](https://github.com/taosdata/grafanaplugin/releases/tag/latest) 或 [Grafana](https://grafana.com/grafana/plugins/tdengine-datasource/?tab=installation) 下载 .zip 文件到本地并解压到 Grafana 插件目录。命令行下载示例如下:
|
||||
|
||||
```bash
|
||||
GF_VERSION=3.1.7
|
||||
GF_VERSION=3.2.2
|
||||
# from GitHub
|
||||
wget https://github.com/taosdata/grafanaplugin/releases/download/v$GF_VERSION/tdengine-datasource-$GF_VERSION.zip
|
||||
# from Grafana
|
||||
wget -O tdengine-datasource-$GF_VERSION.zip https://grafana.com/api/plugins/tdengine-datasource/versions/$GF_VERSION/download
|
||||
```
|
||||
|
||||
以 CentOS 7.2 操作系统为例,将插件包解压到 /var/lib/grafana/plugins 目录下,重新启动 grafana 即可。
|
||||
|
@ -41,52 +42,41 @@ wget https://github.com/taosdata/grafanaplugin/releases/download/v$GF_VERSION/td
|
|||
sudo unzip tdengine-datasource-$GF_VERSION.zip -d /var/lib/grafana/plugins/
|
||||
```
|
||||
|
||||
:::note
|
||||
3.1.6 和更早版本未签名,会在 Grafana 7.3+ / 8.x 版本签名检查时失败导致无法加载插件,需要在 grafana.ini 文件中修改配置如下:
|
||||
|
||||
```ini
|
||||
[plugins]
|
||||
allow_loading_unsigned_plugins = tdengine-datasource
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
在 Docker 环境下,可以使用如下的环境变量设置自动安装并设置 TDengine 插件:
|
||||
如果 Grafana 在 Docker 环境下运行,可以使用如下的环境变量设置自动安装 TDengine 数据源插件:
|
||||
|
||||
```bash
|
||||
GF_INSTALL_PLUGINS=https://github.com/taosdata/grafanaplugin/releases/download/v3.1.4/tdengine-datasource-3.1.4.zip;tdengine-datasource
|
||||
GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=tdengine-datasource
|
||||
GF_INSTALL_PLUGINS=tdengine-datasource
|
||||
```
|
||||
|
||||
## 使用 Grafana
|
||||
|
||||
### 配置数据源
|
||||
|
||||
用户可以直接通过 http://localhost:3000 的网址,登录 Grafana 服务器(用户名/密码:admin/admin),通过左侧 `Configuration -> Data Sources` 可以添加数据源,如下图所示:
|
||||
用户可以直接通过 <http://localhost:3000> 的网址,登录 Grafana 服务器(用户名/密码:admin/admin),通过左侧 `Configuration -> Data Sources` 可以添加数据源,如下图所示:
|
||||
|
||||

|
||||

|
||||
|
||||
点击 `Add data source` 可进入新增数据源页面,在查询框中输入 TDengine 可选择添加,如下图所示:
|
||||
|
||||

|
||||

|
||||
|
||||
进入数据源配置页面,按照默认提示修改相应配置即可:
|
||||
|
||||

|
||||

|
||||
|
||||
- Host: TDengine 集群中提供 REST 服务 (在 2.4 之前由 taosd 提供, 从 2.4 开始由 taosAdapter 提供)的组件所在服务器的 IP 地址与 TDengine REST 服务的端口号(6041),默认 http://localhost:6041。
|
||||
- Host: TDengine 集群中提供 REST 服务 (在 2.4 之前由 taosd 提供, 从 2.4 开始由 taosAdapter 提供)的组件所在服务器的 IP 地址与 TDengine REST 服务的端口号(6041),默认 <http://localhost:6041>。
|
||||
- User:TDengine 用户名。
|
||||
- Password:TDengine 用户密码。
|
||||
|
||||
点击 `Save & Test` 进行测试,成功会有如下提示:
|
||||
|
||||

|
||||

|
||||
|
||||
### 创建 Dashboard
|
||||
|
||||
回到主界面创建 Dashboard,点击 Add Query 进入面板查询页面:
|
||||
|
||||

|
||||

|
||||
|
||||
如上图所示,在 Query 中选中 `TDengine` 数据源,在下方查询框可输入相应 SQL 进行查询,具体说明如下:
|
||||
|
||||
|
@ -96,7 +86,7 @@ GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=tdengine-datasource
|
|||
|
||||
按照默认提示查询当前 TDengine 部署所在服务器指定间隔系统内存平均使用量如下:
|
||||
|
||||

|
||||

|
||||
|
||||
> 关于如何使用 Grafana 创建相应的监测界面以及更多有关使用 Grafana 的信息,请参考 Grafana 官方的[文档](https://grafana.com/docs/)。
|
||||
|
||||
|
|
|
@ -45,25 +45,25 @@ MQTT 是流行的物联网数据传输协议,[EMQX](https://github.com/emqx/em
|
|||
|
||||
使用浏览器打开网址 http://IP:18083 并登录 EMQX Dashboard。初次安装用户名为 `admin` 密码为:`public`
|
||||
|
||||

|
||||

|
||||
|
||||
### 创建规则(Rule)
|
||||
|
||||
选择左侧“规则引擎(Rule Engine)”中的“规则(Rule)”并点击“创建(Create)”按钮:
|
||||
|
||||

|
||||

|
||||
|
||||
### 编辑 SQL 字段
|
||||
|
||||

|
||||

|
||||
|
||||
### 新增“动作(action handler)”
|
||||
|
||||

|
||||

|
||||
|
||||
### 新增“资源(Resource)”
|
||||
|
||||

|
||||

|
||||
|
||||
选择“发送数据到 Web 服务“并点击“新建资源”按钮:
|
||||
|
||||
|
@ -71,13 +71,13 @@ MQTT 是流行的物联网数据传输协议,[EMQX](https://github.com/emqx/em
|
|||
|
||||
选择“发送数据到 Web 服务“并填写 请求 URL 为 运行 taosAdapter 的服务器地址和端口(默认为 6041)。其他属性请保持默认值。
|
||||
|
||||

|
||||

|
||||
|
||||
### 编辑“动作(action)”
|
||||
|
||||
编辑资源配置,增加 Authorization 认证的键/值配对项,相关文档请参考[ TDengine REST API 文档](https://docs.taosdata.com/reference/rest-api/)。在消息体中输入规则引擎替换模板。
|
||||
|
||||

|
||||

|
||||
|
||||
## 编写模拟测试程序
|
||||
|
||||
|
@ -164,7 +164,7 @@ MQTT 是流行的物联网数据传输协议,[EMQX](https://github.com/emqx/em
|
|||
|
||||
注意:代码中 CLIENT_NUM 在开始测试中可以先设置一个较小的值,避免硬件性能不能完全处理较大并发客户端数量。
|
||||
|
||||

|
||||

|
||||
|
||||
## 执行测试模拟发送 MQTT 数据
|
||||
|
||||
|
@ -173,19 +173,19 @@ npm install mqtt mockjs --save --registry=https://registry.npm.taobao.org
|
|||
node mock.js
|
||||
```
|
||||
|
||||

|
||||

|
||||
|
||||
## 验证 EMQX 接收到数据
|
||||
|
||||
在 EMQX Dashboard 规则引擎界面进行刷新,可以看到有多少条记录被正确接收到:
|
||||
|
||||

|
||||

|
||||
|
||||
## 验证数据写入到 TDengine
|
||||
|
||||
使用 TDengine CLI 程序登录并查询相应数据库和表,验证数据是否被正确写入到 TDengine 中:
|
||||
|
||||

|
||||

|
||||
|
||||
TDengine 详细使用方法请参考 [TDengine 官方文档](https://docs.taosdata.com/)。
|
||||
EMQX 详细使用方法请参考 [EMQX 官方文档](https://www.emqx.io/docs/zh/v4.4/rule/rule-engine.html)。
|
||||
|
|
|
@ -7,17 +7,17 @@ TDengine Kafka Connector 包含两个插件: TDengine Source Connector 和 TDeng
|
|||
|
||||
## 什么是 Kafka Connect?
|
||||
|
||||
Kafka Connect 是 Apache Kafka 的一个组件,用于使其它系统,比如数据库、云服务、文件系统等能方便地连接到 Kafka。数据既可以通过 Kafka Connect 从其它系统流向 Kafka, 也可以通过 Kafka Connect 从 Kafka 流向其它系统。从其它系统读数据的插件称为 Source Connector, 写数据到其它系统的插件称为 Sink Connector。Source Connector 和 Sink Connector 都不会直接连接 Kafka Broker,Source Connector 把数据转交给 Kafka Connect。Sink Connector 从 Kafka Connect 接收数据。
|
||||
Kafka Connect 是 [Apache Kafka](https://kafka.apache.org/) 的一个组件,用于使其它系统,比如数据库、云服务、文件系统等能方便地连接到 Kafka。数据既可以通过 Kafka Connect 从其它系统流向 Kafka, 也可以通过 Kafka Connect 从 Kafka 流向其它系统。从其它系统读数据的插件称为 Source Connector, 写数据到其它系统的插件称为 Sink Connector。Source Connector 和 Sink Connector 都不会直接连接 Kafka Broker,Source Connector 把数据转交给 Kafka Connect。Sink Connector 从 Kafka Connect 接收数据。
|
||||
|
||||

|
||||

|
||||
|
||||
TDengine Source Connector 用于把数据实时地从 TDengine 读出来发送给 Kafka Connect。TDengine Sink Connector 用于 从 Kafka Connect 接收数据并写入 TDengine。
|
||||
|
||||

|
||||

|
||||
|
||||
## 什么是 Confluent?
|
||||
|
||||
Confluent 在 Kafka 的基础上增加很多扩展功能。包括:
|
||||
[Confluent](https://www.confluent.io/) 在 Kafka 的基础上增加很多扩展功能。包括:
|
||||
|
||||
1. Schema Registry
|
||||
2. REST 代理
|
||||
|
@ -26,7 +26,7 @@ Confluent 在 Kafka 的基础上增加很多扩展功能。包括:
|
|||
5. 管理和监控 Kafka 的 GUI —— Confluent 控制中心
|
||||
|
||||
这些扩展功能有的包含在社区版本的 Confluent 中,有的只有企业版能用。
|
||||

|
||||

|
||||
|
||||
Confluent 企业版提供了 `confluent` 命令行工具管理各个组件。
|
||||
|
||||
|
@ -81,10 +81,10 @@ Development: false
|
|||
git clone https://github.com:taosdata/kafka-connect-tdengine.git
|
||||
cd kafka-connect-tdengine
|
||||
mvn clean package
|
||||
unzip -d $CONFLUENT_HOME/share/confluent-hub-components/ target/components/packages/taosdata-kafka-connect-tdengine-0.1.0.zip
|
||||
unzip -d $CONFLUENT_HOME/share/java/ target/components/packages/taosdata-kafka-connect-tdengine-*.zip
|
||||
```
|
||||
|
||||
以上脚本先 clone 项目源码,然后用 Maven 编译打包。打包完成后在 `target/components/packages/` 目录生成了插件的 zip 包。把这个 zip 包解压到安装插件的路径即可。安装插件的路径在配置文件 `$CONFLUENT_HOME/etc/kafka/connect-standalone.properties` 中。默认的路径为 `$CONFLUENT_HOME/share/confluent-hub-components/`。
|
||||
以上脚本先 clone 项目源码,然后用 Maven 编译打包。打包完成后在 `target/components/packages/` 目录生成了插件的 zip 包。把这个 zip 包解压到安装插件的路径即可。上面的示例中使用了内置的插件安装路径: `$CONFLUENT_HOME/share/java/`。
|
||||
|
||||
### 用 confluent-hub 安装
|
||||
|
||||
|
@ -98,7 +98,7 @@ confluent local services start
|
|||
```
|
||||
|
||||
:::note
|
||||
一定要先安装插件再启动 Confluent, 否则会出现找不到类的错误。Kafka Connect 的日志(默认路径: /tmp/confluent.xxxx/connect/logs/connect.log)中会输出成功安装的插件,据此可判断插件是否安装成功。
|
||||
一定要先安装插件再启动 Confluent, 否则加载插件会失败。
|
||||
:::
|
||||
|
||||
:::tip
|
||||
|
@ -125,6 +125,61 @@ Control Center is [UP]
|
|||
清空数据可执行 `rm -rf /tmp/confluent.106668`。
|
||||
:::
|
||||
|
||||
### 验证各个组件是否启动成功
|
||||
|
||||
输入命令:
|
||||
|
||||
```
|
||||
confluent local services status
|
||||
```
|
||||
|
||||
如果各组件都启动成功,会得到如下输出:
|
||||
|
||||
```
|
||||
Connect is [UP]
|
||||
Control Center is [UP]
|
||||
Kafka is [UP]
|
||||
Kafka REST is [UP]
|
||||
ksqlDB Server is [UP]
|
||||
Schema Registry is [UP]
|
||||
ZooKeeper is [UP]
|
||||
```
|
||||
|
||||
### 验证插件是否安装成功
|
||||
|
||||
在 Kafka Connect 组件完全启动后,可用以下命令列出成功加载的插件:
|
||||
|
||||
```
|
||||
confluent local services connect plugin list
|
||||
```
|
||||
|
||||
如果成功安装,会输出如下:
|
||||
|
||||
```txt {4,9}
|
||||
Available Connect Plugins:
|
||||
[
|
||||
{
|
||||
"class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector",
|
||||
"type": "sink",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"class": "com.taosdata.kafka.connect.source.TDengineSourceConnector",
|
||||
"type": "source",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
......
|
||||
```
|
||||
|
||||
如果插件安装失败,请检查 Kafka Connect 的启动日志是否有异常信息,用以下命令输出日志路径:
|
||||
```
|
||||
echo `cat /tmp/confluent.current`/connect/connect.stdout
|
||||
```
|
||||
该命令的输出类似: `/tmp/confluent.104086/connect/connect.stdout`。
|
||||
|
||||
与日志文件 `connect.stdout` 同一目录,还有一个文件名为: `connect.properties`。在这个文件的末尾,可以看到最终生效的 `plugin.path`, 它是一系列用逗号分割的路径。如果插件安装失败,很可能是因为实际的安装路径不包含在 `plugin.path` 中。
|
||||
|
||||
|
||||
## TDengine Sink Connector 的使用
|
||||
|
||||
TDengine Sink Connector 的作用是同步指定 topic 的数据到 TDengine。用户无需提前创建数据库和超级表。可手动指定目标数据库的名字(见配置参数 connection.database), 也可按一定规则生成(见配置参数 connection.database.prefix)。
|
||||
|
@ -144,7 +199,7 @@ vi sink-demo.properties
|
|||
sink-demo.properties 内容如下:
|
||||
|
||||
```ini title="sink-demo.properties"
|
||||
name=tdengine-sink-demo
|
||||
name=TDengineSinkConnector
|
||||
connector.class=com.taosdata.kafka.connect.sink.TDengineSinkConnector
|
||||
tasks.max=1
|
||||
topics=meters
|
||||
|
@ -153,6 +208,7 @@ connection.user=root
|
|||
connection.password=taosdata
|
||||
connection.database=power
|
||||
db.schemaless=line
|
||||
data.precision=ns
|
||||
key.converter=org.apache.kafka.connect.storage.StringConverter
|
||||
value.converter=org.apache.kafka.connect.storage.StringConverter
|
||||
```
|
||||
|
@ -179,6 +235,7 @@ confluent local services connect connector load TDengineSinkConnector --config .
|
|||
"connection.url": "jdbc:TAOS://127.0.0.1:6030",
|
||||
"connection.user": "root",
|
||||
"connector.class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector",
|
||||
"data.precision": "ns",
|
||||
"db.schemaless": "line",
|
||||
"key.converter": "org.apache.kafka.connect.storage.StringConverter",
|
||||
"tasks.max": "1",
|
||||
|
@ -223,10 +280,10 @@ Database changed.
|
|||
taos> select * from meters;
|
||||
ts | current | voltage | phase | groupid | location |
|
||||
===============================================================================================================================================================
|
||||
2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LosAngeles |
|
||||
Query OK, 4 row(s) in set (0.004208s)
|
||||
```
|
||||
|
||||
|
@ -356,21 +413,33 @@ confluent local services connect connector unload TDengineSourceConnector
|
|||
2. `connection.database.prefix`: 当 connection.database 为 null 时, 目标数据库的前缀。可以包含占位符 '${topic}'。 比如 kafka_${topic}, 对于主题 'orders' 将写入数据库 'kafka_orders'。 默认 null。当为 null 时,目标数据库的名字和主题的名字是一致的。
|
||||
3. `batch.size`: 分批写入每批记录数。当 Sink Connector 一次接收到的数据大于这个值时将分批写入。
|
||||
4. `max.retries`: 发生错误时的最大重试次数。默认为 1。
|
||||
5. `retry.backoff.ms`: 发送错误时重试的时间间隔。单位毫秒,默认 3000。
|
||||
6. `db.schemaless`: 数据格式,必须指定为: line、json、telnet 中的一个。分别代表 InfluxDB 行协议格式、 OpenTSDB JSON 格式、 OpenTSDB Telnet 行协议格式。
|
||||
5. `retry.backoff.ms`: 发送错误时重试的时间间隔。单位毫秒,默认为 3000。
|
||||
6. `db.schemaless`: 数据格式,可选值为:
|
||||
1. line :代表 InfluxDB 行协议格式
|
||||
2. json : 代表 OpenTSDB JSON 格式
|
||||
3. telnet :代表 OpenTSDB Telnet 行协议格式
|
||||
7. `data.precision`: 使用 InfluxDB 行协议格式时,时间戳的精度。可选值为:
|
||||
1. ms : 表示毫秒
|
||||
2. us : 表示微秒
|
||||
3. ns : 表示纳秒。默认为纳秒。
|
||||
|
||||
### TDengine Source Connector 特有的配置
|
||||
|
||||
1. `connection.database`: 源数据库名称,无缺省值。
|
||||
2. `topic.prefix`: 数据导入 kafka 后 topic 名称前缀。 使用 `topic.prefix` + `connection.database` 名称作为完整 topic 名。默认为空字符串 ""。
|
||||
3. `timestamp.initial`: 数据同步起始时间。格式为'yyyy-MM-dd HH:mm:ss'。默认 "1970-01-01 00:00:00"。
|
||||
4. `poll.interval.ms`: 拉取数据间隔,单位为 ms。默认 1000。
|
||||
3. `timestamp.initial`: 数据同步起始时间。格式为'yyyy-MM-dd HH:mm:ss'。默认为 "1970-01-01 00:00:00"。
|
||||
4. `poll.interval.ms`: 拉取数据间隔,单位为 ms。默认为 1000。
|
||||
5. `fetch.max.rows` : 检索数据库时最大检索条数。 默认为 100。
|
||||
6. `out.format`: 数据格式。取值 line 或 json。line 表示 InfluxDB Line 协议格式, json 表示 OpenTSDB JSON 格式。默认 line。
|
||||
6. `out.format`: 数据格式。取值 line 或 json。line 表示 InfluxDB Line 协议格式, json 表示 OpenTSDB JSON 格式。默认为 line。
|
||||
|
||||
## 其他说明
|
||||
|
||||
1. 插件的安装位置可以自定义,请参考官方文档:https://docs.confluent.io/home/connect/self-managed/install.html#install-connector-manually。
|
||||
2. 本教程的示例程序使用了 Confluent 平台,但是 TDengine Kafka Connector 本身同样适用于独立安装的 Kafka, 且配置方法相同。关于如何在独立安装的 Kafka 环境使用 Kafka Connect 插件, 请参考官方文档: https://kafka.apache.org/documentation/#connect。
|
||||
|
||||
## 问题反馈
|
||||
|
||||
https://github.com/taosdata/kafka-connect-tdengine/issues
|
||||
无论遇到任何问题,都欢迎在本项目的 Github 仓库反馈: https://github.com/taosdata/kafka-connect-tdengine/issues。
|
||||
|
||||
## 参考
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ TDengine 的设计是基于单个硬件、软件系统不可靠,基于任何
|
|||
|
||||
TDengine 分布式架构的逻辑结构图如下:
|
||||
|
||||

|
||||

|
||||
|
||||
<center> 图 1 TDengine架构示意图 </center>
|
||||
|
||||
|
@ -63,7 +63,7 @@ TDengine 分布式架构的逻辑结构图如下:
|
|||
|
||||
为解释 vnode、mnode、taosc 和应用之间的关系以及各自扮演的角色,下面对写入数据这个典型操作的流程进行剖析。
|
||||
|
||||

|
||||

|
||||
|
||||
<center> 图 2 TDengine 典型的操作流程 </center>
|
||||
|
||||
|
@ -135,7 +135,7 @@ TDengine 除 vnode 分片之外,还对时序数据按照时间段进行分区
|
|||
|
||||
Master Vnode 遵循下面的写入流程:
|
||||
|
||||

|
||||

|
||||
|
||||
<center> 图 3 TDengine Master 写入流程 </center>
|
||||
|
||||
|
@ -150,7 +150,7 @@ Master Vnode 遵循下面的写入流程:
|
|||
|
||||
对于 slave vnode,写入流程是:
|
||||
|
||||

|
||||

|
||||
|
||||
<center> 图 4 TDengine Slave 写入流程 </center>
|
||||
|
||||
|
@ -284,7 +284,7 @@ SELECT COUNT(*) FROM d1001 WHERE ts >= '2017-7-14 00:00:00' AND ts < '2017-7-14
|
|||
|
||||
TDengine 对每个数据采集点单独建表,但在实际应用中经常需要对不同的采集点数据进行聚合。为高效的进行聚合操作,TDengine 引入超级表(STable)的概念。超级表用来代表一特定类型的数据采集点,它是包含多张表的表集合,集合里每张表的模式(schema)完全一致,但每张表都带有自己的静态标签,标签可以有多个,可以随时增加、删除和修改。应用可通过指定标签的过滤条件,对一个 STable 下的全部或部分表进行聚合或统计操作,这样大大简化应用的开发。其具体流程如下图所示:
|
||||
|
||||

|
||||

|
||||
|
||||
<center> 图 5 多表聚合查询原理图 </center>
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ IT 运维监测数据通常都是对时间特性比较敏感的数据,例如
|
|||
|
||||
本文介绍不需要写一行代码,通过简单修改几行配置文件,就可以快速搭建一个基于 TDengine + Telegraf + Grafana 的 IT 运维系统。架构如下图:
|
||||
|
||||

|
||||

|
||||
|
||||
## 安装步骤
|
||||
|
||||
|
@ -75,7 +75,7 @@ sudo systemctl start telegraf
|
|||
点击左侧齿轮图标并选择 `Plugins`,应该可以找到 TDengine data source 插件图标。
|
||||
点击左侧加号图标并选择 `Import`,从 `https://github.com/taosdata/grafanaplugin/blob/master/examples/telegraf/grafana/dashboards/telegraf-dashboard-v0.1.0.json` 下载 dashboard JSON 文件后导入。之后可以看到如下界面的仪表盘:
|
||||
|
||||
![IT-DevOps-Solutions-telegraf-dashboard.webp]./IT-DevOps-Solutions-telegraf-dashboard.webp)
|
||||

|
||||
|
||||
## 总结
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ IT 运维监测数据通常都是对时间特性比较敏感的数据,例如
|
|||
|
||||
本文介绍不需要写一行代码,通过简单修改几行配置文件,就可以快速搭建一个基于 TDengine + collectd / statsD + Grafana 的 IT 运维系统。架构如下图:
|
||||
|
||||

|
||||

|
||||
|
||||
## 安装步骤
|
||||
|
||||
|
@ -81,12 +81,12 @@ repeater 部分添加 { host:'<TDengine server/cluster host>', port: <port for S
|
|||
|
||||
从 https://github.com/taosdata/grafanaplugin/blob/master/examples/collectd/grafana/dashboards/collect-metrics-with-tdengine-v0.1.0.json 下载 dashboard json 文件,点击左侧加号图标并选择 `Import`,按照界面提示选择 JSON 文件导入。之后可以看到如下界面的仪表盘:
|
||||
|
||||

|
||||

|
||||
|
||||
#### 导入 StatsD 仪表盘
|
||||
|
||||
从 `https://github.com/taosdata/grafanaplugin/blob/master/examples/statsd/dashboards/statsd-with-tdengine-v0.1.0.json` 下载 dashboard json 文件,点击左侧加号图标并选择 `Import`,按照界面提示导入 JSON 文件。之后可以看到如下界面的仪表盘:
|
||||

|
||||

|
||||
|
||||
## 总结
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ title: OpenTSDB 应用迁移到 TDengine 的最佳实践
|
|||
一个典型的 DevOps 应用场景的系统整体的架构如下图(图 1) 所示。
|
||||
|
||||
**图 1. DevOps 场景中典型架构**
|
||||

|
||||

|
||||
|
||||
在该应用场景中,包含了部署在应用环境中负责收集机器度量(Metrics)、网络度量(Metrics)以及应用度量(Metrics)的 Agent 工具、汇聚 Agent 收集信息的数据收集器,数据持久化存储和管理的系统以及监控数据可视化工具(例如:Grafana 等)。
|
||||
|
||||
|
@ -70,7 +70,7 @@ LoadPlugin write_tsdb
|
|||
TDengine 提供了默认的两套 Dashboard 模板,用户只需要将 Grafana 目录下的模板导入到 Grafana 中即可激活使用。
|
||||
|
||||
**图 2. 导入 Grafana 模板**
|
||||

|
||||

|
||||
|
||||
操作完以上步骤后,就完成了将 OpenTSDB 替换成为 TDengine 的迁移工作。可以看到整个流程非常简单,不需要写代码,只需要对某些配置文件进行调整即可完成全部的迁移工作。
|
||||
|
||||
|
@ -83,7 +83,7 @@ TDengine 提供了默认的两套 Dashboard 模板,用户只需要将 Grafana
|
|||
如果你的应用特别复杂,或者应用领域并不是 DevOps 场景,你可以继续阅读后续的章节,更加全面深入地了解将 OpenTSDB 的应用迁移到 TDengine 的高级话题。
|
||||
|
||||
**图 3. 迁移完成后的系统架构**
|
||||

|
||||

|
||||
|
||||
## 其他场景的迁移评估与策略
|
||||
|
||||
|
|
|
@ -33,15 +33,15 @@ title: 常见问题及反馈
|
|||
|
||||
### 2. Windows 平台下 JDBCDriver 找不到动态链接库,怎么办?
|
||||
|
||||
请看为此问题撰写的[技术博客](https://www.taosdata.com/blog/2019/12/03/950.html)。
|
||||
请看为此问题撰写的 [技术博客](https://www.taosdata.com/blog/2019/12/03/950.html)。
|
||||
|
||||
### 3. 创建数据表时提示 more dnodes are needed
|
||||
|
||||
请看为此问题撰写的[技术博客](https://www.taosdata.com/blog/2019/12/03/965.html)。
|
||||
请看为此问题撰写的 [技术博客](https://www.taosdata.com/blog/2019/12/03/965.html)。
|
||||
|
||||
### 4. 如何让 TDengine crash 时生成 core 文件?
|
||||
|
||||
请看为此问题撰写的[技术博客](https://www.taosdata.com/blog/2019/12/06/974.html)。
|
||||
请看为此问题撰写的 [技术博客](https://www.taosdata.com/blog/2019/12/06/974.html)。
|
||||
|
||||
### 5. 遇到错误“Unable to establish connection” 怎么办?
|
||||
|
||||
|
@ -128,19 +128,30 @@ properties.setProperty(TSDBDriver.LOCALE_KEY, "UTF-8");
|
|||
Connection = DriverManager.getConnection(url, properties);
|
||||
```
|
||||
|
||||
### 13.JDBC 报错: the executed SQL is not a DML or a DDL?
|
||||
### 13. Windows 系统下客户端无法正常显示中文字符?
|
||||
|
||||
Windows 系统中一般是采用 GBK/GB18030 存储中文字符,而 TDengine 的默认字符集为 UTF-8 ,在 Windows 系统中使用 TDengine 客户端时,客户端驱动会将字符统一转换为 UTF-8 编码后发送到服务端存储,因此在应用开发过程中,调用接口时正确配置当前的中文字符集即可。
|
||||
|
||||
【 v2.2.1.5以后版本 】在 Windows 10 环境下运行 TDengine 客户端命令行工具 taos 时,若无法正常输入、显示中文,可以对客户端 taos.cfg 做如下配置:
|
||||
|
||||
```
|
||||
locale C
|
||||
charset UTF-8
|
||||
```
|
||||
|
||||
### 14. JDBC 报错: the executed SQL is not a DML or a DDL?
|
||||
|
||||
请更新至最新的 JDBC 驱动,参考 [Java 连接器](/reference/connector/java)
|
||||
|
||||
### 14. taos connect failed, reason: invalid timestamp
|
||||
### 15. taos connect failed, reason: invalid timestamp
|
||||
|
||||
常见原因是服务器和客户端时间没有校准,可以通过和时间服务器同步的方式(Linux 下使用 ntpdate 命令,Windows 在系统时间设置中选择自动同步)校准。
|
||||
|
||||
### 15. 表名显示不全
|
||||
### 16. 表名显示不全
|
||||
|
||||
由于 taos shell 在终端中显示宽度有限,有可能比较长的表名显示不全,如果按照显示的不全的表名进行相关操作会发生 Table does not exist 错误。解决方法可以是通过修改 taos.cfg 文件中的设置项 maxBinaryDisplayWidth, 或者直接输入命令 set max_binary_display_width 100。或者在命令结尾使用 \G 参数来调整结果的显示方式。
|
||||
|
||||
### 16. 如何进行数据迁移?
|
||||
### 17. 如何进行数据迁移?
|
||||
|
||||
TDengine 是根据 hostname 唯一标志一台机器的,在数据文件从机器 A 移动机器 B 时,注意如下两件事:
|
||||
|
||||
|
@ -148,7 +159,7 @@ TDengine 是根据 hostname 唯一标志一台机器的,在数据文件从机
|
|||
- 2.0.7.0 及以后的版本,到/var/lib/taos/dnode 下,修复 dnodeEps.json 的 dnodeId 对应的 FQDN,重启。确保机器内所有机器的此文件是完全相同的。
|
||||
- 1.x 和 2.x 版本的存储结构不兼容,需要使用迁移工具或者自己开发应用导出导入数据。
|
||||
|
||||
### 17. 如何在命令行程序 taos 中临时调整日志级别
|
||||
### 18. 如何在命令行程序 taos 中临时调整日志级别
|
||||
|
||||
为了调试方便,从 2.0.16 版本开始,命令行程序 taos 新增了与日志记录相关的两条指令:
|
||||
|
||||
|
@ -169,7 +180,7 @@ ALTER LOCAL RESETLOG;
|
|||
|
||||
<a class="anchor" id="timezone"></a>
|
||||
|
||||
### 18. go 语言编写组件编译失败怎样解决?
|
||||
### 19. go 语言编写组件编译失败怎样解决?
|
||||
|
||||
TDengine 2.3.0.0 及之后的版本包含一个使用 go 语言开发的 taosAdapter 独立组件,需要单独运行,取代之前 taosd 内置的 httpd ,提供包含原 httpd 功能以及支持多种其他软件(Prometheus、Telegraf、collectd、StatsD 等)的数据接入功能。
|
||||
使用最新 develop 分支代码编译需要先 `git submodule update --init --recursive` 下载 taosAdapter 仓库代码后再编译。
|
||||
|
@ -184,7 +195,7 @@ go env -w GOPROXY=https://goproxy.cn,direct
|
|||
如果希望继续使用之前的内置 httpd,可以关闭 taosAdapter 编译,使用
|
||||
`cmake .. -DBUILD_HTTP=true` 使用原来内置的 httpd。
|
||||
|
||||
### 19. 如何查询数据占用的存储空间大小?
|
||||
### 20. 如何查询数据占用的存储空间大小?
|
||||
|
||||
默认情况下,TDengine 的数据文件存储在 /var/lib/taos ,日志文件存储在 /var/log/taos 。
|
||||
|
||||
|
@ -193,3 +204,38 @@ go env -w GOPROXY=https://goproxy.cn,direct
|
|||
若想查看单个数据库占用的大小,可在命令行程序 taos 内指定要查看的数据库后执行 `show vgroups;` ,通过得到的 VGroup id 去 /var/lib/taos/vnode 下查看包含的文件夹大小。
|
||||
|
||||
若仅仅想查看指定(超级)表的数据块分布及大小,可查看[_block_dist 函数](https://docs.taosdata.com/taos-sql/select/#_block_dist-%E5%87%BD%E6%95%B0)
|
||||
|
||||
### 21. 客户端连接串如何保证高可用?
|
||||
|
||||
请看为此问题撰写的 [技术博客](https://www.taosdata.com/blog/2021/04/16/2287.html)
|
||||
|
||||
### 22. 时间戳的时区信息是怎样处理的?
|
||||
|
||||
TDengine 中时间戳的时区总是由客户端进行处理,而与服务端无关。具体来说,客户端会对 SQL 语句中的时间戳进行时区转换,转为 UTC 时区(即 Unix 时间戳——Unix Timestamp)再交由服务端进行写入和查询;在读取数据时,服务端也是采用 UTC 时区提供原始数据,客户端收到后再根据本地设置,把时间戳转换为本地系统所要求的时区进行显示。
|
||||
|
||||
客户端在处理时间戳字符串时,会采取如下逻辑:
|
||||
|
||||
1. 在未做特殊设置的情况下,客户端默认使用所在操作系统的时区设置。
|
||||
2. 如果在 taos.cfg 中设置了 timezone 参数,则客户端会以这个配置文件中的设置为准。
|
||||
3. 如果在 C/C++/Java/Python 等各种编程语言的 Connector Driver 中,在建立数据库连接时显式指定了 timezone,那么会以这个指定的时区设置为准。例如 Java Connector 的 JDBC URL 中就有 timezone 参数。
|
||||
4. 在书写 SQL 语句时,也可以直接使用 Unix 时间戳(例如 `1554984068000`)或带有时区的时间戳字符串,也即以 RFC 3339 格式(例如 `2013-04-12T15:52:01.123+08:00`)或 ISO-8601 格式(例如 `2013-04-12T15:52:01.123+0800`)来书写时间戳,此时这些时间戳的取值将不再受其他时区设置的影响。
|
||||
|
||||
### 23. TDengine 2.0 都会用到哪些网络端口?
|
||||
|
||||
使用到的网络端口请看文档:[serverport](/reference/config/#serverport)
|
||||
|
||||
需要注意,文档上列举的端口号都是以默认端口 6030 为前提进行说明,如果修改了配置文件中的设置,那么列举的端口都会随之出现变化,管理员可以参考上述的信息调整防火墙设置。
|
||||
|
||||
### 24. 为什么 RESTful 接口无响应、Grafana 无法添加 TDengine 为数据源、TDengineGUI 选了 6041 端口还是无法连接成功??
|
||||
|
||||
taosAdapter 从 TDengine 2.4.0.0 版本开始成为 TDengine 服务端软件的组成部分,是 TDengine 集群和应用程序之间的桥梁和适配器。在此之前 RESTful 接口等功能是由 taosd 内置的 HTTP 服务提供的,而如今要实现上述功能需要执行:```systemctl start taosadapter``` 命令来启动 taosAdapter 服务。
|
||||
|
||||
需要说明的是,taosAdapter 的日志路径 path 需要单独配置,默认路径是 /var/log/taos ;日志等级 logLevel 有 8 个等级,默认等级是 info ,配置成 panic 可关闭日志输出。请注意操作系统 / 目录的空间大小,可通过命令行参数、环境变量或配置文件来修改配置,默认配置文件是 /etc/taos/taosadapter.toml 。
|
||||
|
||||
有关 taosAdapter 组件的详细介绍请看文档:[taosAdapter](https://docs.taosdata.com/reference/taosadapter/)
|
||||
|
||||
### 25. 发生了 OOM 怎么办?
|
||||
|
||||
OOM 是操作系统的保护机制,当操作系统内存(包括 SWAP )不足时,会杀掉某些进程,从而保证操作系统的稳定运行。通常内存不足主要是如下两个原因导致,一是剩余内存小于 vm.min_free_kbytes ;二是程序请求的内存大于剩余内存。还有一种情况是内存充足但程序占用了特殊的内存地址,也会触发 OOM 。
|
||||
|
||||
TDengine 会预先为每个 VNode 分配好内存,每个 Database 的 VNode 个数受 maxVgroupsPerDb 影响,每个 VNode 占用的内存大小受 Blocks 和 Cache 影响。要防止 OOM,需要在项目建设之初合理规划内存,并合理设置 SWAP ,除此之外查询过量的数据也有可能导致内存暴涨,这取决于具体的查询语句。TDengine 企业版对内存管理做了优化,采用了新的内存分配器,对稳定性有更高要求的用户可以考虑选择企业版。
|
||||
|
|
|
@ -54,7 +54,7 @@ With TDengine, the total cost of ownership of your time-series data platform can
|
|||
## Technical Ecosystem
|
||||
This is how TDengine would be situated, in a typical time-series data processing platform:
|
||||
|
||||

|
||||

|
||||
|
||||
<center>Figure 1. TDengine Technical Ecosystem</center>
|
||||
|
||||
|
|
|
@ -12,6 +12,6 @@ Between two major release versions, some beta versions may be delivered for user
|
|||
|
||||
For the details please refer to [Install and Uninstall](/operation/pkg-install)。
|
||||
|
||||
To see the details of versions, please refer to [Download List](https://www.taosdata.com/all-downloads) and [Release Notes](https://github.com/taosdata/TDengine/releases).
|
||||
To see the details of versions, please refer to [Download List](https://tdengine.com/all-downloads) and [Release Notes](https://github.com/taosdata/TDengine/releases).
|
||||
|
||||
|
||||
|
|
|
@ -130,7 +130,7 @@ After TDengine server is running,execute `taosBenchmark` (previously named tao
|
|||
taosBenchmark
|
||||
```
|
||||
|
||||
This command will create a super table "meters" under database "test". Under "meters", 10000 tables are created with names from "d0" to "d9999". Each table has 10000 rows and each row has four columns (ts, current, voltage, phase). Time stamp is starting from "2017-07-14 10:40:00 000" to "2017-07-14 10:40:09 999". Each table has tags "location" and "groupId". groupId is set 1 to 10 randomly, and location is set to "California.SanFrancisco" or "California.SanDieo".
|
||||
This command will create a super table "meters" under database "test". Under "meters", 10000 tables are created with names from "d0" to "d9999". Each table has 10000 rows and each row has four columns (ts, current, voltage, phase). Time stamp is starting from "2017-07-14 10:40:00 000" to "2017-07-14 10:40:09 999". Each table has tags "location" and "groupId". groupId is set 1 to 10 randomly, and location is set to "California.SanFrancisco" or "California.SanDiego".
|
||||
|
||||
This command will insert 100 million rows into the database quickly. Time to insert depends on the hardware configuration, it only takes a dozen seconds for a regular PC server.
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
sidebar_label: Connection
|
||||
title: Connect to TDengine
|
||||
sidebar_label: Connect
|
||||
title: Connect
|
||||
description: "This document explains how to establish connections to TDengine, and briefly introduces how to install and use TDengine connectors."
|
||||
---
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
sidebar_label: SQL
|
||||
sidebar_label: Insert Using SQL
|
||||
title: Insert Using SQL
|
||||
---
|
||||
|
||||
|
@ -52,7 +52,7 @@ For more details about `INSERT` please refer to [INSERT](/taos-sql/insert).
|
|||
|
||||
:::info
|
||||
|
||||
- Inserting in batches can improve performance. Normally, the higher the batch size, the better the performance. Please note that a single row can't exceed 16K bytes and each SQL statement can't exceed 1MB.
|
||||
- Inserting in batches can improve performance. Normally, the higher the batch size, the better the performance. Please note that a single row can't exceed 48K bytes and each SQL statement can't exceed 1MB.
|
||||
- Inserting with multiple threads can also improve performance. However, depending on the system resources on the application side and the server side, when the number of inserting threads grows beyond a specific point the performance may drop instead of improving. The proper number of threads needs to be tested in a specific environment to find the best number.
|
||||
|
||||
:::
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
title: Insert
|
||||
title: Insert Data
|
||||
---
|
||||
|
||||
TDengine supports multiple protocols of inserting data, including SQL, InfluxDB Line protocol, OpenTSDB Telnet protocol, and OpenTSDB JSON protocol. Data can be inserted row by row, or in batches. Data from one or more collection points can be inserted simultaneously. Data can be inserted with multiple threads, and out of order data and historical data can be inserted as well. InfluxDB Line protocol, OpenTSDB Telnet protocol and OpenTSDB JSON protocol are the 3 kinds of schemaless insert protocols supported by TDengine. It's not necessary to create STables and tables in advance if using schemaless protocols, and the schemas can be adjusted automatically based on the data being inserted.
|
||||
|
|
|
@ -1 +1 @@
|
|||
label: Select Data
|
||||
label: Query Data
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
Sidebar_label: Select
|
||||
title: Select
|
||||
Sidebar_label: Query data
|
||||
title: Query data
|
||||
description: "This chapter introduces major query functionalities and how to perform sync and async query using connectors."
|
||||
---
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
sidebar_label: Subscription
|
||||
sidebar_label: Data Subscription
|
||||
description: "Lightweight service for data subscription and publishing. Time series data inserted into TDengine continuously can be pushed automatically to subscribing clients."
|
||||
title: Data Subscription
|
||||
---
|
||||
|
@ -108,7 +108,7 @@ if (async) {
|
|||
}
|
||||
```
|
||||
|
||||
In the above sample code in the else condition, there is an infinite loop. Each time carriage return is entered `taos_consume` is invoked. The return value of `taos_consume` is the selected result set. In the above sample, `print_result` is used to simplify the printing of the result set. Below is the implementation of `print_result`.
|
||||
In the above sample code in the else condition, there is an infinite loop. Each time carriage return is entered `taos_consume` is invoked. The return value of `taos_consume` is the selected result set. In the above sample, `print_result` is used to simplify the printing of the result set. It is similar to `taos_use_result`. Below is the implementation of `print_result`.
|
||||
|
||||
```c
|
||||
void print_result(TAOS_RES* res, int blockFetch) {
|
||||
|
@ -151,7 +151,7 @@ void subscribe_callback(TAOS_SUB* tsub, TAOS_RES *res, void* param, int code) {
|
|||
taos_unsubscribe(tsub, keep);
|
||||
```
|
||||
|
||||
The second parameter `keep` is used to specify whether to keep the subscription progress on the client sde. If it is **false**, i.e. **0**, then subscription will be restarted from beginning regardless of the `restart` parameter's value when `taos_subscribe` is invoked again. The subscription progress information is stored in _{DataDir}/subscribe/_ , under which there is a file with the same name as `topic` for each subscription, the subscription will be restarted from the beginning if the corresponding progress file is removed.
|
||||
The second parameter `keep` is used to specify whether to keep the subscription progress on the client sde. If it is **false**, i.e. **0**, then subscription will be restarted from beginning regardless of the `restart` parameter's value when `taos_subscribe` is invoked again. The subscription progress information is stored in _{DataDir}/subscribe/_ , under which there is a file with the same name as `topic` for each subscription(Note: The default value of `DataDir` in the `taos.cfg` file is **/var/lib/taos/**. However, **/var/lib/taos/** does not exist on the Windows server. So you need to change the `DataDir` value to the corresponding existing directory."), the subscription will be restarted from the beginning if the corresponding progress file is removed.
|
||||
|
||||
Now let's see the effect of the above sample code, assuming below prerequisites have been done.
|
||||
|
||||
|
|
|
@ -4,15 +4,15 @@ title: Cache
|
|||
description: "The latest row of each table is kept in cache to provide high performance query of latest state."
|
||||
---
|
||||
|
||||
The cache management policy in TDengine is First-In-First-Out (FIFO), which is also known as insert driven cache management policy and different from read driven cache management, i.e. Least-Recent-Used (LRU). It simply stores the latest data in cache and flushes the oldest data in cache to disk when the cache usage reaches a threshold. In IoT use cases, the most cared about data is the latest data, i.e. current state. The cache policy in TDengine is based the nature of IoT data.
|
||||
The cache management policy in TDengine is First-In-First-Out (FIFO). FIFO is also known as insert driven cache management policy and it is different from read driven cache management, which is more commonly known as Least-Recently-Used (LRU). FIFO simply stores the latest data in cache and flushes the oldest data in cache to disk, when the cache usage reaches a threshold. In IoT use cases, it is the current state i.e. the latest or most recent data that is important. The cache policy in TDengine, like much of the design and architecture of TDengine, is based on the nature of IoT data.
|
||||
|
||||
Caching the latest data provides the capability of retrieving data in milliseconds. With this capability, TDengine can be configured properly to be used as caching system without deploying another separate caching system to simplify the system architecture and minimize the operation cost. The cache will be emptied after TDengine is restarted, TDengine doesn't reload data from disk into cache like a real key-value caching system.
|
||||
Caching the latest data provides the capability of retrieving data in milliseconds. With this capability, TDengine can be configured properly to be used as a caching system without deploying another separate caching system. This simplifies the system architecture and minimizes operational costs. The cache is emptied after TDengine is restarted. TDengine does not reload data from disk into cache, like a key-value caching system.
|
||||
|
||||
The memory space used by TDengine cache is fixed in size, according to the configuration based on application requirement and system resources. Independent memory pool is allocated for and managed by each vnode (virtual node) in TDengine, there is no sharing of memory pools between vnodes. All the tables belonging to a vnode share all the cache memory of the vnode.
|
||||
The memory space used by the TDengine cache is fixed in size and configurable. It should be allocated based on application requirements and system resources. An independent memory pool is allocated for and managed by each vnode (virtual node) in TDengine. There is no sharing of memory pools between vnodes. All the tables belonging to a vnode share all the cache memory of the vnode.
|
||||
|
||||
Memory pool is divided into blocks and data is stored in row format in memory and each block follows FIFO policy. The size of each block is determined by configuration parameter `cache`, the number of blocks for each vnode is determined by `blocks`. For each vnode, the total cache size is `cache * blocks`. A cache block needs to ensure that each table can store at least dozens of records to be efficient.
|
||||
The memory pool is divided into blocks and data is stored in row format in memory and each block follows FIFO policy. The size of each block is determined by configuration parameter `cache` and the number of blocks for each vnode is determined by the parameter `blocks`. For each vnode, the total cache size is `cache * blocks`. A cache block needs to ensure that each table can store at least dozens of records, to be efficient.
|
||||
|
||||
`last_row` function can be used to retrieve the last row of a table or a STable to quickly show the current state of devices on monitoring screen. For example the below SQL statement retrieves the latest voltage of all meters in San Francisco of California.
|
||||
`last_row` function can be used to retrieve the last row of a table or a STable to quickly show the current state of devices on monitoring screen. For example the below SQL statement retrieves the latest voltage of all meters in San Francisco, California.
|
||||
|
||||
```sql
|
||||
select last_row(voltage) from meters where location='California.SanFrancisco';
|
||||
|
|
|
@ -1,24 +1,31 @@
|
|||
---
|
||||
sidebar_label: UDF
|
||||
title: User Defined Functions
|
||||
description: "Scalar functions and aggregate functions developed by users can be utilized by the query framework to expand the query capability"
|
||||
title: User Defined Functions(UDF)
|
||||
description: "Scalar functions and aggregate functions developed by users can be utilized by the query framework to expand query capability"
|
||||
---
|
||||
|
||||
In some use cases, the query capability required by application programs can't be achieved directly by builtin functions. With UDF, the functions developed by users can be utilized by query framework to meet some special requirements. UDF normally takes one column of data as input, but can also support the result of sub query as input.
|
||||
In some use cases, built-in functions are not adequate for the query capability required by application programs. With UDF, the functions developed by users can be utilized by the query framework to meet business and application requirements. UDF normally takes one column of data as input, but can also support the result of a sub-query as input.
|
||||
|
||||
From version 2.2.0.0, UDF programmed in C/C++ language can be supported by TDengine.
|
||||
From version 2.2.0.0, UDF written in C/C++ are supported by TDengine.
|
||||
|
||||
Two kinds of functions can be implemented by UDF: scalar function and aggregate function.
|
||||
|
||||
## Define UDF
|
||||
## Types of UDF
|
||||
|
||||
Two kinds of functions can be implemented by UDF: scalar functions and aggregate functions.
|
||||
|
||||
Scalar functions return multiple rows and aggregate functions return either 0 or 1 row.
|
||||
|
||||
In the case of a scalar function you only have to implement the "normal" function template.
|
||||
|
||||
In the case of an aggregate function, in addition to the "normal" function, you also need to implement the "merge" and "finalize" function templates even if the implementation is empty. This will become clear in the sections below.
|
||||
|
||||
### Scalar Function
|
||||
|
||||
Below function template can be used to define your own scalar function.
|
||||
As mentioned earlier, a scalar UDF only has to implement the "normal" function template. The function template below can be used to define your own scalar function.
|
||||
|
||||
`void udfNormalFunc(char* data, short itype, short ibytes, int numOfRows, long long* ts, char* dataOutput, char* interBuf, char* tsOutput, int* numOfOutput, short otype, short obytes, SUdfInit* buf)`
|
||||
|
||||
`udfNormalFunc` is the place holder of function name, a function implemented based on the above template can be used to perform scalar computation on data rows. The parameters are fixed to control the data exchange between UDF and TDengine.
|
||||
`udfNormalFunc` is the place holder for a function name. A function implemented based on the above template can be used to perform scalar computation on data rows. The parameters are fixed to control the data exchange between UDF and TDengine.
|
||||
|
||||
- Definitions of the parameters:
|
||||
|
||||
|
@ -30,20 +37,24 @@ Below function template can be used to define your own scalar function.
|
|||
- numOfRows:the number of rows in the input data
|
||||
- ts: the column of timestamp corresponding to the input data
|
||||
- dataOutput:the buffer for output data, total size is `oBytes * numberOfRows`
|
||||
- interBuf:the buffer for intermediate result, its size is specified by `BUFSIZE` parameter when creating a UDF. It's normally used when the intermediate result is not same as the final result, it's allocated and freed by TDengine.
|
||||
- interBuf:the buffer for an intermediate result. Its size is specified by the `BUFSIZE` parameter when creating a UDF. It's normally used when the intermediate result is not same as the final result. This buffer is allocated and freed by TDengine.
|
||||
- tsOutput:the column of timestamps corresponding to the output data; it can be used to output timestamp together with the output data if it's not NULL
|
||||
- numOfOutput:the number of rows in output data
|
||||
- buf:for the state exchange between UDF and TDengine
|
||||
|
||||
[add_one.c](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/add_one.c) is one example of the simplest UDF implementations, i.e. one instance of the above `udfNormalFunc` template. It adds one to each value of a column passed in which can be filtered using `where` clause and outputs the result.
|
||||
[add_one.c](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/add_one.c) is one example of a very simple UDF implementation, i.e. one instance of the above `udfNormalFunc` template. It adds one to each value of a passed in column, which can be filtered using the `where` clause, and outputs the result.
|
||||
|
||||
### Aggregate Function
|
||||
|
||||
Below function template can be used to define your own aggregate function.
|
||||
For aggregate UDF, as mentioned earlier you must implement a "normal" function template (described above) and also implement the "merge" and "finalize" templates.
|
||||
|
||||
`void abs_max_merge(char* data, int32_t numOfRows, char* dataOutput, int32_t* numOfOutput, SUdfInit* buf)`
|
||||
#### Merge Function Template
|
||||
|
||||
`udfMergeFunc` is the place holder of function name, the function implemented with the above template is used to aggregate the intermediate result, only can be used in the aggregate query for STable.
|
||||
The function template below can be used to define your own merge function for an aggregate UDF.
|
||||
|
||||
`void udfMergeFunc(char* data, int32_t numOfRows, char* dataOutput, int32_t* numOfOutput, SUdfInit* buf)`
|
||||
|
||||
`udfMergeFunc` is the place holder for a function name. The function implemented with the above template is used to aggregate intermediate results and can only be used in the aggregate query for STable.
|
||||
|
||||
Definitions of the parameters:
|
||||
|
||||
|
@ -53,17 +64,11 @@ Definitions of the parameters:
|
|||
- numOfOutput:number of rows in the output data
|
||||
- buf:for the state exchange between UDF and TDengine
|
||||
|
||||
[abs_max.c](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/abs_max.c) is an user defined aggregate function to get the maximum from the absolute value of a column.
|
||||
#### Finalize Function Template
|
||||
|
||||
The internal processing is that the data affected by the select statement will be divided into multiple row blocks and `udfNormalFunc`, i.e. `abs_max` in this case, is performed on each row block to generate the intermediate of each sub table, then `udfMergeFunc`, i.e. `abs_max_merge` in this case, is performed on the intermediate result of sub tables to aggregate to generate the final or intermediate result of STable. The intermediate result of STable is finally processed by `udfFinalizeFunc` to generate the final result, which contain either 0 or 1 row.
|
||||
The function template below can be used to finalize the result of your own UDF, normally used when interBuf is used.
|
||||
|
||||
Other typical scenarios, like covariance, can also be achieved by aggregate UDF.
|
||||
|
||||
### Finalize
|
||||
|
||||
Below function template can be used to finalize the result of your own UDF, normally used when interBuf is used.
|
||||
|
||||
`void abs_max_finalize(char* dataOutput, char* interBuf, int* numOfOutput, SUdfInit* buf)`
|
||||
`void udfFinalizeFunc(char* dataOutput, char* interBuf, int* numOfOutput, SUdfInit* buf)`
|
||||
|
||||
`udfFinalizeFunc` is the place holder of function name, definitions of the parameter are as below:
|
||||
|
||||
|
@ -72,47 +77,64 @@ Below function template can be used to finalize the result of your own UDF, norm
|
|||
- numOfOutput:number of output data, can only be 0 or 1 for aggregate function
|
||||
- buf:for state exchange between UDF and TDengine
|
||||
|
||||
## UDF Conventions
|
||||
### Example abs_max.c
|
||||
|
||||
The naming of 3 kinds of UDF, i.e. udfNormalFunc, udfMergeFunc, and udfFinalizeFunc is required to have same prefix, i.e. the actual name of udfNormalFunc, which means udfNormalFunc doesn't need a suffix following the function name. While udfMergeFunc should be udfNormalFunc followed by `_merge`, udfFinalizeFunc should be udfNormalFunc followed by `_finalize`. The naming convention is part of UDF framework, TDengine follows this convention to invoke corresponding actual functions.\
|
||||
[abs_max.c](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/abs_max.c) is an example of a user defined aggregate function to get the maximum from the absolute values of a column.
|
||||
|
||||
According to the kind of UDF to implement, the functions that need to be implemented are different.
|
||||
The internal processing happens as follows. The results of the select statement are divided into multiple row blocks and `udfNormalFunc`, i.e. `abs_max` in this case, is performed on each row block to generate the intermediate results for each sub table. Then `udfMergeFunc`, i.e. `abs_max_merge` in this case, is performed on the intermediate result of sub tables to aggregate and generate the final or intermediate result of STable. The intermediate result of STable is finally processed by `udfFinalizeFunc`, i.e. `abs_max_finalize` in this example, to generate the final result, which contains either 0 or 1 row.
|
||||
|
||||
- Scalar function:udfNormalFunc is required
|
||||
- Aggregate function:udfNormalFunc, udfMergeFunc (if query on STable) and udfFinalizeFunc are required
|
||||
Other typical aggregation functions such as covariance, can also be implemented using aggregate UDF.
|
||||
|
||||
To be more accurate, assuming we want to implement a UDF named "foo". If the function is a scalar function, what we really need to implement is `foo`; if the function is aggregate function, we need to implement `foo`, `foo_merge`, and `foo_finalize`. For aggregate UDF, even though one of the three functions is not necessary, there must be an empty implementation.
|
||||
## UDF Naming Conventions
|
||||
|
||||
The naming convention for the 3 kinds of function templates required by UDF is as follows:
|
||||
- udfNormalFunc, udfMergeFunc, and udfFinalizeFunc are required to have same prefix, i.e. the actual name of udfNormalFunc. The udfNormalFunc doesn't need a suffix following the function name.
|
||||
- udfMergeFunc should be udfNormalFunc followed by `_merge`
|
||||
- udfFinalizeFunc should be udfNormalFunc followed by `_finalize`.
|
||||
|
||||
The naming convention is part of TDengine's UDF framework. TDengine follows this convention to invoke the corresponding actual functions.
|
||||
|
||||
Depending on whether you are creating a scalar UDF or aggregate UDF, the functions that you need to implement are different.
|
||||
|
||||
- Scalar function:udfNormalFunc is required.
|
||||
- Aggregate function:udfNormalFunc, udfMergeFunc (if query on STable) and udfFinalizeFunc are required.
|
||||
|
||||
For clarity, assuming we want to implement a UDF named "foo":
|
||||
- If the function is a scalar function, we only need to implement the "normal" function template and it should be named simply `foo`.
|
||||
- If the function is an aggregate function, we need to implement `foo`, `foo_merge`, and `foo_finalize`. Note that for aggregate UDF, even though one of the three functions is not necessary, there must be an empty implementation.
|
||||
|
||||
## Compile UDF
|
||||
|
||||
The source code of UDF in C can't be utilized by TDengine directly. UDF can only be loaded into TDengine after compiling to dynamically linked library.
|
||||
The source code of UDF in C can't be utilized by TDengine directly. UDF can only be loaded into TDengine after compiling to dynamically linked library (DLL).
|
||||
|
||||
For example, the example UDF `add_one.c` mentioned in previous sections need to be compiled into DLL using below command on Linux Shell.
|
||||
For example, the example UDF `add_one.c` mentioned earlier, can be compiled into DLL using the command below, in a Linux Shell.
|
||||
|
||||
```bash
|
||||
gcc -g -O0 -fPIC -shared add_one.c -o add_one.so
|
||||
```
|
||||
|
||||
The generated DLL file `dd_one.so` can be used later when creating UDF. It's recommended to use GCC not older than 7.5.
|
||||
The generated DLL file `add_one.so` can be used later when creating a UDF. It's recommended to use GCC not older than 7.5.
|
||||
|
||||
## Create and Use UDF
|
||||
|
||||
When a UDF is created in a TDengine instance, it is available across the databases in that instance.
|
||||
|
||||
### Create UDF
|
||||
|
||||
SQL command can be executed on the same hos where the generated UDF DLL resides to load the UDF DLL into TDengine, this operation can't be done through REST interface or web console. Once created, all the clients of the current TDengine can use these UDF functions in their SQL commands. UDF are stored in the management node of TDengine. The UDFs loaded in TDengine would be still available after TDengine is restarted.
|
||||
SQL command can be executed on the host where the generated UDF DLL resides to load the UDF DLL into TDengine. This operation cannot be done through REST interface or web console. Once created, any client of the current TDengine can use these UDF functions in their SQL commands. UDF are stored in the management node of TDengine. The UDFs loaded in TDengine would be still available after TDengine is restarted.
|
||||
|
||||
When creating UDF, it needs to be clarified as either scalar function or aggregate function. If the specified type is wrong, the SQL statements using the function would fail with error. Besides, the input type and output type don't need to be same in UDF, but the input data type and output data type need to be consistent with the UDF definition.
|
||||
When creating UDF, the type of UDF, i.e. a scalar function or aggregate function must be specified. If the specified type is wrong, the SQL statements using the function would fail with errors. The input type and output type don't need to be the same in UDF, but the input data type and output data type must be consistent with the UDF definition.
|
||||
|
||||
- Create Scalar Function
|
||||
|
||||
```sql
|
||||
CREATE FUNCTION ids(X) AS ids(Y) OUTPUTTYPE typename(Z) [ BUFSIZE B ];
|
||||
CREATE FUNCTION userDefinedFunctionName AS "/absolute/path/to/userDefinedFunctionName.so" OUTPUTTYPE <supported TDengine type> [BUFSIZE B];
|
||||
```
|
||||
|
||||
- ids(X):the function name to be sued in SQL statement, must be consistent with the function name defined by `udfNormalFunc`
|
||||
- ids(Y):the absolute path of the DLL file including the implementation of the UDF, the path needs to be quoted by single or double quotes
|
||||
- typename(Z):the output data type, the value is the literal string of the type
|
||||
- B:the size of intermediate buffer, in bytes; it's an optional parameter and the range is [0,512]
|
||||
- userDefinedFunctionName:The function name to be used in SQL statement which must be consistent with the function name defined by `udfNormalFunc` and is also the name of the compiled DLL (.so file).
|
||||
- path:The absolute path of the DLL file including the name of the shared object file (.so). The path must be quoted with single or double quotes.
|
||||
- outputtype:The output data type, the value is the literal string of the supported TDengine data type.
|
||||
- B:the size of intermediate buffer, in bytes; it is an optional parameter and the range is [0,512].
|
||||
|
||||
For example, below SQL statement can be used to create a UDF from `add_one.so`.
|
||||
|
||||
|
@ -123,17 +145,17 @@ CREATE FUNCTION add_one AS "/home/taos/udf_example/add_one.so" OUTPUTTYPE INT;
|
|||
- Create Aggregate Function
|
||||
|
||||
```sql
|
||||
CREATE AGGREGATE FUNCTION ids(X) AS ids(Y) OUTPUTTYPE typename(Z) [ BUFSIZE B ];
|
||||
CREATE AGGREGATE FUNCTION userDefinedFunctionName AS "/absolute/path/to/userDefinedFunctionName.so" OUTPUTTYPE <supported TDengine data type> [ BUFSIZE B ];
|
||||
```
|
||||
|
||||
- ids(X):the function name to be sued in SQL statement, must be consistent with the function name defined by `udfNormalFunc`
|
||||
- ids(Y):the absolute path of the DLL file including the implementation of the UDF, the path needs to be quoted by single or double quotes
|
||||
- typename(Z):the output data type, the value is the literal string of the type
|
||||
- userDefinedFunctionName:the function name to be used in SQL statement which must be consistent with the function name defined by `udfNormalFunc` and is also the name of the compiled DLL (.so file).
|
||||
- path:the absolute path of the DLL file including the name of the shared object file (.so). The path needs to be quoted by single or double quotes.
|
||||
- OUTPUTTYPE:the output data type, the value is the literal string of the type
|
||||
- B:the size of intermediate buffer, in bytes; it's an optional parameter and the range is [0,512]
|
||||
|
||||
For details about how to use intermediate result, please refer to example program [demo.c](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/demo.c).
|
||||
|
||||
For example, below SQL statement can be used to create a UDF rom `demo.so`.
|
||||
For example, below SQL statement can be used to create a UDF from `demo.so`.
|
||||
|
||||
```sql
|
||||
CREATE AGGREGATE FUNCTION demo AS "/home/taos/udf_example/demo.so" OUTPUTTYPE DOUBLE bufsize 14;
|
||||
|
@ -176,11 +198,11 @@ In current version there are some restrictions for UDF
|
|||
1. Only Linux is supported when creating and invoking UDF for both client side and server side
|
||||
2. UDF can't be mixed with builtin functions
|
||||
3. Only one UDF can be used in a SQL statement
|
||||
4. Single column is supported as input for UDF
|
||||
4. Only a single column is supported as input for UDF
|
||||
5. Once created successfully, UDF is persisted in MNode of TDengineUDF
|
||||
6. UDF can't be created through REST interface
|
||||
7. The function name used when creating UDF in SQL must be consistent with the function name defined in the DLL, i.e. the name defined by `udfNormalFunc`
|
||||
8. The name name of UDF name should not conflict with any of builtin functions
|
||||
8. The name of a UDF should not conflict with any of TDengine's built-in functions
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
|
@ -3,16 +3,16 @@ sidebar_label: Operation
|
|||
title: Manage DNODEs
|
||||
---
|
||||
|
||||
The previous section [Deployment](/cluster/deploy) introduced how to deploy and start a cluster from scratch. Once a cluster is ready, the dnode status in the cluster can be shown at any time, new dnode can be added to scale out the cluster, an existing dnode can be removed, even load balance can be performed manually.
|
||||
The previous section, [Deployment],(/cluster/deploy) showed you how to deploy and start a cluster from scratch. Once a cluster is ready, the status of dnode(s) in the cluster can be shown at any time. Dnodes can be managed from the TDengine CLI. New dnode(s) can be added to scale out the cluster, an existing dnode can be removed and you can even perform load balancing manually, if necessary.
|
||||
|
||||
:::note
|
||||
All the commands to be introduced in this chapter need to be run through TDengine CLI, sometimes it's necessary to use root privilege.
|
||||
All the commands introduced in this chapter must be run in the TDengine CLI - `taos`. Note that sometimes it is necessary to use root privilege.
|
||||
|
||||
:::
|
||||
|
||||
## Show DNODEs
|
||||
|
||||
The below command can be executed in TDengine CLI `taos` to list all dnodes in the cluster, including ID, end point (fqdn:port), status (ready, offline), number of vnodes, number of free vnodes, etc. It's suggested to execute this command to check after adding or removing a dnode.
|
||||
The below command can be executed in TDengine CLI `taos` to list all dnodes in the cluster, including ID, end point (fqdn:port), status (ready, offline), number of vnodes, number of free vnodes and so on. We recommend executing this command after adding or removing a dnode.
|
||||
|
||||
```sql
|
||||
SHOW DNODES;
|
||||
|
@ -30,7 +30,7 @@ Query OK, 1 row(s) in set (0.008298s)
|
|||
|
||||
## Show VGROUPs
|
||||
|
||||
To utilize system resources efficiently and provide scalability, data sharding is required. The data of each database is divided into multiple shards and stored in multiple vnodes. These vnodes may be located in different dnodes, scaling out can be achieved by adding more vnodes from more dnodes. Each vnode can only be used for a single DB, but one DB can have multiple vnodes. The allocation of vnode is scheduled automatically by mnode according to system resources of the dnodes.
|
||||
To utilize system resources efficiently and provide scalability, data sharding is required. The data of each database is divided into multiple shards and stored in multiple vnodes. These vnodes may be located on different dnodes. One way of scaling out is to add more vnodes on dnodes. Each vnode can only be used for a single DB, but one DB can have multiple vnodes. The allocation of vnode is scheduled automatically by mnode based on system resources of the dnodes.
|
||||
|
||||
Launch TDengine CLI `taos` and execute below command:
|
||||
|
||||
|
@ -87,7 +87,7 @@ taos> show dnodes;
|
|||
Query OK, 2 row(s) in set (0.001017s)
|
||||
```
|
||||
|
||||
It can be seen that the status of the new dnode is "offline", once the dnode is started and connects the firstEp of the cluster, execute the command again and get the example output below, from which it can be seen that two dnodes are both in "ready" status.
|
||||
It can be seen that the status of the new dnode is "offline". Once the dnode is started and connects to the firstEp of the cluster, you can execute the command again and get the example output below. As can be seen, both dnodes are in "ready" status.
|
||||
|
||||
```
|
||||
taos> show dnodes;
|
||||
|
@ -132,12 +132,12 @@ taos> show dnodes;
|
|||
Query OK, 1 row(s) in set (0.001137s)
|
||||
```
|
||||
|
||||
In the above example, when `show dnodes` is executed the first time, two dnodes are shown. Then `drop dnode 2` is executed, after that from the output of executing `show dnodes` again it can be seen that only the dnode with ID 1 is still in the cluster.
|
||||
In the above example, when `show dnodes` is executed the first time, two dnodes are shown. After `drop dnode 2` is executed, you can execute `show dnodes` again and it can be seen that only the dnode with ID 1 is still in the cluster.
|
||||
|
||||
:::note
|
||||
|
||||
- Once a dnode is dropped, it can't rejoin the cluster. To rejoin, the dnode needs to deployed again after cleaning up the data directory. Normally, before dropping a dnode, the data belonging to the dnode needs to be migrated to other place.
|
||||
- Please be noted that `drop dnode` is different from stopping `taosd` process. `drop dnode` just removes the dnode out of TDengine cluster. Only after a dnode is dropped, can the corresponding `taosd` process be stopped.
|
||||
- Once a dnode is dropped, it can't rejoin the cluster. To rejoin, the dnode needs to deployed again after cleaning up the data directory. Before dropping a dnode, the data belonging to the dnode MUST be migrated/backed up according to your data retention, data security or other SOPs.
|
||||
- Please note that `drop dnode` is different from stopping `taosd` process. `drop dnode` just removes the dnode out of TDengine cluster. Only after a dnode is dropped, can the corresponding `taosd` process be stopped.
|
||||
- Once a dnode is dropped, other dnodes in the cluster will be notified of the drop and will not accept the request from the dropped dnode.
|
||||
- dnodeID is allocated automatically and can't be manually modified. dnodeID is generated in ascending order without duplication.
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ title: High Availability and Load Balancing
|
|||
|
||||
High availability of vnode and mnode can be achieved through replicas in TDengine.
|
||||
|
||||
The number of vnodes is associated with each DB, there can be multiple DBs in a TDengine cluster. A different number of replicas can be configured for each DB. When creating a database, the parameter `replica` is used to specify the number of replicas, the default value is 1. With single replica, the high availability of the system can't be guaranteed. Whenever one node is down, the data service will be unavailable. The number of dnodes in the cluster must NOT be lower than the number of replicas set for any DB, otherwise the `create table` operation would fail with error "more dnodes are needed". The SQL statement below is used to create a database named "demo" with 3 replicas.
|
||||
A TDengine cluster can have multiple databases. Each database has a number of vnodes associated with it. A different number of replicas can be configured for each DB. When creating a database, the parameter `replica` is used to specify the number of replicas. The default value for `replica` is 1. Naturally, a single replica cannot guarantee high availability since if one node is down, the data service is unavailable. Note that the number of dnodes in the cluster must NOT be lower than the number of replicas set for any DB, otherwise the `create table` operation will fail with error "more dnodes are needed". The SQL statement below is used to create a database named "demo" with 3 replicas.
|
||||
|
||||
```sql
|
||||
CREATE DATABASE demo replica 3;
|
||||
|
@ -15,19 +15,19 @@ CREATE DATABASE demo replica 3;
|
|||
|
||||
The data in a DB is divided into multiple shards and stored in multiple vgroups. The number of vnodes in each vgroup is determined by the number of replicas set for the DB. The vnodes in each vgroup store exactly the same data. For the purpose of high availability, the vnodes in a vgroup must be located in different dnodes on different hosts. As long as over half of the vnodes in a vgroup are in an online state, the vgroup is able to provide data access. Otherwise the vgroup can't provide data access for reading or inserting data.
|
||||
|
||||
There may be data for multiple DBs in a dnode. Once a dnode is down, multiple DBs may be affected. However, it's hard to say the cluster is guaranteed to work properly as long as over half of dnodes are online because vnodes are introduced and there may be complex mapping between vnodes and dnodes.
|
||||
There may be data for multiple DBs in a dnode. When a dnode is down, multiple DBs may be affected. While in theory, the cluster will provide data access for reading or inserting data if over half the vnodes in vgroups are online, because of the possibly complex mapping between vnodes and dnodes, it is difficult to guarantee that the cluster will work properly if over half of the dnodes are online.
|
||||
|
||||
## High Availability of Mnode
|
||||
|
||||
Each TDengine cluster is managed by `mnode`, which is a module of `taosd`. For the high availability of mnode, multiple mnodes can be configured using system parameter `numOfMNodes`, the valid time range is [1,3]. To make sure the data consistency between mnodes, the data replication between mnodes is performed in a synchronous way.
|
||||
Each TDengine cluster is managed by `mnode`, which is a module of `taosd`. For the high availability of mnode, multiple mnodes can be configured using system parameter `numOfMNodes`. The valid range for `numOfMnodes` is [1,3]. To ensure data consistency between mnodes, data replication between mnodes is performed synchronously.
|
||||
|
||||
There may be multiple dnodes in a cluster, but only one mnode can be started in each dnode. Which one or ones of the dnodes will be designated as mnodes is automatically determined by TDengine according to the cluster configuration and system resources. Command `show mnodes` can be executed in TDengine `taos` to show the mnodes in the cluster.
|
||||
There may be multiple dnodes in a cluster, but only one mnode can be started in each dnode. Which one or ones of the dnodes will be designated as mnodes is automatically determined by TDengine according to the cluster configuration and system resources. The command `show mnodes` can be executed in TDengine `taos` to show the mnodes in the cluster.
|
||||
|
||||
```sql
|
||||
SHOW MNODES;
|
||||
```
|
||||
|
||||
The end point and role/status (master, slave, unsynced, or offline) of all mnodes can be shown by the above command. When the first dnode is started in a cluster, there must be one mnode in this dnode, because there must be at least one mnode otherwise the cluster doesn't work. If `numOfMNodes` is configured to 2, another mnode will be started when the second dnode is launched.
|
||||
The end point and role/status (master, slave, unsynced, or offline) of all mnodes can be shown by the above command. When the first dnode is started in a cluster, there must be one mnode in this dnode. Without at least one mnode, the cluster cannot work. If `numOfMNodes` is configured to 2, another mnode will be started when the second dnode is launched.
|
||||
|
||||
For the high availability of mnode, `numOfMnodes` needs to be configured to 2 or a higher value. Because the data consistency between mnodes must be guaranteed, the replica confirmation parameter `quorum` is set to 2 automatically if `numOfMNodes` is set to 2 or higher.
|
||||
|
||||
|
@ -36,15 +36,16 @@ If high availability is important for your system, both vnode and mnode must be
|
|||
|
||||
:::
|
||||
|
||||
## Load Balance
|
||||
## Load Balancing
|
||||
|
||||
Load balance will be triggered in 3 cases without manual intervention.
|
||||
Load balancing will be triggered in 3 cases without manual intervention.
|
||||
|
||||
- When a new dnode is joined in the cluster, automatic load balancing may be triggered, some data from some dnodes may be transferred to the new dnode automatically.
|
||||
- When a new dnode joins the cluster, automatic load balancing may be triggered. Some data from other dnodes may be transferred to the new dnode automatically.
|
||||
- When a dnode is removed from the cluster, the data from this dnode will be transferred to other dnodes automatically.
|
||||
- When a dnode is too hot, i.e. too much data has been stored in it, automatic load balancing may be triggered to migrate some vnodes from this dnode to other dnodes.
|
||||
|
||||
:::tip
|
||||
Automatic load balancing is controlled by parameter `balance`, 0 means disabled and 1 means enabled.
|
||||
Automatic load balancing is controlled by the parameter `balance`, 0 means disabled and 1 means enabled. This is set in the file [taos.cfg](https://docs.tdengine.com/reference/config/#balance).
|
||||
|
||||
:::
|
||||
|
||||
|
@ -52,22 +53,22 @@ Automatic load balancing is controlled by parameter `balance`, 0 means disabled
|
|||
|
||||
When a dnode is offline, it can be detected by the TDengine cluster. There are two cases:
|
||||
|
||||
- The dnode becomes online again before the threshold configured in `offlineThreshold` is reached, it is still in the cluster and data replication is started automatically. The dnode can work properly after the data syncup is finished.
|
||||
- The dnode comes online before the threshold configured in `offlineThreshold` is reached. The dnode is still in the cluster and data replication is started automatically. The dnode can work properly after the data sync is finished.
|
||||
|
||||
- If the dnode has been offline over the threshold configured in `offlineThreshold` in `taos.cfg`, the dnode will be removed from the cluster automatically. A system alert will be generated and automatic load balancing will be triggered if `balance` is set to 1. When the removed dnode is restarted and becomes online, it will not join in the cluster automatically, it can only be joined manually by the system operator.
|
||||
- If the dnode has been offline over the threshold configured in `offlineThreshold` in `taos.cfg`, the dnode will be removed from the cluster automatically. A system alert will be generated and automatic load balancing will be triggered if `balance` is set to 1. When the removed dnode is restarted and becomes online, it will not join the cluster automatically. The system administrator has to manually join the dnode to the cluster.
|
||||
|
||||
:::note
|
||||
If all the vnodes in a vgroup (or mnodes in mnode group) are in offline or unsynced status, the master node can only be voted after all the vnodes or mnodes in the group become online and can exchange status, then the vgroup (or mnode group) is able to provide service.
|
||||
If all the vnodes in a vgroup (or mnodes in mnode group) are in offline or unsynced status, the master node can only be voted on, after all the vnodes or mnodes in the group become online and can exchange status. Following this, the vgroup (or mnode group) is able to provide service.
|
||||
|
||||
:::
|
||||
|
||||
## Arbitrator
|
||||
|
||||
If the number of replicas is set to an even number like 2, when half of the vnodes in a vgroup don't work a master node can't be voted. A similar case is also applicable to mnode if the number of mnodes is set to an even number like 2.
|
||||
The "arbitrator" component is used to address the special case when the number of replicas is set to an even number like 2,4 etc. If half of the vnodes in a vgroup don't work, it is impossible to vote and select a master node. This situation also applies to mnodes if the number of mnodes is set to an even number like 2,4 etc.
|
||||
|
||||
To resolve this problem, a new arbitrator component named `tarbitrator`, abbreviated for TDengine Arbitrator, was introduced. Arbitrator simulates a vnode or mnode but it's only responsible for network communication and doesn't handle any actual data access. As long as more than half of the vnode or mnode, including Arbitrator, are available the vnode group or mnode group can provide data insertion or query services normally.
|
||||
To resolve this problem, a new arbitrator component named `tarbitrator`, an abbreviation of TDengine Arbitrator, was introduced. The `tarbitrator` simulates a vnode or mnode but it's only responsible for network communication and doesn't handle any actual data access. As long as more than half of the vnode or mnode, including Arbitrator, are available the vnode group or mnode group can provide data insertion or query services normally.
|
||||
|
||||
Normally, it's suggested to configure a replica number of each DB or system parameter `numOfMNodes` to an odd number. However, if a user is very sensitive to storage space, a replica number of 2 plus arbitrator component can be used to achieve both lower cost of storage space and high availability.
|
||||
Normally, it's prudent to configure the replica number for each DB or system parameter `numOfMNodes` to be an odd number. However, if a user is very sensitive to storage space, a replica number of 2 plus arbitrator component can be used to achieve both lower cost of storage space and high availability.
|
||||
|
||||
Arbitrator component is installed with the server package. For details about how to install, please refer to [Install](/operation/pkg-install). The `-p` parameter of `tarbitrator` can be used to specify the port on which it provides service.
|
||||
|
||||
|
|
|
@ -1,49 +1,69 @@
|
|||
---
|
||||
title: Data Types
|
||||
description: "The data types supported by TDengine include timestamp, float, JSON, etc"
|
||||
description: "TDengine supports a variety of data types including timestamp, float, JSON and many others."
|
||||
---
|
||||
|
||||
When using TDengine to store and query data, the most important part of the data is timestamp. Timestamp must be specified when creating and inserting data rows or querying data, timestamp must follow the rules below:
|
||||
## TIMESTAMP
|
||||
|
||||
- the format must be `YYYY-MM-DD HH:mm:ss.MS`, the default time precision is millisecond (ms), for example `2017-08-12 18:25:58.128`
|
||||
- internal function `now` can be used to get the current timestamp of the client side
|
||||
- the current timestamp of the client side is applied when `now` is used to insert data
|
||||
When using TDengine to store and query data, the most important part of the data is timestamp. Timestamp must be specified when creating and inserting data rows. Timestamp must follow the rules below:
|
||||
|
||||
- The format must be `YYYY-MM-DD HH:mm:ss.MS`, the default time precision is millisecond (ms), for example `2017-08-12 18:25:58.128`
|
||||
- Internal function `now` can be used to get the current timestamp on the client side
|
||||
- The current timestamp of the client side is applied when `now` is used to insert data
|
||||
- Epoch Time:timestamp can also be a long integer number, which means the number of seconds, milliseconds or nanoseconds, depending on the time precision, from 1970-01-01 00:00:00.000 (UTC/GMT)
|
||||
- timestamp can be applied with add/subtract operation, for example `now-2h` means 2 hours back from the time at which query is executed,the unit can be b(nanosecond), u(microsecond), a(millisecond), s(second), m(minute), h(hour), d(day), or w(week). So `select * from t1 where ts > now-2w and ts <= now-1w` means the data between two weeks ago and one week ago. The time unit can also be n (calendar month) or y (calendar year) when specifying the time window for down sampling operation.
|
||||
- Add/subtract operations can be carried out on timestamps. For example `now-2h` means 2 hours prior to the time at which query is executed. The units of time in operations can be b(nanosecond), u(microsecond), a(millisecond), s(second), m(minute), h(hour), d(day), or w(week). So `select * from t1 where ts > now-2w and ts <= now-1w` means the data between two weeks ago and one week ago. The time unit can also be n (calendar month) or y (calendar year) when specifying the time window for down sampling operations.
|
||||
|
||||
Time precision in TDengine can be set by the `PRECISION` parameter when executing `CREATE DATABASE`, like below, the default time precision is millisecond.
|
||||
Time precision in TDengine can be set by the `PRECISION` parameter when executing `CREATE DATABASE`. The default time precision is millisecond. In the statement below, the precision is set to nanonseconds.
|
||||
|
||||
```sql
|
||||
CREATE DATABASE db_name PRECISION 'ns';
|
||||
```
|
||||
|
||||
## Data Types
|
||||
|
||||
In TDengine, the data types below can be used when specifying a column or tag.
|
||||
|
||||
| # | **type** | **Bytes** | **Description** |
|
||||
| --- | :-------: | --------- | ------------------------- |
|
||||
| 1 | TIMESTAMP | 8 | Default precision is millisecond, microsecond and nanosecond are also supported |
|
||||
| 2 | INT | 4 | Integer, the value range is [-2^31+1, 2^31-1], while -2^31 is treated as NULL |
|
||||
| 3 | BIGINT | 8 | Long integer, the value range is [-2^63+1, 2^63-1], while -2^63 is treated as NULL |
|
||||
| 4 | FLOAT | 4 | Floating point number, the effective number of digits is 6-7, the value range is [-3.4E38, 3.4E38] |
|
||||
| 5 | DOUBLE | 8 | Double precision floating point number, the effective number of digits is 15-16, the value range is [-1.7E308, 1.7E308] |
|
||||
| 6 | BINARY | User Defined | Single-byte string for ASCII visible characters. Length must be specified when defining a column or tag of binary type. The string length can be up to 16374 bytes. The string value must be quoted with single quotes. The literal single quote inside the string must be preceded with back slash like `\'` |
|
||||
| 7 | SMALLINT | 2 | Short integer, the value range is [-32767, 32767], while -32768 is treated as NULL |
|
||||
| 8 | TINYINT | 1 | Single-byte integer, the value range is [-127, 127], while -128 is treated as NULL |
|
||||
| 9 | BOOL | 1 | Bool, the value range is {true, false} |
|
||||
| 10 | NCHAR | User Defined| Multiple-Byte string that can include like Chinese characters. Each character of NCHAR type consumes 4 bytes storage. The string value should be quoted with single quotes. Literal single quote inside the string must be preceded with backslash, like `\’`. The length must be specified when defining a column or tag of NCHAR type, for example nchar(10) means it can store at most 10 characters of nchar type and will consume fixed storage of 40 bytes. An error will be reported if the string value exceeds the length defined. |
|
||||
| 11 | JSON | | json type can only be used on tag, a tag of json type is excluded with any other tags of any other type |
|
||||
|
||||
:::tip
|
||||
TDengine is case insensitive and treats any characters in the sql command as lower case by default, case sensitive strings must be quoted with single quotes.
|
||||
|
||||
:::
|
||||
| 2 | INT | 4 | Integer, the value range is [-2^31, 2^31-1] |
|
||||
| 3 |INT UNSIGNED|4 | Unsigned integer, the value range is [0, 2^31-1] |
|
||||
| 4 | BIGINT | 8 | Long integer, the value range is [-2^63, 2^63-1] |
|
||||
| 5 | BIGINT UNSIGNED | 8 | Unsigned long integer, the value range is [0, 2^63-1] |
|
||||
| 6 | FLOAT | 4 | Floating point number, the effective number of digits is 6-7, the value range is [-3.4E38, 3.4E38] |
|
||||
| 7 | DOUBLE | 8 | Double precision floating point number, the effective number of digits is 15-16, the value range is [-1.7E308, 1.7E308] |
|
||||
| 8 | BINARY | User Defined | Single-byte string for ASCII visible characters. Length must be specified when defining a column or tag of binary type. The string length can be up to 16374 bytes. The string value must be quoted with single quotes. The literal single quote inside the string must be preceded with back slash like `\'` |
|
||||
| 9 | SMALLINT | 2 | Short integer, the value range is [-32768, 32767] |
|
||||
| 10 | SMALLINT UNSIGNED | 2 | Unsigned short integer, the value range is [0, 32767] |
|
||||
| 11 | TINYINT | 1 | Single-byte integer, the value range is [-128, 127] |
|
||||
| 12 | TINYINT UNSIGNED | 1 | Unsigned single-byte integer, the value range is [0, 127] |
|
||||
| 13 | BOOL | 1 | Bool, the value range is {true, false} |
|
||||
| 14 | NCHAR | User Defined| Multi-Byte string that can include multi byte characters like Chinese characters. Each character of NCHAR type consumes 4 bytes storage. The string value should be quoted with single quotes. Literal single quote inside the string must be preceded with backslash, like `\’`. The length must be specified when defining a column or tag of NCHAR type, for example nchar(10) means it can store at most 10 characters of nchar type and will consume fixed storage of 40 bytes. An error will be reported if the string value exceeds the length defined. |
|
||||
| 15 | JSON | | JSON type can only be used on tags. A tag of json type is excluded with any other tags of any other type |
|
||||
| 16 | VARCHAR | User Defined| Alias of BINARY type |
|
||||
|
||||
:::note
|
||||
Only ASCII visible characters are suggested to be used in a column or tag of BINARY type. Multiple-byte characters must be stored in NCHAR type.
|
||||
- TDengine is case insensitive and treats any characters in the sql command as lower case by default, case sensitive strings must be quoted with single quotes.
|
||||
- Only ASCII visible characters are suggested to be used in a column or tag of BINARY type. Multi-byte characters must be stored in NCHAR type.
|
||||
- Numeric values in SQL statements will be determined as integer or float type according to whether there is decimal point or whether scientific notation is used, so attention must be paid to avoid overflow. For example, 9999999999999999999 will be considered as overflow because it exceeds the upper limit of long integer, but 9999999999999999999.0 will be considered as a legal float number.
|
||||
|
||||
:::
|
||||
|
||||
## Constants
|
||||
TDengine supports constants of multiple data type.
|
||||
|
||||
| # | **Syntax** | **Type** | **Description** |
|
||||
| --- | :-------: | --------- | -------------------------------------- |
|
||||
| 1 | [{+ \| -}]123 | BIGINT | Numeric constants are treated as BIGINT type. The value will be truncated if it exceeds the range of BIGINT type. |
|
||||
| 2 | 123.45 | DOUBLE | Floating number constants are treated as DOUBLE type. TDengine determines whether it's a floating number based on if decimal point or scientific notation is used. |
|
||||
| 3 | 1.2E3 | DOUBLE | Constants in scientific notation are treated ad DOUBLE type. |
|
||||
| 4 | 'abc' | BINARY | String constants enclosed by single quotes are treated as BINARY type. Its size is determined as the acutal length. Single quote itself can be included by preceding backslash, i.e. `\'`, in a string constant. |
|
||||
| 5 | "abc" | BINARY | String constants enclosed by double quotes are treated as BINARY type. Its size is determined as the acutal length. Double quote itself can be included by preceding backslash, i.e. `\"`, in a string constant. |
|
||||
| 6 | TIMESTAMP {'literal' \| "literal"} | TIMESTAMP | A string constant following `TIMESTAMP` keyword is treated as TIMESTAMP type. The string should be in the format of "YYYY-MM-DD HH:mm:ss.MS". Its time precision is same as that of the current database being used. |
|
||||
| 7 | {TRUE \| FALSE} | BOOL | BOOL type contant. |
|
||||
| 8 | {'' \| "" \| '\t' \| "\t" \| ' ' \| " " \| NULL } | -- | NULL constant, it can be used for any type.|
|
||||
|
||||
:::note
|
||||
Numeric values in SQL statements will be determined as integer or float type according to whether there is decimal point or whether scientific notation is used, so attention must be paid to avoid overflow. For example, 9999999999999999999 will be considered as overflow because it exceeds the upper limit of long integer, but 9999999999999999999.0 will be considered as a legal float number.
|
||||
- TDengine determines whether it's a floating number based on if decimal point or scientific notation is used. So whether the value is determined as overflow depends on both the value and the determined type. For example, 9999999999999999999 is determined as overflow because it exceeds the upper limit of BIGINT type, while 9999999999999999999.0 is considered as a valid floating number because it is within the range of DOUBLE type.
|
||||
|
||||
:::
|
||||
|
|
|
@ -4,7 +4,7 @@ title: Database
|
|||
description: "create and drop database, show or change database parameters"
|
||||
---
|
||||
|
||||
## Create Datable
|
||||
## Create Database
|
||||
|
||||
```
|
||||
CREATE DATABASE [IF NOT EXISTS] db_name [KEEP keep] [DAYS days] [UPDATE 1];
|
||||
|
@ -12,11 +12,11 @@ CREATE DATABASE [IF NOT EXISTS] db_name [KEEP keep] [DAYS days] [UPDATE 1];
|
|||
|
||||
:::info
|
||||
|
||||
1. KEEP specifies the number of days for which the data in the database to be created will be kept, the default value is 3650 days, i.e. 10 years. The data will be deleted automatically once its age exceeds this threshold.
|
||||
1. KEEP specifies the number of days for which the data in the database will be retained. The default value is 3650 days, i.e. 10 years. The data will be deleted automatically once its age exceeds this threshold.
|
||||
2. UPDATE specifies whether the data can be updated and how the data can be updated.
|
||||
1. UPDATE set to 0 means update operation is not allowed, the data with an existing timestamp will be dropped silently.
|
||||
2. UPDATE set to 1 means the whole row will be updated, the columns for which no value is specified will be set to NULL
|
||||
3. UPDATE set to 2 means updating a part of columns for a row is allowed, the columns for which no value is specified will be kept as no change
|
||||
1. UPDATE set to 0 means update operation is not allowed. The update for data with an existing timestamp will be discarded silently and the original record in the database will be preserved as is.
|
||||
2. UPDATE set to 1 means the whole row will be updated. The columns for which no value is specified will be set to NULL.
|
||||
3. UPDATE set to 2 means updating a subset of columns for a row is allowed. The columns for which no value is specified will be kept unchanged.
|
||||
3. The maximum length of database name is 33 bytes.
|
||||
4. The maximum length of a SQL statement is 65,480 bytes.
|
||||
5. Below are the parameters that can be used when creating a database
|
||||
|
@ -35,7 +35,7 @@ CREATE DATABASE [IF NOT EXISTS] db_name [KEEP keep] [DAYS days] [UPDATE 1];
|
|||
- maxVgroupsPerDb: [Description](/reference/config/#maxvgroupsperdb)
|
||||
- comp: [Description](/reference/config/#comp)
|
||||
- precision: [Description](/reference/config/#precision)
|
||||
6. Please note that all of the parameters mentioned in this section can be configured in configuration file `taosd.cfg` at server side and used by default, the default parameters can be overriden if they are specified in `create database` statement.
|
||||
6. Please note that all of the parameters mentioned in this section are configured in configuration file `taos.cfg` on the TDengine server. If not specified in the `create database` statement, the values from taos.cfg are used by default. To override default parameters, they must be specified in the `create database` statement.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -52,7 +52,7 @@ USE db_name;
|
|||
```
|
||||
|
||||
:::note
|
||||
This way is not applicable when using a REST connection
|
||||
This way is not applicable when using a REST connection. In a REST connection the database name must be specified before a table or stable name. For e.g. to query the stable "meters" in database "test" the query would be "SELECT count(*) from test.meters"
|
||||
|
||||
:::
|
||||
|
||||
|
@ -63,13 +63,13 @@ DROP DATABASE [IF EXISTS] db_name;
|
|||
```
|
||||
|
||||
:::note
|
||||
All data in the database will be deleted too. This command must be used with caution.
|
||||
All data in the database will be deleted too. This command must be used with extreme caution. Please follow your organization's data integrity, data backup, data security or any other applicable SOPs before using this command.
|
||||
|
||||
:::
|
||||
|
||||
## Change Database Configuration
|
||||
|
||||
Some examples are shown below to demonstrate how to change the configuration of a database. Please note that some configuration parameters can be changed after the database is created, but some others can't, for details of the configuration parameters of database please refer to [Configuration Parameters](/reference/config/).
|
||||
Some examples are shown below to demonstrate how to change the configuration of a database. Please note that some configuration parameters can be changed after the database is created, but some cannot. For details of the configuration parameters of database please refer to [Configuration Parameters](/reference/config/).
|
||||
|
||||
```
|
||||
ALTER DATABASE db_name COMP 2;
|
||||
|
@ -81,7 +81,7 @@ COMP parameter specifies whether the data is compressed and how the data is comp
|
|||
ALTER DATABASE db_name REPLICA 2;
|
||||
```
|
||||
|
||||
REPLICA parameter specifies the number of replications of the database.
|
||||
REPLICA parameter specifies the number of replicas of the database.
|
||||
|
||||
```
|
||||
ALTER DATABASE db_name KEEP 365;
|
||||
|
@ -124,4 +124,4 @@ SHOW DATABASES;
|
|||
SHOW CREATE DATABASE db_name;
|
||||
```
|
||||
|
||||
This command is useful when migrating the data from one TDengine cluster to another one. This command can be used to get the CREATE statement, which can be used in another TDengine to create the exact same database.
|
||||
This command is useful when migrating the data from one TDengine cluster to another. This command can be used to get the CREATE statement, which can be used in another TDengine instance to create the exact same database.
|
||||
|
|
|
@ -12,10 +12,10 @@ CREATE TABLE [IF NOT EXISTS] tb_name (timestamp_field_name TIMESTAMP, field1_nam
|
|||
|
||||
:::info
|
||||
|
||||
1. The first column of a table must be of TIMESTAMP type, and it will be set as the primary key automatically
|
||||
1. The first column of a table MUST be of type TIMESTAMP. It is automatically set as the primary key.
|
||||
2. The maximum length of the table name is 192 bytes.
|
||||
3. The maximum length of each row is 16k bytes, please note that the extra 2 bytes used by each BINARY/NCHAR column are also counted.
|
||||
4. The name of the subtable can only consist of English characters, digits and underscore, and can't start with a digit. Table names are case insensitive.
|
||||
3. The maximum length of each row is 48k bytes, please note that the extra 2 bytes used by each BINARY/NCHAR column are also counted.
|
||||
4. The name of the subtable can only consist of characters from the English alphabet, digits and underscore. Table names can't start with a digit. Table names are case insensitive.
|
||||
5. The maximum length in bytes must be specified when using BINARY or NCHAR types.
|
||||
6. Escape character "\`" can be used to avoid the conflict between table names and reserved keywords, above rules will be bypassed when using escape character on table names, but the upper limit for the name length is still valid. The table names specified using escape character are case sensitive. Only ASCII visible characters can be used with escape character.
|
||||
For example \`aBc\` and \`abc\` are different table names but `abc` and `aBc` are same table names because they are both converted to `abc` internally.
|
||||
|
@ -44,7 +44,7 @@ The tags for which no value is specified will be set to NULL.
|
|||
CREATE TABLE [IF NOT EXISTS] tb_name1 USING stb_name TAGS (tag_value1, ...) [IF NOT EXISTS] tb_name2 USING stb_name TAGS (tag_value2, ...) ...;
|
||||
```
|
||||
|
||||
This can be used to create a lot of tables in a single SQL statement to accelerate the speed of the creating tables.
|
||||
This can be used to create a lot of tables in a single SQL statement while making table creation much faster.
|
||||
|
||||
:::info
|
||||
|
||||
|
@ -111,7 +111,7 @@ If a table is created using a super table as template, the table definition can
|
|||
ALTER TABLE tb_name MODIFY COLUMN field_name data_type(length);
|
||||
```
|
||||
|
||||
The type of a column is variable length, like BINARY or NCHAR, this can be used to change (or increase) the length of the column.
|
||||
If the type of a column is variable length, like BINARY or NCHAR, this command can be used to change the length of the column.
|
||||
|
||||
:::note
|
||||
If a table is created using a super table as template, the table definition can only be changed on the corresponding super table, and the change will be automatically applied to all the subtables created using this super table as template. For tables created in the normal way, the table definition can be changed directly on the table.
|
||||
|
|
|
@ -9,7 +9,7 @@ Keyword `STable`, abbreviated for super table, is supported since version 2.0.15
|
|||
|
||||
:::
|
||||
|
||||
## Crate STable
|
||||
## Create STable
|
||||
|
||||
```
|
||||
CREATE STable [IF NOT EXISTS] stb_name (timestamp_field_name TIMESTAMP, field1_name data_type1 [, field2_name data_type2 ...]) TAGS (tag1_name tag_type1, tag2_name tag_type2 [, tag3_name tag_type3]);
|
||||
|
@ -19,7 +19,7 @@ The SQL statement of creating a STable is similar to that of creating a table, b
|
|||
|
||||
:::info
|
||||
|
||||
1. The tag types specified in TAGS should NOT be timestamp. Since 2.1.3.0 timestamp type can be used in TAGS column, but its value must be fixed and arithmetic operation can't be applied on it.
|
||||
1. A tag can be of type timestamp, since version 2.1.3.0, but its value must be fixed and arithmetic operations cannot be performed on it. Prior to version 2.1.3.0, tag types specified in TAGS could not be of type timestamp.
|
||||
2. The tag names specified in TAGS should NOT be the same as other columns.
|
||||
3. The tag names specified in TAGS should NOT be the same as any reserved keywords.(Please refer to [keywords](/taos-sql/keywords/)
|
||||
4. The maximum number of tags specified in TAGS is 128, there must be at least one tag, and the total length of all tag columns should NOT exceed 16KB.
|
||||
|
@ -76,7 +76,7 @@ ALTER STable stb_name DROP COLUMN field_name;
|
|||
ALTER STable stb_name MODIFY COLUMN field_name data_type(length);
|
||||
```
|
||||
|
||||
This command can be used to change (or increase, more specifically) the length of a column of variable length types, like BINARY or NCHAR.
|
||||
This command can be used to change (or more specifically, increase) the length of a column of variable length types, like BINARY or NCHAR.
|
||||
|
||||
## Change Tags of A STable
|
||||
|
||||
|
@ -94,7 +94,7 @@ This command is used to add a new tag for a STable and specify the tag type.
|
|||
ALTER STable stb_name DROP TAG tag_name;
|
||||
```
|
||||
|
||||
The tag will be removed automatically from all the subtables created using the super table as template once a tag is removed from a super table.
|
||||
The tag will be removed automatically from all the subtables, created using the super table as template, once a tag is removed from a super table.
|
||||
|
||||
### Change A Tag
|
||||
|
||||
|
@ -102,7 +102,7 @@ The tag will be removed automatically from all the subtables created using the s
|
|||
ALTER STable stb_name CHANGE TAG old_tag_name new_tag_name;
|
||||
```
|
||||
|
||||
The tag name will be changed automatically for all the subtables created using the super table as template once a tag name is changed for a super table.
|
||||
The tag name will be changed automatically for all the subtables, created using the super table as template, once a tag name is changed for a super table.
|
||||
|
||||
### Change Tag Length
|
||||
|
||||
|
@ -110,7 +110,7 @@ The tag name will be changed automatically for all the subtables created using t
|
|||
ALTER STable stb_name MODIFY TAG tag_name data_type(length);
|
||||
```
|
||||
|
||||
This command can be used to change (or increase, more specifically) the length of a tag of variable length types, like BINARY or NCHAR.
|
||||
This command can be used to change (or more specifically, increase) the length of a tag of variable length types, like BINARY or NCHAR.
|
||||
|
||||
:::note
|
||||
Changing tag values can be applied to only subtables. All other tag operations, like add tag, remove tag, however, can be applied to only STable. If a new tag is added for a STable, the tag will be added with NULL value for all its subtables.
|
||||
|
|
|
@ -21,7 +21,7 @@ SELECT select_expr [, select_expr ...]
|
|||
|
||||
## Wildcard
|
||||
|
||||
Wilcard \* can be used to specify all columns. The result includes only data columns for normal tables.
|
||||
Wildcard \* can be used to specify all columns. The result includes only data columns for normal tables.
|
||||
|
||||
```
|
||||
taos> SELECT * FROM d1001;
|
||||
|
@ -51,14 +51,14 @@ taos> SELECT * FROM meters;
|
|||
Query OK, 9 row(s) in set (0.002022s)
|
||||
```
|
||||
|
||||
Wildcard can be used with table name as prefix, both below SQL statements have same effects and return all columns.
|
||||
Wildcard can be used with table name as prefix. Both SQL statements below have the same effect and return all columns.
|
||||
|
||||
```SQL
|
||||
SELECT * FROM d1001;
|
||||
SELECT d1001.* FROM d1001;
|
||||
```
|
||||
|
||||
In JOIN query, however, with or without table name prefix will return different results. \* without table prefix will return all the columns of both tables, but \* with table name as prefix will return only the columns of that table.
|
||||
In a JOIN query, however, the results are different with or without a table name prefix. \* without table prefix will return all the columns of both tables, but \* with table name as prefix will return only the columns of that table.
|
||||
|
||||
```
|
||||
taos> SELECT * FROM d1001, d1003 WHERE d1001.ts=d1003.ts;
|
||||
|
@ -76,7 +76,7 @@ taos> SELECT d1001.* FROM d1001,d1003 WHERE d1001.ts = d1003.ts;
|
|||
Query OK, 1 row(s) in set (0.020443s)
|
||||
```
|
||||
|
||||
Wilcard \* can be used with some functions, but the result may be different depending on the function being used. For example, `count(*)` returns only one column, i.e. the number of rows; `first`, `last` and `last_row` return all columns of the selected row.
|
||||
Wildcard \* can be used with some functions, but the result may be different depending on the function being used. For example, `count(*)` returns only one column, i.e. the number of rows; `first`, `last` and `last_row` return all columns of the selected row.
|
||||
|
||||
```
|
||||
taos> SELECT COUNT(*) FROM d1001;
|
||||
|
@ -96,7 +96,7 @@ Query OK, 1 row(s) in set (0.000849s)
|
|||
|
||||
## Tags
|
||||
|
||||
Starting from version 2.0.14, tag columns can be selected together with data columns when querying sub tables. Please note that, however, wildcard \* doesn't represent any tag column, that means tag columns must be specified explicitly like the example below.
|
||||
Starting from version 2.0.14, tag columns can be selected together with data columns when querying sub tables. Please note however, that, wildcard \* cannot be used to represent any tag column. This means that tag columns must be specified explicitly like the example below.
|
||||
|
||||
```
|
||||
taos> SELECT location, groupid, current FROM d1001 LIMIT 2;
|
||||
|
@ -109,7 +109,7 @@ Query OK, 2 row(s) in set (0.003112s)
|
|||
|
||||
## Get distinct values
|
||||
|
||||
`DISTINCT` keyword can be used to get all the unique values of tag columns from a super table, it can also be used to get all the unique values of data columns from a table or subtable.
|
||||
`DISTINCT` keyword can be used to get all the unique values of tag columns from a super table. It can also be used to get all the unique values of data columns from a table or subtable.
|
||||
|
||||
```sql
|
||||
SELECT DISTINCT tag_name [, tag_name ...] FROM stb_name;
|
||||
|
@ -118,15 +118,15 @@ SELECT DISTINCT col_name [, col_name ...] FROM tb_name;
|
|||
|
||||
:::info
|
||||
|
||||
1. Configuration parameter `maxNumOfDistinctRes` in `taos.cfg` is used to control the number of rows to output. The minimum configurable value is 100,000, the maximum configurable value is 100,000,000, the default value is 1000,000. If the actual number of rows exceeds the value of this parameter, only the number of rows specified by this parameter will be output.
|
||||
2. It can't be guaranteed that the results selected by using `DISTINCT` on columns of `FLOAT` or `DOUBLE` are exactly unique because of the precision nature of floating numbers.
|
||||
1. Configuration parameter `maxNumOfDistinctRes` in `taos.cfg` is used to control the number of rows to output. The minimum configurable value is 100,000, the maximum configurable value is 100,000,000, the default value is 1,000,000. If the actual number of rows exceeds the value of this parameter, only the number of rows specified by this parameter will be output.
|
||||
2. It can't be guaranteed that the results selected by using `DISTINCT` on columns of `FLOAT` or `DOUBLE` are exactly unique because of the precision errors in floating point numbers.
|
||||
3. `DISTINCT` can't be used in the sub-query of a nested query statement, and can't be used together with aggregate functions, `GROUP BY` or `JOIN` in the same SQL statement.
|
||||
|
||||
:::
|
||||
|
||||
## Columns Names of Result Set
|
||||
|
||||
When using `SELECT`, the column names in the result set will be same as that in the select clause if `AS` is not used. `AS` can be used to rename the column names in the result set. For example
|
||||
When using `SELECT`, the column names in the result set will be the same as that in the select clause if `AS` is not used. `AS` can be used to rename the column names in the result set. For example
|
||||
|
||||
```
|
||||
taos> SELECT ts, ts AS primary_key_ts FROM d1001;
|
||||
|
@ -161,7 +161,7 @@ SELECT * FROM d1001;
|
|||
|
||||
## Special Query
|
||||
|
||||
Some special query functionalities can be performed without `FORM` sub-clause. For example, below statement can be used to get the current database in use.
|
||||
Some special query functions can be invoked without `FROM` sub-clause. For example, the statement below can be used to get the current database in use.
|
||||
|
||||
```
|
||||
taos> SELECT DATABASE();
|
||||
|
@ -181,7 +181,7 @@ taos> SELECT DATABASE();
|
|||
Query OK, 1 row(s) in set (0.000184s)
|
||||
```
|
||||
|
||||
Below statement can be used to get the version of client or server.
|
||||
The statement below can be used to get the version of client or server.
|
||||
|
||||
```
|
||||
taos> SELECT CLIENT_VERSION();
|
||||
|
@ -197,7 +197,7 @@ taos> SELECT SERVER_VERSION();
|
|||
Query OK, 1 row(s) in set (0.000077s)
|
||||
```
|
||||
|
||||
Below statement is used to check the server status. One integer, like `1`, is returned if the server status is OK, otherwise an error code is returned. This is compatible with the status check for TDengine from connection pool or 3rd party tools, and can avoid the problem of losing the connection from a connection pool when using the wrong heartbeat checking SQL statement.
|
||||
The statement below is used to check the server status. An integer, like `1`, is returned if the server status is OK, otherwise an error code is returned. This is compatible with the status check for TDengine from connection pool or 3rd party tools, and can avoid the problem of losing the connection from a connection pool when using the wrong heartbeat checking SQL statement.
|
||||
|
||||
```
|
||||
taos> SELECT SERVER_STATUS();
|
||||
|
@ -284,7 +284,7 @@ taos> SELECT COUNT(tbname) FROM meters WHERE groupId > 2;
|
|||
Query OK, 1 row(s) in set (0.001091s)
|
||||
```
|
||||
|
||||
- Wildcard \* can be used to get all columns, or specific column names can be specified. Arithmetic operation can be performed on columns of number types, columns can be renamed in the result set.
|
||||
- Wildcard \* can be used to get all columns, or specific column names can be specified. Arithmetic operation can be performed on columns of numerical types, columns can be renamed in the result set.
|
||||
- Arithmetic operation on columns can't be used in where clause. For example, `where a*2>6;` is not allowed but `where a>6/2;` can be used instead for the same purpose.
|
||||
- Arithmetic operation on columns can't be used as the objectives of select statement. For example, `select min(2*a) from t;` is not allowed but `select 2*min(a) from t;` can be used instead.
|
||||
- Logical operation can be used in `WHERE` clause to filter numeric values, wildcard can be used to filter string values.
|
||||
|
@ -318,13 +318,13 @@ Logical operations in below table can be used in the `where` clause to filter th
|
|||
- Operator `like` is used together with wildcards to match strings
|
||||
- '%' matches 0 or any number of characters, '\_' matches any single ASCII character.
|
||||
- `\_` is used to match the \_ in the string.
|
||||
- The maximum length of wildcard string is 100 bytes from version 2.1.6.1 (before that the maximum length is 20 bytes). `maxWildCardsLength` in `taos.cfg` can be used to control this threshold. Too long wildcard string may slowdown the execution performance of `LIKE` operator.
|
||||
- The maximum length of wildcard string is 100 bytes from version 2.1.6.1 (before that the maximum length is 20 bytes). `maxWildCardsLength` in `taos.cfg` can be used to control this threshold. A very long wildcard string may slowdown the execution performance of `LIKE` operator.
|
||||
- `AND` keyword can be used to filter multiple columns simultaneously. AND/OR operation can be performed on single or multiple columns from version 2.3.0.0. However, before 2.3.0.0 `OR` can't be used on multiple columns.
|
||||
- For timestamp column, only one condition can be used; for other columns or tags, `OR` keyword can be used to combine multiple logical operators. For example, `((value > 20 AND value < 30) OR (value < 12))`.
|
||||
- From version 2.3.0.0, multiple conditions can be used on timestamp column, but the result set can only contain single time range.
|
||||
- From version 2.0.17.0, operator `BETWEEN AND` can be used in where clause, for example `WHERE col2 BETWEEN 1.5 AND 3.25` means the filter condition is equal to "1.5 ≤ col2 ≤ 3.25".
|
||||
- From version 2.1.4.0, operator `IN` can be used in the where clause. For example, `WHERE city IN ('California.SanFrancisco', 'California.SanDiego')`. For bool type, both `{true, false}` and `{0, 1}` are allowed, but integers other than 0 or 1 are not allowed. FLOAT and DOUBLE types are impacted by floating precision, only values that match the condition within the tolerance will be selected. Non-primary key column of timestamp type can be used with `IN`.
|
||||
- From version 2.3.0.0, regular expression is supported in the where clause with keyword `match` or `nmatch`, the regular expression is case insensitive.
|
||||
- From version 2.1.4.0, operator `IN` can be used in the where clause. For example, `WHERE city IN ('California.SanFrancisco', 'California.SanDiego')`. For bool type, both `{true, false}` and `{0, 1}` are allowed, but integers other than 0 or 1 are not allowed. FLOAT and DOUBLE types are impacted by floating point precision errors. Only values that match the condition within the tolerance will be selected. Non-primary key column of timestamp type can be used with `IN`.
|
||||
- From version 2.3.0.0, regular expression is supported in the where clause with keyword `match` or `nmatch`. The regular expression is case insensitive.
|
||||
|
||||
## Regular Expression
|
||||
|
||||
|
@ -364,7 +364,7 @@ FROM temp_STable t1, temp_STable t2
|
|||
WHERE t1.ts = t2.ts AND t1.deviceid = t2.deviceid AND t1.status=0;
|
||||
```
|
||||
|
||||
Similary, join operation can be performed on the result set of multiple sub queries.
|
||||
Similarly, join operations can be performed on the result set of multiple sub queries.
|
||||
|
||||
:::note
|
||||
Restrictions on join operation:
|
||||
|
@ -380,7 +380,7 @@ Restrictions on join operation:
|
|||
|
||||
## Nested Query
|
||||
|
||||
Nested query is also called sub query, that means in a single SQL statement the result of inner query can be used as the data source of the outer query.
|
||||
Nested query is also called sub query. This means that in a single SQL statement the result of inner query can be used as the data source of the outer query.
|
||||
|
||||
From 2.2.0.0, unassociated sub query can be used in the `FROM` clause. Unassociated means the sub query doesn't use the parameters in the parent query. More specifically, in the `tb_name_list` of `SELECT` statement, an independent SELECT statement can be used. So a complete nested query looks like:
|
||||
|
||||
|
@ -390,14 +390,14 @@ SELECT ... FROM (SELECT ... FROM ...) ...;
|
|||
|
||||
:::info
|
||||
|
||||
- Only one layer of nesting is allowed, that means no sub query is allowed in a sub query
|
||||
- The result set returned by the inner query will be used as a "virtual table" by the outer query, the "virtual table" can be renamed using `AS` keyword for easy reference in the outer query.
|
||||
- Only one layer of nesting is allowed, that means no sub query is allowed within a sub query
|
||||
- The result set returned by the inner query will be used as a "virtual table" by the outer query. The "virtual table" can be renamed using `AS` keyword for easy reference in the outer query.
|
||||
- Sub query is not allowed in continuous query.
|
||||
- JOIN operation is allowed between tables/STables inside both inner and outer queries. Join operation can be performed on the result set of the inner query.
|
||||
- UNION operation is not allowed in either inner query or outer query.
|
||||
- The functionalities that can be used in the inner query is same as non-nested query.
|
||||
- `ORDER BY` inside the inner query doesn't make any sense but will slow down the query performance significantly, so please avoid such usage.
|
||||
- Compared to the non-nested query, the functionalities that can be used in the outer query have such restrictions as:
|
||||
- The functions that can be used in the inner query are the same as those that can be used in a non-nested query.
|
||||
- `ORDER BY` inside the inner query is unnecessary and will slow down the query performance significantly. It is best to avoid the use of `ORDER BY` inside the inner query.
|
||||
- Compared to the non-nested query, the functionality that can be used in the outer query has the following restrictions:
|
||||
- Functions
|
||||
- If the result set returned by the inner query doesn't contain timestamp column, then functions relying on timestamp can't be used in the outer query, like `TOP`, `BOTTOM`, `FIRST`, `LAST`, `DIFF`.
|
||||
- Functions that need to scan the data twice can't be used in the outer query, like `STDDEV`, `PERCENTILE`.
|
||||
|
@ -442,8 +442,8 @@ The sum of col1 and col2 for rows later than 2018-06-01 08:00:00.000 and whose c
|
|||
SELECT (col1 + col2) AS 'complex' FROM tb1 WHERE ts > '2018-06-01 08:00:00.000' AND col2 > 1.2 LIMIT 10 OFFSET 5;
|
||||
```
|
||||
|
||||
The rows in the past 10 minutes and whose col2 is bigger than 3.14 are selected and output to the result file `/home/testoutpu.csv` with below SQL statement:
|
||||
The rows in the past 10 minutes and whose col2 is bigger than 3.14 are selected and output to the result file `/home/testoutput.csv` with below SQL statement:
|
||||
|
||||
```SQL
|
||||
SELECT COUNT(*) FROM tb1 WHERE ts >= NOW - 10m AND col2 > 3.14 >> /home/testoutpu.csv;
|
||||
SELECT COUNT(*) FROM tb1 WHERE ts >= NOW - 10m AND col2 > 3.14 >> /home/testoutput.csv;
|
||||
```
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3,36 +3,36 @@ sidebar_label: Interval
|
|||
title: Aggregate by Time Window
|
||||
---
|
||||
|
||||
Aggregate by time window is supported in TDengine. For example, each temperature sensor reports the temperature every second, the average temperature every 10 minutes can be retrieved by query with time window.
|
||||
Window related clauses are used to divide the data set to be queried into subsets and then aggregate. There are three kinds of windows, time window, status window, and session window. There are two kinds of time windows, sliding window and flip time window.
|
||||
Aggregation by time window is supported in TDengine. For example, in the case where temperature sensors report the temperature every seconds, the average temperature for every 10 minutes can be retrieved by performing a query with a time window.
|
||||
Window related clauses are used to divide the data set to be queried into subsets and then aggregation is performed across the subsets. There are three kinds of windows: time window, status window, and session window. There are two kinds of time windows: sliding window and flip time/tumbling window.
|
||||
|
||||
## Time Window
|
||||
|
||||
`INTERVAL` clause is used to generate time windows of the same time interval, `SLIDING` is used to specify the time step for which the time window moves forward. The query is performed on one time window each time, and the time window moves forward with time. When defining continuous query both the size of time window and the step of forward sliding time need to be specified. As shown in the figure blow, [t0s, t0e] ,[t1s , t1e], [t2s, t2e] are respectively the time ranges of three time windows on which continuous queries are executed. The time step for which time window moves forward is marked by `sliding time`. Query, filter and aggregate operations are executed on each time window respectively. When the time step specified by `SLIDING` is same as the time interval specified by `INTERVAL`, the sliding time window is actually a flip time window.
|
||||
The `INTERVAL` clause is used to generate time windows of the same time interval. The `SLIDING` parameter is used to specify the time step for which the time window moves forward. The query is performed on one time window each time, and the time window moves forward with time. When defining a continuous query, both the size of the time window and the step of forward sliding time need to be specified. As shown in the figure blow, [t0s, t0e] ,[t1s , t1e], [t2s, t2e] are respectively the time ranges of three time windows on which continuous queries are executed. The time step for which time window moves forward is marked by `sliding time`. Query, filter and aggregate operations are executed on each time window respectively. When the time step specified by `SLIDING` is same as the time interval specified by `INTERVAL`, the sliding time window is actually a flip time/tumbling window.
|
||||
|
||||

|
||||

|
||||
|
||||
`INTERVAL` and `SLIDING` should be used with aggregate functions and select functions. Below SQL statement is illegal because no aggregate or selection function is used with `INTERVAL`.
|
||||
`INTERVAL` and `SLIDING` should be used with aggregate functions and select functions. The SQL statement below is illegal because no aggregate or selection function is used with `INTERVAL`.
|
||||
|
||||
```
|
||||
SELECT * FROM temp_tb_1 INTERVAL(1m);
|
||||
```
|
||||
|
||||
The time step specified by `SLIDING` can't exceed the time interval specified by `INTERVAL`. Below SQL statement is illegal because the time length specified by `SLIDING` exceeds that specified by `INTERVAL`.
|
||||
The time step specified by `SLIDING` cannot exceed the time interval specified by `INTERVAL`. The SQL statement below is illegal because the time length specified by `SLIDING` exceeds that specified by `INTERVAL`.
|
||||
|
||||
```
|
||||
SELECT COUNT(*) FROM temp_tb_1 INTERVAL(1m) SLIDING(2m);
|
||||
```
|
||||
|
||||
When the time length specified by `SLIDING` is the same as that specified by `INTERVAL`, the sliding window is actually a flip window. The minimum time range specified by `INTERVAL` is 10 milliseconds (10a) prior to version 2.1.5.0. From version 2.1.5.0, the minimum time range by `INTERVAL` can be 1 microsecond (1u). However, if the DB precision is millisecond, the minimum time range is 1 millisecond (1a). Please note that the `timezone` parameter should be configured to be the same value in the `taos.cfg` configuration file on client side and server side.
|
||||
When the time length specified by `SLIDING` is the same as that specified by `INTERVAL`, the sliding window is actually a flip/tumbling window. The minimum time range specified by `INTERVAL` is 10 milliseconds (10a) prior to version 2.1.5.0. Since version 2.1.5.0, the minimum time range by `INTERVAL` can be 1 microsecond (1u). However, if the DB precision is millisecond, the minimum time range is 1 millisecond (1a). Please note that the `timezone` parameter should be configured to be the same value in the `taos.cfg` configuration file on client side and server side.
|
||||
|
||||
## Status Window
|
||||
|
||||
In case of using integer, bool, or string to represent the device status at a moment, the continuous rows with same status belong to same status window. Once the status changes, the status window closes. As shown in the following figure, there are two status windows according to status, [2019-04-28 14:22:07,2019-04-28 14:22:10] and [2019-04-28 14:22:11,2019-04-28 14:22:12]. Status window is not applicable to STable for now.
|
||||
In case of using integer, bool, or string to represent the status of a device at any given moment, continuous rows with the same status belong to a status window. Once the status changes, the status window closes. As shown in the following figure, there are two status windows according to status, [2019-04-28 14:22:07,2019-04-28 14:22:10] and [2019-04-28 14:22:11,2019-04-28 14:22:12]. Status window is not applicable to STable for now.
|
||||
|
||||

|
||||

|
||||
|
||||
`STATE_WINDOW` is used to specify the column based on which to define status window, for example:
|
||||
`STATE_WINDOW` is used to specify the column on which the status window will be based. For example:
|
||||
|
||||
```
|
||||
SELECT COUNT(*), FIRST(ts), status FROM temp_tb_1 STATE_WINDOW(status);
|
||||
|
@ -44,9 +44,9 @@ SELECT COUNT(*), FIRST(ts), status FROM temp_tb_1 STATE_WINDOW(status);
|
|||
SELECT COUNT(*), FIRST(ts) FROM temp_tb_1 SESSION(ts, tol_val);
|
||||
```
|
||||
|
||||
The primary key, i.e. timestamp, is used to determine which session window the row belongs to. If the time interval between two adjacent rows is within the time range specified by `tol_val`, they belong to the same session window; otherwise they belong to two different time windows. As shown in the figure below, if the limit of time interval for the session window is specified as 12 seconds, then the 6 rows in the figure constitutes 2 time windows, [2019-04-28 14:22:10,2019-04-28 14:22:30] and [2019-04-28 14:23:10,2019-04-28 14:23:30], because the time difference between 2019-04-28 14:22:30 and 2019-04-28 14:23:10 is 40 seconds, which exceeds the time interval limit of 12 seconds.
|
||||
The primary key, i.e. timestamp, is used to determine which session window a row belongs to. If the time interval between two adjacent rows is within the time range specified by `tol_val`, they belong to the same session window; otherwise they belong to two different session windows. As shown in the figure below, if the limit of time interval for the session window is specified as 12 seconds, then the 6 rows in the figure constitutes 2 time windows, [2019-04-28 14:22:10,2019-04-28 14:22:30] and [2019-04-28 14:23:10,2019-04-28 14:23:30], because the time difference between 2019-04-28 14:22:30 and 2019-04-28 14:23:10 is 40 seconds, which exceeds the time interval limit of 12 seconds.
|
||||
|
||||

|
||||

|
||||
|
||||
If the time interval between two continuous rows are within the time interval specified by `tol_value` they belong to the same session window; otherwise a new session window is started automatically. Session window is not supported on STable for now.
|
||||
|
||||
|
@ -73,7 +73,7 @@ SELECT function_list FROM stb_name
|
|||
|
||||
### Restrictions
|
||||
|
||||
- Aggregate functions and select functions can be used in `function_list`, with each function having only one output, for example COUNT, AVG, SUM, STDDEV, LEASTSQUARES, PERCENTILE, MIN, MAX, FIRST, LAST. Functions having multiple output can't be used, for example DIFF or arithmetic operations.
|
||||
- Aggregate functions and select functions can be used in `function_list`, with each function having only one output. For example COUNT, AVG, SUM, STDDEV, LEASTSQUARES, PERCENTILE, MIN, MAX, FIRST, LAST. Functions having multiple outputs, such as DIFF or arithmetic operations can't be used.
|
||||
- `LAST_ROW` can't be used together with window aggregate.
|
||||
- Scalar functions, like CEIL/FLOOR, can't be used with window aggregate.
|
||||
- `WHERE` clause can be used to specify the starting and ending time and other filter conditions
|
||||
|
@ -87,8 +87,8 @@ SELECT function_list FROM stb_name
|
|||
|
||||
:::info
|
||||
|
||||
1. Huge volume of interpolation output may be returned using `FILL`, so it's recommended to specify the time range when using `FILL`. The maximum interpolation values that can be returned in single query is 10,000,000.
|
||||
2. The result set is in ascending order of timestamp in aggregate by time window aggregate.
|
||||
1. A huge volume of interpolation output may be returned using `FILL`, so it's recommended to specify the time range when using `FILL`. The maximum number of interpolation values that can be returned in a single query is 10,000,000.
|
||||
2. The result set is in ascending order of timestamp when you aggregate by time window.
|
||||
3. If aggregate by window is used on STable, the aggregate function is performed on all the rows matching the filter conditions. If `GROUP BY` is not used in the query, the result set will be returned in ascending order of timestamp; otherwise the result set is not exactly in the order of ascending timestamp in each group.
|
||||
|
||||
:::
|
||||
|
@ -97,13 +97,13 @@ Aggregate by time window is also used in continuous query, please refer to [Cont
|
|||
|
||||
## Examples
|
||||
|
||||
The table of intelligent meters can be created by the SQL statement below:
|
||||
A table of intelligent meters can be created by the SQL statement below:
|
||||
|
||||
```sql
|
||||
CREATE TABLE meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT);
|
||||
```
|
||||
|
||||
The average current, maximum current and median of current in every 10 minutes for the past 24 hours can be calculated using the below SQL statement, with missing values filled with the previous non-NULL values.
|
||||
The average current, maximum current and median of current in every 10 minutes for the past 24 hours can be calculated using the SQL statement below, with missing values filled with the previous non-NULL values.
|
||||
|
||||
```
|
||||
SELECT AVG(current), MAX(current), APERCENTILE(current, 50) FROM meters
|
||||
|
|
|
@ -4,8 +4,8 @@ title: Limits & Restrictions
|
|||
|
||||
## Naming Rules
|
||||
|
||||
1. Only English characters, digits and underscore are allowed
|
||||
2. Can't start with a digit
|
||||
1. Only characters from the English alphabet, digits and underscore are allowed
|
||||
2. Names cannot start with a digit
|
||||
3. Case insensitive without escape character "\`"
|
||||
4. Identifier with escape character "\`"
|
||||
To support more flexible table or column names, a new escape character "\`" is introduced. For more details please refer to [escape](/taos-sql/escape).
|
||||
|
@ -16,38 +16,38 @@ The legal character set is `[a-zA-Z0-9!?$%^&*()_–+={[}]:;@~#|<,>.?/]`.
|
|||
|
||||
## General Limits
|
||||
|
||||
- Maximum length of database name is 32 bytes
|
||||
- Maximum length of table name is 192 bytes, excluding the database name prefix and the separator
|
||||
- Maximum length of each data row is 48K bytes from version 2.1.7.0 , before which the limit is 16K bytes. Please note that the upper limit includes the extra 2 bytes consumed by each column of BINARY/NCHAR type.
|
||||
- Maximum of column name is 64.
|
||||
- Maximum length of database name is 32 bytes.
|
||||
- Maximum length of table name is 192 bytes, excluding the database name prefix and the separator.
|
||||
- Maximum length of each data row is 48K bytes since version 2.1.7.0 , before which the limit was 16K bytes. Please note that the upper limit includes the extra 2 bytes consumed by each column of BINARY/NCHAR type.
|
||||
- Maximum length of column name is 64.
|
||||
- Maximum number of columns is 4096. There must be at least 2 columns, and the first column must be timestamp.
|
||||
- Maximum length of tag name is 64.
|
||||
- Maximum number of tags is 128. There must be at least 1 tag. The total length of tag values should not exceed 16K bytes.
|
||||
- Maximum length of singe SQL statement is 1048576, i.e. 1 MB bytes. It can be configured in the parameter `maxSQLLength` in the client side, the applicable range is [65480, 1048576].
|
||||
- At most 4096 columns (or 1024 prior to 2.1.7.0) can be returned by `SELECT`, functions in the query statement may constitute columns. Error will be returned if the limit is exceeded.
|
||||
- Maximum numbers of databases, STables, tables are only depending on the system resources.
|
||||
- Maximum length of singe SQL statement is 1048576, i.e. 1 MB. It can be configured in the parameter `maxSQLLength` in the client side, the applicable range is [65480, 1048576].
|
||||
- At most 4096 columns (or 1024 prior to 2.1.7.0) can be returned by `SELECT`. Functions in the query statement constitute columns. An error is returned if the limit is exceeded.
|
||||
- Maximum numbers of databases, STables, tables are dependent only on the system resources.
|
||||
- Maximum of database name is 32 bytes, and it can't include "." or special characters.
|
||||
- Maximum replica number of database is 3
|
||||
- Maximum length of user name is 23 bytes
|
||||
- Maximum length of password is 15 bytes
|
||||
- Maximum number of rows depends on the storage space only.
|
||||
- Maximum number of tables depends on the number of nodes only.
|
||||
- Maximum number of databases depends on the number of nodes only.
|
||||
- Maximum number of vnodes for single database is 64.
|
||||
- Maximum number of replicas for a database is 3.
|
||||
- Maximum length of user name is 23 bytes.
|
||||
- Maximum length of password is 15 bytes.
|
||||
- Maximum number of rows depends only on the storage space.
|
||||
- Maximum number of tables depends only on the number of nodes.
|
||||
- Maximum number of databases depends only on the number of nodes.
|
||||
- Maximum number of vnodes for a single database is 64.
|
||||
|
||||
## Restrictions of `GROUP BY`
|
||||
|
||||
`GROUP BY` can be performed on tags and `TBNAME`. It can be performed on data columns too, with one restriction that only one column and the number of unique values on that column is lower than 100,000. Please note that `GROUP BY` can't be performed on float or double types.
|
||||
`GROUP BY` can be performed on tags and `TBNAME`. It can be performed on data columns too, with the only restriction being it can only be performed on one data column and the number of unique values in that column is lower than 100,000. Please note that `GROUP BY` cannot be performed on float or double types.
|
||||
|
||||
## Restrictions of `IS NOT NULL`
|
||||
|
||||
`IS NOT NULL` can be used on any data type of columns. The non-empty string evaluation expression, i.e. `<\>""` can only be used on non-numeric data types.
|
||||
`IS NOT NULL` can be used on any data type of columns. The non-empty string evaluation expression, i.e. `< > ""` can only be used on non-numeric data types.
|
||||
|
||||
## Restrictions of `ORDER BY`
|
||||
|
||||
- Only one `order by` is allowed for normal table and subtable.
|
||||
- At most two `order by` are allowed for STable, and the second one must be `ts`.
|
||||
- `order by tag` must be used with `group by tag` on same tag, this rule is also applicable to `tbname`.
|
||||
- `order by tag` must be used with `group by tag` on same tag. This rule is also applicable to `tbname`.
|
||||
- `order by column` must be used with `group by column` or `top/bottom` on same column. This rule is applicable to table and STable.
|
||||
- `order by ts` is applicable to table and STable.
|
||||
- If `order by ts` is used with `group by`, the result set is sorted using `ts` in each group.
|
||||
|
@ -56,7 +56,7 @@ The legal character set is `[a-zA-Z0-9!?$%^&*()_–+={[}]:;@~#|<,>.?/]`.
|
|||
|
||||
### Name Restrictions of Table/Column
|
||||
|
||||
The name of a table or column can only be composed of ASCII characters, digits and underscore, while it can't start with a digit. The maximum length is 192 bytes. Names are case insensitive. The name mentioned in this rule doesn't include the database name prefix and the separator.
|
||||
The name of a table or column can only be composed of ASCII characters, digits and underscore and it cannot start with a digit. The maximum length is 192 bytes. Names are case insensitive. The name mentioned in this rule doesn't include the database name prefix and the separator.
|
||||
|
||||
### Name Restrictions After Escaping
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ title: JSON Type
|
|||
|
||||
## Syntax
|
||||
|
||||
1. Tag of JSON type
|
||||
1. Tag of type JSON
|
||||
|
||||
```sql
|
||||
create STable s1 (ts timestamp, v1 int) tags (info json);
|
||||
|
@ -12,7 +12,7 @@ title: JSON Type
|
|||
create table s1_1 using s1 tags ('{"k1": "v1"}');
|
||||
```
|
||||
|
||||
2. -> Operator of JSON
|
||||
2. "->" Operator of JSON
|
||||
|
||||
```sql
|
||||
select * from s1 where info->'k1' = 'v1';
|
||||
|
@ -20,7 +20,7 @@ title: JSON Type
|
|||
select info->'k1' from s1;
|
||||
```
|
||||
|
||||
3. contains Operator of JSON
|
||||
3. "contains" Operator of JSON
|
||||
|
||||
```sql
|
||||
select * from s1 where info contains 'k2';
|
||||
|
@ -30,7 +30,7 @@ title: JSON Type
|
|||
|
||||
## Applicable Operations
|
||||
|
||||
1. When JSON data type is used in `where`, `match/nmatch/between and/like/and/or/is null/is no null` can be used but `in` can't be used.
|
||||
1. When a JSON data type is used in `where`, `match/nmatch/between and/like/and/or/is null/is no null` can be used but `in` can't be used.
|
||||
|
||||
```sql
|
||||
select * from s1 where info->'k1' match 'v*';
|
||||
|
@ -42,9 +42,9 @@ title: JSON Type
|
|||
select * from s1 where info->'k1' is not null;
|
||||
```
|
||||
|
||||
2. Tag of JSON type can be used in `group by`, `order by`, `join`, `union all` and sub query, for example `group by json->'key'`
|
||||
2. A tag of JSON type can be used in `group by`, `order by`, `join`, `union all` and sub query; for example `group by json->'key'`
|
||||
|
||||
3. `Distinct` can be used with tag of JSON type
|
||||
3. `Distinct` can be used with a tag of type JSON
|
||||
|
||||
```sql
|
||||
select distinct info->'k1' from s1;
|
||||
|
@ -52,9 +52,9 @@ title: JSON Type
|
|||
|
||||
4. Tag Operations
|
||||
|
||||
The value of JSON tag can be altered. Please note that the full JSON will be overriden when doing this.
|
||||
The value of a JSON tag can be altered. Please note that the full JSON will be overriden when doing this.
|
||||
|
||||
The name of JSON tag can be altered. A tag of JSON type can't be added or removed. The column length of a JSON tag can't be changed.
|
||||
The name of a JSON tag can be altered. A tag of JSON type can't be added or removed. The column length of a JSON tag can't be changed.
|
||||
|
||||
## Other Restrictions
|
||||
|
||||
|
@ -64,17 +64,17 @@ title: JSON Type
|
|||
|
||||
- JSON format:
|
||||
|
||||
- The input string for JSON can be empty, i.e. "", "\t", or NULL, but can't be non-NULL string, bool or array.
|
||||
- object can be {}, and the whole JSON is empty if so. Key can be "", and it's ignored if so.
|
||||
- value can be int, double, string, boll or NULL, can't be array. Nesting is not allowed, that means value can't be another JSON.
|
||||
- The input string for JSON can be empty, i.e. "", "\t", or NULL, but it can't be non-NULL string, bool or array.
|
||||
- object can be {}, and the entire JSON is empty if so. Key can be "", and it's ignored if so.
|
||||
- value can be int, double, string, bool or NULL, and it can't be an array. Nesting is not allowed which means that the value of a key can't be JSON.
|
||||
- If one key occurs twice in JSON, only the first one is valid.
|
||||
- Escape characters are not allowed in JSON.
|
||||
|
||||
- NULL is returned if querying a key that doesn't exist in JSON.
|
||||
- NULL is returned when querying a key that doesn't exist in JSON.
|
||||
|
||||
- If a tag of JSON is the result of inner query, it can't be parsed and queried in the outer query.
|
||||
|
||||
For example, the below SQL statements are not supported.
|
||||
For example, the SQL statements below are not supported.
|
||||
|
||||
```sql;
|
||||
select jtag->'key' from (select jtag from STable);
|
||||
|
|
|
@ -46,3 +46,45 @@ There are about 200 keywords reserved by TDengine, they can't be used as the nam
|
|||
| CONNECTIONS | HAVING | NOT | SOFFSET | VNODES |
|
||||
| CONNS | ID | NOTNULL | STable | WAL |
|
||||
| COPY | IF | NOW | STableS | WHERE |
|
||||
| _C0 | _QSTART | _QSTOP | _QDURATION | _WSTART |
|
||||
| _WSTOP | _WDURATION | _ROWTS |
|
||||
|
||||
## Explanations
|
||||
### TBNAME
|
||||
`TBNAME` can be considered as a special tag, which represents the name of the subtable, in a STable.
|
||||
|
||||
Get the table name and tag values of all subtables in a STable.
|
||||
```mysql
|
||||
SELECT TBNAME, location FROM meters;
|
||||
```
|
||||
|
||||
Count the number of subtables in a STable.
|
||||
```mysql
|
||||
SELECT COUNT(TBNAME) FROM meters;
|
||||
```
|
||||
|
||||
Only filter on TAGS can be used in WHERE clause in the above two query statements.
|
||||
```mysql
|
||||
taos> SELECT TBNAME, location FROM meters;
|
||||
tbname | location |
|
||||
==================================================================
|
||||
d1004 | California.SanFrancisco |
|
||||
d1003 | California.SanFrancisco |
|
||||
d1002 | California.LosAngeles |
|
||||
d1001 | California.LosAngeles |
|
||||
Query OK, 4 row(s) in set (0.000881s)
|
||||
|
||||
taos> SELECT COUNT(tbname) FROM meters WHERE groupId > 2;
|
||||
count(tbname) |
|
||||
========================
|
||||
2 |
|
||||
Query OK, 1 row(s) in set (0.001091s)
|
||||
```
|
||||
### _QSTART/_QSTOP/_QDURATION
|
||||
The start, stop and duration of a query time window.
|
||||
|
||||
### _WSTART/_WSTOP/_WDURATION
|
||||
The start, stop and duration of aggegate query by time window, like interval, session window, state window.
|
||||
|
||||
### _c0/_ROWTS
|
||||
_c0 is equal to _ROWTS, it means the first column of a table or STable.
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
---
|
||||
sidebar_label: Operators
|
||||
title: Operators
|
||||
---
|
||||
|
||||
## Arithmetic Operators
|
||||
|
||||
| # | **Operator** | **Data Types** | **Description** |
|
||||
| --- | :----------: | -------------- | --------------------------------------------------------- |
|
||||
| 1 | +, - | Numeric Types | Representing positive or negative numbers, unary operator |
|
||||
| 2 | +, - | Numeric Types | Addition and substraction, binary operator |
|
||||
| 3 | \*, / | Numeric Types | Multiplication and division, binary oeprator |
|
||||
| 4 | % | Numeric Types | Taking the remainder, binary operator |
|
||||
|
||||
## Bitwise Operators
|
||||
|
||||
| # | **Operator** | **Data Types** | **Description** |
|
||||
| --- | :----------: | -------------- | ----------------------------- |
|
||||
| 1 | & | Numeric Types | Bitewise AND, binary operator |
|
||||
| 2 | \| | Numeric Types | Bitewise OR, binary operator |
|
||||
|
||||
## JSON Operator
|
||||
|
||||
`->` operator can be used to get the value of a key in a column of JSON type, the left oeprand is the column name, the right operand is a string constant. For example, `col->'name'` returns the value of key `'name'`.
|
||||
|
||||
## Set Operator
|
||||
|
||||
Set operators are used to combine the results of two queries into single result. A query including set operators is called a combined query. The number of rows in each result in a combined query must be same, and the type is determined by the first query's result, the type of the following queriess result must be able to be converted to the type of the first query's result, the conversion rule is same as `CAST` function.
|
||||
|
||||
TDengine provides 2 set operators: `UNION ALL` and `UNION`. `UNION ALL` combines the results without removing duplicate data. `UNION` combines the results and remove duplicate data rows. In single SQL statement, at most 100 set operators can be used.
|
||||
|
||||
## Comparsion Operator
|
||||
|
||||
| # | **Operator** | **Data Types** | **Description** |
|
||||
| --- | :---------------: | ------------------------------------------------------------------- | ----------------------------------------------- |
|
||||
| 1 | = | Except for BLOB, MEDIUMBLOB and JSON | Equal |
|
||||
| 2 | <\>, != | Except for BLOB, MEDIUMBLOB, JSON and primary key of timestamp type | Not equal |
|
||||
| 3 | \>, < | Except for BLOB, MEDIUMBLOB and JSON | Greater than, less than |
|
||||
| 4 | \>=, <= | Except for BLOB, MEDIUMBLOB and JSON | Greater than or equal to, less than or equal to |
|
||||
| 5 | IS [NOT] NULL | Any types | Is NULL or NOT |
|
||||
| 6 | [NOT] BETWEEN AND | Except for BLOB, MEDIUMBLOB and JSON | In a value range or not |
|
||||
| 7 | IN | Except for BLOB, MEDIUMBLOB, JSON and primary key of timestamp type | In a list of values or not |
|
||||
| 8 | LIKE | BINARY, NCHAR and VARCHAR | Wildcard matching |
|
||||
| 9 | MATCH, NMATCH | BINARY, NCHAR and VARCHAR | Regular expression matching |
|
||||
| 10 | CONTAINS | JSON | If A key exists in JSON |
|
||||
|
||||
`LIKE` operator uses wildcard to match a string, the rules are:
|
||||
|
||||
- '%' matches 0 to any number of characters; '\_' matches any single ASCII character.
|
||||
- \_ can be used to match a `_` in the string, i.e. using escape character backslash `\`
|
||||
- Wildcard string is 100 bytes at most. Longer a wildcard string is, worse the performance of LIKE operator is.
|
||||
|
||||
`MATCH` and `NMATCH` operators use regular expressions to match a string, the rules are:
|
||||
|
||||
- Regular expressions of POSIX standard are supported.
|
||||
- Only `tbname`, i.e. table name of sub tables, and tag columns of string types can be matched with regular expression, data columns are not supported.
|
||||
- Regular expression string is 128 bytes at most, and can be adjusted by setting parameter `maxRegexStringLen`, which is a client side configuration and needs to restart the client to take effect.
|
||||
|
||||
## Logical Operators
|
||||
|
||||
| # | **Operator** | **Data Types** | **Description** |
|
||||
| --- | :----------: | -------------- | ---------------------------------------------------------------------------------------- |
|
||||
| 1 | AND | BOOL | Logical AND, return TRUE if both conditions are TRUE; return FALSE if any one is FALSE. |
|
||||
| 2 | OR | BOOL | Logical OR, return TRUE if any condition is TRUE; return FALSE if both are FALSE |
|
||||
|
||||
TDengine uses shortcircut optimization when performing logical operations. For AND operator, if the first condition is evaluated to FALSE, then the second one is not evaluated. For OR operator, if the first condition is evaluated to TRUE, then the second one is not evaluated.
|
|
@ -3,11 +3,9 @@ title: TDengine SQL
|
|||
description: "The syntax supported by TDengine SQL "
|
||||
---
|
||||
|
||||
This section explains the syntax to operating databases, tables, STables, inserting data, selecting data, functions and some tips that can be used in TDengine SQL. It would be easier to understand with some fundamental knowledge of SQL.
|
||||
This section explains the syntax of SQL to perform operations on databases, tables and STables, insert data, select data and use functions. We also provide some tips that can be used in TDengine SQL. If you have previous experience with SQL this section will be fairly easy to understand. If you do not have previous experience with SQL, you'll come to appreciate the simplicity and power of SQL.
|
||||
|
||||
TDengine SQL is the major interface for users to write data into or query from TDengine. For users to easily use, syntax similar to standard SQL is provided. However, please note that TDengine SQL is not standard SQL. For instance, TDengine doesn't provide the functionality of deleting time series data, thus corresponding statements are not provided in TDengine SQL.
|
||||
|
||||
TDengine SQL doesn't support abbreviation for keywords, for example `DESCRIBE` can't be abbreviated as `DESC`.
|
||||
TDengine SQL is the major interface for users to write data into or query from TDengine. For ease of use, the syntax is similar to that of standard SQL. However, please note that TDengine SQL is not standard SQL. For instance, TDengine doesn't provide a delete function for time series data and so corresponding statements are not provided in TDengine SQL.
|
||||
|
||||
Syntax Specifications used in this chapter:
|
||||
|
||||
|
@ -16,7 +14,7 @@ Syntax Specifications used in this chapter:
|
|||
- | means one of a few options, excluding | itself.
|
||||
- … means the item prior to it can be repeated multiple times.
|
||||
|
||||
To better demonstrate the syntax, usage and rules of TAOS SQL, hereinafter it's assumed that there is a data set of meters. Assuming each meter collects 3 data measurements: current, voltage, phase. The data model is shown below:
|
||||
To better demonstrate the syntax, usage and rules of TAOS SQL, hereinafter it's assumed that there is a data set of data from electric meters. Each meter collects 3 data measurements: current, voltage, phase. The data model is shown below:
|
||||
|
||||
```sql
|
||||
taos> DESCRIBE meters;
|
||||
|
@ -30,4 +28,4 @@ taos> DESCRIBE meters;
|
|||
groupid | INT | 4 | TAG |
|
||||
```
|
||||
|
||||
The data set includes the data collected by 4 meters, the corresponding table name is d1001, d1002, d1003, d1004 respectively based on the data model of TDengine.
|
||||
The data set includes the data collected by 4 meters, the corresponding table name is d1001, d1002, d1003 and d1004 based on the data model of TDengine.
|
||||
|
|
|
@ -6,7 +6,7 @@ description: Install, Uninstall, Start, Stop and Upgrade
|
|||
import Tabs from "@theme/Tabs";
|
||||
import TabItem from "@theme/TabItem";
|
||||
|
||||
TDengine community version provides dev and rpm packages for users to choose based on the system environment. deb supports Debian, Ubuntu and systems derived from them. rpm supports CentOS, RHEL, SUSE and systems derived from them. Furthermore, tar.gz package is provided for enterprise customers.
|
||||
TDengine community version provides deb and rpm packages for users to choose from, based on their system environment. The deb package supports Debian, Ubuntu and derivative systems. The rpm package supports CentOS, RHEL, SUSE and derivative systems. Furthermore, a tar.gz package is provided for TDengine Enterprise customers.
|
||||
|
||||
## Install
|
||||
|
||||
|
@ -124,7 +124,7 @@ taoskeeper is installed, enable it by `systemctl enable taoskeeper`
|
|||
```
|
||||
|
||||
:::info
|
||||
Some configuration will be prompted for users to provide when install.sh is executing, the interactive mode can be disabled by executing `./install.sh -e no`. `./install -h` can show all parameters and detailed explanation.
|
||||
Users will be prompted to enter some configuration information when install.sh is executing. The interactive mode can be disabled by executing `./install.sh -e no`. `./install.sh -h` can show all parameters with detailed explanation.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -132,7 +132,7 @@ Some configuration will be prompted for users to provide when install.sh is exec
|
|||
</Tabs>
|
||||
|
||||
:::note
|
||||
When installing on the first node in the cluster, when "Enter FQDN:" is prompted, nothing needs to be provided. When installing on following nodes, when "Enter FQDN:" is prompted, the end point of the first dnode in the cluster can be input if it is already up; or just ignore it and configure later after installation is done.
|
||||
When installing on the first node in the cluster, at the "Enter FQDN:" prompt, nothing needs to be provided. When installing on subsequent nodes, at the "Enter FQDN:" prompt, you must enter the end point of the first dnode in the cluster if it is already up. You can also just ignore it and configure it later after installation is finished.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -181,14 +181,14 @@ taosKeeper is removed successfully!
|
|||
|
||||
:::note
|
||||
|
||||
- It's strongly suggested not to use multiple kinds of installation packages on a single host TDengine
|
||||
- After deb package is installed, if the installation directory is removed manually so that uninstall or reinstall can't succeed, it can be resolved by cleaning up TDengine package information as in the command below and then reinstalling.
|
||||
- We strongly recommend not to use multiple kinds of installation packages on a single host TDengine.
|
||||
- After deb package is installed, if the installation directory is removed manually, uninstall or reinstall will not work. This issue can be resolved by using the command below which cleans up TDengine package information. You can then reinstall if needed.
|
||||
|
||||
```bash
|
||||
$ sudo rm -f /var/lib/dpkg/info/tdengine*
|
||||
```
|
||||
|
||||
- After rpm package is installed, if the installation directory is removed manually so that uninstall or reinstall can't succeed, it can be resolved by cleaning up TDengine package information as in the command below and then reinstalling.
|
||||
- After rpm package is installed, if the installation directory is removed manually, uninstall or reinstall will not work. This issue can be resolved by using the command below which cleans up TDengine package information. You can then reinstall if needed.
|
||||
|
||||
```bash
|
||||
$ sudo rpm -e --noscripts tdengine
|
||||
|
@ -219,7 +219,7 @@ lrwxrwxrwx 1 root root 13 Feb 22 09:34 log -> /var/log/taos/
|
|||
During the installation process:
|
||||
|
||||
- Configuration directory, data directory, and log directory are created automatically if they don't exist
|
||||
- The default configuration file is located at /etc/taos/taos.cfg, which is a copy of /usr/local/taos/cfg/taos.cfg if not existing
|
||||
- The default configuration file is located at /etc/taos/taos.cfg, which is a copy of /usr/local/taos/cfg/taos.cfg
|
||||
- The default data directory is /var/lib/taos, which is a soft link to /usr/local/taos/data
|
||||
- The default log directory is /var/log/taos, which is a soft link to /usr/local/taos/log
|
||||
- The executables at /usr/local/taos/bin are linked to /usr/bin
|
||||
|
@ -228,7 +228,7 @@ During the installation process:
|
|||
|
||||
:::note
|
||||
|
||||
- When TDengine is uninstalled, the configuration /etc/taos/taos.cfg, data directory /var/lib/taos, log directory /var/log/taos are kept. They can be deleted manually with caution because data can't be recovered
|
||||
- When TDengine is uninstalled, the configuration /etc/taos/taos.cfg, data directory /var/lib/taos, log directory /var/log/taos are kept. They can be deleted manually with caution, because data can't be recovered. Please follow data integrity, security, backup or relevant SOPs before deleting any data.
|
||||
- When reinstalling TDengine, if the default configuration file /etc/taos/taos.cfg exists, it will be kept and the configuration file in the installation package will be renamed to taos.cfg.orig and stored at /usr/local/taos/cfg to be used as configuration sample. Otherwise the configuration file in the installation package will be installed to /etc/taos/taos.cfg and used.
|
||||
|
||||
## Start and Stop
|
||||
|
@ -263,18 +263,19 @@ Active: inactive (dead)
|
|||
|
||||
There are two aspects in upgrade operation: upgrade installation package and upgrade a running server.
|
||||
|
||||
Upgrading package should follow the steps mentioned previously to first uninstall the old version then install the new version.
|
||||
To upgrade a package, follow the steps mentioned previously to first uninstall the old version then install the new version.
|
||||
|
||||
Upgrading a running server is much more complex. First please check the version number of the old version and the new version. The version number of TDengine consists of 4 sections, only if the first 3 section match can the old version be upgraded to the new version. The steps of upgrading a running server are as below:
|
||||
Upgrading a running server is much more complex. First please check the version number of the old version and the new version. The version number of TDengine consists of 4 sections, only if the first 3 sections match can the old version be upgraded to the new version. The steps of upgrading a running server are as below:
|
||||
|
||||
- Stop inserting data
|
||||
- Make sure all data are persisted into disk
|
||||
- Make sure all data is persisted to disk
|
||||
- Make some simple queries (Such as total rows in stables, tables and so on. Note down the values. Follow best practices and relevant SOPs.)
|
||||
- Stop the cluster of TDengine
|
||||
- Uninstall old version and install new version
|
||||
- Start the cluster of TDengine
|
||||
- Make some simple queries to make sure no data loss
|
||||
- Make some simple data insertion to make sure the cluster works well
|
||||
- Restore business data
|
||||
- Execute simple queries, such as the ones executed prior to installing the new package, to make sure there is no data loss
|
||||
- Run some simple data insertion statements to make sure the cluster works well
|
||||
- Restore business services
|
||||
|
||||
:::warning
|
||||
|
||||
|
|
|
@ -2,17 +2,17 @@
|
|||
title: Resource Planning
|
||||
---
|
||||
|
||||
The computing and storage resources need to be planned if using TDengine to build an IoT platform. How to plan the CPU, memory and disk required will be described in this chapter.
|
||||
It is important to plan computing and storage resources if using TDengine to build an IoT, time-series or Big Data platform. How to plan the CPU, memory and disk resources required, will be described in this chapter.
|
||||
|
||||
## Memory Requirement of Server Side
|
||||
|
||||
The number of vgroups created for each database is the same as the number of CPU cores by default and can be configured by parameter `maxVgroupsPerDb`, each vnode in a vgroup stores one replica. Each vnode consumes a fixed size of memory, i.e. `blocks` \* `cache`. Besides, some memory is required for tag values associated with each table. A fixed amount of memory is required for each cluster. So, the memory required for each DB can be calculated using the formula below:
|
||||
By default, the number of vgroups created for each database is the same as the number of CPU cores. This can be configured by the parameter `maxVgroupsPerDb`. Each vnode in a vgroup stores one replica. Each vnode consumes a fixed amount of memory, i.e. `blocks` \* `cache`. In addition, some memory is required for tag values associated with each table. A fixed amount of memory is required for each cluster. So, the memory required for each DB can be calculated using the formula below:
|
||||
|
||||
```
|
||||
Database Memory Size = maxVgroupsPerDb * replica * (blocks * cache + 10MB) + numOfTables * (tagSizePerTable + 0.5KB)
|
||||
```
|
||||
|
||||
For example, assuming the default value of `maxVgroupPerDB` is 64, the default value of `cache` 16M, the default value of `blocks` is 6, there are 100,000 tables in a DB, the replica number is 1, total length of tag values is 256 bytes, the total memory required for this DB is: 64 \* 1 \* (16 \* 6 + 10) + 100000 \* (0.25 + 0.5) / 1000 = 6792M.
|
||||
For example, assuming the default value of `maxVgroupPerDB` is 64, the default value of `cache` is 16M, the default value of `blocks` is 6, there are 100,000 tables in a DB, the replica number is 1, total length of tag values is 256 bytes, the total memory required for this DB is: 64 \* 1 \* (16 \* 6 + 10) + 100000 \* (0.25 + 0.5) / 1000 = 6792M.
|
||||
|
||||
In the real operation of TDengine, we are more concerned about the memory used by each TDengine server process `taosd`.
|
||||
|
||||
|
@ -22,10 +22,10 @@ In the real operation of TDengine, we are more concerned about the memory used b
|
|||
|
||||
In the above formula:
|
||||
|
||||
1. "vnode_memory" of a `taosd` process is the memory used by all vnodes hosted by this `taosd` process. It can be roughly calculated by firstly adding up the total memory of all DBs whose memory usage can be derived according to the formula mentioned previously then dividing by number of dnodes and multiplying the number of replicas.
|
||||
1. "vnode_memory" of a `taosd` process is the memory used by all vnodes hosted by this `taosd` process. It can be roughly calculated by firstly adding up the total memory of all DBs whose memory usage can be derived according to the formula for Database Memory Size, mentioned above, then dividing by number of dnodes and multiplying the number of replicas.
|
||||
|
||||
```
|
||||
vnode_memory = sum(Database memory) / number_of_dnodes * replica
|
||||
vnode_memory = (sum(Database Memory Size) / number_of_dnodes) * replica
|
||||
```
|
||||
|
||||
2. "mnode_memory" of a `taosd` process is the memory consumed by a mnode. If there is one (and only one) mnode hosted in a `taosd` process, the memory consumed by "mnode" is "0.2KB \* the total number of tables in the cluster".
|
||||
|
@ -56,8 +56,8 @@ So, at least 3GB needs to be reserved for such a client.
|
|||
|
||||
The CPU resources required depend on two aspects:
|
||||
|
||||
- **Data Insertion** Each dnode of TDengine can process at least 10,000 insertion requests in one second, while each insertion request can have multiple rows. The computing resource consumed between inserting 1 row one time and inserting 10 rows one time is very small. So, the more the rows to insert one time, the higher the efficiency. Inserting in bach also exposes requirements for the client side which needs to cache rows and insert in batch once the cached rows reaches a threshold.
|
||||
- **Data Query** High efficiency query is provided in TDengine, but it's hard to estimate the CPU resource required because the queries used in different use cases and the frequency of queries vary significantly. It can only be verified with the query statements, query frequency, data size to be queried, etc provided by user.
|
||||
- **Data Insertion** Each dnode of TDengine can process at least 10,000 insertion requests in one second, while each insertion request can have multiple rows. The difference in computing resource consumed, between inserting 1 row at a time, and inserting 10 rows at a time is very small. So, the more the number of rows that can be inserted one time, the higher the efficiency. Inserting in batch also imposes requirements on the client side which needs to cache rows to insert in batch once the number of cached rows reaches a threshold.
|
||||
- **Data Query** High efficiency query is provided in TDengine, but it's hard to estimate the CPU resource required because the queries used in different use cases and the frequency of queries vary significantly. It can only be verified with the query statements, query frequency, data size to be queried, and other requirements provided by users.
|
||||
|
||||
In short, the CPU resource required for data insertion can be estimated but it's hard to do so for query use cases. In real operation, it's suggested to control CPU usage below 50%. If this threshold is exceeded, it's a reminder for system operator to add more nodes in the cluster to expand resources.
|
||||
|
||||
|
@ -71,12 +71,12 @@ Raw DataSize = numOfTables * rowSizePerTable * rowsPerTable
|
|||
|
||||
For example, there are 10,000,000 meters, while each meter collects data every 15 minutes and the data size of each collection is 128 bytes, so the raw data size of one year is: 10000000 \* 128 \* 24 \* 60 / 15 \* 365 = 44.8512(TB). Assuming compression ratio is 5, the actual disk size is: 44.851 / 5 = 8.97024(TB).
|
||||
|
||||
Parameter `keep` can be used to set how long the data will be kept on disk. To further reduce storage cost, multiple storage levels can be enabled in TDengine, with the coldest data stored on the cheapest storage device, and this is transparent to application programs.
|
||||
Parameter `keep` can be used to set how long the data will be kept on disk. To further reduce storage cost, multiple storage levels can be enabled in TDengine, with the coldest data stored on the cheapest storage device. This is completely transparent to application programs.
|
||||
|
||||
To increase the performance, multiple disks can be setup for parallel data reading or data inserting. Please note that an expensive disk array is not necessary because replications are used in TDengine to provide high availability.
|
||||
To increase performance, multiple disks can be setup for parallel data reading or data inserting. Please note that an expensive disk array is not necessary because replications are used in TDengine to provide high availability.
|
||||
|
||||
## Number of Hosts
|
||||
|
||||
A host can be either physical or virtual. The total memory, total CPU, total disk required can be estimated according to the formulas mentioned previously. Then, according to the system resources that a single host can provide, assuming all hosts have the same resources, the number of hosts can be derived easily.
|
||||
A host can be either physical or virtual. The total memory, total CPU, total disk required can be estimated according to the formulae mentioned previously. Then, according to the system resources that a single host can provide, assuming all hosts have the same resources, the number of hosts can be derived easily.
|
||||
|
||||
**Quick Estimation for CPU, Memory and Disk** Please refer to [Resource Estimate](https://www.taosdata.com/config/config.html).
|
||||
|
|
|
@ -7,26 +7,26 @@ title: Fault Tolerance & Disaster Recovery
|
|||
|
||||
TDengine uses **WAL**, i.e. Write Ahead Log, to achieve fault tolerance and high reliability.
|
||||
|
||||
When a data block is received by TDengine, the original data block is first written into WAL. The log in WAL will be deleted only after the data has been written into data files in the database. Data can be recovered from WAL in case the server is stopped abnormally due to any reason and then restarted.
|
||||
When a data block is received by TDengine, the original data block is first written into WAL. The log in WAL will be deleted only after the data has been written into data files in the database. Data can be recovered from WAL in case the server is stopped abnormally for any reason and then restarted.
|
||||
|
||||
There are 2 configuration parameters related to WAL:
|
||||
|
||||
- walLevel:
|
||||
- 0:wal is disabled;
|
||||
- 1:wal is enabled without fsync;
|
||||
- 2:wal is enabled with fsync.
|
||||
- fsync:only valid when walLevel is set to 2, it specifies the interval of invoking fsync. If set to 0, it means fsync is invoked immediately once WAL is written.
|
||||
- 0:wal is disabled
|
||||
- 1:wal is enabled without fsync
|
||||
- 2:wal is enabled with fsync
|
||||
- fsync:This parameter is only valid when walLevel is set to 2. It specifies the interval, in milliseconds, of invoking fsync. If set to 0, it means fsync is invoked immediately once WAL is written.
|
||||
|
||||
To achieve absolutely no data loss, walLevel needs to be set to 2 and fsync needs to be set to 1. The penalty is the performance of data ingestion downgrades. However, if the concurrent threads of data insertion on the client side can reach a big enough number, for example 50, the data ingestion performance would be still good enough, our verification shows that the drop is only 30% compared to fsync is set to 3,000 milliseconds.
|
||||
To achieve absolutely no data loss, walLevel should be set to 2 and fsync should be set to 1. There is a performance penalty to the data ingestion rate. However, if the concurrent data insertion threads on the client side can reach a big enough number, for example 50, the data ingestion performance will be still good enough. Our verification shows that the drop is only 30% when fsync is set to 3,000 milliseconds.
|
||||
|
||||
## Disaster Recovery
|
||||
|
||||
TDengine uses replications to provide high availability and disaster recovery capability.
|
||||
TDengine uses replication to provide high availability and disaster recovery capability.
|
||||
|
||||
TDengine cluster is managed by mnode. To make sure the high availability of mnode, multiple replicas can be configured by the system parameter `numOfMnodes`. The data replication between mnode replicas is performed in a synchronous way to guarantee the metadata consistency.
|
||||
A TDengine cluster is managed by mnode. To ensure the high availability of mnode, multiple replicas can be configured by the system parameter `numOfMnodes`. The data replication between mnode replicas is performed in a synchronous way to guarantee metadata consistency.
|
||||
|
||||
The number of replicas for the time series data in TDengine is associated with each database, there can be a lot of databases in a cluster while each database can be configured with a different number of replicas. When creating a database, parameter `replica` is used to configure the number of replications. To achieve high availability, `replica` needs to be higher than 1.
|
||||
The number of replicas for time series data in TDengine is associated with each database. There can be many databases in a cluster and each database can be configured with a different number of replicas. When creating a database, parameter `replica` is used to configure the number of replications. To achieve high availability, `replica` needs to be higher than 1.
|
||||
|
||||
The number of dnodes in a TDengine cluster must NOT be lower than the number of replicas for any database, otherwise it would fail when trying to create a table.
|
||||
|
||||
As long as the dnodes of a TDengine cluster are deployed on different physical machines and the replica number is set to bigger than 1, high availability can be achieved without any other assistance. If dnodes of TDengine cluster are deployed in geographically different data centers, disaster recovery can be achieved too.
|
||||
As long as the dnodes of a TDengine cluster are deployed on different physical machines and the replica number is higher than 1, high availability can be achieved without any other assistance. For disaster recovery, dnodes of a TDengine cluster should be deployed in geographically different data centers.
|
||||
|
|
|
@ -2,11 +2,13 @@
|
|||
title: Data Export
|
||||
---
|
||||
|
||||
There are two ways of exporting data from a TDengine cluster, one is SQL statement in TDengine CLI, the other one is `taosdump`.
|
||||
There are two ways of exporting data from a TDengine cluster:
|
||||
- Using a SQL statement in TDengine CLI
|
||||
- Using the `taosdump` tool
|
||||
|
||||
## Export Using SQL
|
||||
|
||||
If you want to export the data of a table or a STable, please execute below SQL statement in TDengine CLI.
|
||||
If you want to export the data of a table or a STable, please execute the SQL statement below, in the TDengine CLI.
|
||||
|
||||
```sql
|
||||
select * from <tb_name> >> data.csv;
|
||||
|
@ -16,4 +18,4 @@ The data of table or STable specified by `tb_name` will be exported into a file
|
|||
|
||||
## Export Using taosdump
|
||||
|
||||
With `taosdump`, you can choose to export the data of all databases, a database, a table or a STable, you can also choose export the data within a time range, or even only export the schema definition of a table. For the details of using `taosdump` please refer to [Tool for exporting and importing data: taosdump](/reference/taosdump).
|
||||
With `taosdump`, you can choose to export the data of all databases, a database, a table or a STable, you can also choose to export the data within a time range, or even only export the schema definition of a table. For the details of using `taosdump` please refer to [Tool for exporting and importing data: taosdump](/reference/taosdump).
|
||||
|
|
|
@ -3,7 +3,7 @@ sidebar_label: Connections & Tasks
|
|||
title: Manage Connections and Query Tasks
|
||||
---
|
||||
|
||||
A system operator can use TDengine CLI to show the connections, ongoing queries, stream computing, and can close connection or stop ongoing query task or stream computing.
|
||||
A system operator can use the TDengine CLI to show connections, ongoing queries, stream computing, and can close connections or stop ongoing query tasks or stream computing.
|
||||
|
||||
## Show Connections
|
||||
|
||||
|
@ -13,7 +13,7 @@ SHOW CONNECTIONS;
|
|||
|
||||
One column of the output of the above SQL command is "ip:port", which is the end point of the client.
|
||||
|
||||
## Close Connections Forcedly
|
||||
## Force Close Connections
|
||||
|
||||
```sql
|
||||
KILL CONNECTION <connection-id>;
|
||||
|
@ -27,9 +27,9 @@ In the above SQL command, `connection-id` is from the first column of the output
|
|||
SHOW QUERIES;
|
||||
```
|
||||
|
||||
The first column of the output is query ID, which is composed of the corresponding connection ID and the sequence number of the current query task started on this connection, in format of "connection-id:query-no".
|
||||
The first column of the output is query ID, which is composed of the corresponding connection ID and the sequence number of the current query task started on this connection. The format is "connection-id:query-no".
|
||||
|
||||
## Close Queries Forcedly
|
||||
## Force Close Queries
|
||||
|
||||
```sql
|
||||
KILL QUERY <query-id>;
|
||||
|
@ -43,9 +43,9 @@ In the above SQL command, `query-id` is from the first column of the output of `
|
|||
SHOW STREAMS;
|
||||
```
|
||||
|
||||
The first column of the output is stream ID, which is composed of the connection ID and the sequence number of the current stream started on this connection, in the format of "connection-id:stream-no".
|
||||
The first column of the output is stream ID, which is composed of the connection ID and the sequence number of the current stream started on this connection. The format is "connection-id:stream-no".
|
||||
|
||||
## Close Continuous Query Forcedly
|
||||
## Force Close Continuous Query
|
||||
|
||||
```sql
|
||||
KILL STREAM <stream-id>;
|
||||
|
|
|
@ -2,13 +2,13 @@
|
|||
title: TDengine Monitoring
|
||||
---
|
||||
|
||||
After TDengine is started, a database named `log` for monitoring is created automatically. The information about CPU, memory, disk, bandwidth, number of requests, disk I/O speed, slow query is written into `log` database on the basis of a predefined interval. Additionally, some important system operations, like logon, create user, drop database, and alerts and warnings generated in TDengine are written into the `log` database too. A system operator can view the data in `log` database from TDengine CLI or from a web console.
|
||||
After TDengine is started, a database named `log` is created automatically to help with monitoring. Information that includes CPU, memory and disk usage, bandwidth, number of requests, disk I/O speed, slow queries, is written into the `log` database at a predefined interval. Additionally, some important system operations, like logon, create user, drop database, and alerts and warnings generated in TDengine are written into the `log` database too. A system operator can view the data in `log` database from TDengine CLI or from a web console.
|
||||
|
||||
The collection of the monitoring information is enabled by default, but can be disabled by parameter `monitor` in the configuration file.
|
||||
|
||||
## TDinsight
|
||||
|
||||
TDinsight is a complete solution which uses the monitor database `log` mentioned previously and Grafana to monitor a TDengine cluster.
|
||||
TDinsight is a complete solution which uses the monitoring database `log` mentioned previously, and Grafana, to monitor a TDengine cluster.
|
||||
|
||||
From version 2.3.3.0, more monitoring data has been added in the `log` database. Please refer to [TDinsight Grafana Dashboard](https://grafana.com/grafana/dashboards/15167) to learn more details about using TDinsight to monitor TDengine.
|
||||
|
||||
|
|
|
@ -4,13 +4,13 @@ title: Problem Diagnostics
|
|||
|
||||
## Network Connection Diagnostics
|
||||
|
||||
When the client is unable to access the server, the network connection between the client side and the server side needs to be checked to find out the root cause and resolve problems.
|
||||
When a TDengine client is unable to access a TDengine server, the network connection between the client side and the server side must be checked to find the root cause and resolve problems.
|
||||
|
||||
The diagnostic for network connection can be executed between Linux and Linux or between Linux and Windows.
|
||||
Diagnostics for network connections can be executed between Linux and Linux or between Linux and Windows.
|
||||
|
||||
Diagnostic steps:
|
||||
|
||||
1. If the port range to be diagnosed are being occupied by a `taosd` server process, please first stop `taosd.
|
||||
1. If the port range to be diagnosed is being occupied by a `taosd` server process, please first stop `taosd.
|
||||
2. On the server side, execute command `taos -n server -P <port> -l <pktlen>` to monitor the port range starting from the port specified by `-P` parameter with the role of "server".
|
||||
3. On the client side, execute command `taos -n client -h <fqdn of server> -P <port> -l <pktlen>` to send a testing package to the specified server and port.
|
||||
|
||||
|
@ -65,13 +65,13 @@ Output of the client side for the example is below:
|
|||
12/21 14:50:22.721274 0x7fc95d859200 UTL successed to test UDP port:6011
|
||||
```
|
||||
|
||||
The output needs to be checked carefully for the system operator to find out the root cause and solve the problem.
|
||||
The output needs to be checked carefully for the system operator to find the root cause and resolve the problem.
|
||||
|
||||
## Startup Status and RPC Diagnostic
|
||||
|
||||
`taos -n startup -h <fqdn of server>` can be used to check the startup status of a `taosd` process. This is a comman task for a system operator to do to determine whether `taosd` has been started successfully, especially in case of cluster.
|
||||
`taos -n startup -h <fqdn of server>` can be used to check the startup status of a `taosd` process. This is a common task which should be performed by a system operator, especially in the case of a cluster, to determine whether `taosd` has been started successfully.
|
||||
|
||||
`taos -n rpc -h <fqdn of server>` can be used to check whether the port of a started `taosd` can be accessed or not. If `taosd` process doesn't respond or is working abnormally, this command can be used to initiate a rpc communication with the specified fqdn to determine whether it's a network problem or `taosd` is abnormal.
|
||||
`taos -n rpc -h <fqdn of server>` can be used to check whether the port of a started `taosd` can be accessed or not. If `taosd` process doesn't respond or is working abnormally, this command can be used to initiate a rpc communication with the specified fqdn to determine whether it's a network problem or whether `taosd` is abnormal.
|
||||
|
||||
## Sync and Arbitrator Diagnostic
|
||||
|
||||
|
@ -80,13 +80,13 @@ taos -n sync -P 6040 -h <fqdn of server>
|
|||
taos -n sync -P 6042 -h <fqdn of server>
|
||||
```
|
||||
|
||||
The above commands can be executed on Linux Shell to check whether the port for sync is working well and whether the sync module on the server side is working well. Additionally, `-P 6042` is used to check whether the arbitrator is configured properly and is working well.
|
||||
The above commands can be executed in a Linux shell to check whether the port for sync is working well and whether the sync module on the server side is working well. Additionally, `-P 6042` is used to check whether the arbitrator is configured properly and is working well.
|
||||
|
||||
## Network Speed Diagnostic
|
||||
|
||||
`taos -n speed -h <fqdn of server> -P 6030 -N 10 -l 10000000 -S TCP`
|
||||
|
||||
From version 2.2.0.0, the above command can be executed on Linux Shell to test the network speed, it sends uncompressed package to a running `taosd` server process or a simulated server process started by `taos -n server` to test the network speed. Parameters can be used when testing network speed are as below:
|
||||
From version 2.2.0.0 onwards, the above command can be executed in a Linux shell to test network speed. The command sends uncompressed packages to a running `taosd` server process or a simulated server process started by `taos -n server` to test the network speed. Parameters can be used when testing network speed are as below:
|
||||
|
||||
-n:When set to "speed", it means testing network speed.
|
||||
-h:The FQDN or IP of the server process to be connected to; if not set, the FQDN configured in `taos.cfg` is used.
|
||||
|
@ -99,23 +99,23 @@ From version 2.2.0.0, the above command can be executed on Linux Shell to test t
|
|||
|
||||
`taos -n fqdn -h <fqdn of server>`
|
||||
|
||||
From version 2.2.0.0, the above command can be executed on Linux Shell to test the resolution speed of FQDN. It can be used to try to resolve a FQDN to an IP address and record the time spent in this process. The parameters that can be used for this purpose are as below:
|
||||
From version 2.2.0.0 onward, the above command can be executed in a Linux shell to test the resolution speed of FQDN. It can be used to try to resolve a FQDN to an IP address and record the time spent in this process. The parameters that can be used for this purpose are as below:
|
||||
|
||||
-n:When set to "fqdn", it means testing the speed of resolving FQDN.
|
||||
-h:The FQDN to be resolved. If not set, the `FQDN` parameter in `taos.cfg` is used by default.
|
||||
|
||||
## Server Log
|
||||
|
||||
The parameter `debugFlag` is used to control the log level of the `taosd` server process. The default value is 131, for debug purpose it needs to be escalated to 135 or 143.
|
||||
The parameter `debugFlag` is used to control the log level of the `taosd` server process. The default value is 131. For debugging and tracing, it needs to be set to either 135 or 143 respectively.
|
||||
|
||||
Once this parameter is set to 135 or 143, the log file grows very quickly especially when there is a huge volume of data insertion and data query requests. If all the logs are stored together, some important information may be missed very easily, so on server side important information is stored at different place from other logs.
|
||||
Once this parameter is set to 135 or 143, the log file grows very quickly especially when there is a huge volume of data insertion and data query requests. If all the logs are stored together, some important information may be missed very easily and so on the server side, important information is stored in a different place from other logs.
|
||||
|
||||
- The log at level of INFO, WARNING and ERROR is stored in `taosinfo` so that it is easy to find important information
|
||||
- The log at level of DEBUG (135) and TRACE (143) and other information not handled by `taosinfo` are stored in `taosdlog`
|
||||
|
||||
## Client Log
|
||||
|
||||
An independent log file, named as "taoslog+<seq num\>" is generated for each client program, i.e. a client process. The default value of `debugFlag` is also 131 and only logs at level of INFO/ERROR/WARNING are recorded, for debugging purposes it needs to be changed to 135 or 143 so that logs at DEBUG or TRACE level can be recorded.
|
||||
An independent log file, named as "taoslog+<seq num\>" is generated for each client program, i.e. a client process. The default value of `debugFlag` is also 131 and only logs at level of INFO/ERROR/WARNING are recorded. As stated above, for debugging and tracing, it needs to be changed to 135 or 143 respectively, so that logs at DEBUG or TRACE level can be recorded.
|
||||
|
||||
The maximum length of a single log file is controlled by parameter `numOfLogLines` and only 2 log files are kept for each `taosd` server process.
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
title: Administration
|
||||
---
|
||||
|
||||
This chapter is mainly written for system administrators, covering download, install/uninstall, data import/export, system monitoring, user management, connection management, etc. Capacity planning and system optimization are also covered.
|
||||
This chapter is mainly written for system administrators. It covers download, install/uninstall, data import/export, system monitoring, user management, connection management, capacity planning and system optimization.
|
||||
|
||||
```mdx-code-block
|
||||
import DocCardList from '@theme/DocCardList';
|
||||
|
|
|
@ -2,23 +2,23 @@
|
|||
title: REST API
|
||||
---
|
||||
|
||||
To support the development of various types of platforms, TDengine provides an API that conforms to the REST principle, namely REST API. To minimize the learning cost, different from the other database REST APIs, TDengine directly requests the SQL command contained in the request BODY through HTTP POST to operate the database and only requires a URL.
|
||||
To support the development of various types of applications and platforms, TDengine provides an API that conforms to REST principles; namely REST API. To minimize the learning cost, unlike REST APIs for other database engines, TDengine allows insertion of SQL commands in the BODY of an HTTP POST request, to operate the database.
|
||||
|
||||
:::note
|
||||
One difference from the native connector is that the REST interface is stateless, so the `USE db_name` command has no effect. All references to table names and super table names need to specify the database name prefix. (Since version 2.2.0.0, it is supported to specify db_name in RESTful URL. If the database name prefix is not specified in the SQL command, the `db_name` specified in the URL will be used. Since version 2.4.0.0, REST service is provided by taosAdapter by default. And it requires that the `db_name` must be specified in the URL.)
|
||||
One difference from the native connector is that the REST interface is stateless and so the `USE db_name` command has no effect. All references to table names and super table names need to specify the database name in the prefix. (Since version 2.2.0.0, TDengine supports specification of the db_name in RESTful URL. If the database name prefix is not specified in the SQL command, the `db_name` specified in the URL will be used. Since version 2.4.0.0, REST service is provided by taosAdapter by default and it requires that the `db_name` must be specified in the URL.)
|
||||
:::
|
||||
|
||||
## Installation
|
||||
|
||||
The REST interface does not rely on any TDengine native library, so the client application does not need to install any TDengine libraries. The client application's development language supports the HTTP protocol is enough.
|
||||
The REST interface does not rely on any TDengine native library, so the client application does not need to install any TDengine libraries. The client application's development language only needs to support the HTTP protocol.
|
||||
|
||||
## Verification
|
||||
|
||||
If the TDengine server is already installed, it can be verified as follows:
|
||||
|
||||
The following is an Ubuntu environment using the `curl` tool (to confirm that it is installed) to verify that the REST interface is working.
|
||||
The following example is in an Ubuntu environment and uses the `curl` tool to verify that the REST interface is working. Note that the `curl` tool may need to be installed in your environment.
|
||||
|
||||
The following example lists all databases, replacing `h1.taosdata.com` and `6041` (the default port) with the actual running TDengine service FQDN and port number.
|
||||
The following example lists all databases on the host h1.taosdata.com. To use it in your environment, replace `h1.taosdata.com` and `6041` (the default port) with the actual running TDengine service FQDN and port number.
|
||||
|
||||
```html
|
||||
curl -H 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' -d 'show databases;' h1.taosdata.com:6041/rest/sql
|
||||
|
@ -89,7 +89,7 @@ For example, `http://h1.taos.com:6041/rest/sql/test` is a URL to `h1.taos.com:60
|
|||
|
||||
TDengine supports both Basic authentication and custom authentication mechanisms, and subsequent versions will provide a standard secure digital signature mechanism for authentication.
|
||||
|
||||
- The custom authentication information is as follows (Let's introduce token later)
|
||||
- The custom authentication information is as follows. More details about "token" later.
|
||||
|
||||
```
|
||||
Authorization: Taosd <TOKEN>
|
||||
|
@ -136,7 +136,7 @@ The return result is in JSON format, as follows:
|
|||
|
||||
Description:
|
||||
|
||||
- status: tell if the operation result is success or failure.
|
||||
- status: tells you whethre the operation result is success or failure.
|
||||
- head: the definition of the table, or just one column "affected_rows" if no result set is returned. (As of version 2.0.17.0, it is recommended not to rely on the head return value to determine the data column type but rather use column_meta. In later versions, the head item may be removed from the return value.)
|
||||
- column_meta: this item is added to the return value to indicate the data type of each column in the data with version 2.0.17.0 and later versions. Each column is described by three values: column name, column type, and type length. For example, `["current",6,4]` means that the column name is "current", the column type is 6, which is the float type, and the type length is 4, which is the float type with 4 bytes. If the column type is binary or nchar, the type length indicates the maximum length of content stored in the column, not the length of the specific data in this return value. When the column type is nchar, the type length indicates the number of Unicode characters that can be saved, not bytes.
|
||||
- data: The exact data returned, presented row by row, or just [[affected_rows]] if no result set is returned. The order of the data columns in each row of data is the same as that of the data columns described in column_meta.
|
||||
|
|
|
@ -4,7 +4,7 @@ title: Connector
|
|||
|
||||
TDengine provides a rich set of APIs (application development interface). To facilitate users to develop their applications quickly, TDengine supports connectors for multiple programming languages, including official connectors for C/C++, Java, Python, Go, Node.js, C#, and Rust. These connectors support connecting to TDengine clusters using both native interfaces (taosc) and REST interfaces (not supported in a few languages yet). Community developers have also contributed several unofficial connectors, such as the ADO.NET connector, the Lua connector, and the PHP connector.
|
||||
|
||||

|
||||

|
||||
|
||||
## Supported platforms
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ sidebar_label: C/C++
|
|||
title: C/C++ Connector
|
||||
---
|
||||
|
||||
C/C++ developers can use TDengine's client driver and the C/C++ connector, to develop their applications to connect to TDengine clusters for data writing, querying, and other functions. To use it, you need to include the TDengine header file _taos.h_, which lists the function prototypes of the provided APIs; the application also needs to link to the corresponding dynamic libraries on the platform where it is located.
|
||||
C/C++ developers can use TDengine's client driver and the C/C++ connector, to develop their applications to connect to TDengine clusters for data writing, querying, and other functions. To use the C/C++ connector you must include the TDengine header file _taos.h_, which lists the function prototypes of the provided APIs. The application also needs to link to the corresponding dynamic libraries on the platform where it is located.
|
||||
|
||||
```c
|
||||
#include <taos.h>
|
||||
|
@ -26,7 +26,7 @@ Please refer to [list of supported platforms](/reference/connector#supported-pla
|
|||
|
||||
## Supported versions
|
||||
|
||||
The version number of the TDengine client driver and the version number of the TDengine server require one-to-one correspondence and recommend using the same version of client driver as what the TDengine server version is. Although a lower version of the client driver is compatible to work with a higher version of the server, if the first three version numbers are the same (i.e., only the fourth version number is different), but it is not recommended. It is strongly discouraged to use a higher version of the client driver to access a lower version of the TDengine server.
|
||||
The version number of the TDengine client driver and the version number of the TDengine server should be the same. A lower version of the client driver is compatible with a higher version of the server, if the first three version numbers are the same (i.e., only the fourth version number is different). For e.g. if the client version is x.y.z.1 and the server version is x.y.z.2 the client and server are compatible. But in general we do not recommend using a lower client version with a newer server version. It is also strongly discouraged to use a higher version of the client driver to access a lower version of the TDengine server.
|
||||
|
||||
## Installation steps
|
||||
|
||||
|
@ -55,7 +55,7 @@ In the above example code, `taos_connect()` establishes a connection to port 603
|
|||
|
||||
:::note
|
||||
|
||||
- If not specified, when the return value of the API is an integer, _0_ means success, the others are error codes representing the reason for failure, and when the return value is a pointer, _NULL_ means failure.
|
||||
- If not specified, when the return value of the API is an integer, _0_ means success. All others are error codes representing the reason for failure. When the return value is a pointer, _NULL_ means failure.
|
||||
- All error codes and their corresponding causes are described in the `taoserror.h` file.
|
||||
|
||||
:::
|
||||
|
@ -114,7 +114,6 @@ This section shows sample code for standard access methods to TDengine clusters
|
|||
<summary>Subscribe and consume</summary>
|
||||
|
||||
```c
|
||||
{{#include examples/c/subscribe.c}}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
@ -140,13 +139,12 @@ The base API is used to do things like create database connections and provide a
|
|||
|
||||
- `void taos_cleanup()`
|
||||
|
||||
Clean up the runtime environment and should be called before the application exits.
|
||||
Cleans up the runtime environment and should be called before the application exits.
|
||||
|
||||
- ` int taos_options(TSDB_OPTION option, const void * arg, ...) `
|
||||
|
||||
Set client options, currently supports region setting (`TSDB_OPTION_LOCALE`), character set
|
||||
(`TSDB_OPTION_CHARSET`), time zone
|
||||
(`TSDB_OPTION_TIMEZONE`), configuration file path (`TSDB_OPTION_CONFIGDIR`) . The region setting, character set, and time zone default to the current settings of the operating system.
|
||||
(`TSDB_OPTION_CHARSET`), time zone (`TSDB_OPTION_TIMEZONE`), configuration file path (`TSDB_OPTION_CONFIGDIR`). The region setting, character set, and time zone default to the current settings of the operating system.
|
||||
|
||||
- `char *taos_get_client_info()`
|
||||
|
||||
|
@ -159,7 +157,7 @@ The base API is used to do things like create database connections and provide a
|
|||
- host: FQDN of any node in the TDengine cluster
|
||||
- user: user name
|
||||
- pass: password
|
||||
- db: database name, if the user does not provide, it can also be connected correctly, the user can create a new database through this connection, if the user provides the database name, it means that the database user has already created, the default use of the database
|
||||
- db: the database name. Even if the user does not provide this, the connection will still work correctly. The user can create a new database through this connection. If the user provides the database name, it means that the database has already been created and the connection can be used for regular operations on the database.
|
||||
- port: the port the taosd program is listening on
|
||||
|
||||
NULL indicates a failure. The application needs to save the returned parameters for subsequent use.
|
||||
|
@ -187,7 +185,7 @@ The APIs described in this subsection are all synchronous interfaces. After bein
|
|||
|
||||
- `TAOS_RES* taos_query(TAOS *taos, const char *sql)`
|
||||
|
||||
Executes an SQL command, either a DQL, DML, or DDL statement. The `taos` parameter is a handle obtained with `taos_connect()`. You can't tell if the result failed by whether the return value is `NULL`, but by parsing the error code in the result set with the `taos_errno()` function.
|
||||
Executes an SQL command, either a DQL, DML, or DDL statement. The `taos` parameter is a handle obtained with `taos_connect()`. If the return value is `NULL` this does not necessarily indicate a failure. You can get the error code, if any, by parsing the error code in the result set with the `taos_errno()` function.
|
||||
|
||||
- `int taos_result_precision(TAOS_RES *res)`
|
||||
|
||||
|
@ -231,7 +229,7 @@ typedef struct taosField {
|
|||
|
||||
- ` void taos_free_result(TAOS_RES *res)`
|
||||
|
||||
Frees the query result set and the associated resources. Be sure to call this API to free the resources after the query is completed. Otherwise, it may lead to a memory leak in the application. However, note that the application will crash if you call a function like `taos_consume()` to get the query results after freeing the resources.
|
||||
Frees the query result set and the associated resources. Be sure to call this API to free the resources after the query is completed. Failing to call this, may lead to a memory leak in the application. However, note that the application will crash if you call a function like `taos_consume()` to get the query results after freeing the resources.
|
||||
|
||||
- `char *taos_errstr(TAOS_RES *res)`
|
||||
|
||||
|
@ -242,7 +240,7 @@ typedef struct taosField {
|
|||
Get the reason for the last API call failure. The return value is the error code.
|
||||
|
||||
:::note
|
||||
TDengine version 2.0 and above recommends that each thread of a database application create a separate connection or a connection pool based on threads. It is not recommended to pass the connection (TAOS\*) structure to different threads for shared use in the application. Queries, writes, etc., issued based on TAOS structures are multi-thread safe, but state quantities such as "USE statement" may interfere between threads. In addition, the C connector can dynamically create new database-oriented connections on demand (this procedure is not visible to the user), and it is recommended that `taos_close()` be called only at the final exit of the program to close the connection.
|
||||
TDengine version 2.0 and above recommends that each thread of a database application create a separate connection or a connection pool based on threads. It is not recommended to pass the connection (TAOS\*) structure to different threads for shared use in the application. Queries, writes, and other operations issued that are based on TAOS structures are multi-thread safe, but state quantities such as the "USE statement" may interfere between threads. In addition, the C connector can dynamically create new database-oriented connections on demand (this procedure is not visible to the user), and it is recommended that `taos_close()` be called only at the final exit of the program to close the connection.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -274,12 +272,12 @@ All TDengine's asynchronous APIs use a non-blocking call pattern. Applications c
|
|||
|
||||
### Parameter Binding API
|
||||
|
||||
In addition to direct calls to `taos_query()` to perform queries, TDengine also provides a set of `bind` APIs that supports parameter binding, similar in style to MySQL, and currently only supports using a question mark `? ` to represent the parameter to be bound.
|
||||
In addition to direct calls to `taos_query()` to perform queries, TDengine also provides a set of `bind` APIs that supports parameter binding, similar in style to MySQL. TDengine currently only supports using a question mark `? ` to represent the parameter to be bound.
|
||||
|
||||
Starting with versions 2.1.1.0 and 2.1.2.0, TDengine has significantly improved the bind APIs to support for data writing (INSERT) scenarios. This avoids the resource consumption of SQL syntax parsing when writing data through the parameter binding interface, thus significantly improving write performance in most cases. A typical operation, in this case, is as follows.
|
||||
Starting with versions 2.1.1.0 and 2.1.2.0, TDengine has significantly improved the bind APIs to support data writing (INSERT) scenarios. This avoids the resource consumption of SQL syntax parsing when writing data through the parameter binding interface, thus significantly improving write performance in most cases. A typical operation, in this case, is as follows.
|
||||
|
||||
1. call `taos_stmt_init()` to create the parameter binding object.
|
||||
2. call `taos_stmt_prepare()` to parse the INSERT statement. 3.
|
||||
2. call `taos_stmt_prepare()` to parse the INSERT statement.
|
||||
3. call `taos_stmt_set_tbname()` to set the table name if it is reserved in the INSERT statement but not the TAGS.
|
||||
4. call `taos_stmt_set_tbname_tags()` to set the table name and TAGS values if the table name and TAGS are reserved in the INSERT statement (for example, if the INSERT statement takes an automatic table build).
|
||||
5. call `taos_stmt_bind_param_batch()` to set the value of VALUES in multiple columns, or call `taos_stmt_bind_param()` to set the value of VALUES in a single row.
|
||||
|
@ -383,7 +381,7 @@ In addition to writing data using the SQL method or the parameter binding API, w
|
|||
**return value**
|
||||
TAOS_RES structure, application can get error message by using `taos_errstr()` and also error code by using `taos_errno()`.
|
||||
In some cases, the returned TAOS_RES is `NULL`, and it is still possible to call `taos_errno()` to safely get the error code information.
|
||||
The returned TAOS_RES needs to be freed by the caller. Otherwise, a memory leak will occur.
|
||||
The returned TAOS_RES needs to be freed by the caller in order to avoid memory leaks.
|
||||
|
||||
**Description**
|
||||
The protocol type is enumerated and contains the following three formats.
|
||||
|
@ -416,13 +414,13 @@ The Subscription API currently supports subscribing to one or more tables and co
|
|||
|
||||
This function is responsible for starting the subscription service, returning the subscription object on success and `NULL` on failure, with the following parameters.
|
||||
|
||||
- taos: the database connection that has been established
|
||||
- restart: if the subscription already exists, whether to restart or continue the previous subscription
|
||||
- topic: the topic of the subscription (i.e., the name). This parameter is the unique identifier of the subscription
|
||||
- sql: the query statement of the subscription, this statement can only be _select_ statement, only the original data should be queried, only the data can be queried in time order
|
||||
- fp: the callback function when the query result is received (the function prototype will be introduced later), only used when called asynchronously. This parameter should be passed `NULL` when called synchronously
|
||||
- param: additional parameter when calling the callback function, the system API will pass it to the callback function as it is, without any processing
|
||||
- interval: polling period in milliseconds. The callback function will be called periodically according to this parameter when called asynchronously. not recommended to set this parameter too small To avoid impact on system performance when called synchronously. If the interval between two calls to `taos_consume()` is less than this period, the API will block until the interval exceeds this period.
|
||||
- taos: the database connection that has been established.
|
||||
- restart: if the subscription already exists, whether to restart or continue the previous subscription.
|
||||
- topic: the topic of the subscription (i.e., the name). This parameter is the unique identifier of the subscription.
|
||||
- sql: the query statement of the subscription which can only be a _select_ statement. Only the original data should be queried, and data can only be queried in temporal order.
|
||||
- fp: the callback function when the query result is received only used when called asynchronously. This parameter should be passed `NULL` when called synchronously. The function prototype is described below.
|
||||
- param: additional parameter when calling the callback function. The system API will pass it to the callback function as is, without any processing.
|
||||
- interval: polling period in milliseconds. The callback function will be called periodically according to this parameter when called asynchronously. The interval should not be too small to avoid impact on system performance when called synchronously. If the interval between two calls to `taos_consume()` is less than this period, the API will block until the interval exceeds this period.
|
||||
|
||||
- ` typedef void (*TAOS_SUBSCRIBE_CALLBACK)(TAOS_SUB* tsub, TAOS_RES *res, void* param, int code)`
|
||||
|
||||
|
|
|
@ -179,9 +179,9 @@ namespace TDengineExample
|
|||
|
||||
1. "Unable to establish connection", "Unable to resolve FQDN"
|
||||
|
||||
Usually, it cause by the FQDN configuration is incorrect, you can refer to [How to understand TDengine's FQDN (Chinese)](https://www.taosdata.com/blog/2021/07/29/2741.html) to solve it. 2.
|
||||
Usually, it's caused by an incorrect FQDN configuration. Please refer to this section in the [FAQ](https://docs.tdengine.com/2.4/train-faq/faq/#2-how-to-handle-unable-to-establish-connection) to troubleshoot.
|
||||
|
||||
Unhandled exception. System.DllNotFoundException: Unable to load DLL 'taos' or one of its dependencies: The specified module cannot be found.
|
||||
2. Unhandled exception. System.DllNotFoundException: Unable to load DLL 'taos' or one of its dependencies: The specified module cannot be found.
|
||||
|
||||
This is usually because the program did not find the dependent client driver. The solution is to copy `C:\TDengine\driver\taos.dll` to the `C:\Windows\System32\` directory on Windows, and create the following soft link on Linux `ln -s /usr/local/taos/driver/libtaos.so.x.x .x.x /usr/lib/libtaos.so` will work.
|
||||
|
||||
|
|
|
@ -15,9 +15,9 @@ import GoOpenTSDBTelnet from "../../07-develop/03-insert-data/_go_opts_telnet.md
|
|||
import GoOpenTSDBJson from "../../07-develop/03-insert-data/_go_opts_json.mdx"
|
||||
import GoQuery from "../../07-develop/04-query-data/_go.mdx"
|
||||
|
||||
`driver-go` is the official Go language connector for TDengine, which implements the interface to the Go language [database/sql](https://golang.org/pkg/database/sql/) package. Go developers can use it to develop applications that access TDengine cluster data.
|
||||
`driver-go` is the official Go language connector for TDengine. It implements the [database/sql](https://golang.org/pkg/database/sql/) package, the generic Go language interface to SQL databases. Go developers can use it to develop applications that access TDengine cluster data.
|
||||
|
||||
`driver-go` provides two ways to establish connections. One is **native connection**, which connects to TDengine instances natively through the TDengine client driver (taosc), supporting data writing, querying, subscriptions, schemaless writing, and bind interface. The other is the **REST connection**, which connects to TDengine instances via the REST interface provided by taosAdapter. The set of features implemented by the REST connection differs slightly from the native connection.
|
||||
`driver-go` provides two ways to establish connections. One is **native connection**, which connects to TDengine instances natively through the TDengine client driver (taosc), supporting data writing, querying, subscriptions, schemaless writing, and bind interface. The other is the **REST connection**, which connects to TDengine instances via the REST interface provided by taosAdapter. The set of features implemented by the REST connection differs slightly from those implemented by the native connection.
|
||||
|
||||
This article describes how to install `driver-go` and connect to TDengine clusters and perform basic operations such as data query and data writing through `driver-go`.
|
||||
|
||||
|
@ -213,7 +213,7 @@ func main() {
|
|||
|
||||
Since the REST interface is stateless, the `use db` syntax will not work. You need to put the db name into the SQL command, e.g. `create table if not exists tb1 (ts timestamp, a int)` to `create table if not exists test.tb1 (ts timestamp, a int)` otherwise it will report the error `[0x217] Database not specified or available`.
|
||||
|
||||
You can also put the db name in the DSN by changing `root:taosdata@http(localhost:6041)/` to `root:taosdata@http(localhost:6041)/test`. This method is supported by taosAdapter in TDengine 2.4.0.5. is supported since TDengine 2.4.0.5. Executing the `create database` statement when the specified db does not exist will not report an error while executing other queries or writing against that db will report an error.
|
||||
You can also put the db name in the DSN by changing `root:taosdata@http(localhost:6041)/` to `root:taosdata@http(localhost:6041)/test`. This method is supported by taosAdapter since TDengine 2.4.0.5. Executing the `create database` statement when the specified db does not exist will not report an error while executing other queries or writing against that db will report an error.
|
||||
|
||||
The complete example is as follows.
|
||||
|
||||
|
@ -289,7 +289,7 @@ func main() {
|
|||
|
||||
6. `readBufferSize` parameter has no significant effect after being increased
|
||||
|
||||
If you increase `readBufferSize` will reduce the number of `syscall` calls when fetching results. If the query result is smaller, modifying this parameter will not improve significantly. If you increase the parameter value too much, the bottleneck will be parsing JSON data. If you need to optimize the query speed, you must adjust the value according to the actual situation to achieve the best query result.
|
||||
Increasing `readBufferSize` will reduce the number of `syscall` calls when fetching results. If the query result is smaller, modifying this parameter will not improve performance significantly. If you increase the parameter value too much, the bottleneck will be parsing JSON data. If you need to optimize the query speed, you must adjust the value based on the actual situation to achieve the best query performance.
|
||||
|
||||
7. `disableCompression` parameter is set to `false` when the query efficiency is reduced
|
||||
|
||||
|
|
|
@ -9,19 +9,19 @@ description: TDengine Java based on JDBC API and provide both native and REST co
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
'taos-jdbcdriver' is TDengine's official Java language connector, which allows Java developers to develop applications that access the TDengine database. 'taos-jdbcdriver' implements the interface of the JDBC driver standard and provides two forms of connectors. One is to connect to a TDengine instance natively through the TDengine client driver (taosc), which supports functions including data writing, querying, subscription, schemaless writing, and bind interface. And the other is to connect to a TDengine instance through the REST interface provided by taosAdapter (2.4.0.0 and later). REST connections implement has a slight differences to compare the set of features implemented and native connections.
|
||||
'taos-jdbcdriver' is TDengine's official Java language connector, which allows Java developers to develop applications that access the TDengine database. 'taos-jdbcdriver' implements the interface of the JDBC driver standard and provides two forms of connectors. One is to connect to a TDengine instance natively through the TDengine client driver (taosc), which supports functions including data writing, querying, subscription, schemaless writing, and bind interface. And the other is to connect to a TDengine instance through the REST interface provided by taosAdapter (2.4.0.0 and later). The implementation of the REST connection and those of the native connections have slight differences in features.
|
||||
|
||||

|
||||

|
||||
|
||||
The preceding diagram shows two ways for a Java app to access TDengine via connector:
|
||||
|
||||
- JDBC native connection: Java applications use TSDBDriver on physical node 1 (pnode1) to call client-driven directly (`libtaos.so` or `taos.dll`) APIs to send writing and query requests to taosd instances located on physical node 2 (pnode2).
|
||||
- JDBC REST connection: The Java application encapsulates the SQL as a REST request via RestfulDriver, sends it to the REST server of physical node 2 (taosAdapter), requests TDengine server through the REST server, and returns the result.
|
||||
- JDBC REST connection: The Java application encapsulates the SQL as a REST request via RestfulDriver, sends it to the REST server (taosAdapter) on physical node 2. taosAdapter forwards the request to TDengine server and returns the result.
|
||||
|
||||
Using REST connection, which does not rely on TDengine client drivers.It can be cross-platform more convenient and flexible but introduce about 30% lower performance than native connection.
|
||||
The REST connection, which does not rely on TDengine client drivers, is more convenient and flexible, in addition to being cross-platform. However the performance is about 30% lower than that of the native connection.
|
||||
|
||||
:::info
|
||||
TDengine's JDBC driver implementation is as consistent as possible with the relational database driver. Still, there are differences in the use scenarios and technical characteristics of TDengine and relational object databases, so 'taos-jdbcdriver' also has some differences from traditional JDBC drivers. You need to pay attention to the following points when using:
|
||||
TDengine's JDBC driver implementation is as consistent as possible with the relational database driver. Still, there are differences in the use scenarios and technical characteristics of TDengine and relational object databases. So 'taos-jdbcdriver' also has some differences from traditional JDBC drivers. It is important to keep the following points in mind:
|
||||
|
||||
- TDengine does not currently support delete operations for individual data records.
|
||||
- Transactional operations are not currently supported.
|
||||
|
@ -88,7 +88,7 @@ Add following dependency in the `pom.xml` file of your Maven project:
|
|||
</TabItem>
|
||||
<TabItem value="source" label="Build from source code">
|
||||
|
||||
You can build Java connector from source code after clone TDengine project:
|
||||
You can build Java connector from source code after cloning the TDengine project:
|
||||
|
||||
```shell
|
||||
git clone https://github.com/taosdata/TDengine.git
|
||||
|
@ -96,7 +96,7 @@ cd TDengine/src/connector/jdbc
|
|||
mvn clean install -Dmaven.test.skip=true
|
||||
```
|
||||
|
||||
After compilation, a jar package of taos-jdbcdriver-2.0.XX-dist .jar is generated in the target directory, and the compiled jar file is automatically placed in the local Maven repository.
|
||||
After compilation, a jar package named taos-jdbcdriver-2.0.XX-dist.jar is generated in the target directory, and the compiled jar file is automatically placed in the local Maven repository.
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
@ -186,7 +186,7 @@ Connection conn = DriverManager.getConnection(jdbcUrl);
|
|||
|
||||
In the above example, a RestfulDriver with a JDBC REST connection is used to establish a connection to a database named `test` with hostname `taosdemo.com` on port `6041`. The URL specifies the user name as `root` and the password as `taosdata`.
|
||||
|
||||
There is no dependency on the client driver when Using a JDBC REST connection. Compared to a JDBC native connection, only the following are required: 1.
|
||||
There is no dependency on the client driver when Using a JDBC REST connection. Compared to a JDBC native connection, only the following are required:
|
||||
|
||||
1. driverClass specified as "com.taosdata.jdbc.rs.RestfulDriver".
|
||||
2. jdbcUrl starting with "jdbc:TAOS-RS://".
|
||||
|
@ -209,7 +209,7 @@ The configuration parameters in the URL are as follows.
|
|||
INSERT INTO test.t1 USING test.weather (ts, temperature) TAGS('California.SanFrancisco') VALUES(now, 24.6);
|
||||
```
|
||||
|
||||
- Starting from taos-jdbcdriver-2.0.36 and TDengine 2.2.0.0, if dbname is specified in the URL, JDBC REST connections will use `/rest/sql/dbname` as the URL for REST requests by default, and there is no need to specify dbname in SQL. For example, if the URL is `jdbc:TAOS-RS://127.0.0.1:6041/test`, then the SQL can be executed: insert into t1 using weather(ts, temperature) tags('California.SanFrancisco') values(now, 24.6);
|
||||
- Starting from taos-jdbcdriver-2.0.36 and TDengine 2.2.0.0, if dbname is specified in the URL, JDBC REST connections will use `/rest/sql/dbname` as the URL for REST requests by default, and there is no need to specify dbname in SQL. For example, if the URL is `jdbc:TAOS-RS://127.0.0.1:6041/test`, then the SQL can be executed: insert into test using weather(ts, temperature) tags('California.SanFrancisco') values(now, 24.6);
|
||||
|
||||
:::
|
||||
|
||||
|
@ -271,7 +271,7 @@ If the configuration parameters are duplicated in the URL, Properties, or client
|
|||
2. Properties connProps
|
||||
3. the configuration file taos.cfg of the TDengine client driver when using a native connection
|
||||
|
||||
For example, if you specify the password as `taosdata` in the URL and specify the password as `taosdemo` in the Properties simultaneously. In this case, JDBC will use the password in the URL to establish the connection.
|
||||
For example, if you specify the password as `taosdata` in the URL and specify the password as `taosdemo` in the Properties simultaneously, JDBC will use the password in the URL to establish the connection.
|
||||
|
||||
## Usage examples
|
||||
|
||||
|
@ -323,7 +323,7 @@ while(resultSet.next()){
|
|||
}
|
||||
```
|
||||
|
||||
> The query is consistent with operating a relational database. When using subscripts to get the contents of the returned fields, starting from 1, it is recommended to use the field names to get them.
|
||||
> The query is consistent with operating a relational database. When using subscripts to get the contents of the returned fields, you have to start from 1. However, we recommend using the field names to get the values of the fields in the result set.
|
||||
|
||||
### Handling exceptions
|
||||
|
||||
|
@ -623,7 +623,7 @@ public void setNString(int columnIndex, ArrayList<String> list, int size) throws
|
|||
|
||||
### Schemaless Writing
|
||||
|
||||
Starting with version 2.2.0.0, TDengine has added the ability to schemaless writing. It is compatible with InfluxDB's Line Protocol, OpenTSDB's telnet line protocol, and OpenTSDB's JSON format protocol. See [schemaless writing](/reference/schemaless/) for details.
|
||||
Starting with version 2.2.0.0, TDengine has added the ability to perform schemaless writing. It is compatible with InfluxDB's Line Protocol, OpenTSDB's telnet line protocol, and OpenTSDB's JSON format protocol. See [schemaless writing](/reference/schemaless/) for details.
|
||||
|
||||
**Note**.
|
||||
|
||||
|
@ -666,16 +666,16 @@ The TDengine Java Connector supports subscription functionality with the followi
|
|||
#### Create subscriptions
|
||||
|
||||
```java
|
||||
TSDBSubscribe sub = ((TSDBConnection)conn).subscribe("topic", "select * from meters", false);
|
||||
TSDBSubscribe sub = ((TSDBConnection)conn).subscribe("topicname", "select * from meters", false);
|
||||
```
|
||||
|
||||
The three parameters of the `subscribe()` method have the following meanings.
|
||||
|
||||
- topic: the subscribed topic (i.e., name). This parameter is the unique identifier of the subscription
|
||||
- sql: the query statement of the subscription, this statement can only be `select` statement, only the original data should be queried, and you can query only the data in the positive time order
|
||||
- topicname: the name of the subscribed topic. This parameter is the unique identifier of the subscription.
|
||||
- sql: the query statement of the subscription. This statement can only be a `select` statement. Only original data can be queried, and you can query the data only temporal order.
|
||||
- restart: if the subscription already exists, whether to restart or continue the previous subscription
|
||||
|
||||
The above example will use the SQL command `select * from meters` to create a subscription named `topic`. If the subscription exists, it will continue the progress of the previous query instead of consuming all the data from the beginning.
|
||||
The above example will use the SQL command `select * from meters` to create a subscription named `topicname`. If the subscription exists, it will continue the progress of the previous query instead of consuming all the data from the beginning.
|
||||
|
||||
#### Subscribe to consume data
|
||||
|
||||
|
|
|
@ -14,7 +14,6 @@ import NodeInfluxLine from "../../07-develop/03-insert-data/_js_line.mdx";
|
|||
import NodeOpenTSDBTelnet from "../../07-develop/03-insert-data/_js_opts_telnet.mdx";
|
||||
import NodeOpenTSDBJson from "../../07-develop/03-insert-data/_js_opts_json.mdx";
|
||||
import NodeQuery from "../../07-develop/04-query-data/_js.mdx";
|
||||
import NodeAsyncQuery from "../../07-develop/04-query-data/_js_async.mdx";
|
||||
|
||||
`td2.0-connector` and `td2.0-rest-connector` are the official Node.js language connectors for TDengine. Node.js developers can develop applications to access TDengine instance data.
|
||||
|
||||
|
@ -189,14 +188,8 @@ let cursor = conn.cursor();
|
|||
|
||||
### Query data
|
||||
|
||||
#### Synchronous queries
|
||||
|
||||
<NodeQuery />
|
||||
|
||||
#### asynchronous query
|
||||
|
||||
<NodeAsyncQuery />
|
||||
|
||||
## More Sample Programs
|
||||
|
||||
| Sample Programs | Sample Program Description |
|
||||
|
@ -232,7 +225,7 @@ See [video tutorial](https://www.taosdata.com/blog/2020/11/11/1957.html) for the
|
|||
|
||||
2. "Unable to establish connection", "Unable to resolve FQDN"
|
||||
|
||||
Usually, root cause is the FQDN is not configured correctly. You can refer to [How to understand TDengine's FQDN (In Chinese)](https://www.taosdata.com/blog/2021/07/29/2741.html).
|
||||
Usually, the root cause is an incorrect FQDN configuration. You can refer to this section in the [FAQ](https://docs.tdengine.com/2.4/train-faq/faq/#2-how-to-handle-unable-to-establish-connection) to troubleshoot.
|
||||
|
||||
## Important Updates
|
||||
|
||||
|
|
|
@ -11,18 +11,18 @@ import TabItem from "@theme/TabItem";
|
|||
`taospy` is the official Python connector for TDengine. `taospy` provides a rich set of APIs that makes it easy for Python applications to access TDengine. `taospy` wraps both the [native interface](/reference/connector/cpp) and [REST interface](/reference/rest-api) of TDengine, which correspond to the `taos` and `taosrest` modules of the `taospy` package, respectively.
|
||||
In addition to wrapping the native and REST interfaces, `taospy` also provides a set of programming interfaces that conforms to the [Python Data Access Specification (PEP 249)](https://peps.python.org/pep-0249/). It is easy to integrate `taospy` with many third-party tools, such as [SQLAlchemy](https://www.sqlalchemy.org/) and [pandas](https://pandas.pydata.org/).
|
||||
|
||||
The connection to the server directly using the native interface provided by the client driver is referred to hereinafter as a "native connection"; the connection to the server using the REST interface provided by taosAdapter is referred to hereinafter as a "REST connection".
|
||||
The direct connection to the server using the native interface provided by the client driver is referred to hereinafter as a "native connection"; the connection to the server using the REST interface provided by taosAdapter is referred to hereinafter as a "REST connection".
|
||||
|
||||
The source code for the Python connector is hosted on [GitHub](https://github.com/taosdata/taos-connector-python).
|
||||
|
||||
## Supported Platforms
|
||||
|
||||
- The native connection [supported platforms](/reference/connector/#supported-platforms) is the same as the one supported by the TDengine client.
|
||||
- The [supported platforms](/reference/connector/#supported-platforms) for the native connection are the same as the ones supported by the TDengine client.
|
||||
- REST connections are supported on all platforms that can run Python.
|
||||
|
||||
## Version selection
|
||||
|
||||
We recommend using the latest version of `taospy`, regardless what the version of TDengine is.
|
||||
We recommend using the latest version of `taospy`, regardless of the version of TDengine.
|
||||
|
||||
## Supported features
|
||||
|
||||
|
@ -53,7 +53,7 @@ Earlier TDengine client software includes the Python connector. If the Python co
|
|||
|
||||
:::
|
||||
|
||||
#### to install `taospy`
|
||||
#### To install `taospy`
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="Install from PyPI" value="pypi">
|
||||
|
@ -139,7 +139,7 @@ The FQDN above can be the FQDN of any dnode in the cluster, and the PORT is the
|
|||
</TabItem>
|
||||
<TabItem value="rest" label="REST connection" groupId="connect">
|
||||
|
||||
For REST connections and making sure the cluster is up, make sure the taosAdapter component is up. This can be tested using the following `curl ` command.
|
||||
For REST connections, make sure the cluster and taosAdapter component, are running. This can be tested using the following `curl ` command.
|
||||
|
||||
```
|
||||
curl -u root:taosdata http://<FQDN>:<PORT>/rest/sql -d "select server_version()"
|
||||
|
@ -312,7 +312,7 @@ For a more detailed description of the `sql()` method, please refer to [RestClie
|
|||
|
||||
### Exception handling
|
||||
|
||||
All database operations will be thrown directly if an exception occurs. The application is responsible for exception handling. For example:
|
||||
All errors from database operations are thrown directly as exceptions and the error message from the database is passed up the exception stack. The application is responsible for exception handling. For example:
|
||||
|
||||
```python
|
||||
{{#include docs-examples/python/handle_exception.py}}
|
||||
|
@ -320,7 +320,7 @@ All database operations will be thrown directly if an exception occurs. The appl
|
|||
|
||||
### About nanoseconds
|
||||
|
||||
Due to the current imperfection of Python's nanosecond support (see link below), the current implementation returns integers at nanosecond precision instead of the `datetime` type produced by `ms and `us`, which application developers will need to handle on their own. And it is recommended to use pandas' to_datetime(). The Python Connector may modify the interface in the future if Python officially supports nanoseconds in full.
|
||||
Due to the current imperfection of Python's nanosecond support (see link below), the current implementation returns integers at nanosecond precision instead of the `datetime` type produced by `ms` and `us`, which application developers will need to handle on their own. And it is recommended to use pandas' to_datetime(). The Python Connector may modify the interface in the future if Python officially supports nanoseconds in full.
|
||||
|
||||
1. https://stackoverflow.com/questions/10611328/parsing-datetime-strings-containing-nanoseconds
|
||||
2. https://www.python.org/dev/peps/pep-0564/
|
||||
|
@ -328,7 +328,7 @@ Due to the current imperfection of Python's nanosecond support (see link below),
|
|||
|
||||
## Frequently Asked Questions
|
||||
|
||||
Welcome to [ask questions or report questions] (https://github.com/taosdata/taos-connector-python/issues).
|
||||
Welcome to [ask questions or report questions](https://github.com/taosdata/taos-connector-python/issues).
|
||||
|
||||
## Important Update
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ REST connections are supported on all platforms that can run Rust.
|
|||
|
||||
Please refer to [version support list](/reference/connector#version-support).
|
||||
|
||||
The Rust Connector is still under rapid development and is not guaranteed to be backward compatible before 1.0. Recommend to use TDengine version 2.4 or higher to avoid known issues.
|
||||
The Rust Connector is still under rapid development and is not guaranteed to be backward compatible before 1.0. We recommend using TDengine version 2.4 or higher to avoid known issues.
|
||||
|
||||
## Installation
|
||||
|
||||
|
@ -206,7 +206,7 @@ let conn: Taos = cfg.connect();
|
|||
|
||||
### Connection pooling
|
||||
|
||||
In complex applications, recommand to enable connection pool. Connection pool for [libtaos] is implemented using [r2d2].
|
||||
In complex applications, we recommend enabling connection pools. Connection pool for [libtaos] is implemented using [r2d2].
|
||||
|
||||
As follows, a connection pool with default parameters can be generated.
|
||||
|
||||
|
@ -269,7 +269,7 @@ The [Taos] structure is the connection manager in [libtaos] and provides two mai
|
|||
|
||||
Note that Rust asynchronous functions and an asynchronous runtime are required.
|
||||
|
||||
[Taos] provides partial Rust methodization of SQL to reduce the frequency of `format!` code blocks.
|
||||
[Taos] provides a few Rust methods that encapsulate SQL to reduce the frequency of `format!` code blocks.
|
||||
|
||||
- `.describe(table: &str)`: Executes `DESCRIBE` and returns a Rust data structure.
|
||||
- `.create_database(database: &str)`: Executes the `CREATE DATABASE` statement.
|
||||
|
@ -279,7 +279,7 @@ In addition, this structure is also the entry point for [Parameter Binding](#Par
|
|||
|
||||
### Bind Interface
|
||||
|
||||
Similar to the C interface, Rust provides the bind interface's wraping. First, create a bind object [Stmt] for a SQL command from the [Taos] object.
|
||||
Similar to the C interface, Rust provides the bind interface's wrapping. First, create a bind object [Stmt] for a SQL command from the [Taos] object.
|
||||
|
||||
```rust
|
||||
let mut stmt: Stmt = taos.stmt("insert into ? values(? ,?)") ? ;
|
||||
|
|
|
@ -24,21 +24,21 @@ taosAdapter provides the following features.
|
|||
|
||||
## taosAdapter architecture diagram
|
||||
|
||||

|
||||

|
||||
|
||||
## taosAdapter Deployment Method
|
||||
|
||||
### Install taosAdapter
|
||||
|
||||
taosAdapter has been part of TDengine server software since TDengine v2.4.0.0. If you use the TDengine server, you don't need additional steps to install taosAdapter. You can download taosAdapter from [TAOSData official website](https://taosdata.com/en/all-downloads/) to download the TDengine server installation package (taosAdapter is included in v2.4.0.0 and later version). If you need to deploy taosAdapter separately on another server other than the TDengine server, you should install the full TDengine on that server to install taosAdapter. If you need to build taosAdapter from source code, you can refer to the [Building taosAdapter]( https://github.com/taosdata/taosadapter/blob/develop/BUILD.md) documentation.
|
||||
taosAdapter has been part of TDengine server software since TDengine v2.4.0.0. If you use the TDengine server, you don't need additional steps to install taosAdapter. You can download taosAdapter from [TDengine official website](https://tdengine.com/all-downloads/) to download the TDengine server installation package (taosAdapter is included in v2.4.0.0 and later version). If you need to deploy taosAdapter separately on another server other than the TDengine server, you should install the full TDengine server package on that server to install taosAdapter. If you need to build taosAdapter from source code, you can refer to the [Building taosAdapter]( https://github.com/taosdata/taosadapter/blob/develop/BUILD.md) documentation.
|
||||
|
||||
### start/stop taosAdapter
|
||||
### Start/Stop taosAdapter
|
||||
|
||||
On Linux systems, the taosAdapter service is managed by `systemd` by default. You can use the command `systemctl start taosadapter` to start the taosAdapter service and use the command `systemctl stop taosadapter` to stop the taosAdapter service.
|
||||
|
||||
### Remove taosAdapter
|
||||
|
||||
Use the command `rmtaos` to remove the TDengine server software if you use tar.gz package or use package management command like rpm or apt to remove the TDengine server, including taosAdapter.
|
||||
Use the command `rmtaos` to remove the TDengine server software if you use tar.gz package. If you installed using a .deb or .rpm package, use the corresponding command, for your package manager, like apt or rpm to remove the TDengine server, including taosAdapter.
|
||||
|
||||
### Upgrade taosAdapter
|
||||
|
||||
|
@ -153,8 +153,7 @@ See [example/config/taosadapter.toml](https://github.com/taosdata/taosadapter/bl
|
|||
|
||||
## Feature List
|
||||
|
||||
- Compatible with RESTful interfaces
|
||||
[https://www.taosdata.com/cn/documentation/connector#restful](https://www.taosdata.com/cn/documentation/connector#restful)
|
||||
- Compatible with RESTful interfaces [REST API](/reference/rest-api/)
|
||||
- Compatible with InfluxDB v1 write interface
|
||||
[https://docs.influxdata.com/influxdb/v2.0/reference/api/influxdb-1x/write/](https://docs.influxdata.com/influxdb/v2.0/reference/api/influxdb-1x/write/)
|
||||
- Compatible with OpenTSDB JSON and telnet format writes
|
||||
|
@ -187,7 +186,7 @@ You can use any client that supports the http protocol to write data to or query
|
|||
|
||||
### InfluxDB
|
||||
|
||||
You can use any client that supports the http protocol to access the Restful interface address `http://<fqdn>:6041/<APIEndPoint>` to write data in InfluxDB compatible format to TDengine. The EndPoint is as follows:
|
||||
You can use any client that supports the http protocol to access the RESTful interface address `http://<fqdn>:6041/<APIEndPoint>` to write data in InfluxDB compatible format to TDengine. The EndPoint is as follows:
|
||||
|
||||
```text
|
||||
/influxdb/v1/write
|
||||
|
@ -204,7 +203,7 @@ Note: InfluxDB token authorization is not supported at present. Only Basic autho
|
|||
|
||||
### OpenTSDB
|
||||
|
||||
You can use any client that supports the http protocol to access the Restful interface address `http://<fqdn>:6041/<APIEndPoint>` to write data in OpenTSDB compatible format to TDengine.
|
||||
You can use any client that supports the http protocol to access the RESTful interface address `http://<fqdn>:6041/<APIEndPoint>` to write data in OpenTSDB compatible format to TDengine.
|
||||
|
||||
```text
|
||||
/opentsdb/v1/put/json/:db
|
||||
|
@ -241,7 +240,7 @@ node_export is an exporter of hardware and OS metrics exposed by the \*NIX kerne
|
|||
|
||||
## Memory usage optimization methods
|
||||
|
||||
taosAdapter will monitor its memory usage during operation and adjust it with two thresholds. Valid values range from -1 to 100 integers in percent of the system's physical memory.
|
||||
taosAdapter will monitor its memory usage during operation and adjust it with two thresholds. Valid values are integers between 1 to 100, and represent a percentage of the system's physical memory.
|
||||
|
||||
- pauseQueryMemoryThreshold
|
||||
- pauseAllMemoryThreshold
|
||||
|
@ -277,7 +276,7 @@ Corresponding configuration parameter
|
|||
monitor.pauseQueryMemoryThreshold memory threshold for no more queries Environment variable `TAOS_MONITOR_PAUSE_QUERY_MEMORY_THRESHOLD` (default 70)
|
||||
```
|
||||
|
||||
You can adjust it according to the specific application scenario and operation strategy, and it is recommended to use operation monitoring software to monitor system memory status timely. The load balancer can also check the taosAdapter running status through this interface.
|
||||
You should adjust this parameter based on your specific application scenario and operation strategy. We recommend using monitoring software to monitor system memory status. The load balancer can also check the taosAdapter running status through this interface.
|
||||
|
||||
## taosAdapter Monitoring Metrics
|
||||
|
||||
|
@ -326,7 +325,7 @@ You can also adjust the level of the taosAdapter log output by setting the `--lo
|
|||
|
||||
## How to migrate from older TDengine versions to taosAdapter
|
||||
|
||||
In TDengine server 2.2.x.x or earlier, the TDengine server process (taosd) contains an embedded HTTP service. As mentioned earlier, taosAdapter is a standalone software managed using `systemd` and has its process ID. And there are some configuration parameters and behaviors that are different between the two. See the following table for details.
|
||||
In TDengine server 2.2.x.x or earlier, the TDengine server process (taosd) contains an embedded HTTP service. As mentioned earlier, taosAdapter is a standalone software managed using `systemd` and has its own process ID. There are some configuration parameters and behaviors that are different between the two. See the following table for details.
|
||||
|
||||
| **#** | **embedded httpd** | **taosAdapter** | **comment** |
|
||||
| ----- | ------------------- | ------------------------------------ | ------------------------------------------------------------------ ------------------------------------------------------------------------ |
|
||||
|
|
|
@ -7,7 +7,7 @@ description: "taosBenchmark (once called taosdemo ) is a tool for testing the pe
|
|||
|
||||
## Introduction
|
||||
|
||||
taosBenchmark (formerly taosdemo ) is a tool for testing the performance of TDengine products. taosBenchmark can test the performance of TDengine's insert, query, and subscription functions and simulate large amounts of data generated by many devices. taosBenchmark can flexibly control the number and type of databases, supertables, tag columns, number and type of data columns, and sub-tables, and types of databases, super tables, the number and types of data columns, the number of sub-tables, the amount of data per sub-table, the time interval for inserting data, the number of working threads, whether and how to insert disordered data, and so on. The installer provides taosdemo as a soft link to taosBenchmark for compatibility with past users.
|
||||
taosBenchmark (formerly taosdemo ) is a tool for testing the performance of TDengine products. taosBenchmark can test the performance of TDengine's insert, query, and subscription functions and simulate large amounts of data generated by many devices. taosBenchmark can flexibly control the number and type of databases, supertables, tag columns, number and type of data columns, and sub-tables, and types of databases, super tables, the number and types of data columns, the number of sub-tables, the amount of data per sub-table, the time interval for inserting data, the number of working threads, whether and how to insert disordered data, and so on. The installer provides taosdemo as a soft link to taosBenchmark for compatibility and for the convenience of past users.
|
||||
|
||||
## Installation
|
||||
|
||||
|
@ -21,7 +21,7 @@ There are two ways to install taosBenchmark:
|
|||
|
||||
### Configuration and running methods
|
||||
|
||||
taosBenchmark supports two configuration methods: [Command-line arguments](#Command-line arguments in detailed) and [JSON configuration file](#Configuration file arguments in detailed). These two methods are mutually exclusive, and with only one command-line parameter, users can use `-f <json file>` to specify a configuration file when using a configuration file. When running taosBenchmark with command-line arguments and controlling its behavior, users should use other parameters for configuration rather than `-f` parameter. In addition, taosBenchmark offers a special way of running without parameters.
|
||||
taosBenchmark supports two configuration methods: [Command-line arguments](#Command-line arguments in detailed) and [JSON configuration file](#Configuration file arguments in detailed). These two methods are mutually exclusive. Users can use `-f <json file>` to specify a configuration file. When running taosBenchmark with command-line arguments to control its behavior, users should use other parameters for configuration, but not the `-f` parameter. In addition, taosBenchmark offers a special way of running without parameters.
|
||||
|
||||
taosBenchmark supports complete performance testing of TDengine. taosBenchmark supports the TDengine functions in three categories: write, query, and subscribe. These three functions are mutually exclusive, and users can select only one of them each time taosBenchmark runs. It is important to note that the type of functionality to be tested is not configurable when using the command-line configuration method, which can only test writing performance. To test the query and subscription performance of the TDengine, you must use the configuration file method and specify the function type to test via the parameter `filetype` in the configuration file.
|
||||
|
||||
|
@ -35,7 +35,7 @@ Execute the following commands to quickly experience taosBenchmark's default con
|
|||
taosBenchmark
|
||||
```
|
||||
|
||||
When run without parameters, taosBenchmark connects to the TDengine cluster specified in `/etc/taos` by default and creates a database named test in TDengine, a super table named `meters` under the test database, and 10,000 tables under the super table with 10,000 records written to each table. Note that if there is already a test database, this table is not used. Note that if there is already a test database, this command will delete it first and create a new test database.
|
||||
When run without parameters, taosBenchmark connects to the TDengine cluster specified in `/etc/taos` by default and creates a database named `test`, a super table named `meters` under the test database, and 10,000 tables under the super table with 10,000 records written to each table. Note that if there is already a database named "test" this command will delete it first and create a new database.
|
||||
|
||||
### Run with command-line configuration parameters
|
||||
|
||||
|
@ -45,7 +45,7 @@ The `-f <json file>` argument cannot be used when running taosBenchmark with com
|
|||
taosBenchmark -I stmt -n 200 -t 100
|
||||
```
|
||||
|
||||
The above command, `taosBenchmark` will create a database named `test`, create a super table `meters` in it, create 100 sub-tables in the super table and insert 200 records for each sub-table using parameter binding.
|
||||
Using the above command, `taosBenchmark` will create a database named `test`, create a super table `meters` in it, create 100 sub-tables in the super table and insert 200 records for each sub-table using parameter binding.
|
||||
|
||||
### Run with the configuration file
|
||||
|
||||
|
@ -95,10 +95,10 @@ taosBenchmark -f <json file>
|
|||
## Command-line argument in detailed
|
||||
|
||||
- **-f/--file <json file\>** :
|
||||
specify the configuration file to use. This file includes All parameters. And users should not use this parameter with other parameters on the command-line. There is no default value.
|
||||
specify the configuration file to use. This file includes All parameters. Users should not use this parameter with other parameters on the command-line. There is no default value.
|
||||
|
||||
- **-c/--config-dir <dir\>** :
|
||||
specify the directory where the TDengine cluster configuration file. the default path is `/etc/taos`.
|
||||
specify the directory where the TDengine cluster configuration file. The default path is `/etc/taos`.
|
||||
|
||||
- **-h/--host <host\>** :
|
||||
Specify the FQDN of the TDengine server to connect to. The default value is localhost.
|
||||
|
@ -272,13 +272,13 @@ The parameters for creating super tables are configured in `super_tables` in the
|
|||
|
||||
- **child_table_prefix** : The prefix of the child table name, mandatory configuration item, no default value.
|
||||
|
||||
- **escape_character**: specify the super table and child table names containing escape characters. By default is "no". The value can be "yes" or "no".
|
||||
- **escape_character**: specify the super table and child table names containing escape characters. The value can be "yes" or "no". The default is "no".
|
||||
|
||||
- **auto_create_table**: only when insert_mode is taosc, rest, stmt, and childtable_exists is "no". "yes" means taosBenchmark will automatically create non-existent tables when inserting data; "no" means that taosBenchmark will create all tables before inserting.
|
||||
|
||||
- **batch_create_tbl_num** : the number of tables per batch when creating sub-tables, default is 10. Note: the actual number of batches may not be the same as this value when the executed SQL statement is larger than the maximum length supported, it will be automatically truncated and re-executed to continue creating.
|
||||
- **batch_create_tbl_num** : the number of tables per batch when creating sub-tables, default is 10. Note: the actual number of batches may not be the same as this value. If the executed SQL statement is larger than the maximum length supported, it will be automatically truncated and re-executed to continue creating.
|
||||
|
||||
- **data_source**: specify the source of data-generating. Default is taosBenchmark randomly generated. Users can configure it as "rand" and "sample". When "sample" is used, taosBenchmark will use the data in the file specified by the `sample_file` parameter.
|
||||
- **data_source**: specify the source of data-generation. Default is taosBenchmark randomly generated. Users can configure it as "rand" and "sample". When "sample" is used, taosBenchmark will use the data in the file specified by the `sample_file` parameter.
|
||||
|
||||
- **insert_mode**: insertion mode with options taosc, rest, stmt, sml, sml-rest, corresponding to normal write, restful interface write, parameter binding interface write, schemaless interface write, restful schemaless interface write (provided by taosAdapter). The default value is taosc.
|
||||
|
||||
|
@ -300,15 +300,15 @@ The parameters for creating super tables are configured in `super_tables` in the
|
|||
|
||||
- **partial_col_num**: If this value is a positive number n, only the first n columns are written to, only if insert_mode is taosc and rest, or all columns if n is 0.
|
||||
|
||||
- **disorder_ratio** : Specifies the percentage probability of disordered data in the value range [0,50]. The default is 0, which means there is no disorder data.
|
||||
- **disorder_ratio** : Specifies the percentage probability of disordered (i.e. out-of-order) data in the value range [0,50]. The default is 0, which means there is no disorder data.
|
||||
|
||||
- **disorder_range** : Specifies the timestamp fallback range for the disordered data. The generated disorder timestamp is the timestamp that should be used in the non-disorder case minus a random value in this range. Valid only if the percentage of disordered data specified by `-O/--disorder` is greater than 0.
|
||||
- **disorder_range** : Specifies the timestamp fallback range for the disordered data. The disordered timestamp is generated by subtracting a random value in this range, from the timestamp that would be used in the non-disorder case. Valid only if the percentage of disordered data specified by `-O/--disorder` is greater than 0.
|
||||
|
||||
- **timestamp_step**: The timestamp step for inserting data in each child table, in units consistent with the `precision` of the database, the default value is 1.
|
||||
- **timestamp_step**: The timestamp step for inserting data in each child table, in units consistent with the `precision` of the database. For e.g. if the `precision` is milliseconds, the timestamp step will be in milliseconds. The default value is 1.
|
||||
|
||||
- **start_timestamp** : The timestamp start value of each sub-table, the default value is now.
|
||||
|
||||
- **sample_format**: The type of the sample data file, now only "csv" is supported.
|
||||
- **sample_format**: The type of the sample data file; for now only "csv" is supported.
|
||||
|
||||
- **sample_file**: Specify a CSV format file as the data source. It only works when data_source is a sample. If the number of rows in the CSV file is less than or equal to prepared_rand, then taosBenchmark will read the CSV file data cyclically until it is the same as prepared_rand; otherwise, taosBenchmark will read only the rows with the number of prepared_rand. The final number of rows of data generated is the smaller of the two.
|
||||
|
||||
|
@ -341,7 +341,7 @@ The configuration parameters for specifying super table tag columns and data col
|
|||
|
||||
- **create_table_thread_count** : The number of threads to build the table, default is 8.
|
||||
|
||||
- **connection_pool_size** : The number of pre-established connections to the TDengine server. If not configured, it is the same number of threads specified.
|
||||
- **connection_pool_size** : The number of pre-established connections to the TDengine server. If not configured, it is the same as number of threads specified.
|
||||
|
||||
- **result_file** : The path to the result output file, the default value is . /output.txt.
|
||||
|
||||
|
|
|
@ -1,25 +1,25 @@
|
|||
---
|
||||
title: taosdump
|
||||
description: "taosdump is a tool application that supports backing up data from a running TDengine cluster and restoring the backed up data to the same or another running TDengine cluster."
|
||||
description: "taosdump is a tool that supports backing up data from a running TDengine cluster and restoring the backed up data to the same, or another running TDengine cluster."
|
||||
---
|
||||
|
||||
## Introduction
|
||||
|
||||
taosdump is a tool application that supports backing up data from a running TDengine cluster and restoring the backed up data to the same or another running TDengine cluster.
|
||||
taosdump is a tool that supports backing up data from a running TDengine cluster and restoring the backed up data to the same, or another running TDengine cluster.
|
||||
|
||||
taosdump can back up a database, a super table, or a normal table as a logical data unit or backup data records in the database, super tables, and normal tables. When using taosdump, you can specify the directory path for data backup. If you do not specify a directory, taosdump will back up the data to the current directory by default.
|
||||
|
||||
Suppose the specified location already has data files. In that case, taosdump will prompt the user and exit immediately to avoid data overwriting which means that the same path can only be used for one backup.
|
||||
Please be careful if you see a prompt for this.
|
||||
If the specified location already has data files, taosdump will prompt the user and exit immediately to avoid data overwriting. This means that the same path can only be used for one backup.
|
||||
|
||||
Please be careful if you see a prompt for this and please ensure that you follow best practices and relevant SOPs for data integrity, backup and data security.
|
||||
|
||||
taosdump is a logical backup tool and should not be used to back up any raw data, environment settings,
|
||||
Users should not use taosdump to back up raw data, environment settings, hardware information, server configuration, or cluster topology. taosdump uses [Apache AVRO](https://avro.apache.org/) as the data file format to store backup data.
|
||||
|
||||
## Installation
|
||||
|
||||
There are two ways to install taosdump:
|
||||
|
||||
- Install the taosTools official installer. Please find taosTools from [All download links](https://www.taosdata.com/all-downloads) page and download and install it.
|
||||
- Install the taosTools official installer. Please find taosTools from [All download links](https://www.tdengine.com/all-downloads) page and download and install it.
|
||||
|
||||
- Compile taos-tools separately and install it. Please refer to the [taos-tools](https://github.com/taosdata/taos-tools) repository for details.
|
||||
|
||||
|
@ -28,14 +28,14 @@ There are two ways to install taosdump:
|
|||
### taosdump backup data
|
||||
|
||||
1. backing up all databases: specify `-A` or `-all-databases` parameter.
|
||||
2. backup multiple specified databases: use `-D db1,db2,... ` parameters; 3.
|
||||
2. backup multiple specified databases: use `-D db1,db2,... ` parameters;
|
||||
3. back up some super or normal tables in the specified database: use `-dbname stbname1 stbname2 tbname1 tbname2 ... ` parameters. Note that the first parameter of this input sequence is the database name, and only one database is supported. The second and subsequent parameters are the names of super or normal tables in that database, separated by spaces.
|
||||
4. back up the system log database: TDengine clusters usually contain a system database named `log`. The data in this database is the data that TDengine runs itself, and the taosdump will not back up the log database by default. If users need to back up the log database, users can use the `-a` or `-allow-sys` command-line parameter.
|
||||
5. Loose mode backup: taosdump version 1.4.1 onwards provides `-n` and `-L` parameters for backing up data without using escape characters and "loose" mode, which can reduce the number of backups if table names, column names, tag names do not use This can reduce the backup data time and backup data footprint if table names, column names, and tag names do not use `escape character`. If you are unsure about using `-n` and `-L` conditions, please use the default parameters for "strict" mode backup. See the [official documentation](/taos-sql/escape) for a description of escaped characters.
|
||||
5. Loose mode backup: taosdump version 1.4.1 onwards provides `-n` and `-L` parameters for backing up data without using escape characters and "loose" mode, which can reduce the number of backups if table names, column names, tag names do not use escape characters. This can also reduce the backup data time and backup data footprint. If you are unsure about using `-n` and `-L` conditions, please use the default parameters for "strict" mode backup. See the [official documentation](/taos-sql/escape) for a description of escaped characters.
|
||||
|
||||
:::tip
|
||||
- taosdump versions after 1.4.1 provide the `-I` argument for parsing Avro file schema and data. If users specify `-s` then only taosdump will parse schema.
|
||||
- Backups after taosdump 1.4.2 use the batch count specified by the `-B` parameter. The default value is 16384. If, in some environments, low network speed or disk performance causes "Error actual dump ... batch ..." can be tried by challenging the `-B` parameter to a smaller value.
|
||||
- Backups after taosdump 1.4.2 use the batch count specified by the `-B` parameter. The default value is 16384. If, in some environments, low network speed or disk performance causes "Error actual dump ... batch ...", then try changing the `-B` parameter to a smaller value.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -44,7 +44,7 @@ There are two ways to install taosdump:
|
|||
Restore the data file in the specified path: use the `-i` parameter plus the path to the data file. You should not use the same directory to backup different data sets, and you should not backup the same data set multiple times in the same path. Otherwise, the backup data will cause overwriting or multiple backups.
|
||||
|
||||
:::tip
|
||||
taosdump internally uses TDengine stmt binding API for writing recovery data and currently uses 16384 as one write batch for better data recovery performance. If there are more columns in the backup data, it may cause a "WAL size exceeds limit" error. You can try to adjust to a smaller value by using the `-B` parameter.
|
||||
taosdump internally uses TDengine stmt binding API for writing recovery data with a default batch size of 16384 for better data recovery performance. If there are more columns in the backup data, it may cause a "WAL size exceeds limit" error. You can try to adjust the batch size to a smaller value by using the `-B` parameter.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -59,7 +59,7 @@ Usage: taosdump [OPTION...] dbname [tbname ...]
|
|||
or: taosdump [OPTION...] -i inpath
|
||||
or: taosdump [OPTION...] -o outpath
|
||||
|
||||
-h, --host=HOST Server host dumping data from. Default is
|
||||
-h, --host=HOST Server host from which to dump data. Default is
|
||||
localhost.
|
||||
-p, --password User password to connect to server. Default is
|
||||
taosdata.
|
||||
|
@ -72,10 +72,10 @@ Usage: taosdump [OPTION...] dbname [tbname ...]
|
|||
-r, --resultFile=RESULTFILE DumpOut/In Result file path and name.
|
||||
-a, --allow-sys Allow to dump system database
|
||||
-A, --all-databases Dump all databases.
|
||||
-D, --databases=DATABASES Dump inputted databases. Use comma to separate
|
||||
databases' name.
|
||||
-D, --databases=DATABASES Dump listed databases. Use comma to separate
|
||||
database names.
|
||||
-N, --without-property Dump database without its properties.
|
||||
-s, --schemaonly Only dump tables' schema.
|
||||
-s, --schemaonly Only dump table schemas.
|
||||
-y, --answer-yes Input yes for prompt. It will skip data file
|
||||
checking!
|
||||
-d, --avro-codec=snappy Choose an avro codec among null, deflate, snappy,
|
||||
|
@ -98,7 +98,7 @@ Usage: taosdump [OPTION...] dbname [tbname ...]
|
|||
and try. The workable value is related to the
|
||||
length of the row and type of table schema.
|
||||
-I, --inspect inspect avro file content and print on screen
|
||||
-L, --loose-mode Using loose mode if the table name and column name
|
||||
-L, --loose-mode Use loose mode if the table name and column name
|
||||
use letter and number only. Default is NOT.
|
||||
-n, --no-escape No escape char '`'. Default is using it.
|
||||
-T, --thread-num=THREAD_NUM Number of thread for dump in file. Default is
|
||||
|
|
|
@ -5,11 +5,11 @@ sidebar_label: TDinsight
|
|||
|
||||
TDinsight is a solution for monitoring TDengine using the builtin native monitoring database and [Grafana].
|
||||
|
||||
After TDengine starts, it will automatically create a monitoring database `log`. TDengine will automatically write many metrics in specific intervals into the `log` database. The metrics may include the server's CPU, memory, hard disk space, network bandwidth, number of requests, disk read/write speed, slow queries, other information like important system operations (user login, database creation, database deletion, etc.), and error alarms. With [Grafana] and [TDengine Data Source Plugin](https://github.com/taosdata/grafanaplugin/releases), TDinsight can visualize cluster status, node information, insertion and query requests, resource usage, etc., and also vnode, dnode, and mnode status, and exception alerts. Developers monitoring TDengine cluster operation status in real-time can be very convinient. This article will guide users to install the Grafana server, automatically install the TDengine data source plug-in, and deploy the TDinsight visualization panel through `TDinsight.sh` installation script.
|
||||
After TDengine starts, it will automatically create a monitoring database `log`. TDengine will automatically write many metrics in specific intervals into the `log` database. The metrics may include the server's CPU, memory, hard disk space, network bandwidth, number of requests, disk read/write speed, slow queries, other information like important system operations (user login, database creation, database deletion, etc.), and error alarms. With [Grafana] and [TDengine Data Source Plugin](https://github.com/taosdata/grafanaplugin/releases), TDinsight can visualize cluster status, node information, insertion and query requests, resource usage, vnode, dnode, and mnode status, exception alerts and many other metrics. This is very convenient for developers who want to monitor TDengine cluster status in real-time. This article will guide users to install the Grafana server, automatically install the TDengine data source plug-in, and deploy the TDinsight visualization panel using the `TDinsight.sh` installation script.
|
||||
|
||||
## System Requirements
|
||||
|
||||
To deploy TDinsight, a single-node TDengine server or a multi-nodes TDengine cluster and a [Grafana] server are required. This dashboard requires TDengine 2.3.3.0 and above, with the `log` database enabled (`monitor = 1`).
|
||||
To deploy TDinsight, a single-node TDengine server or a multi-node TDengine cluster and a [Grafana] server are required. This dashboard requires TDengine 2.3.3.0 and above, with the `log` database enabled (`monitor = 1`).
|
||||
|
||||
## Installing Grafana
|
||||
|
||||
|
@ -17,7 +17,7 @@ We recommend using the latest [Grafana] version 7 or 8 here. You can install Gra
|
|||
|
||||
### Installing Grafana on Debian or Ubuntu
|
||||
|
||||
For Debian or Ubuntu operating systems, we recommend the Grafana image repository and Use the following command to install from scratch.
|
||||
For Debian or Ubuntu operating systems, we recommend the Grafana image repository and using the following command to install from scratch.
|
||||
|
||||
```bash
|
||||
sudo apt-get install -y apt-transport-https
|
||||
|
@ -61,7 +61,7 @@ sudo yum install \
|
|||
|
||||
## Automated deployment of TDinsight
|
||||
|
||||
We provide an installation script [`TDinsight.sh`](https://github.com/taosdata/grafanaplugin/releases/latest/download/TDinsight.sh) script to allow users to configure the installation automatically and quickly.
|
||||
We provide an installation script [`TDinsight.sh`](https://github.com/taosdata/grafanaplugin/releases/latest/download/TDinsight.sh) to allow users to configure the installation automatically and quickly.
|
||||
|
||||
You can download the script via `wget` or other tools:
|
||||
|
||||
|
@ -71,7 +71,7 @@ chmod +x TDinsight.sh
|
|||
./TDinsight.sh
|
||||
```
|
||||
|
||||
This script will automatically download the latest [Grafana TDengine data source plugin](https://github.com/taosdata/grafanaplugin/releases/latest) and [TDinsight dashboard](https://grafana.com/grafana/dashboards/15167) with configurable parameters from the command-line options to the [Grafana Provisioning](https://grafana.com/docs/grafana/latest/administration/provisioning/) configuration file to automate deployment and updates, etc. With the alert setting options provided by this script, you can also get built-in support for AliCloud SMS alert notifications.
|
||||
This script will automatically download the latest [Grafana TDengine data source plugin](https://github.com/taosdata/grafanaplugin/releases/latest) and [TDinsight dashboard](https://grafana.com/grafana/dashboards/15167) with configurable parameters for command-line options to the [Grafana Provisioning](https://grafana.com/docs/grafana/latest/administration/provisioning/) configuration file to automate deployment and updates, etc. With the alert setting options provided by this script, you can also get built-in support for AliCloud SMS alert notifications.
|
||||
|
||||
Assume you use TDengine and Grafana's default services on the same host. Run `. /TDinsight.sh` and open the Grafana browser window to see the TDinsight dashboard.
|
||||
|
||||
|
@ -233,33 +233,33 @@ The default username/password is `admin`. Grafana will require a password change
|
|||
|
||||
Point to the **Configurations** -> **Data Sources** menu, and click the **Add data source** button.
|
||||
|
||||

|
||||

|
||||
|
||||
Search for and select **TDengine**.
|
||||
|
||||

|
||||

|
||||
|
||||
Configure the TDengine datasource.
|
||||
|
||||

|
||||

|
||||
|
||||
Save and test. It will report 'TDengine Data source is working' under normal circumstances.
|
||||
|
||||

|
||||

|
||||
|
||||
### Importing dashboards
|
||||
|
||||
Point to **+** / **Create** - **import** (or `/dashboard/import` url).
|
||||
|
||||

|
||||

|
||||
|
||||
Type the dashboard ID `15167` in the **Import via grafana.com** location and **Load**.
|
||||
|
||||

|
||||

|
||||
|
||||
Once the import is complete, the full page view of TDinsight is shown below.
|
||||
|
||||

|
||||

|
||||
|
||||
## TDinsight dashboard details
|
||||
|
||||
|
@ -269,7 +269,7 @@ Details of the metrics are as follows.
|
|||
|
||||
### Cluster Status
|
||||
|
||||

|
||||

|
||||
|
||||
This section contains the current information and status of the cluster, the alert information is also here (from left to right, top to bottom).
|
||||
|
||||
|
@ -289,7 +289,7 @@ This section contains the current information and status of the cluster, the ale
|
|||
|
||||
### DNodes Status
|
||||
|
||||

|
||||

|
||||
|
||||
- **DNodes Status**: simple table view of `show dnodes`.
|
||||
- **DNodes Lifetime**: the time elapsed since the dnode was created.
|
||||
|
@ -298,14 +298,14 @@ This section contains the current information and status of the cluster, the ale
|
|||
|
||||
### MNode Overview
|
||||
|
||||

|
||||

|
||||
|
||||
1. **MNodes Status**: a simple table view of `show mnodes`. 2.
|
||||
1. **MNodes Status**: a simple table view of `show mnodes`.
|
||||
2. **MNodes Number**: similar to `DNodes Number`, the number of MNodes changes.
|
||||
|
||||
### Request
|
||||
|
||||

|
||||

|
||||
|
||||
1. **Requests Rate(Inserts per Second)**: average number of inserts per second.
|
||||
2. **Requests (Selects)**: number of query requests and change rate (count of second).
|
||||
|
@ -313,46 +313,46 @@ This section contains the current information and status of the cluster, the ale
|
|||
|
||||
### Database
|
||||
|
||||

|
||||

|
||||
|
||||
Database usage, repeated for each value of the variable `$database` i.e. multiple rows per database.
|
||||
|
||||
1. **STables**: number of super tables. 2.
|
||||
2. **Total Tables**: number of all tables. 3.
|
||||
3. **Sub Tables**: the number of all super table sub-tables. 4.
|
||||
1. **STables**: number of super tables.
|
||||
2. **Total Tables**: number of all tables.
|
||||
3. **Sub Tables**: the number of all super table subtables.
|
||||
4. **Tables**: graph of all normal table numbers over time.
|
||||
5. **Tables Number Foreach VGroups**: The number of tables contained in each VGroups.
|
||||
|
||||
### DNode Resource Usage
|
||||
|
||||

|
||||

|
||||
|
||||
Data node resource usage display with repeated multiple rows for the variable `$fqdn` i.e., each data node. Includes.
|
||||
|
||||
1. **Uptime**: the time elapsed since the dnode was created.
|
||||
2. **Has MNodes?**: whether the current dnode is a mnode. 3.
|
||||
3. **CPU Cores**: the number of CPU cores. 4.
|
||||
4. **VNodes Number**: the number of VNodes in the current dnode. 5.
|
||||
5. **VNodes Masters**: the number of vnodes in the master role. 6.
|
||||
2. **Has MNodes?**: whether the current dnode is a mnode.
|
||||
3. **CPU Cores**: the number of CPU cores.
|
||||
4. **VNodes Number**: the number of VNodes in the current dnode.
|
||||
5. **VNodes Masters**: the number of vnodes in the master role.
|
||||
6. **Current CPU Usage of taosd**: CPU usage rate of taosd processes.
|
||||
7. **Current Memory Usage of taosd**: memory usage of taosd processes.
|
||||
8. **Disk Used**: The total disk usage percentage of the taosd data directory.
|
||||
9. **CPU Usage**: Process and system CPU usage. 10.
|
||||
9. **CPU Usage**: Process and system CPU usage.
|
||||
10. **RAM Usage**: Time series view of RAM usage metrics.
|
||||
11. **Disk Used**: Disks used at each level of multi-level storage (default is level0).
|
||||
12. **Disk Increasing Rate per Minute**: Percentage increase or decrease in disk usage per minute.
|
||||
13. **Disk IO**: Disk IO rate. 14.
|
||||
13. **Disk IO**: Disk IO rate.
|
||||
14. **Net IO**: Network IO, the aggregate network IO rate in addition to the local network.
|
||||
|
||||
### Login History
|
||||
|
||||

|
||||

|
||||
|
||||
Currently, only the number of logins per minute is reported.
|
||||
|
||||
### Monitoring taosAdapter
|
||||
|
||||

|
||||

|
||||
|
||||
Support monitoring taosAdapter request statistics and status details. Includes.
|
||||
|
||||
|
@ -376,7 +376,7 @@ TDinsight installed via the `TDinsight.sh` script can be cleaned up using the co
|
|||
To completely uninstall TDinsight during a manual installation, you need to clean up the following.
|
||||
|
||||
1. the TDinsight Dashboard in Grafana.
|
||||
2. the Data Source in Grafana. 3.
|
||||
2. the Data Source in Grafana.
|
||||
3. remove the `tdengine-datasource` plugin from the plugin installation directory.
|
||||
|
||||
## Integrated Docker Example
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
---
|
||||
title: TDengine Command Line (CLI)
|
||||
sidebar_label: TDengine CLI
|
||||
title: TDengine Command Line Interface (CLI)
|
||||
sidebar_label: Command Line Interface
|
||||
description: Instructions and tips for using the TDengine CLI
|
||||
---
|
||||
|
||||
The TDengine command-line application (hereafter referred to as `TDengine CLI`) is the most simplest way for users to manipulate and interact with TDengine instances.
|
||||
The TDengine command-line interface (hereafter referred to as `TDengine CLI`) is the simplest way for users to manipulate and interact with TDengine instances.
|
||||
|
||||
## Installation
|
||||
|
||||
If executed on the TDengine server-side, there is no need for additional installation steps to install TDengine CLI as it is already included and installed automatically. To run TDengine CLI on the environment which no TDengine server running, the TDengine client installation package needs to be installed first. For details, please refer to [connector](/reference/connector/).
|
||||
If executed on the TDengine server-side, there is no need for additional installation steps to install TDengine CLI as it is already included and installed automatically. To run TDengine CLI in an environment where no TDengine server is running, the TDengine client installation package needs to be installed first. For details, please refer to [connector](/reference/connector/).
|
||||
|
||||
## Execution
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ The TDengine image starts with the HTTP service activated by default, using the
|
|||
docker run -d --name tdengine -p 6041:6041 tdengine/tdengine
|
||||
```
|
||||
|
||||
The above command starts a container named "tdengine" and maps the HTTP service end 6041 to the host port 6041. You can verify that the HTTP service provided in this container is available using the following command.
|
||||
The above command starts a container named "tdengine" and maps the HTTP service port 6041 to the host port 6041. You can verify that the HTTP service provided in this container is available using the following command.
|
||||
|
||||
```shell
|
||||
curl -u root:taosdata -d "show databases" localhost:6041/rest/sql
|
||||
|
@ -34,7 +34,7 @@ taos> show databases;
|
|||
Query OK, 1 row(s) in set (0.002843s)
|
||||
```
|
||||
|
||||
The TDengine server running in the container uses the container's hostname to establish a connection. Using TDengine CLI or various connectors (such as JDBC-JNI) to access the TDengine inside the container from outside the container is more complicated. So the above is the simplest way to access the TDengine service in the container and is suitable for some simple scenarios. Please refer to the next section if you want to access the TDengine service in the container from containerized using TDengine CLI or various connectors in some complex scenarios.
|
||||
The TDengine server running in the container uses the container's hostname to establish a connection. Using TDengine CLI or various connectors (such as JDBC-JNI) to access the TDengine inside the container from outside the container is more complicated. So the above is the simplest way to access the TDengine service in the container and is suitable for some simple scenarios. Please refer to the next section if you want to access the TDengine service in the container from outside the container using TDengine CLI or various connectors for complex scenarios.
|
||||
|
||||
## Start TDengine on the host network
|
||||
|
||||
|
@ -42,7 +42,7 @@ The TDengine server running in the container uses the container's hostname to es
|
|||
docker run -d --name tdengine --network host tdengine/tdengine
|
||||
```
|
||||
|
||||
The above command starts TDengine on the host network and uses the host's FQDN to establish a connection instead of the container's hostname. It works too, like using `systemctl` to start TDengine on the host. If the TDengine client is already installed on the host, you can access it directly with the following command.
|
||||
The above command starts TDengine on the host network and uses the host's FQDN to establish a connection instead of the container's hostname. It is the equivalent of using `systemctl` to start TDengine on the host. If the TDengine client is already installed on the host, you can access it directly with the following command.
|
||||
|
||||
```shell
|
||||
$ taos
|
||||
|
@ -315,13 +315,13 @@ password: taosdata
|
|||
taoslog-td2:
|
||||
```
|
||||
|
||||
:::note
|
||||
:::note
|
||||
- The `VERSION` environment variable is used to set the tdengine image tag
|
||||
- `TAOS_FIRST_EP` must be set on the newly created instance so that it can join the TDengine cluster; if there is a high availability requirement, `TAOS_SECOND_EP` needs to be used at the same time
|
||||
- `TAOS_REPLICA` is used to set the default number of database replicas. Its value range is [1,3]
|
||||
We recommend setting with `TAOS_ARBITRATOR` to use arbitrator in a two-nodes environment.
|
||||
:::
|
||||
|
||||
We recommend setting it with `TAOS_ARBITRATOR` to use arbitrator in a two-nodes environment.
|
||||
|
||||
:::
|
||||
|
||||
2. Start the cluster
|
||||
|
||||
|
@ -382,7 +382,7 @@ password: taosdata
|
|||
Suppose you want to deploy multiple taosAdapters to improve throughput and provide high availability. In that case, the recommended configuration method uses a reverse proxy such as Nginx to offer a unified access entry. For specific configuration methods, please refer to the official documentation of Nginx. Here is an example:
|
||||
|
||||
```docker
|
||||
ersion: "3"
|
||||
version: "3"
|
||||
|
||||
networks:
|
||||
inter:
|
||||
|
|
|
@ -65,7 +65,7 @@ taos --dump-config
|
|||
| ------------- | ------------------------------------------------------------------------ |
|
||||
| Applicable | Server Only |
|
||||
| Meaning | The FQDN of the host where `taosd` will be started. It can be IP address |
|
||||
| Default Value | The first hostname configured for the hos |
|
||||
| Default Value | The first hostname configured for the host |
|
||||
| Note | It should be within 96 bytes |
|
||||
|
||||
### serverPort
|
||||
|
@ -78,7 +78,7 @@ taos --dump-config
|
|||
| Note | REST service is provided by `taosd` before 2.4.0.0 but by `taosAdapter` after 2.4.0.0, the default port of REST service is 6041 |
|
||||
|
||||
:::note
|
||||
TDengine uses continuous 13 ports, both TCP and TCP, from the port specified by `serverPort`. These ports need to be kept as open if firewall is enabled. Below table describes the ports used by TDengine in details.
|
||||
TDengine uses 13 continuous ports, both TCP and UDP, starting with the port specified by `serverPort`. You should ensure, in your firewall rules, that these ports are kept open. Below table describes the ports used by TDengine in details.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -182,8 +182,8 @@ TDengine uses continuous 13 ports, both TCP and TCP, from the port specified by
|
|||
| ------------- | -------------------------------------------- |
|
||||
| Applicable | Server Only |
|
||||
| Meaning | The maximum number of distinct rows returned |
|
||||
| Value Range | [100,000 - 100, 000, 000] |
|
||||
| Default Value | 100, 000 |
|
||||
| Value Range | [100,000 - 100,000,000] |
|
||||
| Default Value | 100,000 |
|
||||
| Note | After version 2.3.0.0 |
|
||||
|
||||
## Locale Parameters
|
||||
|
@ -197,7 +197,7 @@ TDengine uses continuous 13 ports, both TCP and TCP, from the port specified by
|
|||
| Default Value | TimeZone configured in the host |
|
||||
|
||||
:::info
|
||||
To handle the data insertion and data query from multiple timezones, Unix Timestamp is used and stored TDengine. The timestamp generated from any timezones at same time is same in Unix timestamp. To make sure the time on client side can be converted to Unix timestamp correctly, the timezone must be set properly.
|
||||
To handle the data insertion and data query from multiple timezones, Unix Timestamp is used and stored in TDengine. The timestamp generated from any timezones at same time is same in Unix timestamp. To make sure the time on client side can be converted to Unix timestamp correctly, the timezone must be set properly.
|
||||
|
||||
On Linux system, TDengine clients automatically obtain timezone from the host. Alternatively, the timezone can be configured explicitly in configuration file `taos.cfg` like below.
|
||||
|
||||
|
@ -209,7 +209,7 @@ timezone Asia/Shanghai
|
|||
|
||||
The above examples are all proper configuration for the timezone of UTC+8. On Windows system, however, `timezone Asia/Shanghai` is not supported, it must be set as `timezone UTC-8`.
|
||||
|
||||
The setting for timezone impacts the strings not in Unix timestamp, keywords or functions related to date/time, for example
|
||||
The setting for timezone impacts strings that are not in Unix timestamp format and keywords or functions related to date/time. For example:
|
||||
|
||||
```sql
|
||||
SELECT count(*) FROM table_name WHERE TS<'2019-04-11 12:01:08';
|
||||
|
@ -227,7 +227,7 @@ If the timezone is UTC, it's equal to
|
|||
SELECT count(*) FROM table_name WHERE TS<1554984068000;
|
||||
```
|
||||
|
||||
To avoid the problems of using time strings, Unix timestamp can be used directly. Furthermore, time strings with timezone can be used in SQL statement, for example "2013-04-12T15:52:01.123+08:00" in RFC3339 format or "2013-04-12T15:52:01.123+0800" in ISO-8601 format, they are not influenced by timezone setting when converted to Unix timestamp.
|
||||
To avoid the problems of using time strings, Unix timestamp can be used directly. Furthermore, time strings with timezone can be used in SQL statements. For example "2013-04-12T15:52:01.123+08:00" in RFC3339 format or "2013-04-12T15:52:01.123+0800" in ISO-8601 format are not influenced by timezone setting when converted to Unix timestamp.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -240,11 +240,11 @@ To avoid the problems of using time strings, Unix timestamp can be used directly
|
|||
| Default Value | Locale configured in host |
|
||||
|
||||
:::info
|
||||
A specific type "nchar" is provided in TDengine to store non-ASCII characters such as Chinese, Japanese, Korean. The characters to be stored in nchar type are firstly encoded in UCS4-LE before sending to server side. To store non-ASCII characters correctly, the encoding format of the client side needs to be set properly.
|
||||
A specific type "nchar" is provided in TDengine to store non-ASCII characters such as Chinese, Japanese, and Korean. The characters to be stored in nchar type are firstly encoded in UCS4-LE before sending to server side. To store non-ASCII characters correctly, the encoding format of the client side needs to be set properly.
|
||||
|
||||
The characters input on the client side are encoded using the default system encoding, which is UTF-8 on Linux, or GB18030 or GBK on some systems in Chinese, POSIX in docker, CP936 on Windows in Chinese. The encoding of the operating system in use must be set correctly so that the characters in nchar type can be converted to UCS4-LE.
|
||||
|
||||
The locale definition standard on Linux is: <Language\>\_<Region\>.<charset\>, for example, in "zh_CN.UTF-8", "zh" means Chinese, "CN" means China mainland, "UTF-8" means charset. On Linux andMac OSX, the charset can be set by locale in the system. On Windows system another configuration parameter `charset` must be used to configure charset because the locale used on Windows is not POSIX standard. Of course, `charset` can also be used on Linux to specify the charset.
|
||||
The locale definition standard on Linux is: <Language\>\_<Region\>.<charset\>, for example, in "zh_CN.UTF-8", "zh" means Chinese, "CN" means China mainland, "UTF-8" means charset. On Linux and Mac OSX, the charset can be set by locale in the system. On Windows system another configuration parameter `charset` must be used to configure charset because the locale used on Windows is not POSIX standard. Of course, `charset` can also be used on Linux to specify the charset.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -263,7 +263,7 @@ On Linux, if `charset` is not set in `taos.cfg`, when `taos` is started, the cha
|
|||
locale zh_CN.UTF-8
|
||||
```
|
||||
|
||||
Besides, on Linux system, if the charset contained in `locale` is not consistent with that set by `charset`, the one who comes later in the configuration file is used.
|
||||
On a Linux system, if the charset contained in `locale` is not consistent with that set by `charset`, the later setting in the configuration file takes precedence.
|
||||
|
||||
```title="Effective charset is GBK"
|
||||
locale zh_CN.UTF-8
|
||||
|
@ -778,8 +778,8 @@ To prevent system resource from being exhausted by multiple concurrent streams,
|
|||
## HTTP Parameters
|
||||
|
||||
:::note
|
||||
HTTP server had been provided by `taosd` prior to version 2.4.0.0, now is provided by `taosAdapter` after version 2.4.0.0.
|
||||
The parameters described in this section are only application in versions prior to 2.4.0.0. If you are using any version from 2.4.0.0, please refer to [taosAdapter]](/reference/taosadapter/).
|
||||
HTTP service was provided by `taosd` prior to version 2.4.0.0 and is provided by `taosAdapter` after version 2.4.0.0.
|
||||
The parameters described in this section are only application in versions prior to 2.4.0.0. If you are using any version from 2.4.0.0, please refer to [taosAdapter](/reference/taosadapter/).
|
||||
|
||||
:::
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ All executable files of TDengine are in the _/usr/local/taos/bin_ directory by d
|
|||
- _taosd-dump-cfg.gdb_: script to facilitate debugging of taosd's gdb execution.
|
||||
|
||||
:::note
|
||||
taosdump after version 2.4.0.0 require taosTools as a standalone installation. A few version taosBenchmark is include in taosTools too.
|
||||
taosdump after version 2.4.0.0 require taosTools as a standalone installation. A new version of taosBenchmark is include in taosTools too.
|
||||
:::
|
||||
|
||||
:::tip
|
||||
|
|
|
@ -1,19 +1,19 @@
|
|||
---
|
||||
title: Schemaless Writing
|
||||
description: "The Schemaless write method eliminates the need to create super tables/sub tables in advance and automatically creates the storage structure corresponding to the data as it is written to the interface."
|
||||
description: "The Schemaless write method eliminates the need to create super tables/sub tables in advance and automatically creates the storage structure corresponding to the data, as it is written to the interface."
|
||||
---
|
||||
|
||||
In IoT applications, many data items are often collected for intelligent control, business analysis, device monitoring, etc. Due to the version upgrade of the application logic, or the hardware adjustment of the device itself, the data collection items may change more frequently. To facilitate the data logging work in such cases, TDengine starting from version 2.2.0.0, it provides a series of interfaces to the schemaless writing method, which eliminates the need to create super tables/sub tables in advance and automatically creates the storage structure corresponding to the data as the data is written to the interface. And when necessary, Schemaless writing will automatically add the required columns to ensure that the data written by the user is stored correctly.
|
||||
In IoT applications, data is collected for many purposes such as intelligent control, business analysis, device monitoring and so on. Due to changes in business or functional requirements or changes in device hardware, the application logic and even the data collected may change. To provide the flexibility needed in such cases and in a rapidly changing IoT landscape, TDengine starting from version 2.2.0.0, provides a series of interfaces for the schemaless writing method. These interfaces eliminate the need to create super tables and subtables in advance by automatically creating the storage structure corresponding to the data as the data is written to the interface. When necessary, schemaless writing will automatically add the required columns to ensure that the data written by the user is stored correctly.
|
||||
|
||||
The schemaless writing method creates super tables and their corresponding sub-tables completely indistinguishable from the super tables and sub-tables created directly via SQL. You can write data directly to them via SQL statements. Note that the names of tables created by schemaless writing are based on fixed mapping rules for tag values, so they are not explicitly ideographic and lack readability.
|
||||
The schemaless writing method creates super tables and their corresponding subtables. These are completely indistinguishable from the super tables and subtables created directly via SQL. You can write data directly to them via SQL statements. Note that the names of tables created by schemaless writing are based on fixed mapping rules for tag values, so they are not explicitly ideographic and they lack readability.
|
||||
|
||||
## Schemaless Writing Line Protocol
|
||||
|
||||
TDengine's schemaless writing line protocol supports to be compatible with InfluxDB's Line Protocol, OpenTSDB's telnet line protocol, and OpenTSDB's JSON format protocol. However, when using these three protocols, you need to specify in the API the standard of the parsing protocol to be used for the input content.
|
||||
TDengine's schemaless writing line protocol supports InfluxDB's Line Protocol, OpenTSDB's telnet line protocol, and OpenTSDB's JSON format protocol. However, when using these three protocols, you need to specify in the API the standard of the parsing protocol to be used for the input content.
|
||||
|
||||
For the standard writing protocols of InfluxDB and OpenTSDB, please refer to the documentation of each protocol. The following is a description of TDengine's extended protocol, based on InfluxDB's line protocol first. They allow users to control the (super table) schema more granularly.
|
||||
|
||||
With the following formatting conventions, Schemaless writing uses a single string to express a data row (multiple rows can be passed into the writing API at once to enable bulk writing).
|
||||
With the following formatting conventions, schemaless writing uses a single string to express a data row (multiple rows can be passed into the writing API at once to enable bulk writing).
|
||||
|
||||
```json
|
||||
measurement,tag_set field_set timestamp
|
||||
|
@ -23,7 +23,7 @@ where :
|
|||
|
||||
- measurement will be used as the data table name. It will be separated from tag_set by a comma.
|
||||
- tag_set will be used as tag data in the format `<tag_key>=<tag_value>,<tag_key>=<tag_value>`, i.e. multiple tags' data can be separated by a comma. It is separated from field_set by space.
|
||||
- field_set will be used as normal column data in the format of `<field_key>=<field_value>,<field_key>=<field_value>`, again using a comma to separate multiple normal columns of data. It is separated from the timestamp by space.
|
||||
- field_set will be used as normal column data in the format of `<field_key>=<field_value>,<field_key>=<field_value>`, again using a comma to separate multiple normal columns of data. It is separated from the timestamp by a space.
|
||||
- The timestamp is the primary key corresponding to the data in this row.
|
||||
|
||||
All data in tag_set is automatically converted to the NCHAR data type and does not require double quotes (").
|
||||
|
@ -32,7 +32,7 @@ In the schemaless writing data line protocol, each data item in the field_set ne
|
|||
|
||||
- If there are English double quotes on both sides, it indicates the BINARY(32) type. For example, `"abc"`.
|
||||
- If there are double quotes on both sides and an L prefix, it means NCHAR(32) type. For example, `L"error message"`.
|
||||
- Spaces, equal signs (=), commas (,), and double quotes (") need to be escaped with a backslash (\) in front. (All refer to the ASCII character)
|
||||
- Spaces, equal signs (=), commas (,), and double quotes (") need to be escaped with a backslash (\\) in front. (All refer to the ASCII character)
|
||||
- Numeric types will be distinguished from data types by the suffix.
|
||||
|
||||
| **Serial number** | **Postfix** | **Mapping type** | **Size (bytes)** |
|
||||
|
@ -58,26 +58,25 @@ Note that if the wrong case is used when describing the data type suffix, or if
|
|||
|
||||
Schemaless writes process row data according to the following principles.
|
||||
|
||||
1. You can use the following rules to generate the sub-table names: first, combine the measurement name and the key and value of the label into the next string:
|
||||
1. You can use the following rules to generate the subtable names: first, combine the measurement name and the key and value of the label into the next string:
|
||||
|
||||
```json
|
||||
"measurement,tag_key1=tag_value1,tag_key2=tag_value2"
|
||||
```
|
||||
|
||||
Note that tag_key1, tag_key2 are not the original order of the tags entered by the user but the result of using the tag names in ascending order of the strings. Therefore, tag_key1 is not the first tag entered in the line protocol.
|
||||
The string's MD5 hash value "md5_val" is calculated after the ranking is completed. The calculation result is then combined with the string to generate the table name: "t_md5_val". "t*" is a fixed prefix that every table generated by this mapping relationship has. 2.
|
||||
The string's MD5 hash value "md5_val" is calculated after the ranking is completed. The calculation result is then combined with the string to generate the table name: "t_md5_val". "t*" is a fixed prefix that every table generated by this mapping relationship has.
|
||||
|
||||
2. If the super table obtained by parsing the line protocol does not exist, this super table is created.
|
||||
If the sub-table obtained by the parse line protocol does not exist, Schemaless creates the sub-table according to the sub-table name determined in steps 1 or 2. 4.
|
||||
If the subtable obtained by the parse line protocol does not exist, Schemaless creates the sub-table according to the subtable name determined in steps 1 or 2.
|
||||
4. If the specified tag or regular column in the data row does not exist, the corresponding tag or regular column is added to the super table (only incremental).
|
||||
5. If there are some tag columns or regular columns in the super table that are not specified to take values in a data row, then the values of these columns are set to NULL.
|
||||
6. For BINARY or NCHAR columns, if the length of the value provided in a data row exceeds the column type limit, the maximum length of characters allowed to be stored in the column is automatically increased (only incremented and not decremented) to ensure complete preservation of the data.
|
||||
7. If the specified data sub-table already exists, and the specified tag column takes a value different from the saved value this time, the value in the latest data row overwrites the old tag column take value.
|
||||
7. If the specified data subtable already exists, and the specified tag column takes a value different from the saved value this time, the value in the latest data row overwrites the old tag column take value.
|
||||
8. Errors encountered throughout the processing will interrupt the writing process and return an error code.
|
||||
|
||||
:::tip
|
||||
All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed
|
||||
16k bytes. See [TAOS SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area.
|
||||
All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48k bytes. See [TAOS SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area.
|
||||
:::
|
||||
|
||||
## Time resolution recognition
|
||||
|
@ -87,7 +86,7 @@ Three specified modes are supported in the schemaless writing process, as follow
|
|||
| **Serial** | **Value** | **Description** |
|
||||
| -------- | ------------------- | ------------------------------- |
|
||||
| 1 | SML_LINE_PROTOCOL | InfluxDB Line Protocol |
|
||||
| 2 | SML_TELNET_PROTOCOL | OpenTSDB Text Line Protocol | | 2 | SML_TELNET_PROTOCOL | OpenTSDB Text Line Protocol
|
||||
| 2 | SML_TELNET_PROTOCOL | OpenTSDB Text Line Protocol |
|
||||
| 3 | SML_JSON_PROTOCOL | JSON protocol format |
|
||||
|
||||
In the SML_LINE_PROTOCOL parsing mode, the user is required to specify the time resolution of the input timestamp. The available time resolutions are shown in the following table.
|
||||
|
@ -106,8 +105,11 @@ In SML_TELNET_PROTOCOL and SML_JSON_PROTOCOL modes, the time precision is determ
|
|||
|
||||
## Data schema mapping rules
|
||||
|
||||
This section describes how data for line protocols are mapped to data with a schema. The data measurement in each line protocol is mapped to
|
||||
The tag name in tag_set is the name of the tag in the data schema, and the name in field_set is the column's name. The following data is used as an example to illustrate the mapping rules.
|
||||
This section describes how data for line protocols are mapped to data with a schema. The data measurement in each line protocol is mapped as follows:
|
||||
- The tag name in tag_set is the name of the tag in the data schema
|
||||
- The name in field_set is the column's name.
|
||||
|
||||
The following data is used as an example to illustrate the mapping rules.
|
||||
|
||||
```json
|
||||
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
|
||||
|
@ -139,7 +141,7 @@ st,t1=3,t2=4,t3=t3 c1=3i64,c5="pass" 1626006833639000000
|
|||
st,t1=3,t2=4,t3=t3 c1=3i64,c5="passit" 1626006833640000000
|
||||
```
|
||||
|
||||
The first line of the line protocol parsing will declare column c5 is a BINARY(4) field, the second line data write will extract column c5 is still a BINARY column. Still, its width is 6, then you need to increase the width of the BINARY field to be able to accommodate the new string.
|
||||
The first line of the line protocol parsing will declare column c5 is a BINARY(4) field. The second line data write will parse column c5 as a BINARY column. But in the second line, c5's width is 6 so you need to increase the width of the BINARY field to be able to accommodate the new string.
|
||||
|
||||
```json
|
||||
st,t1=3,t2=4,t3=t3 c1=3i64 1626006833639000000
|
||||
|
|
|
@ -25,7 +25,7 @@ The default database name written by taosAdapter is `collectd`. You can also mod
|
|||
#collectd
|
||||
collectd uses a plugin mechanism to write the collected monitoring data to different data storage software in various forms. tdengine supports both direct collection plugins and write_tsdb plugins.
|
||||
|
||||
#### is configured to receive data from the direct collection plugin
|
||||
#### Configure the direct collection plugin
|
||||
|
||||
Modify the relevant configuration items in the collectd configuration file (default location /etc/collectd/collectd.conf).
|
||||
|
||||
|
@ -62,7 +62,7 @@ LoadPlugin write_tsdb
|
|||
</Plugin>
|
||||
```
|
||||
|
||||
Where <taosAdapter's host\> fills in the server's domain name or IP address running taosAdapter. <port for collectd write_tsdb plugin\> Fill in the data that taosAdapter uses to receive the collectd write_tsdb plugin (default is 6047).
|
||||
Where <taosAdapter's host\> is the domain name or IP address of the server running taosAdapter. <port for collectd write_tsdb plugin\> Fill in the data that taosAdapter uses to receive the collectd write_tsdb plugin (default is 6047).
|
||||
|
||||
```text
|
||||
LoadPlugin write_tsdb
|
||||
|
|
|
@ -17,7 +17,7 @@ password = "taosdata"
|
|||
...
|
||||
```
|
||||
|
||||
The taosAdapter writes to the database with the default name `tcollector`. You can also modify the taosAdapter configuration file dbs entry to specify a different name. user and password fill in the actual TDengine configuration values. After changing the configuration file, you need to restart the taosAdapter.
|
||||
The taosAdapter writes to the database with the default name `tcollector`. You can also modify the taosAdapter configuration file dbs entry to specify a different name. Fill in the actual user and password for TDengine. After changing the configuration file, you need to restart the taosAdapter.
|
||||
|
||||
- You can also enable taosAdapter to receive tcollector data by using the taosAdapter command-line parameters or setting environment variables.
|
||||
|
||||
|
@ -25,7 +25,7 @@ The taosAdapter writes to the database with the default name `tcollector`. You c
|
|||
|
||||
To use TCollector, you need to download its [source code](https://github.com/OpenTSDB/tcollector). Its configuration items are in its source code. Note: TCollector differs significantly from version to version, so here is an example of the latest code for the current master branch (git commit: 37ae920).
|
||||
|
||||
Modify the contents of the `collectors/etc/config.py` and `tcollector.py` files. Change the address of the OpenTSDB host to the domain name or IP address of the server where taosAdapter is deployed, and change the port to the port that taosAdapter supports TCollector on (default is 6049).
|
||||
Modify the contents of the `collectors/etc/config.py` and `tcollector.py` files. Change the address of the OpenTSDB host to the domain name or IP address of the server where taosAdapter is deployed, and change the port to the port on which taosAdapter supports TCollector (default is 6049).
|
||||
|
||||
Example of git diff output of source code changes.
|
||||
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
title: Reference
|
||||
---
|
||||
|
||||
The reference guide is the detailed introduction to TDengine, various TDengine's connectors in different languages, and the tools that come with it.
|
||||
The reference guide is a detailed introduction to TDengine including various TDengine connectors in different languages, and the tools that come with TDengine.
|
||||
|
||||
```mdx-code-block
|
||||
import DocCardList from '@theme/DocCardList';
|
||||
import {useCurrentSidebarCategory} from '@docusaurus/theme-common';
|
||||
|
||||
<DocCardList items={useCurrentSidebarCategory().items}/>
|
||||
```
|
||||
```
|
||||
|
|
|
@ -3,13 +3,14 @@ sidebar_label: Grafana
|
|||
title: Grafana
|
||||
---
|
||||
|
||||
TDengine can be quickly integrated with the open-source data visualization system [Grafana](https://www.grafana.com/) to build a data monitoring and alerting system. The whole process does not require any code development. And you can visualize the contents of the data tables in TDengine on a DashBoard.
|
||||
TDengine can be quickly integrated with the open-source data visualization system [Grafana](https://www.grafana.com/) to build a data monitoring and alerting system. The whole process does not require any code development. And you can visualize the contents of the data tables in TDengine on a dashboard.
|
||||
|
||||
You can learn more about using the TDengine plugin on [GitHub](https://github.com/taosdata/grafanaplugin/blob/master/README.md).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
In order for Grafana to add the TDengine data source successfully, the following preparations are required:
|
||||
|
||||
1. The TDengine cluster is deployed and functioning properly
|
||||
2. taosAdapter is installed and running properly. Please refer to the taosAdapter manual for details.
|
||||
|
||||
|
@ -19,21 +20,22 @@ TDengine currently supports Grafana versions 7.0 and above. Users can go to the
|
|||
|
||||
## Configuring Grafana
|
||||
|
||||
You can download The Grafana plugin for TDengine from <https://github.com/taosdata/grafanaplugin/releases/latest>. The current latest version is 3.1.4.
|
||||
|
||||
Recommend using the [``grafana-cli`` command-line tool](https://grafana.com/docs/grafana/latest/administration/cli/) for plugin installation.
|
||||
|
||||
``bash
|
||||
sudo -u grafana grafana-cli \
|
||||
--pluginUrl https://github.com/taosdata/grafanaplugin/releases/download/v3.1.4/tdengine-datasource-3.1.4.zip \
|
||||
plugins install tdengine-datasource
|
||||
```
|
||||
|
||||
Or download it locally and extract it to the Grafana plugin directory.
|
||||
Follow the installation steps in [Grafana](https://grafana.com/grafana/plugins/tdengine-datasource/?tab=installation) with the [``grafana-cli`` command-line tool](https://grafana.com/docs/grafana/latest/administration/cli/) for plugin installation.
|
||||
|
||||
```bash
|
||||
GF_VERSION=3.1.4
|
||||
grafana-cli plugins install tdengine-datasource
|
||||
# with sudo
|
||||
sudo -u grafana grafana-cli plugins install tdengine-datasource
|
||||
```
|
||||
|
||||
Alternatively, you can manually download the .zip file from [GitHub](https://github.com/taosdata/grafanaplugin/releases/tag/latest) or [Grafana](https://grafana.com/grafana/plugins/tdengine-datasource/?tab=installation) and unpack it into your grafana plugins directory.
|
||||
|
||||
```bash
|
||||
GF_VERSION=3.2.2
|
||||
# from GitHub
|
||||
wget https://github.com/taosdata/grafanaplugin/releases/download/v$GF_VERSION/tdengine-datasource-$GF_VERSION.zip
|
||||
# from Grafana
|
||||
wget -O tdengine-datasource-$GF_VERSION.zip https://grafana.com/api/plugins/tdengine-datasource/versions/$GF_VERSION/download
|
||||
```
|
||||
|
||||
Take CentOS 7.2 for example, extract the plugin package to /var/lib/grafana/plugins directory, and restart grafana.
|
||||
|
@ -42,18 +44,10 @@ Take CentOS 7.2 for example, extract the plugin package to /var/lib/grafana/plug
|
|||
sudo unzip tdengine-datasource-$GF_VERSION.zip -d /var/lib/grafana/plugins/
|
||||
```
|
||||
|
||||
Grafana versions 7.3+ / 8.x do signature checks on plugins, so you also need to add the following line to the grafana.ini file to use the plugin correctly.
|
||||
|
||||
```ini
|
||||
[plugins]
|
||||
allow_loading_unsigned_plugins = tdengine-datasource
|
||||
```
|
||||
|
||||
The TDengine plugin can be automatically installed and set up using the following environment variable settings in a Docker environment.
|
||||
If Grafana is running in a Docker environment, the TDengine plugin can be automatically installed and set up using the following environment variable settings:
|
||||
|
||||
```bash
|
||||
GF_INSTALL_PLUGINS=https://github.com/taosdata/grafanaplugin/releases/download/v3.1.4/tdengine-datasource-3.1.4.zip;tdengine- datasource
|
||||
GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=tdengine-datasource
|
||||
GF_INSTALL_PLUGINS=tdengine-datasource
|
||||
```
|
||||
|
||||
## Using Grafana
|
||||
|
@ -62,39 +56,39 @@ GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=tdengine-datasource
|
|||
|
||||
Users can log in to the Grafana server (username/password: admin/admin) directly through the URL `http://localhost:3000` and add a datasource through `Configuration -> Data Sources` on the left side, as shown in the following figure.
|
||||
|
||||

|
||||

|
||||
|
||||
Click `Add data source` to enter the Add data source page, and enter TDengine in the query box to add it, as shown in the following figure.
|
||||
|
||||

|
||||

|
||||
|
||||
Enter the datasource configuration page, and follow the default prompts to modify the corresponding configuration.
|
||||
|
||||

|
||||

|
||||
|
||||
- Host: IP address of the server where the components of the TDengine cluster provide REST service (offered by taosd before 2.4 and by taosAdapter since 2.4) and the port number of the TDengine REST service (6041), by default use `http://localhost:6041`.
|
||||
- User: TDengine user name.
|
||||
- Password: TDengine user password.
|
||||
|
||||
Click `Save & Test` to test. Follows are a success.
|
||||
Click `Save & Test` to test. You should see a success message if the test worked.
|
||||
|
||||

|
||||

|
||||
|
||||
### Create Dashboard
|
||||
|
||||
Go back to the main interface to create the Dashboard, click Add Query to enter the panel query page:
|
||||
Go back to the main interface to create a dashboard and click Add Query to enter the panel query page:
|
||||
|
||||

|
||||

|
||||
|
||||
As shown above, select the `TDengine` data source in the `Query` and enter the corresponding SQL in the query box below for query.
|
||||
|
||||
- INPUT SQL: enter the statement to be queried (the result set of the SQL statement should be two columns and multiple rows), for example: `select avg(mem_system) from log.dn where ts >= $from and ts < $to interval($interval)`, where, from, to and interval are built-in variables of the TDengine plugin, indicating the range and time interval of queries fetched from the Grafana plugin panel. In addition to the built-in variables, ` custom template variables are also supported.
|
||||
- INPUT SQL: enter the statement to be queried (the result set of the SQL statement should be two columns and multiple rows), for example: `select avg(mem_system) from log.dn where ts >= $from and ts < $to interval($interval)`, where, from, to and interval are built-in variables of the TDengine plugin, indicating the range and time interval of queries fetched from the Grafana plugin panel. In addition to the built-in variables, custom template variables are also supported.
|
||||
- ALIAS BY: This allows you to set the current query alias.
|
||||
- GENERATE SQL: Clicking this button will automatically replace the corresponding variables and generate the final executed statement.
|
||||
|
||||
Follow the default prompt to query the average system memory usage for the specified interval on the server where the current TDengine deployment is located as follows.
|
||||
|
||||

|
||||

|
||||
|
||||
> For more information on how to use Grafana to create the appropriate monitoring interface and for more details on using Grafana, refer to the official Grafana [documentation](https://grafana.com/docs/).
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ title: Telegraf writing
|
|||
|
||||
import Telegraf from "../14-reference/_telegraf.mdx"
|
||||
|
||||
Telegraf is a viral metrics collection open-source software. Telegraf can collect the operation information of various components without writing any scripts to collect regularly, reducing the difficulty of data acquisition.
|
||||
Telegraf is a viral, open-source, metrics collection software. Telegraf can collect the operation information of various components without having to write any scripts to collect regularly, reducing the difficulty of data acquisition.
|
||||
|
||||
Telegraf's data can be written to TDengine by simply adding the output configuration of Telegraf to the URL corresponding to taosAdapter and modifying several configuration items. The presence of Telegraf data in TDengine can take advantage of TDengine's efficient storage query performance and clustering capabilities for time-series data.
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ title: collectd writing
|
|||
import CollectD from "../14-reference/_collectd.mdx"
|
||||
|
||||
|
||||
collectd is a daemon used to collect system performance metric data. collectd provides various storage mechanisms to store different values. It periodically counts system performance statistics number while the system is running and storing information. You can use this information to help identify current system performance bottlenecks and predict future system load.
|
||||
collectd is a daemon used to collect system performance metric data. collectd provides various storage mechanisms to store different values. It periodically counts system performance statistics while the system is running and storing information. You can use this information to help identify current system performance bottlenecks and predict future system load.
|
||||
|
||||
You can write the data collected by collectd to TDengine by simply modifying the configuration of collectd to the domain name (or IP address) and corresponding port of the server running taosAdapter. It can take full advantage of TDengine's efficient storage query performance and clustering capability for time-series data.
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ import StatsD from "../14-reference/_statsd.mdx"
|
|||
|
||||
StatsD is a simple daemon for aggregating application metrics, which has evolved rapidly in recent years into a unified protocol for collecting application performance metrics.
|
||||
|
||||
You can write StatsD data to TDengine by simply modifying in the configuration file of StatsD with the domain name (or IP address) of the server running taosAdapter and the corresponding port. It can take full advantage of TDengine's efficient storage query performance and clustering capabilities for time-series data.
|
||||
You can write StatsD data to TDengine by simply modifying the configuration file of StatsD with the domain name (or IP address) of the server running taosAdapter and the corresponding port. It can take full advantage of TDengine's efficient storage query performance and clustering capabilities for time-series data.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ title: icinga2 writing
|
|||
|
||||
import Icinga2 from "../14-reference/_icinga2.mdx"
|
||||
|
||||
icinga2 is an open-source software monitoring host and network initially developed from the Nagios network monitoring application. Currently, icinga2 is distributed under the GNU GPL v2 license.
|
||||
icinga2 is an open-source, host and network monitoring software initially developed from the Nagios network monitoring application. Currently, icinga2 is distributed under the GNU GPL v2 license.
|
||||
|
||||
You can write the data collected by icinga2 to TDengine by simply modifying the icinga2 configuration to point to the taosAdapter server and the corresponding port, taking advantage of TDengine's efficient storage and query performance and clustering capabilities for time-series data.
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ sidebar_label: EMQX Broker
|
|||
title: EMQX Broker writing
|
||||
---
|
||||
|
||||
MQTT is a popular IoT data transfer protocol, [EMQX](https://github.com/emqx/emqx) is an open-source MQTT Broker software, without any code, only need to use "rules" in EMQX Dashboard to do simple configuration. You can write MQTT data directly to TDengine. EMQX supports saving data to TDengine by sending it to web services and provides a native TDengine driver for direct saving in the Enterprise Edition. Please refer to the [EMQX official documentation](https://www.emqx.io/docs/en/v4.4/rule/rule-engine.html) for details on how to use it. tdengine).
|
||||
MQTT is a popular IoT data transfer protocol. [EMQX](https://github.com/emqx/emqx) is an open-source MQTT Broker software. You can write MQTT data directly to TDengine without any code. You only need to setup "rules" in EMQX Dashboard to create a simple configuration. EMQX supports saving data to TDengine by sending data to a web service and provides a native TDengine driver for direct saving in the Enterprise Edition. Please refer to the [EMQX official documentation](https://www.emqx.io/docs/en/v4.4/rule/rule-engine.html) for details on how to use it.).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
|
@ -44,25 +44,25 @@ Since the configuration interface of EMQX differs from version to version, here
|
|||
|
||||
Use your browser to open the URL `http://IP:18083` and log in to EMQX Dashboard. The initial installation username is `admin` and the password is: `public`.
|
||||
|
||||

|
||||

|
||||
|
||||
### Creating Rule
|
||||
|
||||
Select "Rule" in the "Rule Engine" on the left and click the "Create" button: !
|
||||
|
||||

|
||||

|
||||
|
||||
### Edit SQL fields
|
||||
|
||||

|
||||

|
||||
|
||||
### Add "action handler"
|
||||
|
||||

|
||||

|
||||
|
||||
### Add "Resource"
|
||||
|
||||

|
||||

|
||||
|
||||
Select "Data to Web Service" and click the "New Resource" button.
|
||||
|
||||
|
@ -70,13 +70,13 @@ Select "Data to Web Service" and click the "New Resource" button.
|
|||
|
||||
Select "Data to Web Service" and fill in the request URL as the address and port of the server running taosAdapter (default is 6041). Leave the other properties at their default values.
|
||||
|
||||

|
||||

|
||||
|
||||
### Edit "action"
|
||||
|
||||
Edit the resource configuration to add the key/value pairing for Authorization. Please refer to the [ TDengine REST API documentation ](https://docs.taosdata.com/reference/rest-api/) for the authorization in details. Enter the rule engine replacement template in the message body.
|
||||
|
||||

|
||||

|
||||
|
||||
## Compose program to mock data
|
||||
|
||||
|
@ -163,7 +163,7 @@ Edit the resource configuration to add the key/value pairing for Authorization.
|
|||
|
||||
Note: `CLIENT_NUM` in the code can be set to a smaller value at the beginning of the test to avoid hardware performance be not capable to handle a more significant number of concurrent clients.
|
||||
|
||||

|
||||

|
||||
|
||||
## Execute tests to simulate sending MQTT data
|
||||
|
||||
|
@ -172,19 +172,19 @@ npm install mqtt mockjs --save ---registry=https://registry.npm.taobao.org
|
|||
node mock.js
|
||||
```
|
||||
|
||||

|
||||

|
||||
|
||||
## Verify that EMQX is receiving data
|
||||
|
||||
Refresh the EMQX Dashboard rules engine interface to see how many records were received correctly:
|
||||
|
||||

|
||||

|
||||
|
||||
## Verify that data writing to TDengine
|
||||
|
||||
Use the TDengine CLI program to log in and query the appropriate databases and tables to verify that the data is being written to TDengine correctly:
|
||||
|
||||

|
||||

|
||||
|
||||
Please refer to the [TDengine official documentation](https://docs.taosdata.com/) for more details on how to use TDengine.
|
||||
EMQX Please refer to the [EMQX official documentation](https://www.emqx.io/docs/en/v4.4/rule/rule-engine.html) for details on how to use EMQX.
|
||||
|
|
|
@ -7,17 +7,17 @@ TDengine Kafka Connector contains two plugins: TDengine Source Connector and TDe
|
|||
|
||||
## What is Kafka Connect?
|
||||
|
||||
Kafka Connect is a component of Apache Kafka that enables other systems, such as databases, cloud services, file systems, etc., to connect to Kafka easily. Data can flow from other software to Kafka via Kafka Connect and Kafka to other systems via Kafka Connect. Plugins that read data from other software are called Source Connectors, and plugins that write data to other software are called Sink Connectors. Neither Source Connector nor Sink Connector will directly connect to Kafka Broker, and Source Connector transfers data to Kafka Connect. Sink Connector receives data from Kafka Connect.
|
||||
Kafka Connect is a component of [Apache Kafka](https://kafka.apache.org/) that enables other systems, such as databases, cloud services, file systems, etc., to connect to Kafka easily. Data can flow from other software to Kafka via Kafka Connect and Kafka to other systems via Kafka Connect. Plugins that read data from other software are called Source Connectors, and plugins that write data to other software are called Sink Connectors. Neither Source Connector nor Sink Connector will directly connect to Kafka Broker, and Source Connector transfers data to Kafka Connect. Sink Connector receives data from Kafka Connect.
|
||||
|
||||

|
||||

|
||||
|
||||
TDengine Source Connector is used to read data from TDengine in real-time and send it to Kafka Connect. Users can use The TDengine Sink Connector to receive data from Kafka Connect and write it to TDengine.
|
||||
|
||||

|
||||

|
||||
|
||||
## What is Confluent?
|
||||
|
||||
Confluent adds many extensions to Kafka. include:
|
||||
[Confluent](https://www.confluent.io/) adds many extensions to Kafka. include:
|
||||
|
||||
1. Schema Registry
|
||||
2. REST Proxy
|
||||
|
@ -26,7 +26,7 @@ Confluent adds many extensions to Kafka. include:
|
|||
5. GUI for managing and monitoring Kafka - Confluent Control Center
|
||||
|
||||
Some of these extensions are available in the community version of Confluent. Some are only available in the enterprise version.
|
||||

|
||||

|
||||
|
||||
Confluent Enterprise Edition provides the `confluent` command-line tool to manage various components.
|
||||
|
||||
|
@ -79,10 +79,10 @@ Development: false
|
|||
git clone https://github.com:taosdata/kafka-connect-tdengine.git
|
||||
cd kafka-connect-tdengine
|
||||
mvn clean package
|
||||
unzip -d $CONFLUENT_HOME/share/confluent-hub-components/ target/components/packages/taosdata-kafka-connect-tdengine-0.1.0.zip
|
||||
unzip -d $CONFLUENT_HOME/share/java/ target/components/packages/taosdata-kafka-connect-tdengine-*.zip
|
||||
```
|
||||
|
||||
The above script first clones the project source code and then compiles and packages it with Maven. After the package is complete, the zip package of the plugin is generated in the `target/components/packages/` directory. Unzip this zip package to the path where the plugin is installed. The path to install the plugin is in the configuration file `$CONFLUENT_HOME/etc/kafka/connect-standalone.properties`. The default path is `$CONFLUENT_HOME/share/confluent-hub-components/`.
|
||||
The above script first clones the project source code and then compiles and packages it with Maven. After the package is complete, the zip package of the plugin is generated in the `target/components/packages/` directory. Unzip this zip package to plugin path. We used `$CONFLUENT_HOME/share/java/` above because it's a build in plugin path.
|
||||
|
||||
### Install with confluent-hub
|
||||
|
||||
|
@ -96,7 +96,7 @@ confluent local services start
|
|||
```
|
||||
|
||||
:::note
|
||||
Be sure to install the plugin before starting Confluent. Otherwise, there will be a class not found error. The log of Kafka Connect (default path: /tmp/confluent.xxxx/connect/logs/connect.log) will output the successfully installed plugin, which users can use to determine whether the plugin is installed successfully.
|
||||
Be sure to install the plugin before starting Confluent. Otherwise, Kafka Connect will fail to discover the plugins.
|
||||
:::
|
||||
|
||||
:::tip
|
||||
|
@ -123,6 +123,59 @@ Control Center is [UP]
|
|||
To clear data, execute `rm -rf /tmp/confluent.106668`.
|
||||
:::
|
||||
|
||||
### Check Confluent Services Status
|
||||
|
||||
Use command bellow to check the status of all service:
|
||||
|
||||
```
|
||||
confluent local services status
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
```
|
||||
Connect is [UP]
|
||||
Control Center is [UP]
|
||||
Kafka is [UP]
|
||||
Kafka REST is [UP]
|
||||
ksqlDB Server is [UP]
|
||||
Schema Registry is [UP]
|
||||
ZooKeeper is [UP]
|
||||
```
|
||||
|
||||
### Check Successfully Loaded Plugin
|
||||
|
||||
After Kafka Connect was completely started, you can use bellow command to check if our plugins are installed successfully:
|
||||
```
|
||||
confluent local services connect plugin list
|
||||
```
|
||||
|
||||
The output should contains `TDengineSinkConnector` and `TDengineSourceConnector` as bellow:
|
||||
|
||||
```
|
||||
Available Connect Plugins:
|
||||
[
|
||||
{
|
||||
"class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector",
|
||||
"type": "sink",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"class": "com.taosdata.kafka.connect.source.TDengineSourceConnector",
|
||||
"type": "source",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
......
|
||||
```
|
||||
|
||||
If not, please check the log file of Kafka Connect. To view the log file path, please execute:
|
||||
|
||||
```
|
||||
echo `cat /tmp/confluent.current`/connect/connect.stdout
|
||||
```
|
||||
It should produce a path like:`/tmp/confluent.104086/connect/connect.stdout`
|
||||
|
||||
Besides log file `connect.stdout` there is a file named `connect.properties`. At the end of this file you can see the effective `plugin.path` which is a series of paths joined by comma. If Kafka Connect not found our plugins, it's probably because the installed path is not included in `plugin.path`.
|
||||
|
||||
## The use of TDengine Sink Connector
|
||||
|
||||
The role of the TDengine Sink Connector is to synchronize the data of the specified topic to TDengine. Users do not need to create databases and super tables in advance. The name of the target database can be specified manually (see the configuration parameter connection.database), or it can be generated according to specific rules (see the configuration parameter connection.database.prefix).
|
||||
|
@ -142,7 +195,7 @@ vi sink-demo.properties
|
|||
sink-demo.properties' content is following:
|
||||
|
||||
```ini title="sink-demo.properties"
|
||||
name=tdengine-sink-demo
|
||||
name=TDengineSinkConnector
|
||||
connector.class=com.taosdata.kafka.connect.sink.TDengineSinkConnector
|
||||
tasks.max=1
|
||||
topics=meters
|
||||
|
@ -151,6 +204,7 @@ connection.user=root
|
|||
connection.password=taosdata
|
||||
connection.database=power
|
||||
db.schemaless=line
|
||||
data.precision=ns
|
||||
key.converter=org.apache.kafka.connect.storage.StringConverter
|
||||
value.converter=org.apache.kafka.connect.storage.StringConverter
|
||||
```
|
||||
|
@ -177,6 +231,7 @@ If the above command is executed successfully, the output is as follows:
|
|||
"connection.url": "jdbc:TAOS://127.0.0.1:6030",
|
||||
"connection.user": "root",
|
||||
"connector.class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector",
|
||||
"data.precision": "ns",
|
||||
"db.schemaless": "line",
|
||||
"key.converter": "org.apache.kafka.connect.storage.StringConverter",
|
||||
"tasks.max": "1",
|
||||
|
@ -221,14 +276,14 @@ Database changed.
|
|||
taos> select * from meters;
|
||||
ts | current | voltage | phase | groupid | location |
|
||||
===============================================================================================================================================================
|
||||
2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LoSangeles |
|
||||
2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LoSangeles |
|
||||
2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LoSangeles |
|
||||
2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LoSangeles |
|
||||
2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LosAngeles |
|
||||
2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LosAngeles |
|
||||
Query OK, 4 row(s) in set (0.004208s)
|
||||
```
|
||||
|
||||
If you see the above data, the synchronization is successful. If not, check the logs of Kafka Connect. For detailed description of configuration parameters, see [Configuration Reference](#Configuration Reference).
|
||||
If you see the above data, the synchronization is successful. If not, check the logs of Kafka Connect. For detailed description of configuration parameters, see [Configuration Reference](#configuration-reference).
|
||||
|
||||
## The use of TDengine Source Connector
|
||||
|
||||
|
@ -356,6 +411,7 @@ The following configuration items apply to TDengine Sink Connector and TDengine
|
|||
4. `max.retries`: The maximum number of retries when an error occurs. Defaults to 1.
|
||||
5. `retry.backoff.ms`: The time interval for retry when sending an error. The unit is milliseconds. The default is 3000.
|
||||
6. `db.schemaless`: Data format, could be one of `line`, `json`, and `telnet`. Represent InfluxDB line protocol format, OpenTSDB JSON format, and OpenTSDB Telnet line protocol format.
|
||||
7. `data.precision`: The time precision when use InfluxDB line protocol format data, could be one of `ms`, `us` and `ns`. The default is `ns`.
|
||||
|
||||
### TDengine Source Connector specific configuration
|
||||
|
||||
|
@ -366,7 +422,13 @@ The following configuration items apply to TDengine Sink Connector and TDengine
|
|||
5. `fetch.max.rows`: The maximum number of rows retrieved when retrieving the database. Default is 100.
|
||||
6. `out.format`: The data format. The value could be line or json. The line represents the InfluxDB Line protocol format, and json represents the OpenTSDB JSON format. Default is `line`.
|
||||
|
||||
## feedback
|
||||
|
||||
## Other notes
|
||||
|
||||
1. To install plugin to a customized location, refer to https://docs.confluent.io/home/connect/self-managed/install.html#install-connector-manually.
|
||||
2. To use Kafka Connect without confluent, refer to https://kafka.apache.org/documentation/#connect.
|
||||
|
||||
## Feedback
|
||||
|
||||
https://github.com/taosdata/kafka-connect-tdengine/issues
|
||||
|
||||
|
|
|
@ -5,38 +5,38 @@ title: Architecture
|
|||
|
||||
## Cluster and Primary Logic Unit
|
||||
|
||||
The design of TDengine is based on the assumption that any hardware or software system is not 100% reliable and that no single node can provide sufficient computing and storage resources to process massive data. Therefore, TDengine has been designed in a distributed and high-reliability architecture since day one of the development, so that hardware failure or software failure of any single even multiple servers will not affect the availability and reliability of the system. At the same time, through node virtualization and automatic load-balancing technology, TDengine can make the most efficient use of computing and storage resources in heterogeneous clusters to reduce hardware resources significantly.
|
||||
The design of TDengine is based on the assumption that any hardware or software system is not 100% reliable and that no single node can provide sufficient computing and storage resources to process massive data. Therefore, since day one, TDengine has been designed as a natively distributed system, with high-reliability architecture. Hardware failure or software failure of a single, or even multiple servers will not affect the availability and reliability of the system. At the same time, through node virtualization and automatic load-balancing technology, TDengine can make the most efficient use of computing and storage resources in heterogeneous clusters to reduce hardware resource needs, significantly.
|
||||
|
||||
### Primary Logic Unit
|
||||
|
||||
Logical structure diagram of TDengine distributed architecture as following:
|
||||
Logical structure diagram of TDengine's distributed architecture is as follows:
|
||||
|
||||

|
||||

|
||||
<center> Figure 1: TDengine architecture diagram </center>
|
||||
|
||||
A complete TDengine system runs on one or more physical nodes. Logically, it includes data node (dnode), TDengine client driver (TAOSC) and application (app). There are one or more data nodes in the system, which form a cluster. The application interacts with the TDengine cluster through TAOSC's API. The following is a brief introduction to each logical unit.
|
||||
|
||||
**Physical node (pnode)**: A pnode is a computer that runs independently and has its own computing, storage and network capabilities. It can be a physical machine, virtual machine, or Docker container installed with OS. The physical node is identified by its configured FQDN (Fully Qualified Domain Name). TDengine relies entirely on FQDN for network communication. If you don't know about FQDN, please check [wikipedia](https://en.wikipedia.org/wiki/Fully_qualified_domain_name).
|
||||
|
||||
**Data node (dnode):** A dnode is a running instance of the TDengine server-side execution code taosd on a physical node. A working system must have at least one data node. A dnode contains zero to multiple logical virtual nodes (VNODE), zero or at most one logical management node (mnode). The unique identification of a dnode in the system is determined by the instance's End Point (EP). EP is a combination of FQDN (Fully Qualified Domain Name) of the physical node where the dnode is located and the network port number (Port) configured by the system. By configuring different ports, a physical node (a physical machine, virtual machine or container) can run multiple instances or have multiple data nodes.
|
||||
**Data node (dnode):** A dnode is a running instance of the TDengine server-side execution code taosd on a physical node (pnode). A working system must have at least one data node. A dnode contains zero to multiple logical virtual nodes (VNODE) and zero or at most one logical management node (mnode). The unique identification of a dnode in the system is determined by the instance's End Point (EP). EP is a combination of FQDN (Fully Qualified Domain Name) of the physical node where the dnode is located and the network port number (Port) configured by the system. By configuring different ports, a physical node (a physical machine, virtual machine or container) can run multiple instances or have multiple data nodes.
|
||||
|
||||
**Virtual node (vnode)**: To better support data sharding, load balancing and prevent data from overheating or skewing, data nodes are virtualized into multiple virtual nodes (vnode, V2, V3, V4, etc. in the figure). Each vnode is a relatively independent work unit, which is the basic unit of time-series data storage and has independent running threads, memory space and persistent storage path. A vnode contains a certain number of tables (data collection points). When a new table is created, the system checks whether a new vnode needs to be created. The number of vnodes that can be created on a data node depends on the hardware capacities of the physical node where the data node is located. A vnode belongs to only one DB, but a DB can have multiple vnodes. In addition to the stored time-series data, a vnode also stores the schema and tag values of the included tables. A virtual node is uniquely identified in the system by the EP of the data node and the VGroup ID to which it belongs and is created and managed by the management node.
|
||||
**Virtual node (vnode)**: To better support data sharding, load balancing and prevent data from overheating or skewing, data nodes are virtualized into multiple virtual nodes (vnode, V2, V3, V4, etc. in the figure). Each vnode is a relatively independent work unit, which is the basic unit of time-series data storage and has independent running threads, memory space and persistent storage path. A vnode contains a certain number of tables (data collection points). When a new table is created, the system checks whether a new vnode needs to be created. The number of vnodes that can be created on a data node depends on the capacity of the hardware of the physical node where the data node is located. A vnode belongs to only one DB, but a DB can have multiple vnodes. In addition to the stored time-series data, a vnode also stores the schema and tag values of the included tables. A virtual node is uniquely identified in the system by the EP of the data node and the VGroup ID to which it belongs and is created and managed by the management node.
|
||||
|
||||
**Management node (mnode)**: A virtual logical unit responsible for monitoring and maintaining the running status of all data nodes and load balancing among nodes (M in the figure). At the same time, the management node is also responsible for the storage and management of metadata (including users, databases, tables, static tags, etc.), so it is also called Meta Node. Multiple (up to 5) mnodes can be configured in a TDengine cluster, and they are automatically constructed into a virtual management node group (M0, M1, M2 in the figure). The master/slave mechanism is adopted for the mnode group and the data synchronization is carried out in a strongly consistent way. Any data update operation can only be executed on the master. The creation of mnode cluster is completed automatically by the system without manual intervention. There is at most one mnode on each dnode, which is uniquely identified by the EP of the data node to which it belongs. Each dnode automatically obtains the EP of the dnode where all mnodes in the whole cluster are located through internal messaging interaction.
|
||||
**Management node (mnode)**: A virtual logical unit responsible for monitoring and maintaining the running status of all data nodes and load balancing among nodes (M in the figure). At the same time, the management node is also responsible for the storage and management of metadata (including users, databases, tables, static tags, etc.), so it is also called Meta Node. Multiple (up to 5) mnodes can be configured in a TDengine cluster, and they are automatically constructed into a virtual management node group (M0, M1, M2 in the figure). The master/slave mechanism is adopted for the mnode group and the data synchronization is carried out in a strongly consistent way. Any data update operation can only be executed on the master. The creation of mnode cluster is completed automatically by the system without manual intervention. There is at most one mnode on each dnode, which is uniquely identified by the EP of the data node to which it belongs. Each dnode automatically obtains the EP of the dnode where all mnodes in the whole cluster are located, through internal messaging interaction.
|
||||
|
||||
**Virtual node group (VGroup)**: Vnodes on different data nodes can form a virtual node group to ensure the high availability of the system. The virtual node group is managed in a master/slave mechanism. Write operations can only be performed on the master vnode, and then replicated to slave vnodes, thus ensuring that one single replica of data is copied on multiple physical nodes. The number of virtual nodes in a vgroup equals the number of data replicas. If the number of replicas of a DB is N, the system must have at least N data nodes. The number of replicas can be specified by the parameter `“replica”` when creating DB, and the default is 1. Using the multi-replication feature of TDengine, the same high data reliability can be achieved without the need for expensive storage devices such as disk arrays. Virtual node group is created and managed by the management node, and the management node assigns a system unique ID, aka VGroup ID. If two virtual nodes have the same vnode group ID, means that they belong to the same group and the data is backed up to each other. The number of virtual nodes in a virtual node group can be dynamically changed, allowing only one, that is, no data replication. VGroup ID is never changed. Even if a virtual node group is deleted, its ID will not be reused.
|
||||
**Virtual node group (VGroup)**: Vnodes on different data nodes can form a virtual node group to ensure the high availability of the system. The virtual node group is managed in a master/slave mechanism. Write operations can only be performed on the master vnode, and then replicated to slave vnodes, thus ensuring that one single replica of data is copied on multiple physical nodes. The number of virtual nodes in a vgroup equals the number of data replicas. If the number of replicas of a DB is N, the system must have at least N data nodes. The number of replicas can be specified by the parameter `“replica”` when creating a DB, and the default is 1. Using the multi-replication feature of TDengine, the same high data reliability can be achieved without the need for expensive storage devices such as disk arrays. Virtual node groups are created and managed by the management node, and the management node assigns a system unique ID, aka VGroup ID. If two virtual nodes have the same vnode group ID, it means that they belong to the same group and the data is backed up to each other. The number of virtual nodes in a virtual node group can be dynamically changed, allowing only one, that is, no data replication. VGroup ID is never changed. Even if a virtual node group is deleted, its ID will not be reused.
|
||||
|
||||
**TAOSC**: TAOSC is the driver provided by TDengine to applications, which is responsible for dealing with the interaction between application and cluster, and provides the native interface of C/C++ language, which is embedded in JDBC, C #, Python, Go, Node.js language connection libraries. Applications interact with the whole cluster through TAOSC instead of directly connecting to data nodes in the cluster. This module is responsible for obtaining and caching metadata; forwarding requests for insertion, query, etc. to the correct data node; when returning the results to the application, TAOSC also needs to be responsible for the final level of aggregation, sorting, filtering and other operations. For JDBC, C/C++/C #/Python/Go/Node.js interfaces, this module runs on the physical node where the application is located. At the same time, in order to support the fully distributed RESTful interface, TAOSC has a running instance on each dnode of TDengine cluster.
|
||||
**TAOSC**: TAOSC is the driver provided by TDengine to applications. It is responsible for dealing with the interaction between application and cluster, and provides the native interface for the C/C++ language. It is also embedded in the JDBC, C #, Python, Go, Node.js language connection libraries. Applications interact with the whole cluster through TAOSC instead of directly connecting to data nodes in the cluster. This module is responsible for obtaining and caching metadata; forwarding requests for insertion, query, etc. to the correct data node; when returning the results to the application, TAOSC also needs to be responsible for the final level of aggregation, sorting, filtering and other operations. For JDBC, C/C++/C#/Python/Go/Node.js interfaces, this module runs on the physical node where the application is located. At the same time, in order to support the fully distributed RESTful interface, TAOSC has a running instance on each dnode of TDengine cluster.
|
||||
|
||||
### Node Communication
|
||||
|
||||
**Communication mode**: The communication among each data node of TDengine system, and among the client driver and each data node is carried out through TCP/UDP. Considering an IoT scenario, the data writing packets are generally not large, so TDengine uses UDP in addition to TCP for transmission, because UDP is more efficient and is not limited by the number of connections. TDengine implements its own timeout, retransmission, confirmation and other mechanisms to ensure reliable transmission of UDP. For packets with a data volume of less than 15K, UDP is adopted for transmission, and TCP is automatically adopted for transmission of packets with a data volume of more than 15K or query operations. At the same time, TDengine will automatically compress/decompress the data, digital sign/authenticate the data according to the configuration and data packet. For data replication among data nodes, only TCP is used for data transportation.
|
||||
**Communication mode**: The communication among each data node of TDengine system, and among the client driver and each data node is carried out through TCP/UDP. Considering an IoT scenario, the data writing packets are generally not large, so TDengine uses UDP in addition to TCP for transmission, because UDP is more efficient and is not limited by the number of connections. TDengine implements its own timeout, retransmission, confirmation and other mechanisms to ensure reliable transmission of UDP. For packets with a data volume of less than 15K, UDP is adopted for transmission, and TCP is automatically adopted for transmission of packets with a data volume of more than 15K or query operations. At the same time, TDengine will automatically compress/decompress the data, digitally sign/authenticate the data according to the configuration and data packet. For data replication among data nodes, only TCP is used for data transportation.
|
||||
|
||||
**FQDN configuration:** A data node has one or more FQDNs, which can be specified in the system configuration file taos.cfg with the parameter “fqdn”. If it is not specified, the system will automatically use the hostname of the computer as its FQDN. If the node is not configured with FQDN, you can directly set the configuration parameter “fqdn” of the node to its IP address. However, IP is not recommended because IP address may be changed, and once it changes, the cluster will not work properly. The EP (End Point) of a data node consists of FQDN + Port. With FQDN, it is necessary to ensure the DNS service is running, or hosts files on nodes are configured properly.
|
||||
|
||||
**Port configuration**: The external port of a data node is determined by the system configuration parameter “serverPort” in TDengine, and the port for internal communication of cluster is serverPort+5. The data replication operation among data nodes in the cluster also occupies a TCP port, which is serverPort+10. In order to support multithreading and efficient processing of UDP data, each internal and external UDP connection needs to occupy 5 consecutive ports. Therefore, the total port range of a data node will be serverPort to serverPort + 10, for a total of 11 TCP/UDP ports. To run the system, make sure that the firewall keeps these ports open. Each data node can be configured with a different serverPort.
|
||||
|
||||
**Cluster external connection**: TDengine cluster can accommodate one single, multiple or even thousands of data nodes. The application only needs to initiate a connection to any data node in the cluster. The network parameter required for connection is the End Point (FQDN plus configured port number) of a data node. When starting the application taos through CLI, the FQDN of the data node can be specified through the option `-h`, and the configured port number can be specified through `-p`. If the port is not configured, the system configuration parameter “serverPort” of TDengine will be adopted.
|
||||
**Cluster external connection**: TDengine cluster can accommodate a single, multiple or even thousands of data nodes. The application only needs to initiate a connection to any data node in the cluster. The network parameter required for connection is the End Point (FQDN plus configured port number) of a data node. When starting the application taos through CLI, the FQDN of the data node can be specified through the option `-h`, and the configured port number can be specified through `-p`. If the port is not configured, the system configuration parameter “serverPort” of TDengine will be adopted.
|
||||
|
||||
**Inter-cluster communication**: Data nodes connect with each other through TCP/UDP. When a data node starts, it will obtain the EP information of the dnode where the mnode is located, and then establish a connection with the mnode in the system to exchange information. There are three steps to obtain EP information of the mnode:
|
||||
|
||||
|
@ -44,31 +44,33 @@ A complete TDengine system runs on one or more physical nodes. Logically, it inc
|
|||
2. Check the system configuration file taos.cfg to obtain node configuration parameters “firstEp” and “secondEp” (the node specified by these two parameters can be a normal node without mnode, in this case, the node will try to redirect to the mnode node when connected). If these two configuration parameters do not exist or do not exist in taos.cfg, or are invalid, skip to the third step;
|
||||
3. Set your own EP as a mnode EP and run it independently. After obtaining the mnode EP list, the data node initiates the connection. It will successfully join the working cluster after connection. If not successful, it will try the next item in the mnode EP list. If all attempts are made, but the connection still fails, sleep for a few seconds before trying again.
|
||||
|
||||
**The choice of MNODE**: TDengine logically has a management node, but there is no separated execution code. The server-side only has a set of execution code taosd. So which data node will be the management node? This is determined automatically by the system without any manual intervention. The principle is as follows: when a data node starts, it will check its End Point and compare it with the obtained mnode EP List. If its EP exists in it, the data node shall start the mnode module and become a mnode. If your own EP is not in the mnode EP List, the mnode module will not start. During the system operation, due to load balancing, downtime and other reasons, mnode may migrate to the new dnode, while totally transparent without manual intervention. The modification of configuration parameters is the decision made by mnode itself according to resources usage.
|
||||
**The choice of MNODE**: TDengine logically has a management node, but there is no separate execution code. The server-side only has one set of execution code, taosd. So which data node will be the management node? This is determined automatically by the system without any manual intervention. The principle is as follows: when a data node starts, it will check its End Point and compare it with the obtained mnode EP List. If its EP exists in it, the data node shall start the mnode module and become a mnode. If your own EP is not in the mnode EP List, the mnode module will not start. During the system operation, due to load balancing, downtime and other reasons, mnode may migrate to the new dnode, totally transparently and without manual intervention. The modification of configuration parameters is the decision made by mnode itself according to resources usage.
|
||||
|
||||
**Add new data nodes:** After the system has a data node, it has become a working system. There are two steps to add a new node into the cluster. Step1: Connect to the existing working data node using TDengine CLI, and then add the End Point of the new data node with the command "create dnode"; Step 2: In the system configuration parameter file taos.cfg of the new data node, set the “firstEp” and “secondEp” parameters to the EP of any two data nodes in the existing cluster. Please refer to the detailed user tutorial for detailed steps. In this way, the cluster will be established step by step.
|
||||
**Add new data nodes:** After the system has a data node, it has become a working system. There are two steps to add a new node into the cluster.
|
||||
- Step1: Connect to the existing working data node using TDengine CLI, and then add the End Point of the new data node with the command "create dnode"
|
||||
- Step 2: In the system configuration parameter file taos.cfg of the new data node, set the “firstEp” and “secondEp” parameters to the EP of any two data nodes in the existing cluster. Please refer to the user tutorial for detailed steps. In this way, the cluster will be established step by step.
|
||||
|
||||
**Redirection**: No matter about dnode or TAOSC, the connection to the mnode shall be initiated first, but the mnode is automatically created and maintained by the system, so the user does not know which dnode is running the mnode. TDengine only requires a connection to any working dnode in the system. Because any running dnode maintains the currently running mnode EP List, when receiving a connecting request from the newly started dnode or TAOSC, if it’s not a mnode by self, it will reply to the mnode EP List back. After receiving this list, TAOSC or the newly started dnode will try to establish the connection again. When the mnode EP List changes, each data node quickly obtains the latest list and notifies TAOSC through messaging interaction among nodes.
|
||||
**Redirection**: Regardless of dnode or TAOSC, the connection to the mnode is initiated first. The mnode is automatically created and maintained by the system, so the user does not know which dnode is running the mnode. TDengine only requires a connection to any working dnode in the system. Because any running dnode maintains the currently running mnode EP List, when receiving a connecting request from the newly started dnode or TAOSC, if it’s not an mnode itself, it will reply to the mnode with the EP List. After receiving this list, TAOSC or the newly started dnode will try to establish the connection again. When the mnode EP List changes, each data node quickly obtains the latest list and notifies TAOSC through messaging interaction among nodes.
|
||||
|
||||
### A Typical Data Writing Process
|
||||
|
||||
To explain the relationship between vnode, mnode, TAOSC and application and their respective roles, the following is an analysis of a typical data writing process.
|
||||
|
||||

|
||||

|
||||
<center> Figure 2: Typical process of TDengine </center>
|
||||
|
||||
1. Application initiates a request to insert data through JDBC, ODBC, or other APIs.
|
||||
2. TAOSC checks if meta data existing for the table in the cache. If so, go straight to Step 4. If not, TAOSC sends a get meta-data request to mnode.
|
||||
2. TAOSC checks the cache to see if meta data exists for the table. If it does, it goes straight to Step 4. If not, TAOSC sends a get meta-data request to mnode.
|
||||
3. Mnode returns the meta-data of the table to TAOSC. Meta-data contains the schema of the table, and also the vgroup information to which the table belongs (the vnode ID and the End Point of the dnode where the table belongs. If the number of replicas is N, there will be N groups of End Points). If TAOSC does not receive a response from the mnode for a long time, and there are multiple mnodes, TAOSC will send a request to the next mnode.
|
||||
4. TAOSC initiates an insert request to master vnode.
|
||||
5. After vnode inserts the data, it gives a reply to TAOSC, indicating that the insertion is successful. If TAOSC doesn't get a response from vnode for a long time, TAOSC will treat this node as offline. In this case, if there are multiple replicas of the inserted database, TAOSC will issue an insert request to the next vnode in vgroup.
|
||||
6. TAOSC notifies APP that writing is successful.
|
||||
|
||||
For Step 2 and 3, when TAOSC starts, it does not know the End Point of mnode, so it will directly initiate a request to the configured serving End Point of the cluster. If the dnode that receives the request does not have a mnode configured, it will inform the mnode EP list in a reply message, so that TAOSC will re-issue a request to obtain meta-data to the EP of another new mnode.
|
||||
For Step 2 and 3, when TAOSC starts, it does not know the End Point of mnode, so it will directly initiate a request to the configured serving End Point of the cluster. If the dnode that receives the request does not have a mnode configured, it will reply with the mnode EP list, so that TAOSC will re-issue a request to obtain meta-data to the EP of another mnode.
|
||||
|
||||
For Step 4 and 5, without caching, TAOSC can't recognize the master in the virtual node group, so assumes that the first vnode is the master and sends a request to it. If this vnode is not the master, it will reply to the actual master as a new target where TAOSC shall send a request to. Once the reply of successful insertion is obtained, TAOSC will cache the information of master node.
|
||||
For Step 4 and 5, without caching, TAOSC can't recognize the master in the virtual node group, so assumes that the first vnode is the master and sends a request to it. If this vnode is not the master, it will reply to the actual master as a new target to which TAOSC shall send a request. Once a response of successful insertion is obtained, TAOSC will cache the information of master node.
|
||||
|
||||
The above is the process of inserting data, and the processes of querying and computing are the same. TAOSC encapsulates and hides all these complicated processes, and it is transparent to applications.
|
||||
The above describes the process of inserting data. The processes of querying and computing are the same. TAOSC encapsulates and hides all these complicated processes, and it is transparent to applications.
|
||||
|
||||
Through TAOSC caching mechanism, mnode needs to be accessed only when a table is accessed for the first time, so mnode will not become a system bottleneck. However, because schema and vgroup may change (such as load balancing), TAOSC will interact with mnode regularly to automatically update the cache.
|
||||
|
||||
|
@ -76,24 +78,24 @@ Through TAOSC caching mechanism, mnode needs to be accessed only when a table is
|
|||
|
||||
### Storage Model
|
||||
|
||||
The data stored by TDengine include collected time-series data, metadata related to database and tables, tag data, etc. These data are specifically divided into three parts:
|
||||
The data stored by TDengine includes collected time-series data, metadata related to database and tables, tag data, etc. All of the data is specifically divided into three parts:
|
||||
|
||||
- Time-series data: stored in vnode and composed of data, head and last files. The amount of data is large and query amount depends on the application scenario. Out-of-order writing is allowed, but delete operation is not supported for the time being, and update operation is only allowed when database “update” parameter is set to 1. By adopting the model with **one table for each data collection point**, the data of a given time period is continuously stored, and the writing against one single table is a simple appending operation. Multiple records can be read at one time, thus ensuring the insert and query operation of a single data collection point with the best performance.
|
||||
- Tag data: meta files stored in vnode. Four standard operations of create, read, update and delete are supported. The amount of data is not large. If there are N tables, there are N records, so all can be stored in memory. To make tag filtering efficient, TDengine supports multi-core and multi-threaded concurrent queries. As long as the computing resources are sufficient, even in face of millions of tables, the tag filtering results will return in milliseconds.
|
||||
- Metadata: stored in mnode, including system node, user, DB, Table Schema and other information. Four standard operations of create, delete, update and read are supported. The amount of these data are not large and can be stored in memory, moreover, the query amount is not large because of the client cache. Therefore, TDengine uses centralized storage management, however, there will be no performance bottleneck.
|
||||
- Time-series data: stored in vnode and composed of data, head and last files. The amount of data is large and query amount depends on the application scenario. Out-of-order writing is allowed, but delete operation is not supported for the time being, and update operation is only allowed when database “update” parameter is set to 1. By adopting the model with **one table for each data collection point**, the data of a given time period is continuously stored, and the writing against one single table is a simple appending operation. Multiple records can be read at one time, thus ensuring the best performance for both insert and query operations of a single data collection point.
|
||||
- Tag data: meta files stored in vnode. Four standard operations of create, read, update and delete are supported. The amount of data is not large. If there are N tables, there are N records, so all can be stored in memory. To make tag filtering efficient, TDengine supports multi-core and multi-threaded concurrent queries. As long as the computing resources are sufficient, even with millions of tables, the tag filtering results will return in milliseconds.
|
||||
- Metadata: stored in mnode and includes system node, user, DB, table schema and other information. Four standard operations of create, delete, update and read are supported. The amount of this data is not large and can be stored in memory. Moreover, the number of queries is not large because of client cache. Even though TDengine uses centralized storage management, because of the architecture, there is no performance bottleneck.
|
||||
|
||||
Compared with the typical NoSQL storage model, TDengine stores tag data and time-series data completely separately, which has two major advantages:
|
||||
Compared with the typical NoSQL storage model, TDengine stores tag data and time-series data completely separately. This has two major advantages:
|
||||
|
||||
- Reduce the redundancy of tag data storage significantly: general NoSQL database or time-series database adopts K-V storage, in which Key includes a timestamp, a device ID and various tags. Each record carries these duplicated tags, so storage space is wasted. Moreover, if the application needs to add, modify or delete tags on historical data, it has to traverse the data and rewrite them again, which is extremely expensive to operate.
|
||||
- Aggregate data efficiently between multiple tables: when aggregating data between multiple tables, it first finds out the tables which satisfy the filtering conditions, and then find out the corresponding data blocks of these tables to greatly reduce the data sets to be scanned, thus greatly improving the aggregation efficiency. Moreover, tag data is managed and maintained in a full-memory structure, and tag data queries in tens of millions can return in milliseconds.
|
||||
- Reduces the redundancy of tag data storage significantly. General NoSQL database or time-series database adopts K-V (key-value) storage, in which the key includes a timestamp, a device ID and various tags. Each record carries these duplicated tags, so storage space is wasted. Moreover, if the application needs to add, modify or delete tags on historical data, it has to traverse the data and rewrite them again, which is an extremely expensive operation.
|
||||
- Aggregate data efficiently between multiple tables: when aggregating data between multiple tables, it first finds the tables which satisfy the filtering conditions, and then finds the corresponding data blocks of these tables. This greatly reduces the data sets to be scanned which in turn improves the aggregation efficiency. Moreover, tag data is managed and maintained in a full-memory structure, and tag data queries in tens of millions can return in milliseconds.
|
||||
|
||||
### Data Sharding
|
||||
|
||||
For large-scale data management, to achieve scale-out, it is generally necessary to adopt the Partitioning or Sharding strategy. TDengine implements data sharding via vnode, and time-series data partitioning via one data file for a time range.
|
||||
For large-scale data management, to achieve scale-out, it is generally necessary to adopt a Partitioning or Sharding strategy. TDengine implements data sharding via vnode, and time-series data partitioning via one data file for a time range.
|
||||
|
||||
VNode (Virtual Data Node) is responsible for providing writing, query and computing functions for collected time-series data. To facilitate load balancing, data recovery and support heterogeneous environments, TDengine splits a data node into multiple vnodes according to its computing and storage resources. The management of these vnodes is done automatically by TDengine and is completely transparent to the application.
|
||||
|
||||
For a single data collection point, regardless of the amount of data, a vnode (or vnode group, if the number of replicas is greater than 1) has enough computing resource and storage resource to process (if a 16-byte record is generated per second, the original data generated in one year will be less than 0.5 G), so TDengine stores all the data of a table (a data collection point) in one vnode instead of distributing the data to two or more dnodes. Moreover, a vnode can store data from multiple data collection points (tables), and the upper limit of the tables’ quantity for a vnode is one million. By design, all tables in a vnode belong to the same DB. On a data node, unless specially configured, the number of vnodes owned by a DB will not exceed the number of system cores.
|
||||
For a single data collection point, regardless of the amount of data, a vnode (or vnode group, if the number of replicas is greater than 1) has enough computing resource and storage resource to process (if a 16-byte record is generated per second, the original data generated in one year will be less than 0.5 G). So TDengine stores all the data of a table (a data collection point) in one vnode instead of distributing the data to two or more dnodes. Moreover, a vnode can store data from multiple data collection points (tables), and the upper limit of the tables’ quantity for a vnode is one million. By design, all tables in a vnode belong to the same DB. On a data node, unless specially configured, the number of vnodes owned by a DB will not exceed the number of system cores.
|
||||
|
||||
When creating a DB, the system does not allocate resources immediately. However, when creating a table, the system will check if there is an allocated vnode with free tablespace. If so, the table will be created in the vacant vnode immediately. If not, the system will create a new vnode on a dnode from the cluster according to the current workload, and then a table. If there are multiple replicas of a DB, the system does not create only one vnode, but a vgroup (virtual data node group). The system has no limit on the number of vnodes, which is just limited by the computing and storage resources of physical nodes.
|
||||
|
||||
|
@ -101,43 +103,43 @@ The meta data of each table (including schema, tags, etc.) is also stored in vno
|
|||
|
||||
### Data Partitioning
|
||||
|
||||
In addition to vnode sharding, TDengine partitions the time-series data by time range. Each data file contains only one time range of time-series data, and the length of the time range is determined by DB's configuration parameter `“days”`. This method of partitioning by time rang is also convenient to efficiently implement the data retention policy. As long as the data file exceeds the specified number of days (system configuration parameter `“keep”`), it will be automatically deleted. Moreover, different time ranges can be stored in different paths and storage media, so as to facilitate the tiered-storage. Cold/hot data can be stored in different storage media to reduce the storage cost.
|
||||
In addition to vnode sharding, TDengine partitions the time-series data by time range. Each data file contains only one time range of time-series data, and the length of the time range is determined by the database configuration parameter `“days”`. This method of partitioning by time range is also convenient to efficiently implement data retention policies. As long as the data file exceeds the specified number of days (system configuration parameter `“keep”`), it will be automatically deleted. Moreover, different time ranges can be stored in different paths and storage media, so as to facilitate tiered-storage. Cold/hot data can be stored in different storage media to significantly reduce storage costs.
|
||||
|
||||
In general, **TDengine splits big data by vnode and time range in two dimensions** to manage the data efficiently with horizontal scalability.
|
||||
|
||||
### Load Balancing
|
||||
|
||||
Each dnode regularly reports its status (including hard disk space, memory size, CPU, network, number of virtual nodes, etc.) to the mnode (virtual management node), so mnode knows the status of the entire cluster. Based on the overall status, when the mnode finds a dnode is overloaded, it will migrate one or more vnodes to other dnodes. During the process, TDengine services keep running and the data insertion, query and computing operations are not affected.
|
||||
Each dnode regularly reports its status (including hard disk space, memory size, CPU, network, number of virtual nodes, etc.) to the mnode (virtual management node) so that the mnode knows the status of the entire cluster. Based on the overall status, when the mnode finds a dnode is overloaded, it will migrate one or more vnodes to other dnodes. During the process, TDengine services keep running and the data insertion, query and computing operations are not affected.
|
||||
|
||||
If the mnode has not received the dnode status for a period of time, the dnode will be treated as offline. When offline lasts a certain period of time (configured by parameter `“offlineThreshold”`), the dnode will be forcibly removed from the cluster by mnode. If the number of replicas of vnodes on this dnode is greater than one, the system will automatically create new replicas on other dnodes to ensure the replica number. If there are other mnodes on this dnode and the number of mnodes replicas is greater than one, the system will automatically create new mnodes on other dnodes to ensure the replica number.
|
||||
If the mnode has not received the dnode status for a period of time, the dnode will be treated as offline. If the dnode stays offline beyond the time configured by parameter `“offlineThreshold”`, the dnode will be forcibly removed from the cluster by mnode. If the number of replicas of vnodes on this dnode is greater than one, the system will automatically create new replicas on other dnodes to ensure the replica number. If there are other mnodes on this dnode and the number of mnodes replicas is greater than one, the system will automatically create new mnodes on other dnodes to ensure the replica number.
|
||||
|
||||
When new data nodes are added to the cluster, with new computing and storage resources are added, the system will automatically start the load balancing process.
|
||||
When new data nodes are added to the cluster, with new computing and storage resources, the system will automatically start the load balancing process.
|
||||
|
||||
The load balancing process does not require any manual intervention, and it is transparent to the application. **Note: load balancing is controlled by parameter “balance”, which determines to turn on/off automatic load balancing.**
|
||||
|
||||
## Data Writing and Replication Process
|
||||
|
||||
If a database has N replicas, thus a virtual node group has N virtual nodes, but only one as Master and all others are slaves. When the application writes a new record to system, only the Master vnode can accept the writing request. If a slave vnode receives a writing request, the system will notifies TAOSC to redirect.
|
||||
If a database has N replicas, a virtual node group has N virtual nodes. But only one is the Master and all others are slaves. When the application writes a new record to system, only the Master vnode can accept the writing request. If a slave vnode receives a writing request, the system will notifies TAOSC to redirect.
|
||||
|
||||
### Master vnode Writing Process
|
||||
|
||||
Master Vnode uses a writing process as follows:
|
||||
|
||||

|
||||

|
||||
<center> Figure 3: TDengine Master writing process </center>
|
||||
|
||||
1. Master vnode receives the application data insertion request, verifies, and moves to next step;
|
||||
2. If the system configuration parameter `“walLevel”` is greater than 0, vnode will write the original request packet into database log file WAL. If walLevel is set to 2 and fsync is set to 0, TDengine will make WAL data written immediately to ensure that even system goes down, all data can be recovered from database log file;
|
||||
3. If there are multiple replicas, vnode will forward data packet to slave vnodes in the same virtual node group, and the forwarded packet has a version number with data;
|
||||
4. Write into memory and add the record to “skip list”;
|
||||
5. Master vnode returns a confirmation message to the application, indicating a successful writing.
|
||||
5. Master vnode returns a confirmation message to the application, indicating a successful write.
|
||||
6. If any of Step 2, 3 or 4 fails, the error will directly return to the application.
|
||||
|
||||
### Slave vnode Writing Process
|
||||
|
||||
For a slave vnode, the write process as follows:
|
||||
|
||||

|
||||

|
||||
<center> Figure 4: TDengine Slave Writing Process </center>
|
||||
|
||||
1. Slave vnode receives a data insertion request forwarded by Master vnode;
|
||||
|
@ -146,19 +148,19 @@ For a slave vnode, the write process as follows:
|
|||
|
||||
Compared with Master vnode, slave vnode has no forwarding or reply confirmation step, means two steps less. But writing into memory and WAL is exactly the same.
|
||||
|
||||
### Remote Disaster Recovery and IDC Migration
|
||||
### Remote Disaster Recovery and IDC (Internet Data Center) Migration
|
||||
|
||||
As above Master and Slave processes discussed, TDengine adopts asynchronous replication for data synchronization. This method can greatly improve the writing performance, with no obvious impact from network delay. By configuring IDC and rack number for each physical node, it can be ensured that for a virtual node group, virtual nodes are composed of physical nodes from different IDC and different racks, thus implementing remote disaster recovery without other tools.
|
||||
As discussed above, TDengine writes using Master and Slave processes. TDengine adopts asynchronous replication for data synchronization. This method can greatly improve write performance, with no obvious impact from network delay. By configuring IDC and rack number for each physical node, it can be ensured that for a virtual node group, virtual nodes are composed of physical nodes from different IDC and different racks, thus implementing remote disaster recovery without other tools.
|
||||
|
||||
On the other hand, TDengine supports dynamic modification of the replicas number. Once the number of replicas increases, the newly added virtual nodes will immediately enter the data synchronization process. After synchronization completed, added virtual nodes can provide services. In the synchronization process, master and other synchronized virtual nodes keep serving. With this feature, TDengine can provide IDC migration without service interruption. It is only necessary to add new physical nodes to the existing IDC cluster, and then remove old physical nodes after the data synchronization is completed.
|
||||
On the other hand, TDengine supports dynamic modification of the replica number. Once the number of replicas increases, the newly added virtual nodes will immediately enter the data synchronization process. After synchronization is complete, added virtual nodes can provide services. In the synchronization process, master and other synchronized virtual nodes keep serving. With this feature, TDengine can provide IDC migration without service interruption. It is only necessary to add new physical nodes to the existing IDC cluster, and then remove old physical nodes after the data synchronization is completed.
|
||||
|
||||
However, the asynchronous replication has a tiny time window where data can be lost. The specific scenario is as follows:
|
||||
However, the asynchronous replication has a very low probability scenario where data may be lost. The specific scenario is as follows:
|
||||
|
||||
1. Master vnode has finished its 5-step operations, confirmed the success of writing to APP, and then went down;
|
||||
1. Master vnode has finished its 5-step operations, confirmed the success of writing to APP, and then goes down;
|
||||
2. Slave vnode receives the write request, then processing fails before writing to the log in Step 2;
|
||||
3. Slave vnode will become the new master, thus losing one record.
|
||||
|
||||
In theory, for asynchronous replication, there is no guarantee to prevent data loss. However, this window is extremely small, only if mater and slave fail at the same time, and just confirm the successful write to the application before.
|
||||
In theory, for asynchronous replication, there is no guarantee to prevent data loss. However, this is an extremely low probability scenario as described above.
|
||||
|
||||
Note: Remote disaster recovery and no-downtime IDC migration are only supported by Enterprise Edition. **Hint: This function is not available yet**
|
||||
|
||||
|
@ -171,43 +173,43 @@ When a vnode starts, the roles (master, slave) are uncertain, and the data is in
|
|||
1. If there’s only one replica, it’s always master
|
||||
2. When all replicas are online, the one with latest version is master
|
||||
3. Over half of online nodes are virtual nodes, and some virtual node is slave, it will automatically become master
|
||||
4. For 2 and 3, if multiple virtual nodes meet the requirement, the first vnode in virtual node group list will be selected as master
|
||||
4. For 2 and 3, if multiple virtual nodes meet the requirement, the first vnode in virtual node group list will be selected as master.
|
||||
|
||||
### Synchronous Replication
|
||||
|
||||
For scenarios with strong data consistency requirements, asynchronous data replication is not applicable, because there is a small probability of data loss. So, TDengine provides a synchronous replication mechanism for users. When creating a database, in addition to specifying the number of replicas, user also needs to specify a new parameter “quorum”. If quorum is greater than one, it means that every time the Master forwards a message to the replica, it needs to wait for “quorum-1” reply confirms before informing the application that data has been successfully written in slave. If “quorum-1” reply confirms are not received within a certain period of time, the master vnode will return an error to the application.
|
||||
|
||||
With synchronous replication, performance of system will decrease and latency will increase. Because metadata needs strong consistent, the default for data synchronization between mnodes is synchronous replication.
|
||||
With synchronous replication, performance of system will decrease and latency will increase. Because metadata needs strong consistency, the default for data synchronization between mnodes is synchronous replication.
|
||||
|
||||
## Caching and Persistence
|
||||
|
||||
### Caching
|
||||
|
||||
TDengine adopts a time-driven cache management strategy (First-In-First-Out, FIFO), also known as a Write-driven Cache Management Mechanism. This strategy is different from the read-driven data caching mode (Least-Recent-Used, LRU), which directly put the most recently written data in the system buffer. When the buffer reaches a threshold, the earliest data are written to disk in batches. Generally speaking, for the use of IoT data, users are most concerned about the newly generated data, that is, the current status. TDengine takes full advantage of this feature to put the most recently arrived (current state) data in the buffer.
|
||||
TDengine adopts a time-driven cache management strategy (First-In-First-Out, FIFO), also known as a Write-driven Cache Management Mechanism. This strategy is different from the read-driven data caching mode (Least-Recent-Used, LRU), which directly puts the most recently written data in the system buffer. When the buffer reaches a threshold, the earliest data are written to disk in batches. Generally speaking, for the use of IoT data, users are most concerned about the most recently generated data, that is, the current status. TDengine takes full advantage of this feature to put the most recently arrived (current state) data in the buffer.
|
||||
|
||||
TDengine provides millisecond-level data collecting capability to users through query functions. Putting the recently arrived data directly in the buffer can respond to users' analysis query for the latest piece or batch of data more quickly, and provide faster database query response capability as a whole. In this sense, **TDengine can be used as a data cache by setting appropriate configuration parameters without deploying Redis or other additional cache systems**, which can effectively simplify the system architecture and reduce the operation costs. It should be noted that after the TDengine is restarted, the buffer of the system will be emptied, the previously cached data will be written to disk in batches, and the previously cached data will not be reloaded into the buffer as so in a proprietary key-value cache system.
|
||||
TDengine provides millisecond-level data collecting capability to users through query functions. Putting the recently arrived data directly in the buffer can respond to users' analysis query for the latest piece or batch of data more quickly, and provide faster database query response capability as a whole. In this sense, **TDengine can be used as a data cache by setting appropriate configuration parameters without deploying Redis or other additional cache systems**. This can effectively simplify the system architecture and reduce operational costs. It should be noted that after TDengine is restarted, the buffer of the system will be emptied, the previously cached data will be written to disk in batches, and the previously cached data will not be reloaded into the buffer. In this sense, TDengine's cache differs from proprietary key-value cache systems.
|
||||
|
||||
Each vnode has its own independent memory, and it is composed of multiple memory blocks of fixed size, and different vnodes are completely isolated. When writing data, similar to the writing of logs, data is sequentially added to memory, but each vnode maintains its own skip list for quick search. When more than one third of the memory block are used, the disk writing operation will start, and the subsequent writing operation is carried out in a new memory block. By this design, one third of the memory blocks in a vnode keep the latest data, so as to achieve the purpose of caching and quick search. The number of memory blocks of a vnode is determined by the configuration parameter “blocks”, and the size of memory blocks is determined by the configuration parameter “cache”.
|
||||
|
||||
### Persistent Storage
|
||||
|
||||
TDengine uses a data-driven method to write the data from buffer into hard disk for persistent storage. When the cached data in vnode reaches a certain volume, TDengine will also pull up the disk-writing thread to write the cached data into persistent storage in order not to block subsequent data writing. TDengine will open a new database log file when the data is written, and delete the old database log file after written successfully to avoid unlimited log growth.
|
||||
TDengine uses a data-driven method to write the data from buffer into hard disk for persistent storage. When the cached data in vnode reaches a certain volume, TDengine will pull up the disk-writing thread to write the cached data into persistent storage so that subsequent data writing is not blocked. TDengine will open a new database log file when the data is written, and delete the old database log file after successfull persistence, to avoid unlimited log growth.
|
||||
|
||||
To make full use of the characteristics of time-series data, TDengine splits the data stored in persistent storage by a vnode into multiple files, each file only saves data for a fixed number of days, which is determined by the system configuration parameter `“days”`. By so, for the given start and end date of a query, you can locate the data files to open immediately without any index, thus greatly speeding up reading operations.
|
||||
To make full use of the characteristics of time-series data, TDengine splits the data stored in persistent storage by a vnode into multiple files, each file only saves data for a fixed number of days, which is determined by the system configuration parameter `“days”`. Thus for given start and end dates of a query, you can locate the data files to open immediately without any index. This greatly speeds up read operations.
|
||||
|
||||
For time-series data, there is generally a retention policy, which is determined by the system configuration parameter `“keep”`. Data files exceeding this set number of days will be automatically deleted by the system to free up storage space.
|
||||
|
||||
Given “days” and “keep” parameters, the total number of data files in a vnode is: keep/days. The total number of data files should not be too large or too small. 10 to 100 is appropriate. Based on this principle, reasonable days can be set. In the current version, parameter “keep” can be modified, but parameter “days” cannot be modified once it is set.
|
||||
|
||||
In each data file, the data of a table is stored by blocks. A table can have one or more data file blocks. In a file block, data is stored in columns, occupying a continuous storage space, thus greatly improving the reading speed. The size of file block is determined by the system parameter `“maxRows”` (the maximum number of records per block), and the default value is 4096. This value should not be too large or too small. If it is too large, the data locating in search will cost longer; if too small, the index of data block is too large, and the compression efficiency will be low with slower reading speed.
|
||||
In each data file, the data of a table is stored in blocks. A table can have one or more data file blocks. In a file block, data is stored in columns, occupying a continuous storage space, thus greatly improving the reading speed. The size of file block is determined by the system parameter `“maxRows”` (the maximum number of records per block), and the default value is 4096. This value should not be too large or too small. If it is too large, data location for queries will take a longer tim. If it is too small, the index of data block is too large, and the compression efficiency will be low with slower reading speed.
|
||||
|
||||
Each data file (with a .data postfix) has a corresponding index file (with a .head postfix). The index file has summary information of a data block for each table, recording the offset of each data block in the data file, start and end time of data and other information, so as to lead system quickly locate the data to be found. Each data file also has a corresponding last file (with a .last postfix), which is designed to prevent data block fragmentation when written in disk. If the number of written records from a table does not reach the system configuration parameter `“minRows”` (minimum number of records per block), it will be stored in the last file first. When write to disk next time, the newly written records will be merged with the records in last file and then written into data file.
|
||||
Each data file (with a .data postfix) has a corresponding index file (with a .head postfix). The index file has summary information of a data block for each table, recording the offset of each data block in the data file, start and end time of data and other information which allows the system to locate the data to be found very quickly. Each data file also has a corresponding last file (with a .last postfix), which is designed to prevent data block fragmentation when written in disk. If the number of written records from a table does not reach the system configuration parameter `“minRows”` (minimum number of records per block), it will be stored in the last file first. At the next write operation to the disk, the newly written records will be merged with the records in last file and then written into data file.
|
||||
|
||||
When data is written to disk, it is decided whether to compress the data according to system configuration parameter `“comp”`. TDengine provides three compression options: no compression, one-stage compression and two-stage compression, corresponding to comp values of 0, 1 and 2 respectively. One-stage compression is carried out according to the type of data. Compression algorithms include delta-delta coding, simple 8B method, zig-zag coding, LZ4 and other algorithms. Two-stage compression is based on one-stage compression and compressed by general compression algorithm, which has higher compression ratio.
|
||||
When data is written to disk, the system decideswhether to compress the data based on the system configuration parameter `“comp”`. TDengine provides three compression options: no compression, one-stage compression and two-stage compression, corresponding to comp values of 0, 1 and 2 respectively. One-stage compression is carried out according to the type of data. Compression algorithms include delta-delta coding, simple 8B method, zig-zag coding, LZ4 and other algorithms. Two-stage compression is based on one-stage compression and compressed by general compression algorithm, which has higher compression ratio.
|
||||
|
||||
### Tiered Storage
|
||||
|
||||
By default, TDengine saves all data in /var/lib/taos directory, and the data files of each vnode are saved in a different directory under this directory. In order to expand the storage space, minimize the bottleneck of file reading and improve the data throughput rate, TDengine can configure the system parameter “dataDir” to allow multiple mounted hard disks to be used by system at the same time. In addition, TDengine also provides the function of tiered data storage, i.e. storage on different storage media according to the time stamps of data files. For example, the latest data is stored on SSD, the data for more than one week is stored on local hard disk, and the data for more than four weeks is stored on network storage device, thus reducing the storage cost and ensuring efficient data access. The movement of data on different storage media is automatically done by the system and completely transparent to applications. Tiered storage of data is also configured through the system parameter “dataDir”.
|
||||
By default, TDengine saves all data in /var/lib/taos directory, and the data files of each vnode are saved in a different directory under this directory. In order to expand the storage space, minimize the bottleneck of file reading and improve the data throughput rate, TDengine can configure the system parameter “dataDir” to allow multiple mounted hard disks to be used by system at the same time. In addition, TDengine also provides the function of tiered data storage, i.e. storage on different storage media according to the time stamps of data files. For example, the latest data is stored on SSD, the data older than a week is stored on local hard disk, and data older than four weeks is stored on network storage device. This reduces storage costs and ensures efficient data access. The movement of data on different storage media is automatically done by the system and is completely transparent to applications. Tiered storage of data is also configured through the system parameter “dataDir”.
|
||||
|
||||
dataDir format is as follows:
|
||||
```
|
||||
|
@ -216,7 +218,7 @@ dataDir data_path [tier_level]
|
|||
|
||||
Where data_path is the folder path of mount point and tier_level is the media storage-tier. The higher the media storage-tier, means the older the data file. Multiple hard disks can be mounted at the same storage-tier, and data files on the same storage-tier are distributed on all hard disks within the tier. TDengine supports up to 3 tiers of storage, so tier_level values are 0, 1, and 2. When configuring dataDir, there must be only one mount path without specifying tier_level, which is called special mount disk (path). The mount path defaults to level 0 storage media and contains special file links, which cannot be removed, otherwise it will have a devastating impact on the written data.
|
||||
|
||||
Suppose a physical node with six mountable hard disks/mnt/disk1,/mnt/disk2, …,/mnt/disk6, where disk1 and disk2 need to be designated as level 0 storage media, disk3 and disk4 are level 1 storage media, and disk5 and disk6 are level 2 storage media. Disk1 is a special mount disk, you can configure it in/etc/taos/taos.cfg as follows:
|
||||
Suppose there is a physical node with six mountable hard disks/mnt/disk1,/mnt/disk2, …,/mnt/disk6, where disk1 and disk2 need to be designated as level 0 storage media, disk3 and disk4 are level 1 storage media, and disk5 and disk6 are level 2 storage media. Disk1 is a special mount disk, you can configure it in/etc/taos/taos.cfg as follows:
|
||||
|
||||
```
|
||||
dataDir /mnt/disk1/taos
|
||||
|
@ -233,11 +235,11 @@ Note: Tiered Storage is only supported in Enterprise Edition
|
|||
|
||||
## Data Query
|
||||
|
||||
TDengine provides a variety of query processing functions for tables and STables. In addition to common aggregation queries, TDengine also provides window queries and statistical aggregation functions for time-series data. The query processing of TDengine needs the collaboration of client, vnode and mnode.
|
||||
TDengine provides a variety of query processing functions for tables and STables. In addition to common aggregation queries, TDengine also provides window queries and statistical aggregation functions for time-series data. Query processing in TDengine needs the collaboration of client, vnode and mnode.
|
||||
|
||||
### Single Table Query
|
||||
|
||||
The parsing and verification of SQL statements are completed on the client side. SQL statements are parsed and generate an Abstract Syntax Tree (AST), which is then checksummed. Then request metadata information (table metadata) for the table specified in the query from management node (mnode).
|
||||
The parsing and verification of SQL statements are completed on the client side. SQL statements are parsed and generate an Abstract Syntax Tree (AST), which is then checksummed. Then metadata information (table metadata) for the table specified is requested in the query from management node (mnode).
|
||||
|
||||
According to the End Point information in metadata information, the query request is serialized and sent to the data node (dnode) where the table is located. After receiving the query, the dnode identifies the virtual node (vnode) pointed to and forwards the message to the query execution queue of the vnode. The query execution thread of vnode establishes the basic query execution environment, immediately returns the query request and starts executing the query at the same time.
|
||||
|
||||
|
@ -245,9 +247,9 @@ When client obtains query result, the worker thread in query execution queue of
|
|||
|
||||
### Aggregation by Time Axis, Downsampling, Interpolation
|
||||
|
||||
The remarkable feature that time-series data is different from ordinary data is that each record has a timestamp, so aggregating data with timestamps on the time axis is an important and distinct feature from common databases. From this point of view, it is similar to the window query of stream computing engine.
|
||||
Time-series data is different from ordinary data in that each record has a timestamp. So aggregating data by timestamps on the time axis is an important and distinct feature of time-series databases which is different from that of common databases. It is similar to the window query of stream computing engines.
|
||||
|
||||
The keyword `interval` is introduced into TDengine to split fixed length time windows on time axis, and the data are aggregated based on time windows, and the data within window range are aggregated as needed. For example:
|
||||
The keyword `interval` is introduced into TDengine to split fixed length time windows on the time axis. The data is aggregated based on time windows, and the data within time window ranges is aggregated as needed. For example:
|
||||
|
||||
```mysql
|
||||
select count(*) from d1001 interval(1h);
|
||||
|
@ -265,21 +267,21 @@ For the data collected by device D1001, the number of records per hour is counte
|
|||
|
||||
### Multi-table Aggregation Query
|
||||
|
||||
TDengine creates a separate table for each data collection point, but in practical applications, it is often necessary to aggregate data from different data collection points. In order to perform aggregation operations efficiently, TDengine introduces the concept of STable. STable is used to represent a specific type of data collection point. It is a table set containing multiple tables. The schema of each table in the set is the same, but each table has its own static tag. The tags can be multiple and be added, deleted and modified at any time. Applications can aggregate or statistically operate all or a subset of tables under a STABLE by specifying tag filters, thus greatly simplifying the development of applications. The process is shown in the following figure:
|
||||
TDengine creates a separate table for each data collection point, but in practical applications, it is often necessary to aggregate data from different data collection points. In order to perform aggregation operations efficiently, TDengine introduces the concept of STable (super table). STable is used to represent a specific type of data collection point. It is a table set containing multiple tables. The schema of each table in the set is the same, but each table has its own static tag. There can be multiple tags which can be added, deleted and modified at any time. Applications can aggregate or statistically operate on all or a subset of tables under a STABLE by specifying tag filters. This greatly simplifies the development of applications. The process is shown in the following figure:
|
||||
|
||||

|
||||

|
||||
<center> Figure 5: Diagram of multi-table aggregation query </center>
|
||||
|
||||
1. Application sends a query condition to system;
|
||||
2. TAOSC sends the STable name to Meta Node(management node);
|
||||
3. Management node sends the vnode list owned by the STable back to TAOSC;
|
||||
4. TAOSC sends the computing request together with tag filters to multiple data nodes corresponding to these vnodes;
|
||||
5. Each vnode first finds out the set of tables within its own node that meet the tag filters from memory, then scans the stored time-series data, completes corresponding aggregation calculations, and returns result to TAOSC;
|
||||
5. Each vnode first finds the set of tables within its own node that meet the tag filters from memory, then scans the stored time-series data, completes corresponding aggregation calculations, and returns result to TAOSC;
|
||||
6. TAOSC finally aggregates the results returned by multiple data nodes and send them back to application.
|
||||
|
||||
Since TDengine stores tag data and time-series data separately in vnode, by filtering tag data in memory, the set of tables that need to participate in aggregation operation is first found, which greatly reduces the volume of data scanned and improves aggregation speed. At the same time, because the data is distributed in multiple vnodes/dnodes, the aggregation operation is carried out concurrently in multiple vnodes, which further improves the aggregation speed. Aggregation functions for ordinary tables and most operations are applicable to STables. The syntax is exactly the same. Please see TAOS SQL for details.
|
||||
Since TDengine stores tag data and time-series data separately in vnode, by filtering tag data in memory, the set of tables that need to participate in aggregation operation is first found, which reduces the volume of data to be scanned and improves aggregation speed. At the same time, because the data is distributed in multiple vnodes/dnodes, the aggregation operation is carried out concurrently in multiple vnodes, which further improves the aggregation speed. Aggregation functions for ordinary tables and most operations are applicable to STables. The syntax is exactly the same. Please see TAOS SQL for details.
|
||||
|
||||
### Precomputation
|
||||
|
||||
In order to effectively improve the performance of query processing, based-on the unchangeable feature of IoT data, statistical information of data stored in data block is recorded in the head of data block, including max value, min value, and sum. We call it a precomputing unit. If the query processing involves all the data of a whole data block, the pre-calculated results are directly used, and no need to read the data block contents at all. Since the amount of pre-calculated data is much smaller than the actual size of data block stored on disk, for query processing with disk IO as bottleneck, the use of pre-calculated results can greatly reduce the pressure of reading IO and accelerate the query process. The precomputation mechanism is similar to the index BRIN (Block Range Index) of PostgreSQL.
|
||||
In order to effectively improve the performance of query processing, based-on the unchangeable feature of IoT data, statistical information of data stored in data block is recorded in the head of data block, including max value, min value, and sum. We call it a precomputing unit. If the query processing involves all the data of a whole data block, the pre-calculated results are directly used, and no need to read the data block contents at all. Since the amount of pre-calculated data is much smaller than the actual size of data block stored on disk, for query processing with disk IO as bottleneck, the use of pre-calculated results can greatly reduce the pressure of reading IO and accelerate the query process. The precomputation mechanism is similar to the BRIN (Block Range Index) of PostgreSQL.
|
||||
|
||||
|
|
|
@ -5,18 +5,18 @@ title: Quickly Build IT DevOps Visualization System with TDengine + Telegraf + G
|
|||
|
||||
## Background
|
||||
|
||||
TDengine is a big data platform designed and optimized for IoT (Internet of Things), Vehicle Telematics, Industrial Internet, IT DevOps, etc. by TAOSData. Since it opened its source code in July 2019, it has won the favor of a large number of time-series data developers with its innovative data modeling design, convenient installation, easy-to-use programming interface, and powerful data writing and query performance.
|
||||
TDengine is a big data platform designed and optimized for IoT (Internet of Things), Vehicle Telemetry, Industrial Internet, IT DevOps and other applications. Since it was open-sourced in July 2019, it has won the favor of a large number of time-series data developers with its innovative data modeling design, convenient installation, easy-to-use programming interface, and powerful data writing and query performance.
|
||||
|
||||
IT DevOps metric data usually are time sensitive, for example:
|
||||
|
||||
- System resource metrics: CPU, memory, IO, bandwidth, etc.
|
||||
- Software system metrics: health status, number of connections, number of requests, number of timeouts, number of errors, response time, service type, and other business-related metrics.
|
||||
|
||||
Current mainstream IT DevOps system usually include a data collection module, a data persistent module, and a visualization module; Telegraf and Grafana are one of the most popular data collection modules and visualization modules, respectively. The data persistent module is available in a wide range of options, with OpenTSDB or InfluxDB being the most popular. TDengine, as an emerging time-series big data platform, has the advantages of high performance, high reliability, easy management and easy maintenance.
|
||||
Current mainstream IT DevOps system usually include a data collection module, a data persistent module, and a visualization module; Telegraf and Grafana are one of the most popular data collection modules and visualization modules, respectively. The data persistence module is available in a wide range of options, with OpenTSDB or InfluxDB being the most popular. TDengine, as an emerging time-series big data platform, has the advantages of high performance, high reliability, easy management and easy maintenance.
|
||||
|
||||
This article introduces how to quickly build a TDengine + Telegraf + Grafana based IT DevOps visualization system without writing even a single line of code and by simply modifying a few lines of configuration files. The architecture is as follows.
|
||||
This article introduces how to quickly build a TDengine + Telegraf + Grafana based IT DevOps visualization system without writing even a single line of code and by simply modifying a few lines in configuration files. The architecture is as follows.
|
||||
|
||||

|
||||

|
||||
|
||||
## Installation steps
|
||||
|
||||
|
@ -73,11 +73,11 @@ sudo systemctl start telegraf
|
|||
|
||||
Log in to the Grafana interface using a web browser at `IP:3000`, with the system's initial username and password being `admin/admin`.
|
||||
Click on the gear icon on the left and select `Plugins`, you should find the TDengine data source plugin icon.
|
||||
Click on the plus icon on the left and select `Import` to get the data from `https://github.com/taosdata/grafanaplugin/blob/master/examples/telegraf/grafana/dashboards/telegraf-dashboard- v0.1.0.json`, download the dashboard JSON file and import it. You will then see the dashboard in the following screen.
|
||||
Click on the plus icon on the left and select `Import` to get the data from `https://github.com/taosdata/grafanaplugin/blob/master/examples/telegraf/grafana/dashboards/telegraf-dashboard-v0.1.0.json`, download the dashboard JSON file and import it. You will then see the dashboard in the following screen.
|
||||
|
||||

|
||||

|
||||
|
||||
## Wrap-up
|
||||
|
||||
The above demonstrates how to quickly build a IT DevOps visualization system. Thanks to the new schemaless protocol parsing feature in TDengine version 2.4.0.0 and the powerful ecological software adaptation capability, users can build an efficient and easy-to-use IT DevOps visualization system in just a few minutes.
|
||||
The above demonstrates how to quickly build a IT DevOps visualization system. Thanks to the new schemaless protocol parsing feature in TDengine version 2.4.0.0 and ability to integrate easily with a large software ecosystem, users can build an efficient and easy-to-use IT DevOps visualization system in just a few minutes.
|
||||
Please refer to the official documentation and product implementation cases for other features.
|
||||
|
|
|
@ -5,19 +5,19 @@ title: Quickly build an IT DevOps visualization system using TDengine + collectd
|
|||
|
||||
## Background
|
||||
|
||||
TDengine is a big data platform designed and optimized for IoT (Internet of Things), Vehicle Telematics, Industrial Internet, IT DevOps, etc. by TAOSData. Since it opened its source code in July 2019, it has won the favor of a large number of time-series data developers with its innovative data modeling design, convenient installation, easy-to-use programming interface, and powerful data writing and query performance.
|
||||
TDengine is a big data platform designed and optimized for IoT (Internet of Things), Vehicle Telemetry, Industrial Internet, IT DevOps and other applications. Since it was open-sourced in July 2019, it has won the favor of a large number of time-series data developers with its innovative data modeling design, convenient installation, easy-to-use programming interface, and powerful data writing and query performance.
|
||||
|
||||
IT DevOps metric data usually are time sensitive, for example:
|
||||
|
||||
- System resource metrics: CPU, memory, IO, bandwidth, etc.
|
||||
- Software system metrics: health status, number of connections, number of requests, number of timeouts, number of errors, response time, service type, and other business-related metrics.
|
||||
|
||||
The current mainstream IT DevOps visualization system usually contains a data collection module, a data persistent module, and a visual display module. collectd/StatsD, as an old-fashion open source data collection tool, has a wide user base. However, collectd/StatsD has limited functionality, and often needs to be combined with Telegraf, Grafana, and a time-series database to build a complete monitoring system.
|
||||
The current mainstream IT DevOps visualization system usually contains a data collection module, a data persistence module, and a visual display module. collectd/StatsD, as an old-fashion open source data collection tool, has a wide user base. However, collectd/StatsD has limited functionality, and often needs to be combined with Telegraf, Grafana, and a time-series database to build a complete monitoring system.
|
||||
The new version of TDengine supports multiple data protocols and can accept data from collectd and StatsD directly, and provides Grafana dashboard for graphical display.
|
||||
|
||||
This article introduces how to quickly build an IT DevOps visualization system based on TDengine + collectd / StatsD + Grafana without writing even a single line of code but by simply modifying a few lines of configuration files. The architecture is shown in the following figure.
|
||||
This article introduces how to quickly build an IT DevOps visualization system based on TDengine + collectd / StatsD + Grafana without writing even a single line of code but by simply modifying a few lines in configuration files. The architecture is shown in the following figure.
|
||||
|
||||

|
||||

|
||||
|
||||
## Installation Steps
|
||||
|
||||
|
@ -83,22 +83,22 @@ Click on the gear icon on the left and select `Plugins`, you should find the TDe
|
|||
Download the dashboard json from `https://github.com/taosdata/grafanaplugin/blob/master/examples/collectd/grafana/dashboards/collect-metrics-with-tdengine-v0.1.0.json`, click the plus icon on the left and select Import, follow the instructions to import the JSON file. After that, you can see
|
||||
The dashboard can be seen in the following screen.
|
||||
|
||||

|
||||

|
||||
|
||||
#### import collectd dashboard
|
||||
|
||||
Download the dashboard json file from `https://github.com/taosdata/grafanaplugin/blob/master/examples/collectd/grafana/dashboards/collect-metrics-with-tdengine-v0.1.0.json`. Download the dashboard json file, click the plus icon on the left side and select `Import`, and follow the interface prompts to select the JSON file to import. After that, you can see
|
||||
dashboard with the following interface.
|
||||
|
||||

|
||||

|
||||
|
||||
#### Importing the StatsD dashboard
|
||||
|
||||
Download the dashboard json from `https://github.com/taosdata/grafanaplugin/blob/master/examples/statsd/dashboards/statsd-with-tdengine-v0.1.0.json`. Click on the plus icon on the left and select `Import`, and follow the interface prompts to import the JSON file. You will then see the dashboard in the following screen.
|
||||

|
||||

|
||||
|
||||
## Wrap-up
|
||||
|
||||
TDengine, as an emerging time-series big data platform, has the advantages of high performance, high reliability, easy management and easy maintenance. Thanks to the new schemaless protocol parsing function in TDengine version 2.4.0.0 and the powerful ecological software adaptation capability, users can build an efficient and easy-to-use IT DevOps visualization system or adapt to an existing system in just a few minutes.
|
||||
TDengine, as an emerging time-series big data platform, has the advantages of high performance, high reliability, easy management and easy maintenance. Thanks to the new schemaless protocol parsing feature in TDengine version 2.4.0.0 and ability to integrate easily with a large software ecosystem, users can build an efficient and easy-to-use IT DevOps visualization system, or adapt an existing system, in just a few minutes.
|
||||
|
||||
For TDengine's powerful data writing and querying performance and other features, please refer to the official documentation and successful product implementation cases.
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue