Merge branch '3.0' of github.com:taosdata/TDengine into test/chr/TD-14699

This commit is contained in:
tomchon 2022-05-31 15:47:47 +08:00
commit c0ecf2c1b2
102 changed files with 1945 additions and 717 deletions

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16)
cmake_minimum_required(VERSION 3.0)
project(
TDengine

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16)
cmake_minimum_required(VERSION 3.0)
set(CMAKE_VERBOSE_MAKEFILE OFF)

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.16)
cmake_minimum_required(VERSION 3.0)
MESSAGE("Current system is ${CMAKE_SYSTEM_NAME}")

View File

@ -52,7 +52,7 @@ INSERT INTO d1001 VALUES (1538548685000, 10.3, 219, 0.31) (1538548695000, 12.6,
:::info
- 要提高写入效率,需要批量写入。一批写入的记录条数越多,插入效率就越高。但一条记录不能超过 16K一条 SQL 语句总长度不能超过 1M 。
- 要提高写入效率,需要批量写入。一批写入的记录条数越多,插入效率就越高。但一条记录不能超过 48K一条 SQL 语句总长度不能超过 1M 。
- TDengine 支持多线程同时写入,要进一步提高写入速度,一个客户端需要打开 20 个以上的线程同时写。但线程数达到一定数量后,无法再提高,甚至还会下降,因为线程频繁切换,带来额外开销。
:::

View File

@ -4,6 +4,8 @@ title: 支持的数据类型
description: "TDengine 支持的数据类型: 时间戳、浮点型、JSON 类型等"
---
## 时间戳
使用 TDengine最重要的是时间戳。创建并插入记录、查询历史记录的时候均需要指定时间戳。时间戳有如下规则
- 时间格式为 `YYYY-MM-DD HH:mm:ss.MS`,默认时间分辨率为毫秒。比如:`2017-08-12 18:25:58.128`
@ -12,39 +14,59 @@ description: "TDengine 支持的数据类型: 时间戳、浮点型、JSON 类
- Epoch Time时间戳也可以是一个长整数表示从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始的毫秒数(相应地,如果所在 Database 的时间精度设置为“微秒”,则长整型格式的时间戳含义也就对应于从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始的微秒数;纳秒精度逻辑类似。)
- 时间可以加减,比如 now-2h表明查询时刻向前推 2 个小时(最近 2 小时)。数字后面的时间单位可以是 b(纳秒)、u(微秒)、a(毫秒)、s(秒)、m(分)、h(小时)、d(天)、w(周)。 比如 `select * from t1 where ts > now-2w and ts <= now-1w`表示查询两周前整整一周的数据。在指定降采样操作down sampling的时间窗口interval时间单位还可以使用 n (自然月) 和 y (自然年)。
TDengine 缺省的时间戳精度是毫秒,但通过在 `CREATE DATABASE` 时传递的 PRECISION 参数也可以支持微秒和纳秒。(从 2.1.5.0 版本开始支持纳秒精度)
TDengine 缺省的时间戳精度是毫秒,但通过在 `CREATE DATABASE` 时传递的 PRECISION 参数也可以支持微秒和纳秒。
```sql
CREATE DATABASE db_name PRECISION 'ns';
```
## 数据类型
在 TDengine 中,普通表的数据模型中可使用以下 10 种数据类型。
在 TDengine 中,普通表的数据模型中可使用以下数据类型。
| # | **类型** | **Bytes** | **说明** |
| --- | :-------: | --------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| 1 | TIMESTAMP | 8 | 时间戳。缺省精度毫秒,可支持微秒和纳秒。从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始,计时不能早于该时间。(从 2.0.18.0 版本开始,已经去除了这一时间范围限制)(从 2.1.5.0 版本开始支持纳秒精度) |
| 2 | INT | 4 | 整型,范围 [-2^31+1, 2^31-1], -2^31 用作 NULL |
| 3 | BIGINT | 8 | 长整型,范围 [-2^63+1, 2^63-1], -2^63 用作 NULL |
| 4 | FLOAT | 4 | 浮点型,有效位数 6-7范围 [-3.4E38, 3.4E38] |
| 5 | DOUBLE | 8 | 双精度浮点型,有效位数 15-16范围 [-1.7E308, 1.7E308] |
| 6 | BINARY | 自定义 | 记录单字节字符串,建议只用于处理 ASCII 可见字符,中文等多字节字符需使用 nchar。理论上最长可以有 16374 字节。binary 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 binary(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 byte 的存储空间,总共固定占用 20 bytes 的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\`。 |
| 7 | SMALLINT | 2 | 短整型, 范围 [-32767, 32767], -32768 用作 NULL |
| 8 | TINYINT | 1 | 单字节整型,范围 [-127, 127], -128 用作 NULL |
| 9 | BOOL | 1 | 布尔型,{true, false} |
| 10 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 nchar 字符占用 4 bytes 的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\`。nchar 使用时须指定字符串大小,类型为 nchar(10) 的列表示此列的字符串最多存储 10 个 nchar 字符,会固定占用 40 bytes 的空间。如果用户字符串长度超出声明长度,将会报错。 |
| 11 | JSON | | json 数据类型, 只有 tag 可以是 json 格式 |
| 1 | TIMESTAMP | 8 | 时间戳。缺省精度毫秒,可支持微秒和纳秒,详细说明见上节。 |
| 2 | INT | 4 | 整型,范围 [-2^31, 2^31-1] |
| 3 | INT UNSIGNED| 4| 无符号整数,[0, 2^32-1]
| 4 | BIGINT | 8 | 长整型,范围 [-2^63, 2^63-1] |
| 5 | BIGINT UNSIGNED | 8 | 长整型,范围 [0, 2^64-1] |
| 6 | FLOAT | 4 | 浮点型,有效位数 6-7范围 [-3.4E38, 3.4E38] |
| 7 | DOUBLE | 8 | 双精度浮点型,有效位数 15-16范围 [-1.7E308, 1.7E308] |
| 8 | BINARY | 自定义 | 记录单字节字符串,建议只用于处理 ASCII 可见字符,中文等多字节字符需使用 nchar。 |
| 9 | SMALLINT | 2 | 短整型, 范围 [-32768, 32767] |
| 10 | SMALLINT UNSIGNED | 2| 无符号短整型,范围 [0, 655357] |
| 11 | TINYINT | 1 | 单字节整型,范围 [-128, 127] |
| 12 | TINYINT UNSIGNED | 1 | 无符号单字节整型,范围 [0, 255] |
| 13 | BOOL | 1 | 布尔型,{true, false} |
| 14 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 nchar 字符占用 4 bytes 的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\`。nchar 使用时须指定字符串大小,类型为 nchar(10) 的列表示此列的字符串最多存储 10 个 nchar 字符,会固定占用 40 bytes 的空间。如果用户字符串长度超出声明长度,将会报错。 |
| 15 | JSON | | json 数据类型, 只有 tag 可以是 json 格式 |
| 16 | VARCHAR | 自定义 | BINARY类型的别名 |
:::tip
TDengine 对 SQL 语句中的英文字符不区分大小写,自动转化为小写执行。因此用户大小写敏感的字符串及密码,需要使用单引号将字符串引起来。
:::
:::note
虽然 BINARY 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 BINARY 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 NCHAR 类型进行保存。如果强行使用 BINARY 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。
- TDengine 对 SQL 语句中的英文字符不区分大小写,自动转化为小写执行。因此用户大小写敏感的字符串及密码,需要使用单引号将字符串引起来。
- 虽然 BINARY 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 BINARY 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 NCHAR 类型进行保存。如果强行使用 BINARY 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。
- BINARY 类型理论上最长可以有 16374 字节。binary 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 binary(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 byte 的存储空间,总共固定占用 20 bytes 的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\`
- SQL 语句中的数值类型将依据是否存在小数点或使用科学计数法表示来判断数值类型是否为整型或者浮点型因此在使用时要注意相应类型越界的情况。例如9999999999999999999 会认为超过长整型的上边界而溢出,而 9999999999999999999.0 会被认为是有效的浮点数。
:::
## 常量
TDengine支持多个类型的常量细节如下表
| # | **语法** | **类型** | **说明** |
| --- | :-------: | --------- | -------------------------------------- |
| 1 | [{+ \| -}]123 | BIGINT | 整型数值的字面量的类型均为BIGINT。如果用户输入超过了BIGINT的表示范围TDengine 按BIGINT对数值进行截断。|
| 2 | 123.45 | DOUBLE | 浮点数值的字面量的类型均为DOUBLE。TDengine依据是否存在小数点或使用科学计数法表示来判断数值类型是否为整型或者浮点型。|
| 3 | 1.2E3 | DOUBLE | 科学计数法的字面量的类型为DOUBLE。|
| 4 | 'abc' | BINARY | 单引号括住的内容为字符串字面值其类型为BINARYBINARY的size为实际的字符个数。对于字符串内的单引号可以用转义字符反斜线加单引号来表示即 \'。|
| 5 | "abc" | BINARY | 双引号括住的内容为字符串字面值其类型为BINARYBINARY的size为实际的字符个数。对于字符串内的双引号可以用转义字符反斜线加单引号来表示即 \"。 |
| 6 | TIMESTAMP {'literal' \| "literal"} | TIMESTAMP | TIMESTAMP关键字表示后面的字符串字面量需要被解释为TIMESTAMP类型。字符串需要满足YYYY-MM-DD HH:mm:ss.MS格式其时间分辨率为当前数据库的时间分辨率。 |
| 7 | {TRUE \| FALSE} | BOOL | 布尔类型字面量。 |
| 8 | {'' \| "" \| '\t' \| "\t" \| ' ' \| " " \| NULL } | -- | 空值字面量。可以用于任意类型。|
:::note
SQL 语句中的数值类型将依据是否存在小数点或使用科学计数法表示来判断数值类型是否为整型或者浮点型因此在使用时要注意相应类型越界的情况。例如9999999999999999999 会认为超过长整型的上边界而溢出,而 9999999999999999999.0 会被认为是有效的浮点数。
- TDengine依据是否存在小数点或使用科学计数法表示来判断数值类型是否为整型或者浮点型因此在使用时要注意相应类型越界的情况。例如9999999999999999999会认为超过长整型的上边界而溢出而9999999999999999999.0会被认为是有效的浮点数。
:::

View File

@ -12,7 +12,7 @@ CREATE TABLE [IF NOT EXISTS] tb_name (timestamp_field_name TIMESTAMP, field1_nam
1. 表的第一个字段必须是 TIMESTAMP并且系统自动将其设为主键
2. 表名最大长度为 192
3. 表的每行长度不能超过 16k 个字符;(注意:每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)
3. 表的每行长度不能超过 48KB;(注意:每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)
4. 子表名只能由字母、数字和下划线组成,且不能以数字开头,不区分大小写
5. 使用数据类型 binary 或 nchar需指定其最长的字节数如 binary(20),表示 20 字节;
6. 为了兼容支持更多形式的表名TDengine 引入新的转义符 "\`",可以让表名与关键词不冲突,同时不受限于上述表名称合法性约束检查。但是同样具有长度限制要求。使用转义字符以后,不再对转义字符中的内容进行大小写统一。

View File

@ -86,7 +86,7 @@ ALTER STABLE stb_name MODIFY COLUMN field_name data_type(length);
ALTER STABLE stb_name ADD TAG new_tag_name tag_type;
```
为 STable 增加一个新的标签,并指定新标签的类型。标签总数不能超过 128 个,总长度不超过 16k 个字符
为 STable 增加一个新的标签,并指定新标签的类型。标签总数不能超过 128 个,总长度不超过 16KB
### 删除标签

View File

@ -261,6 +261,92 @@ taos> select hyperloglog(dbig) from shll;
Query OK, 1 row(s) in set (0.008388s)
```
### HISTOGRAM
```
SELECT HISTOGRAM(field_namebin_type, bin_description, normalized) FROM tb_name [WHERE clause];
```
**功能说明**:统计数据按照用户指定区间的分布。
**返回结果类型**:如归一化参数 normalized 设置为 1返回结果为双精度浮点类型 DOUBLE否则为长整形 INT64。
**应用字段**:数值型字段。
**支持的版本**2.6.0.0 及以后的版本。
**适用于**: 表和超级表。
**说明**
1. bin_type 用户指定的分桶类型, 有效输入类型为"user_input“, ”linear_bin", "log_bin"。
2. bin_description 描述如何生成分桶区间,针对三种桶类型,分别为以下描述格式(均为 JSON 格式字符串)
- "user_input": "[1, 3, 5, 7]"
用户指定 bin 的具体数值。
- "linear_bin": "{"start": 0.0, "width": 5.0, "count": 5, "infinity": true}"
"start" 表示数据起始点,"width" 表示每次 bin 偏移量, "count" 为 bin 的总数,"infinity" 表示是否添加(-inf, inf作为区间起点跟终点
生成区间为[-inf, 0.0, 5.0, 10.0, 15.0, 20.0, +inf]。
- "log_bin": "{"start":1.0, "factor": 2.0, "count": 5, "infinity": true}"
"start" 表示数据起始点,"factor" 表示按指数递增的因子,"count" 为 bin 的总数,"infinity" 表示是否添加(-inf, inf作为区间起点跟终点
生成区间为[-inf, 1.0, 2.0, 4.0, 8.0, 16.0, +inf]。
3. normalized 是否将返回结果归一化到 0~1 之间 。有效输入为 0 和 1。
**示例**
```mysql
taos> SELECT HISTOGRAM(voltage, "user_input", "[1,3,5,7]", 1) FROM meters;
histogram(voltage, "user_input", "[1,3,5,7]", 1) |
=======================================================
{"lower_bin":1, "upper_bin":3, "count":0.333333} |
{"lower_bin":3, "upper_bin":5, "count":0.333333} |
{"lower_bin":5, "upper_bin":7, "count":0.333333} |
Query OK, 3 row(s) in set (0.004273s)
taos> SELECT HISTOGRAM(voltage, 'linear_bin', '{"start": 1, "width": 3, "count": 3, "infinity": false}', 0) FROM meters;
histogram(voltage, 'linear_bin', '{"start": 1, "width": 3, " |
===================================================================
{"lower_bin":1, "upper_bin":4, "count":3} |
{"lower_bin":4, "upper_bin":7, "count":3} |
{"lower_bin":7, "upper_bin":10, "count":3} |
Query OK, 3 row(s) in set (0.004887s)
taos> SELECT HISTOGRAM(voltage, 'log_bin', '{"start": 1, "factor": 3, "count": 3, "infinity": true}', 0) FROM meters;
histogram(voltage, 'log_bin', '{"start": 1, "factor": 3, "count" |
===================================================================
{"lower_bin":-inf, "upper_bin":1, "count":3} |
{"lower_bin":1, "upper_bin":3, "count":2} |
{"lower_bin":3, "upper_bin":9, "count":6} |
{"lower_bin":9, "upper_bin":27, "count":3} |
{"lower_bin":27, "upper_bin":inf, "count":1} |
```
### ELAPSED
```mysql
SELECT ELAPSED(field_name[, time_unit]) FROM { tb_name | stb_name } [WHERE clause] [INTERVAL(interval [, offset]) [SLIDING sliding]];
```
**功能说明**elapsed函数表达了统计周期内连续的时间长度和twa函数配合使用可以计算统计曲线下的面积。在通过INTERVAL子句指定窗口的情况下统计在给定时间范围内的每个窗口内有数据覆盖的时间范围如果没有INTERVAL子句则返回整个给定时间范围内的有数据覆盖的时间范围。注意ELAPSED返回的并不是时间范围的绝对值而是绝对值除以time_unit所得到的单位个数。
**返回结果类型**Double
**应用字段**Timestamp类型
**支持的版本**2.6.0.0 及以后的版本。
**适用于**: 表,超级表,嵌套查询的外层查询
**说明**
- field_name参数只能是表的第一列即timestamp主键列。
- 按time_unit参数指定的时间单位返回最小是数据库的时间分辨率。time_unit参数未指定时以数据库的时间分辨率为时间单位。
- 可以和interval组合使用返回每个时间窗口的时间戳差值。需要特别注意的是除第一个时间窗口和最后一个时间窗口外中间窗口的时间戳差值均为窗口长度。
- order by asc/desc不影响差值的计算结果。
- 对于超级表需要和group by tbname子句组合使用不可以直接使用。
- 对于普通表不支持和group by子句组合使用。
- 对于嵌套查询仅当内层查询会输出隐式时间戳列时有效。例如select elapsed(ts) from (select diff(value) from sub1)语句diff函数会让内层查询输出隐式时间戳列此为主键列可以用于elapsed函数的第一个参数。相反例如select elapsed(ts) from (select * from sub1) 语句ts列输出到外层时已经没有了主键列的含义无法使用elapsed函数。此外elapsed函数作为一个与时间线强依赖的函数形如select elapsed(ts) from (select diff(value) from st group by tbname)尽管会返回一条计算结果,但并无实际意义,这种用法后续也将被限制。
- 不支持与leastsquares、diff、derivative、top、bottom、last_row、interp等函数混合使用。
## 选择函数
在使用所有的选择函数的时候,可以同时指定输出 ts 列或标签列(包括 tbname这样就可以方便地知道被选出的值是源于哪个数据行的。
@ -698,7 +784,7 @@ SELECT INTERP(field_name) FROM { tb_name | stb_name } WHERE ts='timestamp' [FILL
SELECT TAIL(field_name, k, offset_val) FROM {tb_name | stb_name} [WHERE clause];
```
**功能说明**:返回跳过最后 offset_value 个,然后取连续 k 个记录,不忽略 NULL 值。offset_val 可以不输入。此时返回最后的 k 个记录。当有 offset_val 输入的情况下,该函数功能等效于 `order by ts desc LIMIT k OFFSET offset_val`
**功能说明**:返回跳过最后 offset_val 个,然后取连续 k 个记录,不忽略 NULL 值。offset_val 可以不输入。此时返回最后的 k 个记录。当有 offset_val 输入的情况下,该函数功能等效于 `order by ts desc LIMIT k OFFSET offset_val`
**参数范围**k: [1,100] offset_val: [0,100]。

View File

@ -7,9 +7,9 @@ title: 边界限制
- 数据库名最大长度为 32。
- 表名最大长度为 192不包括数据库名前缀和分隔符
- 每行数据最大长度 16k 个字符, 从 2.1.7.0 版本开始,每行数据最大长度 48k 个字符(注意:数据行内每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)。
- 每行数据最大长度 48KB (注意:数据行内每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)。
- 列名最大长度为 64最多允许 4096 列,最少需要 2 列,第一列必须是时间戳。注:从 2.1.7.0 版本(不含)以前最多允许 4096 列
- 标签名最大长度为 64最多允许 128 个,至少要有 1 个标签,一个表中标签值的总长度不超过 16k 个字符
- 标签名最大长度为 64最多允许 128 个,至少要有 1 个标签,一个表中标签值的总长度不超过 16KB
- SQL 语句最大长度 1048576 个字符,也可通过客户端配置参数 maxSQLLength 修改,取值范围 65480 ~ 1048576。
- SELECT 语句的查询结果,最多允许返回 4096 列(语句中的函数调用可能也会占用一些列空间),超限时需要显式指定较少的返回数据列,以避免语句执行报错。注: 2.1.7.0 版本(不含)之前为最多允许 1024 列
- 库的数目,超级表的数目、表的数目,系统不做限制,仅受系统资源限制。

View File

@ -23,17 +23,17 @@ title: TDengine 参数限制与保留关键字
去掉了 `` ‘“`\ `` (单双引号、撇号、反斜杠、空格)
- 数据库名:不能包含“.”以及特殊字符,不能超过 32 个字符
- 表名:不能包含“.”以及特殊字符,与所属数据库名一起,不能超过 192 个字符,每行数据最大长度 16k 个字符
- 表的列名:不能包含特殊字符,不能超过 64 个字
- 表名:不能包含“.”以及特殊字符,与所属数据库名一起,不能超过 192 个字节 ,每行数据最大长度 48KB
- 表的列名:不能包含特殊字符,不能超过 64 个字
- 数据库名、表名、列名,都不能以数字开头,合法的可用字符集是“英文字符、数字和下划线”
- 表的列数:不能超过 1024 列,最少需要 2 列,第一列必须是时间戳(从 2.1.7.0 版本开始,改为最多支持 4096 列)
- 记录的最大长度:包括时间戳 8 byte不能超过 16KB每个 BINARY/NCHAR 类型的列还会额外占用 2 个 byte 的存储位置)
- 单条 SQL 语句默认最大字符串长度1048576 byte但可通过系统配置参数 maxSQLLength 修改,取值范围 65480 ~ 1048576 byte
- 记录的最大长度:包括时间戳 8 字节,不能超过 48KB每个 BINARY/NCHAR 类型的列还会额外占用 2 个 字节 的存储位置)
- 单条 SQL 语句默认最大字符串长度1048576 字节,但可通过系统配置参数 maxSQLLength 修改,取值范围 65480 ~ 1048576 字节
- 数据库副本数:不能超过 3
- 用户名:不能超过 23 个 byte
- 用户密码:不能超过 15 个 byte
- 用户名:不能超过 23 个 字节
- 用户密码:不能超过 15 个 字节
- 标签(Tags)数量:不能超过 128 个,可以 0 个
- 标签的总长度:不能超过 16K byte
- 标签的总长度:不能超过 16KB
- 记录条数:仅受存储空间限制
- 表的个数:仅受节点个数限制
- 库的个数:仅受节点个数限制
@ -85,3 +85,44 @@ title: TDengine 参数限制与保留关键字
| CONNECTIONS | HAVING | NOT | SOFFSET | VNODES |
| CONNS | ID | NOTNULL | STABLE | WAL |
| COPY | IF | NOW | STABLES | WHERE |
| _C0 | _QSTART | _QSTOP | _QDURATION | _WSTART |
| _WSTOP | _WDURATION |
## 特殊说明
### TBNAME
`TBNAME` 可以视为超级表中一个特殊的标签,代表子表的表名。
获取一个超级表所有的子表名及相关的标签信息:
```mysql
SELECT TBNAME, location FROM meters;
统计超级表下辖子表数量:
```mysql
SELECT COUNT(TBNAME) FROM meters;
```
以上两个查询均只支持在WHERE条件子句中添加针对标签TAGS的过滤条件。例如
```mysql
taos> SELECT TBNAME, location FROM meters;
tbname | location |
==================================================================
d1004 | California.SanFrancisco |
d1003 | California.SanFrancisco |
d1002 | California.LosAngeles |
d1001 | California.LosAngeles |
Query OK, 4 row(s) in set (0.000881s)
taos> SELECT COUNT(tbname) FROM meters WHERE groupId > 2;
count(tbname) |
========================
2 |
Query OK, 1 row(s) in set (0.001091s)
```
### _QSTART/_QSTOP/_QDURATION
表示查询过滤窗口的起始,结束以及持续时间 (从2.6.0.0版本开始支持)
### _WSTART/_WSTOP/_WDURATION
窗口切分聚合查询(例如 interval/session window/state window中表示每个切分窗口的起始结束以及持续时间从 2.6.0.0 版本开始支持)
### _c0
表示表或超级表的第一列

View File

@ -14,7 +14,6 @@ import NodeInfluxLine from "../../07-develop/03-insert-data/_js_line.mdx";
import NodeOpenTSDBTelnet from "../../07-develop/03-insert-data/_js_opts_telnet.mdx";
import NodeOpenTSDBJson from "../../07-develop/03-insert-data/_js_opts_json.mdx";
import NodeQuery from "../../07-develop/04-query-data/_js.mdx";
import NodeAsyncQuery from "../../07-develop/04-query-data/_js_async.mdx";
`td2.0-connector` 和 `td2.0-rest-connector` 是 TDengine 的官方 Node.js 语言连接器。Node.js 开发人员可以通过它开发可以存取 TDengine 集群数据的应用软件。
@ -189,14 +188,8 @@ let cursor = conn.cursor();
### 查询数据
#### 同步查询
<NodeQuery />
#### 异步查询
<NodeAsyncQuery />
## 更多示例程序
| 示例程序 | 示例程序描述 |

View File

@ -82,7 +82,7 @@ st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
:::tip
无模式所有的处理逻辑,仍会遵循 TDengine 对数据结构的底层限制,例如每行数据的总长度不能超过
16k 字节。这方面的具体限制约束请参见 [TAOS SQL 边界限制](/taos-sql/limit)
48KB。这方面的具体限制约束请参见 [TAOS SQL 边界限制](/taos-sql/limit)
:::

View File

@ -7,7 +7,7 @@ TDengine Kafka Connector 包含两个插件: TDengine Source Connector 和 TDeng
## 什么是 Kafka Connect
Kafka Connect 是 Apache Kafka 的一个组件,用于使其它系统,比如数据库、云服务、文件系统等能方便地连接到 Kafka。数据既可以通过 Kafka Connect 从其它系统流向 Kafka, 也可以通过 Kafka Connect 从 Kafka 流向其它系统。从其它系统读数据的插件称为 Source Connector, 写数据到其它系统的插件称为 Sink Connector。Source Connector 和 Sink Connector 都不会直接连接 Kafka BrokerSource Connector 把数据转交给 Kafka Connect。Sink Connector 从 Kafka Connect 接收数据。
Kafka Connect 是 [Apache Kafka](https://kafka.apache.org/) 的一个组件,用于使其它系统,比如数据库、云服务、文件系统等能方便地连接到 Kafka。数据既可以通过 Kafka Connect 从其它系统流向 Kafka, 也可以通过 Kafka Connect 从 Kafka 流向其它系统。从其它系统读数据的插件称为 Source Connector, 写数据到其它系统的插件称为 Sink Connector。Source Connector 和 Sink Connector 都不会直接连接 Kafka BrokerSource Connector 把数据转交给 Kafka Connect。Sink Connector 从 Kafka Connect 接收数据。
![TDengine Database Kafka Connector -- Kafka Connect structure](kafka/Kafka_Connect.webp)
@ -17,7 +17,7 @@ TDengine Source Connector 用于把数据实时地从 TDengine 读出来发送
## 什么是 Confluent
Confluent 在 Kafka 的基础上增加很多扩展功能。包括:
[Confluent](https://www.confluent.io/) 在 Kafka 的基础上增加很多扩展功能。包括:
1. Schema Registry
2. REST 代理
@ -81,10 +81,10 @@ Development: false
git clone https://github.com:taosdata/kafka-connect-tdengine.git
cd kafka-connect-tdengine
mvn clean package
unzip -d $CONFLUENT_HOME/share/confluent-hub-components/ target/components/packages/taosdata-kafka-connect-tdengine-0.1.0.zip
unzip -d $CONFLUENT_HOME/share/java/ target/components/packages/taosdata-kafka-connect-tdengine-*.zip
```
以上脚本先 clone 项目源码,然后用 Maven 编译打包。打包完成后在 `target/components/packages/` 目录生成了插件的 zip 包。把这个 zip 包解压到安装插件的路径即可。安装插件的路径在配置文件 `$CONFLUENT_HOME/etc/kafka/connect-standalone.properties` 中。默认的路径为 `$CONFLUENT_HOME/share/confluent-hub-components/`。
以上脚本先 clone 项目源码,然后用 Maven 编译打包。打包完成后在 `target/components/packages/` 目录生成了插件的 zip 包。把这个 zip 包解压到安装插件的路径即可。上面的示例中使用了内置的插件安装路径: `$CONFLUENT_HOME/share/java/`。
### 用 confluent-hub 安装
@ -98,7 +98,7 @@ confluent local services start
```
:::note
一定要先安装插件再启动 Confluent, 否则会出现找不到类的错误。Kafka Connect 的日志(默认路径: /tmp/confluent.xxxx/connect/logs/connect.log中会输出成功安装的插件据此可判断插件是否安装成功
一定要先安装插件再启动 Confluent, 否则加载插件会失败
:::
:::tip
@ -125,6 +125,61 @@ Control Center is [UP]
清空数据可执行 `rm -rf /tmp/confluent.106668`
:::
### 验证各个组件是否启动成功
输入命令:
```
confluent local services status
```
如果各组件都启动成功,会得到如下输出:
```
Connect is [UP]
Control Center is [UP]
Kafka is [UP]
Kafka REST is [UP]
ksqlDB Server is [UP]
Schema Registry is [UP]
ZooKeeper is [UP]
```
### 验证插件是否安装成功
在 Kafka Connect 组件完全启动后,可用以下命令列出成功加载的插件:
```
confluent local services connect plugin list
```
如果成功安装,会输出如下:
```txt {4,9}
Available Connect Plugins:
[
{
"class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector",
"type": "sink",
"version": "1.0.0"
},
{
"class": "com.taosdata.kafka.connect.source.TDengineSourceConnector",
"type": "source",
"version": "1.0.0"
},
......
```
如果插件安装失败,请检查 Kafka Connect 的启动日志是否有异常信息,用以下命令输出日志路径:
```
echo `cat /tmp/confluent.current`/connect/connect.stdout
```
该命令的输出类似: `/tmp/confluent.104086/connect/connect.stdout`
与日志文件 `connect.stdout` 同一目录,还有一个文件名为: `connect.properties`。在这个文件的末尾,可以看到最终生效的 `plugin.path` 它是一系列用逗号分割的路径。如果插件安装失败,很可能是因为实际的安装路径不包含在 `plugin.path` 中。
## TDengine Sink Connector 的使用
TDengine Sink Connector 的作用是同步指定 topic 的数据到 TDengine。用户无需提前创建数据库和超级表。可手动指定目标数据库的名字见配置参数 connection.database 也可按一定规则生成(见配置参数 connection.database.prefix)。
@ -144,7 +199,7 @@ vi sink-demo.properties
sink-demo.properties 内容如下:
```ini title="sink-demo.properties"
name=tdengine-sink-demo
name=TDengineSinkConnector
connector.class=com.taosdata.kafka.connect.sink.TDengineSinkConnector
tasks.max=1
topics=meters
@ -153,6 +208,7 @@ connection.user=root
connection.password=taosdata
connection.database=power
db.schemaless=line
data.precision=ns
key.converter=org.apache.kafka.connect.storage.StringConverter
value.converter=org.apache.kafka.connect.storage.StringConverter
```
@ -179,6 +235,7 @@ confluent local services connect connector load TDengineSinkConnector --config .
"connection.url": "jdbc:TAOS://127.0.0.1:6030",
"connection.user": "root",
"connector.class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector",
"data.precision": "ns",
"db.schemaless": "line",
"key.converter": "org.apache.kafka.connect.storage.StringConverter",
"tasks.max": "1",
@ -223,10 +280,10 @@ Database changed.
taos> select * from meters;
ts | current | voltage | phase | groupid | location |
===============================================================================================================================================================
2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LosAngeles |
2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LosAngeles |
2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LosAngeles |
2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LosAngeles |
2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LosAngeles |
2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LosAngeles |
2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LosAngeles |
2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LosAngeles |
Query OK, 4 row(s) in set (0.004208s)
```
@ -356,21 +413,33 @@ confluent local services connect connector unload TDengineSourceConnector
2. `connection.database.prefix` 当 connection.database 为 null 时, 目标数据库的前缀。可以包含占位符 '${topic}'。 比如 kafka_${topic}, 对于主题 'orders' 将写入数据库 'kafka_orders'。 默认 null。当为 null 时,目标数据库的名字和主题的名字是一致的。
3. `batch.size`: 分批写入每批记录数。当 Sink Connector 一次接收到的数据大于这个值时将分批写入。
4. `max.retries`: 发生错误时的最大重试次数。默认为 1。
5. `retry.backoff.ms`: 发送错误时重试的时间间隔。单位毫秒,默认 3000。
6. `db.schemaless`: 数据格式,必须指定为: line、json、telnet 中的一个。分别代表 InfluxDB 行协议格式、 OpenTSDB JSON 格式、 OpenTSDB Telnet 行协议格式。
5. `retry.backoff.ms`: 发送错误时重试的时间间隔。单位毫秒,默认为 3000。
6. `db.schemaless`: 数据格式,可选值为:
1. line :代表 InfluxDB 行协议格式
2. json : 代表 OpenTSDB JSON 格式
3. telnet :代表 OpenTSDB Telnet 行协议格式
7. `data.precision`: 使用 InfluxDB 行协议格式时,时间戳的精度。可选值为:
1. ms 表示毫秒
2. us 表示微秒
3. ns 表示纳秒。默认为纳秒。
### TDengine Source Connector 特有的配置
1. `connection.database`: 源数据库名称,无缺省值。
2. `topic.prefix` 数据导入 kafka 后 topic 名称前缀。 使用 `topic.prefix` + `connection.database` 名称作为完整 topic 名。默认为空字符串 ""。
3. `timestamp.initial`: 数据同步起始时间。格式为'yyyy-MM-dd HH:mm:ss'。默认 "1970-01-01 00:00:00"。
4. `poll.interval.ms`: 拉取数据间隔,单位为 ms。默认 1000。
3. `timestamp.initial`: 数据同步起始时间。格式为'yyyy-MM-dd HH:mm:ss'。默认 "1970-01-01 00:00:00"。
4. `poll.interval.ms`: 拉取数据间隔,单位为 ms。默认 1000。
5. `fetch.max.rows` : 检索数据库时最大检索条数。 默认为 100。
6. `out.format`: 数据格式。取值 line 或 json。line 表示 InfluxDB Line 协议格式, json 表示 OpenTSDB JSON 格式。默认 line。
6. `out.format`: 数据格式。取值 line 或 json。line 表示 InfluxDB Line 协议格式, json 表示 OpenTSDB JSON 格式。默认为 line。
## 其他说明
1. 插件的安装位置可以自定义请参考官方文档https://docs.confluent.io/home/connect/self-managed/install.html#install-connector-manually。
2. 本教程的示例程序使用了 Confluent 平台,但是 TDengine Kafka Connector 本身同样适用于独立安装的 Kafka, 且配置方法相同。关于如何在独立安装的 Kafka 环境使用 Kafka Connect 插件, 请参考官方文档: https://kafka.apache.org/documentation/#connect。
## 问题反馈
https://github.com/taosdata/kafka-connect-tdengine/issues
无论遇到任何问题,都欢迎在本项目的 Github 仓库反馈: https://github.com/taosdata/kafka-connect-tdengine/issues
## 参考

View File

@ -222,21 +222,9 @@ TDengine 中时间戳的时区总是由客户端进行处理,而与服务端
### 23. TDengine 2.0 都会用到哪些网络端口?
在 TDengine 2.0 版本中,会用到以下这些网络端口(以默认端口 6030 为前提进行说明,如果修改了配置文件中的设置,那么这里列举的端口都会随之出现变化),管理员可以参考这里的信息调整防火墙设置:
使用到的网络端口请看文档:[serverport](/reference/config/#serverport)
| 协议 | 默认端口 | 用途说明 | 修改方法 |
| :--- | :-------- | :---------------------------------- | :------------------------------- |
| TCP | 6030 | 客户端与服务端之间通讯。 | 由配置文件设置 serverPort 决定。 |
| TCP | 6035 | 多节点集群的节点间通讯。 | 随 serverPort 端口变化。 |
| TCP | 6040 | 多节点集群的节点间数据同步。 | 随 serverPort 端口变化。 |
| TCP | 6041 | 客户端与服务端之间的 RESTful 通讯。 | 随 serverPort 端口变化。2.4.0.0 及以上版本由 taosAdapter 配置。 |
| TCP | 6042 | Arbitrator 的服务端口。 | 随 Arbitrator 启动参数设置变化。 |
| TCP | 6043 | TaosKeeper 监控服务端口。 | 随 TaosKeeper 启动参数设置变化。 |
| TCP | 6044 | 支持 StatsD 的数据接入端口。 | 随 taosAdapter 启动参数设置变化( 2.4.0.0 及以上版本)。 |
| UDP | 6045 | 支持 collectd 数据接入端口。 | 随 taosAdapter 启动参数设置变化( 2.4.0.0 及以上版本)。 |
| TCP | 6060 | 企业版内 Monitor 服务的网络端口。 | |
| UDP | 6030-6034 | 客户端与服务端之间通讯。 | 随 serverPort 端口变化。 |
| UDP | 6035-6039 | 多节点集群的节点间通讯。 | 随 serverPort 端口变化。 |
需要注意,文档上列举的端口号都是以默认端口 6030 为前提进行说明,如果修改了配置文件中的设置,那么列举的端口都会随之出现变化,管理员可以参考上述的信息调整防火墙设置。
### 24. 为什么 RESTful 接口无响应、Grafana 无法添加 TDengine 为数据源、TDengineGUI 选了 6041 端口还是无法连接成功??

View File

@ -12,6 +12,6 @@ Between two major release versions, some beta versions may be delivered for user
For the details please refer to [Install and Uninstall](/operation/pkg-install)。
To see the details of versions, please refer to [Download List](https://www.taosdata.com/all-downloads) and [Release Notes](https://github.com/taosdata/TDengine/releases).
To see the details of versions, please refer to [Download List](https://tdengine.com/all-downloads) and [Release Notes](https://github.com/taosdata/TDengine/releases).

View File

@ -52,7 +52,7 @@ For more details about `INSERT` please refer to [INSERT](/taos-sql/insert).
:::info
- Inserting in batches can improve performance. Normally, the higher the batch size, the better the performance. Please note that a single row can't exceed 16K bytes and each SQL statement can't exceed 1MB.
- Inserting in batches can improve performance. Normally, the higher the batch size, the better the performance. Please note that a single row can't exceed 48K bytes and each SQL statement can't exceed 1MB.
- Inserting with multiple threads can also improve performance. However, depending on the system resources on the application side and the server side, when the number of inserting threads grows beyond a specific point the performance may drop instead of improving. The proper number of threads needs to be tested in a specific environment to find the best number.
:::

View File

@ -3,6 +3,8 @@ title: Data Types
description: "TDengine supports a variety of data types including timestamp, float, JSON and many others."
---
## TIMESTAMP
When using TDengine to store and query data, the most important part of the data is timestamp. Timestamp must be specified when creating and inserting data rows. Timestamp must follow the rules below:
- The format must be `YYYY-MM-DD HH:mm:ss.MS`, the default time precision is millisecond (ms), for example `2017-08-12 18:25:58.128`
@ -17,33 +19,51 @@ Time precision in TDengine can be set by the `PRECISION` parameter when executin
CREATE DATABASE db_name PRECISION 'ns';
```
## Data Types
In TDengine, the data types below can be used when specifying a column or tag.
| # | **type** | **Bytes** | **Description** |
| --- | :-------: | --------- | ------------------------- |
| 1 | TIMESTAMP | 8 | Default precision is millisecond, microsecond and nanosecond are also supported |
| 2 | INT | 4 | Integer, the value range is [-2^31+1, 2^31-1], while -2^31 is treated as NULL |
| 3 | BIGINT | 8 | Long integer, the value range is [-2^63+1, 2^63-1], while -2^63 is treated as NULL |
| 4 | FLOAT | 4 | Floating point number, the effective number of digits is 6-7, the value range is [-3.4E38, 3.4E38] |
| 5 | DOUBLE | 8 | Double precision floating point number, the effective number of digits is 15-16, the value range is [-1.7E308, 1.7E308] |
| 6 | BINARY | User Defined | Single-byte string for ASCII visible characters. Length must be specified when defining a column or tag of binary type. The string length can be up to 16374 bytes. The string value must be quoted with single quotes. The literal single quote inside the string must be preceded with back slash like `\'` |
| 7 | SMALLINT | 2 | Short integer, the value range is [-32767, 32767], while -32768 is treated as NULL |
| 8 | TINYINT | 1 | Single-byte integer, the value range is [-127, 127], while -128 is treated as NULL |
| 9 | BOOL | 1 | Bool, the value range is {true, false} |
| 10 | NCHAR | User Defined| Multi-Byte string that can include multi byte characters like Chinese characters. Each character of NCHAR type consumes 4 bytes storage. The string value should be quoted with single quotes. Literal single quote inside the string must be preceded with backslash, like `\`. The length must be specified when defining a column or tag of NCHAR type, for example nchar(10) means it can store at most 10 characters of nchar type and will consume fixed storage of 40 bytes. An error will be reported if the string value exceeds the length defined. |
| 11 | JSON | | JSON type can only be used on tags. A tag of json type is excluded with any other tags of any other type |
:::tip
TDengine is case insensitive and treats any characters in the sql command as lower case by default, case sensitive strings must be quoted with single quotes.
:::
| 2 | INT | 4 | Integer, the value range is [-2^31, 2^31-1] |
| 3 |INT UNSIGNED|4 | Unsigned integer, the value range is [0, 2^31-1] |
| 4 | BIGINT | 8 | Long integer, the value range is [-2^63, 2^63-1] |
| 5 | BIGINT UNSIGNED | 8 | Unsigned long integer, the value range is [0, 2^63-1] |
| 6 | FLOAT | 4 | Floating point number, the effective number of digits is 6-7, the value range is [-3.4E38, 3.4E38] |
| 7 | DOUBLE | 8 | Double precision floating point number, the effective number of digits is 15-16, the value range is [-1.7E308, 1.7E308] |
| 8 | BINARY | User Defined | Single-byte string for ASCII visible characters. Length must be specified when defining a column or tag of binary type. The string length can be up to 16374 bytes. The string value must be quoted with single quotes. The literal single quote inside the string must be preceded with back slash like `\'` |
| 9 | SMALLINT | 2 | Short integer, the value range is [-32768, 32767] |
| 10 | SMALLINT UNSIGNED | 2 | Unsigned short integer, the value range is [0, 32767] |
| 11 | TINYINT | 1 | Single-byte integer, the value range is [-128, 127] |
| 12 | TINYINT UNSIGNED | 1 | Unsigned single-byte integer, the value range is [0, 127] |
| 13 | BOOL | 1 | Bool, the value range is {true, false} |
| 14 | NCHAR | User Defined| Multi-Byte string that can include multi byte characters like Chinese characters. Each character of NCHAR type consumes 4 bytes storage. The string value should be quoted with single quotes. Literal single quote inside the string must be preceded with backslash, like `\`. The length must be specified when defining a column or tag of NCHAR type, for example nchar(10) means it can store at most 10 characters of nchar type and will consume fixed storage of 40 bytes. An error will be reported if the string value exceeds the length defined. |
| 15 | JSON | | JSON type can only be used on tags. A tag of json type is excluded with any other tags of any other type |
| 16 | VARCHAR | User Defined| Alias of BINARY type |
:::note
Only ASCII visible characters are suggested to be used in a column or tag of BINARY type. Multi-byte characters must be stored in NCHAR type.
- TDengine is case insensitive and treats any characters in the sql command as lower case by default, case sensitive strings must be quoted with single quotes.
- Only ASCII visible characters are suggested to be used in a column or tag of BINARY type. Multi-byte characters must be stored in NCHAR type.
- Numeric values in SQL statements will be determined as integer or float type according to whether there is decimal point or whether scientific notation is used, so attention must be paid to avoid overflow. For example, 9999999999999999999 will be considered as overflow because it exceeds the upper limit of long integer, but 9999999999999999999.0 will be considered as a legal float number.
:::
## Constants
TDengine supports constants of multiple data type.
| # | **Syntax** | **Type** | **Description** |
| --- | :-------: | --------- | -------------------------------------- |
| 1 | [{+ \| -}]123 | BIGINT | Numeric constants are treated as BIGINT type. The value will be truncated if it exceeds the range of BIGINT type. |
| 2 | 123.45 | DOUBLE | Floating number constants are treated as DOUBLE type. TDengine determines whether it's a floating number based on if decimal point or scientific notation is used. |
| 3 | 1.2E3 | DOUBLE | Constants in scientific notation are treated ad DOUBLE type. |
| 4 | 'abc' | BINARY | String constants enclosed by single quotes are treated as BINARY type. Its size is determined as the acutal length. Single quote itself can be included by preceding backslash, i.e. `\'`, in a string constant. |
| 5 | "abc" | BINARY | String constants enclosed by double quotes are treated as BINARY type. Its size is determined as the acutal length. Double quote itself can be included by preceding backslash, i.e. `\"`, in a string constant. |
| 6 | TIMESTAMP {'literal' \| "literal"} | TIMESTAMP | A string constant following `TIMESTAMP` keyword is treated as TIMESTAMP type. The string should be in the format of "YYYY-MM-DD HH:mm:ss.MS". Its time precision is same as that of the current database being used. |
| 7 | {TRUE \| FALSE} | BOOL | BOOL type contant. |
| 8 | {'' \| "" \| '\t' \| "\t" \| ' ' \| " " \| NULL } | -- | NULL constant, it can be used for any type.|
:::note
Numeric values in SQL statements will be determined as integer or float type according to whether there is decimal point or whether scientific notation is used, so attention must be paid to avoid overflow. For example, 9999999999999999999 will be considered as overflow because it exceeds the upper limit of long integer, but 9999999999999999999.0 will be considered as a legal float number.
- TDengine determines whether it's a floating number based on if decimal point or scientific notation is used. So whether the value is determined as overflow depends on both the value and the determined type. For example, 9999999999999999999 is determined as overflow because it exceeds the upper limit of BIGINT type, while 9999999999999999999.0 is considered as a valid floating number because it is within the range of DOUBLE type.
:::

View File

@ -14,7 +14,7 @@ CREATE TABLE [IF NOT EXISTS] tb_name (timestamp_field_name TIMESTAMP, field1_nam
1. The first column of a table MUST be of type TIMESTAMP. It is automatically set as the primary key.
2. The maximum length of the table name is 192 bytes.
3. The maximum length of each row is 16k bytes, please note that the extra 2 bytes used by each BINARY/NCHAR column are also counted.
3. The maximum length of each row is 48k bytes, please note that the extra 2 bytes used by each BINARY/NCHAR column are also counted.
4. The name of the subtable can only consist of characters from the English alphabet, digits and underscore. Table names can't start with a digit. Table names are case insensitive.
5. The maximum length in bytes must be specified when using BINARY or NCHAR types.
6. Escape character "\`" can be used to avoid the conflict between table names and reserved keywords, above rules will be bypassed when using escape character on table names, but the upper limit for the name length is still valid. The table names specified using escape character are case sensitive. Only ASCII visible characters can be used with escape character.

View File

@ -259,6 +259,100 @@ taos> select hyperloglog(dbig) from shll;
Query OK, 1 row(s) in set (0.008388s)
```
### HISTOGRAM
```
SELECT HISTOGRAM(field_namebin_type, bin_description, normalized) FROM tb_name [WHERE clause];
```
**Description**Returns count of data points in user-specified ranges.
**Return value type**Double or INT64, depends on normalized parameter settings.
**Applicable column type**Numerical types.
**Applicable versions**Since version 2.6.0.0.
**Applicable table types**: table, STable
**Explanations**
1. bin_type: parameter to indicate the bucket type, valid inputs are: "user_input", "linear_bin", "log_bin"。
2. bin_description: parameter to describe how to generate bucketscan be in the following JSON formats for each bin_type respectively:
- "user_input": "[1, 3, 5, 7]": User specified bin values.
- "linear_bin": "{"start": 0.0, "width": 5.0, "count": 5, "infinity": true}"
"start" - bin starting point.
"width" - bin offset.
"count" - number of bins generated.
"infinity" - whether to add-inf, infas start/end point in generated set of bins.
The above "linear_bin" descriptor generates a set of bins: [-inf, 0.0, 5.0, 10.0, 15.0, 20.0, +inf].
- "log_bin": "{"start":1.0, "factor": 2.0, "count": 5, "infinity": true}"
"start" - bin starting point.
"factor" - exponential factor of bin offset.
"count" - number of bins generated.
"infinity" - whether to add-inf, infas start/end point in generated range of bins.
The above "log_bin" descriptor generates a set of bins:[-inf, 1.0, 2.0, 4.0, 8.0, 16.0, +inf].
3. normalized: setting to 1/0 to turn on/off result normalization.
**Example**
```mysql
taos> SELECT HISTOGRAM(voltage, "user_input", "[1,3,5,7]", 1) FROM meters;
histogram(voltage, "user_input", "[1,3,5,7]", 1) |
=======================================================
{"lower_bin":1, "upper_bin":3, "count":0.333333} |
{"lower_bin":3, "upper_bin":5, "count":0.333333} |
{"lower_bin":5, "upper_bin":7, "count":0.333333} |
Query OK, 3 row(s) in set (0.004273s)
taos> SELECT HISTOGRAM(voltage, 'linear_bin', '{"start": 1, "width": 3, "count": 3, "infinity": false}', 0) FROM meters;
histogram(voltage, 'linear_bin', '{"start": 1, "width": 3, " |
===================================================================
{"lower_bin":1, "upper_bin":4, "count":3} |
{"lower_bin":4, "upper_bin":7, "count":3} |
{"lower_bin":7, "upper_bin":10, "count":3} |
Query OK, 3 row(s) in set (0.004887s)
taos> SELECT HISTOGRAM(voltage, 'log_bin', '{"start": 1, "factor": 3, "count": 3, "infinity": true}', 0) FROM meters;
histogram(voltage, 'log_bin', '{"start": 1, "factor": 3, "count" |
===================================================================
{"lower_bin":-inf, "upper_bin":1, "count":3} |
{"lower_bin":1, "upper_bin":3, "count":2} |
{"lower_bin":3, "upper_bin":9, "count":6} |
{"lower_bin":9, "upper_bin":27, "count":3} |
{"lower_bin":27, "upper_bin":inf, "count":1} |
```
### ELAPSED
```mysql
SELECT ELAPSED(field_name[, time_unit]) FROM { tb_name | stb_name } [WHERE clause] [INTERVAL(interval [, offset]) [SLIDING sliding]];
```
**Description**`elapsed` function can be used to calculate the continuous time length in which there is valid data. If it's used with `INTERVAL` clause, the returned result is the calcualted time length within each time window. If it's used without `INTERVAL` caluse, the returned result is the calculated time length within the specified time range. Please be noted that the return value of `elapsed` is the number of `time_unit` in the calculated time length.
**Return value type**Double
**Applicable Column type**Timestamp
**Applicable versions**Sicne version 2.6.0.0
**Applicable tables**: table, STable, outter in nested query
**Explanations**
- `field_name` parameter can only be the first column of a table, i.e. timestamp primary key.
- The minimum value of `time_unit` is the time precision of the database. If `time_unit` is not specified, the time precision of the database is used as the default ime unit.
- It can be used with `INTERVAL` to get the time valid time length of each time window. Please be noted that the return value is same as the time window for all time windows except for the first and the last time window.
- `order by asc/desc` has no effect on the result.
- `group by tbname` must be used together when `elapsed` is used against a STable.
- `group by` must NOT be used together when `elapsed` is used against a table or sub table.
- When used in nested query, it's only applicable when the inner query outputs an implicit timestamp column as the primary key. For example, `select elapsed(ts) from (select diff(value) from sub1)` is legal usage while `select elapsed(ts) from (select * from sub1)` is not.
- It can't be used with `leastsquares`, `diff`, `derivative`, `top`, `bottom`, `last_row`, `interp`.
## Selection Functions
When any select function is used, timestamp column or tag columns including `tbname` can be specified to show that the selected value are from which rows.

View File

@ -46,3 +46,44 @@ There are about 200 keywords reserved by TDengine, they can't be used as the nam
| CONNECTIONS | HAVING | NOT | SOFFSET | VNODES |
| CONNS | ID | NOTNULL | STable | WAL |
| COPY | IF | NOW | STableS | WHERE |
| _C0 | _QSTART | _QSTOP | _QDURATION | _WSTART |
| _WSTOP | _WDURATION |
## Explanations
### TBNAME
`TBNAME` can be considered as a special tag, which represents the name of the subtable, in STable.
Get the table name and tag values of all subtables in a STable.
```mysql
SELECT TBNAME, location FROM meters;
Count the number of subtables in a STable.
```mysql
SELECT COUNT(TBNAME) FROM meters;
```
Only filter on TAGS can be used in WHERE clause in the above two query statements.
```mysql
taos> SELECT TBNAME, location FROM meters;
tbname | location |
==================================================================
d1004 | California.SanFrancisco |
d1003 | California.SanFrancisco |
d1002 | California.LosAngeles |
d1001 | California.LosAngeles |
Query OK, 4 row(s) in set (0.000881s)
taos> SELECT COUNT(tbname) FROM meters WHERE groupId > 2;
count(tbname) |
========================
2 |
Query OK, 1 row(s) in set (0.001091s)
```
### _QSTART/_QSTOP/_QDURATION
The start, stop and duration of a query time window (Since version 2.6.0.0).
### _WSTART/_WSTOP/_WDURATION
The start, stop and duration of aggegate query by time window, like interval, session window, state window (Since version 2.6.0.0).
### _c0
The first column of a table or STable.

View File

@ -4,7 +4,7 @@ sidebar_label: C/C++
title: C/C++ Connector
---
C/C++ developers can use TDengine's client driver and the C/C++ connector, to develop their applications to connect to TDengine clusters for data writing, querying, and other functions. To use it, you need to include the TDengine header file _taos.h_, which lists the function prototypes of the provided APIs; the application also needs to link to the corresponding dynamic libraries on the platform where it is located.
C/C++ developers can use TDengine's client driver and the C/C++ connector, to develop their applications to connect to TDengine clusters for data writing, querying, and other functions. To use the C/C++ connector you must include the TDengine header file _taos.h_, which lists the function prototypes of the provided APIs. The application also needs to link to the corresponding dynamic libraries on the platform where it is located.
```c
#include <taos.h>
@ -26,7 +26,7 @@ Please refer to [list of supported platforms](/reference/connector#supported-pla
## Supported versions
The version number of the TDengine client driver and the version number of the TDengine server require one-to-one correspondence and recommend using the same version of client driver as what the TDengine server version is. Although a lower version of the client driver is compatible to work with a higher version of the server, if the first three version numbers are the same (i.e., only the fourth version number is different), but it is not recommended. It is strongly discouraged to use a higher version of the client driver to access a lower version of the TDengine server.
The version number of the TDengine client driver and the version number of the TDengine server should be the same. A lower version of the client driver is compatible with a higher version of the server, if the first three version numbers are the same (i.e., only the fourth version number is different). For e.g. if the client version is x.y.z.1 and the server version is x.y.z.2 the client and server are compatible. But in general we do not recommend using a lower client version with a newer server version. It is also strongly discouraged to use a higher version of the client driver to access a lower version of the TDengine server.
## Installation steps
@ -55,7 +55,7 @@ In the above example code, `taos_connect()` establishes a connection to port 603
:::note
- If not specified, when the return value of the API is an integer, _0_ means success, the others are error codes representing the reason for failure, and when the return value is a pointer, _NULL_ means failure.
- If not specified, when the return value of the API is an integer, _0_ means success. All others are error codes representing the reason for failure. When the return value is a pointer, _NULL_ means failure.
- All error codes and their corresponding causes are described in the `taoserror.h` file.
:::
@ -140,13 +140,12 @@ The base API is used to do things like create database connections and provide a
- `void taos_cleanup()`
Clean up the runtime environment and should be called before the application exits.
Cleans up the runtime environment and should be called before the application exits.
- ` int taos_options(TSDB_OPTION option, const void * arg, ...) `
Set client options, currently supports region setting (`TSDB_OPTION_LOCALE`), character set
(`TSDB_OPTION_CHARSET`), time zone
(`TSDB_OPTION_TIMEZONE`), configuration file path (`TSDB_OPTION_CONFIGDIR`) . The region setting, character set, and time zone default to the current settings of the operating system.
(`TSDB_OPTION_CHARSET`), time zone (`TSDB_OPTION_TIMEZONE`), configuration file path (`TSDB_OPTION_CONFIGDIR`). The region setting, character set, and time zone default to the current settings of the operating system.
- `char *taos_get_client_info()`
@ -159,7 +158,7 @@ The base API is used to do things like create database connections and provide a
- host: FQDN of any node in the TDengine cluster
- user: user name
- pass: password
- db: database name, if the user does not provide, it can also be connected correctly, the user can create a new database through this connection, if the user provides the database name, it means that the database user has already created, the default use of the database
- db: the database name. Even if the user does not provide this, the connection will still work correctly. The user can create a new database through this connection. If the user provides the database name, it means that the database has already been created and the connection can be used for regular operations on the database.
- port: the port the taosd program is listening on
NULL indicates a failure. The application needs to save the returned parameters for subsequent use.
@ -187,7 +186,7 @@ The APIs described in this subsection are all synchronous interfaces. After bein
- `TAOS_RES* taos_query(TAOS *taos, const char *sql)`
Executes an SQL command, either a DQL, DML, or DDL statement. The `taos` parameter is a handle obtained with `taos_connect()`. You can't tell if the result failed by whether the return value is `NULL`, but by parsing the error code in the result set with the `taos_errno()` function.
Executes an SQL command, either a DQL, DML, or DDL statement. The `taos` parameter is a handle obtained with `taos_connect()`. If the return value is `NULL` this does not necessarily indicate a failure. You can get the error code, if any, by parsing the error code in the result set with the `taos_errno()` function.
- `int taos_result_precision(TAOS_RES *res)`
@ -231,7 +230,7 @@ typedef struct taosField {
- ` void taos_free_result(TAOS_RES *res)`
Frees the query result set and the associated resources. Be sure to call this API to free the resources after the query is completed. Otherwise, it may lead to a memory leak in the application. However, note that the application will crash if you call a function like `taos_consume()` to get the query results after freeing the resources.
Frees the query result set and the associated resources. Be sure to call this API to free the resources after the query is completed. Failing to call this, may lead to a memory leak in the application. However, note that the application will crash if you call a function like `taos_consume()` to get the query results after freeing the resources.
- `char *taos_errstr(TAOS_RES *res)`
@ -242,7 +241,7 @@ typedef struct taosField {
Get the reason for the last API call failure. The return value is the error code.
:::note
TDengine version 2.0 and above recommends that each thread of a database application create a separate connection or a connection pool based on threads. It is not recommended to pass the connection (TAOS\*) structure to different threads for shared use in the application. Queries, writes, etc., issued based on TAOS structures are multi-thread safe, but state quantities such as "USE statement" may interfere between threads. In addition, the C connector can dynamically create new database-oriented connections on demand (this procedure is not visible to the user), and it is recommended that `taos_close()` be called only at the final exit of the program to close the connection.
TDengine version 2.0 and above recommends that each thread of a database application create a separate connection or a connection pool based on threads. It is not recommended to pass the connection (TAOS\*) structure to different threads for shared use in the application. Queries, writes, and other operations issued that are based on TAOS structures are multi-thread safe, but state quantities such as the "USE statement" may interfere between threads. In addition, the C connector can dynamically create new database-oriented connections on demand (this procedure is not visible to the user), and it is recommended that `taos_close()` be called only at the final exit of the program to close the connection.
:::
@ -274,12 +273,12 @@ All TDengine's asynchronous APIs use a non-blocking call pattern. Applications c
### Parameter Binding API
In addition to direct calls to `taos_query()` to perform queries, TDengine also provides a set of `bind` APIs that supports parameter binding, similar in style to MySQL, and currently only supports using a question mark `? ` to represent the parameter to be bound.
In addition to direct calls to `taos_query()` to perform queries, TDengine also provides a set of `bind` APIs that supports parameter binding, similar in style to MySQL. TDengine currently only supports using a question mark `? ` to represent the parameter to be bound.
Starting with versions 2.1.1.0 and 2.1.2.0, TDengine has significantly improved the bind APIs to support for data writing (INSERT) scenarios. This avoids the resource consumption of SQL syntax parsing when writing data through the parameter binding interface, thus significantly improving write performance in most cases. A typical operation, in this case, is as follows.
Starting with versions 2.1.1.0 and 2.1.2.0, TDengine has significantly improved the bind APIs to support data writing (INSERT) scenarios. This avoids the resource consumption of SQL syntax parsing when writing data through the parameter binding interface, thus significantly improving write performance in most cases. A typical operation, in this case, is as follows.
1. call `taos_stmt_init()` to create the parameter binding object.
2. call `taos_stmt_prepare()` to parse the INSERT statement. 3.
2. call `taos_stmt_prepare()` to parse the INSERT statement.
3. call `taos_stmt_set_tbname()` to set the table name if it is reserved in the INSERT statement but not the TAGS.
4. call `taos_stmt_set_tbname_tags()` to set the table name and TAGS values if the table name and TAGS are reserved in the INSERT statement (for example, if the INSERT statement takes an automatic table build).
5. call `taos_stmt_bind_param_batch()` to set the value of VALUES in multiple columns, or call `taos_stmt_bind_param()` to set the value of VALUES in a single row.
@ -383,7 +382,7 @@ In addition to writing data using the SQL method or the parameter binding API, w
**return value**
TAOS_RES structure, application can get error message by using `taos_errstr()` and also error code by using `taos_errno()`.
In some cases, the returned TAOS_RES is `NULL`, and it is still possible to call `taos_errno()` to safely get the error code information.
The returned TAOS_RES needs to be freed by the caller. Otherwise, a memory leak will occur.
The returned TAOS_RES needs to be freed by the caller in order to avoid memory leaks.
**Description**
The protocol type is enumerated and contains the following three formats.
@ -416,13 +415,13 @@ The Subscription API currently supports subscribing to one or more tables and co
This function is responsible for starting the subscription service, returning the subscription object on success and `NULL` on failure, with the following parameters.
- taos: the database connection that has been established
- restart: if the subscription already exists, whether to restart or continue the previous subscription
- topic: the topic of the subscription (i.e., the name). This parameter is the unique identifier of the subscription
- sql: the query statement of the subscription, this statement can only be _select_ statement, only the original data should be queried, only the data can be queried in time order
- fp: the callback function when the query result is received (the function prototype will be introduced later), only used when called asynchronously. This parameter should be passed `NULL` when called synchronously
- param: additional parameter when calling the callback function, the system API will pass it to the callback function as it is, without any processing
- interval: polling period in milliseconds. The callback function will be called periodically according to this parameter when called asynchronously. not recommended to set this parameter too small To avoid impact on system performance when called synchronously. If the interval between two calls to `taos_consume()` is less than this period, the API will block until the interval exceeds this period.
- taos: the database connection that has been established.
- restart: if the subscription already exists, whether to restart or continue the previous subscription.
- topic: the topic of the subscription (i.e., the name). This parameter is the unique identifier of the subscription.
- sql: the query statement of the subscription which can only be a _select_ statement. Only the original data should be queried, and data can only be queried in temporal order.
- fp: the callback function when the query result is received only used when called asynchronously. This parameter should be passed `NULL` when called synchronously. The function prototype is described below.
- param: additional parameter when calling the callback function. The system API will pass it to the callback function as is, without any processing.
- interval: polling period in milliseconds. The callback function will be called periodically according to this parameter when called asynchronously. The interval should not be too small to avoid impact on system performance when called synchronously. If the interval between two calls to `taos_consume()` is less than this period, the API will block until the interval exceeds this period.
- ` typedef void (*TAOS_SUBSCRIBE_CALLBACK)(TAOS_SUB* tsub, TAOS_RES *res, void* param, int code)`

View File

@ -179,9 +179,9 @@ namespace TDengineExample
1. "Unable to establish connection", "Unable to resolve FQDN"
Usually, it cause by the FQDN configuration is incorrect, you can refer to [How to understand TDengine's FQDN (Chinese)](https://www.taosdata.com/blog/2021/07/29/2741.html) to solve it. 2.
Usually, it's caused by an incorrect FQDN configuration. Please refer to this section in the [FAQ](https://docs.tdengine.com/2.4/train-faq/faq/#2-how-to-handle-unable-to-establish-connection) to troubleshoot.
Unhandled exception. System.DllNotFoundException: Unable to load DLL 'taos' or one of its dependencies: The specified module cannot be found.
2. Unhandled exception. System.DllNotFoundException: Unable to load DLL 'taos' or one of its dependencies: The specified module cannot be found.
This is usually because the program did not find the dependent client driver. The solution is to copy `C:\TDengine\driver\taos.dll` to the `C:\Windows\System32\` directory on Windows, and create the following soft link on Linux `ln -s /usr/local/taos/driver/libtaos.so.x.x .x.x /usr/lib/libtaos.so` will work.

View File

@ -14,7 +14,6 @@ import NodeInfluxLine from "../../07-develop/03-insert-data/_js_line.mdx";
import NodeOpenTSDBTelnet from "../../07-develop/03-insert-data/_js_opts_telnet.mdx";
import NodeOpenTSDBJson from "../../07-develop/03-insert-data/_js_opts_json.mdx";
import NodeQuery from "../../07-develop/04-query-data/_js.mdx";
import NodeAsyncQuery from "../../07-develop/04-query-data/_js_async.mdx";
`td2.0-connector` and `td2.0-rest-connector` are the official Node.js language connectors for TDengine. Node.js developers can develop applications to access TDengine instance data.
@ -189,14 +188,8 @@ let cursor = conn.cursor();
### Query data
#### Synchronous queries
<NodeQuery />
#### asynchronous query
<NodeAsyncQuery />
## More Sample Programs
| Sample Programs | Sample Program Description |
@ -232,7 +225,7 @@ See [video tutorial](https://www.taosdata.com/blog/2020/11/11/1957.html) for the
2. "Unable to establish connection", "Unable to resolve FQDN"
Usually, root cause is the FQDN is not configured correctly. You can refer to [How to understand TDengine's FQDN (In Chinese)](https://www.taosdata.com/blog/2021/07/29/2741.html).
Usually, the root cause is an incorrect FQDN configuration. You can refer to this section in the [FAQ](https://docs.tdengine.com/2.4/train-faq/faq/#2-how-to-handle-unable-to-establish-connection) to troubleshoot.
## Important Updates

View File

@ -30,7 +30,7 @@ taosAdapter provides the following features.
### Install taosAdapter
taosAdapter has been part of TDengine server software since TDengine v2.4.0.0. If you use the TDengine server, you don't need additional steps to install taosAdapter. You can download taosAdapter from [TDengine official website](https://tdengine.com/all-downloads/) to download the TDengine server installation package (taosAdapter is included in v2.4.0.0 and later version). If you need to deploy taosAdapter separately on another server other than the TDengine server, you should install the full TDengine on that server to install taosAdapter. If you need to build taosAdapter from source code, you can refer to the [Building taosAdapter]( https://github.com/taosdata/taosadapter/blob/develop/BUILD.md) documentation.
taosAdapter has been part of TDengine server software since TDengine v2.4.0.0. If you use the TDengine server, you don't need additional steps to install taosAdapter. You can download taosAdapter from [TDengine official website](https://tdengine.com/all-downloads/) to download the TDengine server installation package (taosAdapter is included in v2.4.0.0 and later version). If you need to deploy taosAdapter separately on another server other than the TDengine server, you should install the full TDengine server package on that server to install taosAdapter. If you need to build taosAdapter from source code, you can refer to the [Building taosAdapter]( https://github.com/taosdata/taosadapter/blob/develop/BUILD.md) documentation.
### Start/Stop taosAdapter
@ -38,7 +38,7 @@ On Linux systems, the taosAdapter service is managed by `systemd` by default. Yo
### Remove taosAdapter
Use the command `rmtaos` to remove the TDengine server software if you use tar.gz package or use package management command like rpm or apt to remove the TDengine server, including taosAdapter.
Use the command `rmtaos` to remove the TDengine server software if you use tar.gz package. If you installed using a .deb or .rpm package, use the corresponding command, for your package manager, like apt or rpm to remove the TDengine server, including taosAdapter.
### Upgrade taosAdapter
@ -240,7 +240,7 @@ node_export is an exporter of hardware and OS metrics exposed by the \*NIX kerne
## Memory usage optimization methods
taosAdapter will monitor its memory usage during operation and adjust it with two thresholds. Valid values range from -1 to 100 integers in percent of the system's physical memory.
taosAdapter will monitor its memory usage during operation and adjust it with two thresholds. Valid values are integers between 1 to 100, and represent a percentage of the system's physical memory.
- pauseQueryMemoryThreshold
- pauseAllMemoryThreshold
@ -276,7 +276,7 @@ Corresponding configuration parameter
monitor.pauseQueryMemoryThreshold memory threshold for no more queries Environment variable `TAOS_MONITOR_PAUSE_QUERY_MEMORY_THRESHOLD` (default 70)
```
You can adjust it according to the specific application scenario and operation strategy, and it is recommended to use operation monitoring software to monitor system memory status timely. The load balancer can also check the taosAdapter running status through this interface.
You should adjust this parameter based on your specific application scenario and operation strategy. We recommend using monitoring software to monitor system memory status. The load balancer can also check the taosAdapter running status through this interface.
## taosAdapter Monitoring Metrics
@ -325,7 +325,7 @@ You can also adjust the level of the taosAdapter log output by setting the `--lo
## How to migrate from older TDengine versions to taosAdapter
In TDengine server 2.2.x.x or earlier, the TDengine server process (taosd) contains an embedded HTTP service. As mentioned earlier, taosAdapter is a standalone software managed using `systemd` and has its process ID. And there are some configuration parameters and behaviors that are different between the two. See the following table for details.
In TDengine server 2.2.x.x or earlier, the TDengine server process (taosd) contains an embedded HTTP service. As mentioned earlier, taosAdapter is a standalone software managed using `systemd` and has its own process ID. There are some configuration parameters and behaviors that are different between the two. See the following table for details.
| **#** | **embedded httpd** | **taosAdapter** | **comment** |
| ----- | ------------------- | ------------------------------------ | ------------------------------------------------------------------ ------------------------------------------------------------------------ |

View File

@ -7,7 +7,7 @@ description: "taosBenchmark (once called taosdemo ) is a tool for testing the pe
## Introduction
taosBenchmark (formerly taosdemo ) is a tool for testing the performance of TDengine products. taosBenchmark can test the performance of TDengine's insert, query, and subscription functions and simulate large amounts of data generated by many devices. taosBenchmark can flexibly control the number and type of databases, supertables, tag columns, number and type of data columns, and sub-tables, and types of databases, super tables, the number and types of data columns, the number of sub-tables, the amount of data per sub-table, the time interval for inserting data, the number of working threads, whether and how to insert disordered data, and so on. The installer provides taosdemo as a soft link to taosBenchmark for compatibility with past users.
taosBenchmark (formerly taosdemo ) is a tool for testing the performance of TDengine products. taosBenchmark can test the performance of TDengine's insert, query, and subscription functions and simulate large amounts of data generated by many devices. taosBenchmark can flexibly control the number and type of databases, supertables, tag columns, number and type of data columns, and sub-tables, and types of databases, super tables, the number and types of data columns, the number of sub-tables, the amount of data per sub-table, the time interval for inserting data, the number of working threads, whether and how to insert disordered data, and so on. The installer provides taosdemo as a soft link to taosBenchmark for compatibility and for the convenience of past users.
## Installation
@ -21,7 +21,7 @@ There are two ways to install taosBenchmark:
### Configuration and running methods
taosBenchmark supports two configuration methods: [Command-line arguments](#Command-line arguments in detailed) and [JSON configuration file](#Configuration file arguments in detailed). These two methods are mutually exclusive, and with only one command-line parameter, users can use `-f <json file>` to specify a configuration file when using a configuration file. When running taosBenchmark with command-line arguments and controlling its behavior, users should use other parameters for configuration rather than `-f` parameter. In addition, taosBenchmark offers a special way of running without parameters.
taosBenchmark supports two configuration methods: [Command-line arguments](#Command-line arguments in detailed) and [JSON configuration file](#Configuration file arguments in detailed). These two methods are mutually exclusive. Users can use `-f <json file>` to specify a configuration file. When running taosBenchmark with command-line arguments to control its behavior, users should use other parameters for configuration, but not the `-f` parameter. In addition, taosBenchmark offers a special way of running without parameters.
taosBenchmark supports complete performance testing of TDengine. taosBenchmark supports the TDengine functions in three categories: write, query, and subscribe. These three functions are mutually exclusive, and users can select only one of them each time taosBenchmark runs. It is important to note that the type of functionality to be tested is not configurable when using the command-line configuration method, which can only test writing performance. To test the query and subscription performance of the TDengine, you must use the configuration file method and specify the function type to test via the parameter `filetype` in the configuration file.
@ -35,7 +35,7 @@ Execute the following commands to quickly experience taosBenchmark's default con
taosBenchmark
```
When run without parameters, taosBenchmark connects to the TDengine cluster specified in `/etc/taos` by default and creates a database named test in TDengine, a super table named `meters` under the test database, and 10,000 tables under the super table with 10,000 records written to each table. Note that if there is already a test database, this table is not used. Note that if there is already a test database, this command will delete it first and create a new test database.
When run without parameters, taosBenchmark connects to the TDengine cluster specified in `/etc/taos` by default and creates a database named `test`, a super table named `meters` under the test database, and 10,000 tables under the super table with 10,000 records written to each table. Note that if there is already a database named "test" this command will delete it first and create a new database.
### Run with command-line configuration parameters
@ -45,7 +45,7 @@ The `-f <json file>` argument cannot be used when running taosBenchmark with com
taosBenchmark -I stmt -n 200 -t 100
```
The above command, `taosBenchmark` will create a database named `test`, create a super table `meters` in it, create 100 sub-tables in the super table and insert 200 records for each sub-table using parameter binding.
Using the above command, `taosBenchmark` will create a database named `test`, create a super table `meters` in it, create 100 sub-tables in the super table and insert 200 records for each sub-table using parameter binding.
### Run with the configuration file
@ -95,10 +95,10 @@ taosBenchmark -f <json file>
## Command-line argument in detailed
- **-f/--file <json file\>** :
specify the configuration file to use. This file includes All parameters. And users should not use this parameter with other parameters on the command-line. There is no default value.
specify the configuration file to use. This file includes All parameters. Users should not use this parameter with other parameters on the command-line. There is no default value.
- **-c/--config-dir <dir\>** :
specify the directory where the TDengine cluster configuration file. the default path is `/etc/taos`.
specify the directory where the TDengine cluster configuration file. The default path is `/etc/taos`.
- **-h/--host <host\>** :
Specify the FQDN of the TDengine server to connect to. The default value is localhost.
@ -272,13 +272,13 @@ The parameters for creating super tables are configured in `super_tables` in the
- **child_table_prefix** : The prefix of the child table name, mandatory configuration item, no default value.
- **escape_character**: specify the super table and child table names containing escape characters. By default is "no". The value can be "yes" or "no".
- **escape_character**: specify the super table and child table names containing escape characters. The value can be "yes" or "no". The default is "no".
- **auto_create_table**: only when insert_mode is taosc, rest, stmt, and childtable_exists is "no". "yes" means taosBenchmark will automatically create non-existent tables when inserting data; "no" means that taosBenchmark will create all tables before inserting.
- **batch_create_tbl_num** : the number of tables per batch when creating sub-tables, default is 10. Note: the actual number of batches may not be the same as this value when the executed SQL statement is larger than the maximum length supported, it will be automatically truncated and re-executed to continue creating.
- **batch_create_tbl_num** : the number of tables per batch when creating sub-tables, default is 10. Note: the actual number of batches may not be the same as this value. If the executed SQL statement is larger than the maximum length supported, it will be automatically truncated and re-executed to continue creating.
- **data_source**: specify the source of data-generating. Default is taosBenchmark randomly generated. Users can configure it as "rand" and "sample". When "sample" is used, taosBenchmark will use the data in the file specified by the `sample_file` parameter.
- **data_source**: specify the source of data-generation. Default is taosBenchmark randomly generated. Users can configure it as "rand" and "sample". When "sample" is used, taosBenchmark will use the data in the file specified by the `sample_file` parameter.
- **insert_mode**: insertion mode with options taosc, rest, stmt, sml, sml-rest, corresponding to normal write, restful interface write, parameter binding interface write, schemaless interface write, restful schemaless interface write (provided by taosAdapter). The default value is taosc.
@ -300,15 +300,15 @@ The parameters for creating super tables are configured in `super_tables` in the
- **partial_col_num**: If this value is a positive number n, only the first n columns are written to, only if insert_mode is taosc and rest, or all columns if n is 0.
- **disorder_ratio** : Specifies the percentage probability of disordered data in the value range [0,50]. The default is 0, which means there is no disorder data.
- **disorder_ratio** : Specifies the percentage probability of disordered (i.e. out-of-order) data in the value range [0,50]. The default is 0, which means there is no disorder data.
- **disorder_range** : Specifies the timestamp fallback range for the disordered data. The generated disorder timestamp is the timestamp that should be used in the non-disorder case minus a random value in this range. Valid only if the percentage of disordered data specified by `-O/--disorder` is greater than 0.
- **disorder_range** : Specifies the timestamp fallback range for the disordered data. The disordered timestamp is generated by subtracting a random value in this range, from the timestamp that would be used in the non-disorder case. Valid only if the percentage of disordered data specified by `-O/--disorder` is greater than 0.
- **timestamp_step**: The timestamp step for inserting data in each child table, in units consistent with the `precision` of the database, the default value is 1.
- **timestamp_step**: The timestamp step for inserting data in each child table, in units consistent with the `precision` of the database. For e.g. if the `precision` is milliseconds, the timestamp step will be in milliseconds. The default value is 1.
- **start_timestamp** : The timestamp start value of each sub-table, the default value is now.
- **sample_format**: The type of the sample data file, now only "csv" is supported.
- **sample_format**: The type of the sample data file; for now only "csv" is supported.
- **sample_file**: Specify a CSV format file as the data source. It only works when data_source is a sample. If the number of rows in the CSV file is less than or equal to prepared_rand, then taosBenchmark will read the CSV file data cyclically until it is the same as prepared_rand; otherwise, taosBenchmark will read only the rows with the number of prepared_rand. The final number of rows of data generated is the smaller of the two.
@ -341,7 +341,7 @@ The configuration parameters for specifying super table tag columns and data col
- **create_table_thread_count** : The number of threads to build the table, default is 8.
- **connection_pool_size** : The number of pre-established connections to the TDengine server. If not configured, it is the same number of threads specified.
- **connection_pool_size** : The number of pre-established connections to the TDengine server. If not configured, it is the same as number of threads specified.
- **result_file** : The path to the result output file, the default value is . /output.txt.

View File

@ -1,16 +1,17 @@
---
title: taosdump
description: "taosdump is a tool application that supports backing up data from a running TDengine cluster and restoring the backed up data to the same or another running TDengine cluster."
description: "taosdump is a tool that supports backing up data from a running TDengine cluster and restoring the backed up data to the same, or another running TDengine cluster."
---
## Introduction
taosdump is a tool application that supports backing up data from a running TDengine cluster and restoring the backed up data to the same or another running TDengine cluster.
taosdump is a tool that supports backing up data from a running TDengine cluster and restoring the backed up data to the same, or another running TDengine cluster.
taosdump can back up a database, a super table, or a normal table as a logical data unit or backup data records in the database, super tables, and normal tables. When using taosdump, you can specify the directory path for data backup. If you do not specify a directory, taosdump will back up the data to the current directory by default.
Suppose the specified location already has data files. In that case, taosdump will prompt the user and exit immediately to avoid data overwriting which means that the same path can only be used for one backup.
Please be careful if you see a prompt for this.
If the specified location already has data files, taosdump will prompt the user and exit immediately to avoid data overwriting. This means that the same path can only be used for one backup.
Please be careful if you see a prompt for this and please ensure that you follow best practices and relevant SOPs for data integrity, backup and data security.
Users should not use taosdump to back up raw data, environment settings, hardware information, server configuration, or cluster topology. taosdump uses [Apache AVRO](https://avro.apache.org/) as the data file format to store backup data.
@ -30,7 +31,7 @@ There are two ways to install taosdump:
2. backup multiple specified databases: use `-D db1,db2,... ` parameters;
3. back up some super or normal tables in the specified database: use `-dbname stbname1 stbname2 tbname1 tbname2 ... ` parameters. Note that the first parameter of this input sequence is the database name, and only one database is supported. The second and subsequent parameters are the names of super or normal tables in that database, separated by spaces.
4. back up the system log database: TDengine clusters usually contain a system database named `log`. The data in this database is the data that TDengine runs itself, and the taosdump will not back up the log database by default. If users need to back up the log database, users can use the `-a` or `-allow-sys` command-line parameter.
5. Loose mode backup: taosdump version 1.4.1 onwards provides `-n` and `-L` parameters for backing up data without using escape characters and "loose" mode, which can reduce the number of backups if table names, column names, tag names do not use This can reduce the backup data time and backup data footprint if table names, column names, and tag names do not use `escape character`. If you are unsure about using `-n` and `-L` conditions, please use the default parameters for "strict" mode backup. See the [official documentation](/taos-sql/escape) for a description of escaped characters.
5. Loose mode backup: taosdump version 1.4.1 onwards provides `-n` and `-L` parameters for backing up data without using escape characters and "loose" mode, which can reduce the number of backups if table names, column names, tag names do not use escape characters. This can also reduce the backup data time and backup data footprint. If you are unsure about using `-n` and `-L` conditions, please use the default parameters for "strict" mode backup. See the [official documentation](/taos-sql/escape) for a description of escaped characters.
:::tip
- taosdump versions after 1.4.1 provide the `-I` argument for parsing Avro file schema and data. If users specify `-s` then only taosdump will parse schema.
@ -58,7 +59,7 @@ Usage: taosdump [OPTION...] dbname [tbname ...]
or: taosdump [OPTION...] -i inpath
or: taosdump [OPTION...] -o outpath
-h, --host=HOST Server host dumping data from. Default is
-h, --host=HOST Server host from which to dump data. Default is
localhost.
-p, --password User password to connect to server. Default is
taosdata.
@ -71,10 +72,10 @@ Usage: taosdump [OPTION...] dbname [tbname ...]
-r, --resultFile=RESULTFILE DumpOut/In Result file path and name.
-a, --allow-sys Allow to dump system database
-A, --all-databases Dump all databases.
-D, --databases=DATABASES Dump inputted databases. Use comma to separate
databases' name.
-D, --databases=DATABASES Dump listed databases. Use comma to separate
database names.
-N, --without-property Dump database without its properties.
-s, --schemaonly Only dump tables' schema.
-s, --schemaonly Only dump table schemas.
-y, --answer-yes Input yes for prompt. It will skip data file
checking!
-d, --avro-codec=snappy Choose an avro codec among null, deflate, snappy,
@ -97,7 +98,7 @@ Usage: taosdump [OPTION...] dbname [tbname ...]
and try. The workable value is related to the
length of the row and type of table schema.
-I, --inspect inspect avro file content and print on screen
-L, --loose-mode Using loose mode if the table name and column name
-L, --loose-mode Use loose mode if the table name and column name
use letter and number only. Default is NOT.
-n, --no-escape No escape char '`'. Default is using it.
-T, --thread-num=THREAD_NUM Number of thread for dump in file. Default is

View File

@ -5,11 +5,11 @@ sidebar_label: TDinsight
TDinsight is a solution for monitoring TDengine using the builtin native monitoring database and [Grafana].
After TDengine starts, it will automatically create a monitoring database `log`. TDengine will automatically write many metrics in specific intervals into the `log` database. The metrics may include the server's CPU, memory, hard disk space, network bandwidth, number of requests, disk read/write speed, slow queries, other information like important system operations (user login, database creation, database deletion, etc.), and error alarms. With [Grafana] and [TDengine Data Source Plugin](https://github.com/taosdata/grafanaplugin/releases), TDinsight can visualize cluster status, node information, insertion and query requests, resource usage, etc., and also vnode, dnode, and mnode status, and exception alerts. Developers monitoring TDengine cluster operation status in real-time can be very convinient. This article will guide users to install the Grafana server, automatically install the TDengine data source plug-in, and deploy the TDinsight visualization panel through `TDinsight.sh` installation script.
After TDengine starts, it will automatically create a monitoring database `log`. TDengine will automatically write many metrics in specific intervals into the `log` database. The metrics may include the server's CPU, memory, hard disk space, network bandwidth, number of requests, disk read/write speed, slow queries, other information like important system operations (user login, database creation, database deletion, etc.), and error alarms. With [Grafana] and [TDengine Data Source Plugin](https://github.com/taosdata/grafanaplugin/releases), TDinsight can visualize cluster status, node information, insertion and query requests, resource usage, vnode, dnode, and mnode status, exception alerts and many other metrics. This is very convenient for developers who want to monitor TDengine cluster status in real-time. This article will guide users to install the Grafana server, automatically install the TDengine data source plug-in, and deploy the TDinsight visualization panel using the `TDinsight.sh` installation script.
## System Requirements
To deploy TDinsight, a single-node TDengine server or a multi-nodes TDengine cluster and a [Grafana] server are required. This dashboard requires TDengine 2.3.3.0 and above, with the `log` database enabled (`monitor = 1`).
To deploy TDinsight, a single-node TDengine server or a multi-node TDengine cluster and a [Grafana] server are required. This dashboard requires TDengine 2.3.3.0 and above, with the `log` database enabled (`monitor = 1`).
## Installing Grafana
@ -17,7 +17,7 @@ We recommend using the latest [Grafana] version 7 or 8 here. You can install Gra
### Installing Grafana on Debian or Ubuntu
For Debian or Ubuntu operating systems, we recommend the Grafana image repository and Use the following command to install from scratch.
For Debian or Ubuntu operating systems, we recommend the Grafana image repository and using the following command to install from scratch.
```bash
sudo apt-get install -y apt-transport-https
@ -71,7 +71,7 @@ chmod +x TDinsight.sh
./TDinsight.sh
```
This script will automatically download the latest [Grafana TDengine data source plugin](https://github.com/taosdata/grafanaplugin/releases/latest) and [TDinsight dashboard](https://grafana.com/grafana/dashboards/15167) with configurable parameters from the command-line options to the [Grafana Provisioning](https://grafana.com/docs/grafana/latest/administration/provisioning/) configuration file to automate deployment and updates, etc. With the alert setting options provided by this script, you can also get built-in support for AliCloud SMS alert notifications.
This script will automatically download the latest [Grafana TDengine data source plugin](https://github.com/taosdata/grafanaplugin/releases/latest) and [TDinsight dashboard](https://grafana.com/grafana/dashboards/15167) with configurable parameters for command-line options to the [Grafana Provisioning](https://grafana.com/docs/grafana/latest/administration/provisioning/) configuration file to automate deployment and updates, etc. With the alert setting options provided by this script, you can also get built-in support for AliCloud SMS alert notifications.
Assume you use TDengine and Grafana's default services on the same host. Run `. /TDinsight.sh` and open the Grafana browser window to see the TDinsight dashboard.

View File

@ -13,7 +13,7 @@ The TDengine image starts with the HTTP service activated by default, using the
docker run -d --name tdengine -p 6041:6041 tdengine/tdengine
```
The above command starts a container named "tdengine" and maps the HTTP service end 6041 to the host port 6041. You can verify that the HTTP service provided in this container is available using the following command.
The above command starts a container named "tdengine" and maps the HTTP service port 6041 to the host port 6041. You can verify that the HTTP service provided in this container is available using the following command.
```shell
curl -u root:taosdata -d "show databases" localhost:6041/rest/sql
@ -34,7 +34,7 @@ taos> show databases;
Query OK, 1 row(s) in set (0.002843s)
```
The TDengine server running in the container uses the container's hostname to establish a connection. Using TDengine CLI or various connectors (such as JDBC-JNI) to access the TDengine inside the container from outside the container is more complicated. So the above is the simplest way to access the TDengine service in the container and is suitable for some simple scenarios. Please refer to the next section if you want to access the TDengine service in the container from containerized using TDengine CLI or various connectors in some complex scenarios.
The TDengine server running in the container uses the container's hostname to establish a connection. Using TDengine CLI or various connectors (such as JDBC-JNI) to access the TDengine inside the container from outside the container is more complicated. So the above is the simplest way to access the TDengine service in the container and is suitable for some simple scenarios. Please refer to the next section if you want to access the TDengine service in the container from outside the container using TDengine CLI or various connectors for complex scenarios.
## Start TDengine on the host network
@ -42,7 +42,7 @@ The TDengine server running in the container uses the container's hostname to es
docker run -d --name tdengine --network host tdengine/tdengine
```
The above command starts TDengine on the host network and uses the host's FQDN to establish a connection instead of the container's hostname. It works too, like using `systemctl` to start TDengine on the host. If the TDengine client is already installed on the host, you can access it directly with the following command.
The above command starts TDengine on the host network and uses the host's FQDN to establish a connection instead of the container's hostname. It is the equivalent of using `systemctl` to start TDengine on the host. If the TDengine client is already installed on the host, you can access it directly with the following command.
```shell
$ taos
@ -382,7 +382,7 @@ password: taosdata
Suppose you want to deploy multiple taosAdapters to improve throughput and provide high availability. In that case, the recommended configuration method uses a reverse proxy such as Nginx to offer a unified access entry. For specific configuration methods, please refer to the official documentation of Nginx. Here is an example:
```docker
ersion: "3"
version: "3"
networks:
inter:

View File

@ -78,7 +78,7 @@ taos --dump-config
| Note | REST service is provided by `taosd` before 2.4.0.0 but by `taosAdapter` after 2.4.0.0, the default port of REST service is 6041 |
:::note
TDengine uses continuous 13 ports, both TCP and UDP, from the port specified by `serverPort`. These ports need to be kept open if firewall is enabled. Below table describes the ports used by TDengine in details.
TDengine uses 13 continuous ports, both TCP and UDP, starting with the port specified by `serverPort`. You should ensure, in your firewall rules, that these ports are kept open. Below table describes the ports used by TDengine in details.
:::
@ -197,7 +197,7 @@ TDengine uses continuous 13 ports, both TCP and UDP, from the port specified by
| Default Value | TimeZone configured in the host |
:::info
To handle the data insertion and data query from multiple timezones, Unix Timestamp is used and stored TDengine. The timestamp generated from any timezones at same time is same in Unix timestamp. To make sure the time on client side can be converted to Unix timestamp correctly, the timezone must be set properly.
To handle the data insertion and data query from multiple timezones, Unix Timestamp is used and stored in TDengine. The timestamp generated from any timezones at same time is same in Unix timestamp. To make sure the time on client side can be converted to Unix timestamp correctly, the timezone must be set properly.
On Linux system, TDengine clients automatically obtain timezone from the host. Alternatively, the timezone can be configured explicitly in configuration file `taos.cfg` like below.
@ -209,7 +209,7 @@ timezone Asia/Shanghai
The above examples are all proper configuration for the timezone of UTC+8. On Windows system, however, `timezone Asia/Shanghai` is not supported, it must be set as `timezone UTC-8`.
The setting for timezone impacts the strings not in Unix timestamp, keywords or functions related to date/time, for example
The setting for timezone impacts strings that are not in Unix timestamp format and keywords or functions related to date/time. For example:
```sql
SELECT count(*) FROM table_name WHERE TS<'2019-04-11 12:01:08';
@ -227,7 +227,7 @@ If the timezone is UTC, it's equal to
SELECT count(*) FROM table_name WHERE TS<1554984068000;
```
To avoid the problems of using time strings, Unix timestamp can be used directly. Furthermore, time strings with timezone can be used in SQL statement, for example "2013-04-12T15:52:01.123+08:00" in RFC3339 format or "2013-04-12T15:52:01.123+0800" in ISO-8601 format, they are not influenced by timezone setting when converted to Unix timestamp.
To avoid the problems of using time strings, Unix timestamp can be used directly. Furthermore, time strings with timezone can be used in SQL statements. For example "2013-04-12T15:52:01.123+08:00" in RFC3339 format or "2013-04-12T15:52:01.123+0800" in ISO-8601 format are not influenced by timezone setting when converted to Unix timestamp.
:::
@ -244,7 +244,7 @@ A specific type "nchar" is provided in TDengine to store non-ASCII characters su
The characters input on the client side are encoded using the default system encoding, which is UTF-8 on Linux, or GB18030 or GBK on some systems in Chinese, POSIX in docker, CP936 on Windows in Chinese. The encoding of the operating system in use must be set correctly so that the characters in nchar type can be converted to UCS4-LE.
The locale definition standard on Linux is: <Language\>\_<Region\>.<charset\>, for example, in "zh_CN.UTF-8", "zh" means Chinese, "CN" means China mainland, "UTF-8" means charset. On Linux andMac OSX, the charset can be set by locale in the system. On Windows system another configuration parameter `charset` must be used to configure charset because the locale used on Windows is not POSIX standard. Of course, `charset` can also be used on Linux to specify the charset.
The locale definition standard on Linux is: <Language\>\_<Region\>.<charset\>, for example, in "zh_CN.UTF-8", "zh" means Chinese, "CN" means China mainland, "UTF-8" means charset. On Linux and Mac OSX, the charset can be set by locale in the system. On Windows system another configuration parameter `charset` must be used to configure charset because the locale used on Windows is not POSIX standard. Of course, `charset` can also be used on Linux to specify the charset.
:::
@ -263,7 +263,7 @@ On Linux, if `charset` is not set in `taos.cfg`, when `taos` is started, the cha
locale zh_CN.UTF-8
```
Besides, on Linux system, if the charset contained in `locale` is not consistent with that set by `charset`, the one who comes later in the configuration file is used.
On a Linux system, if the charset contained in `locale` is not consistent with that set by `charset`, the later setting in the configuration file takes precedence.
```title="Effective charset is GBK"
locale zh_CN.UTF-8
@ -778,7 +778,7 @@ To prevent system resource from being exhausted by multiple concurrent streams,
## HTTP Parameters
:::note
HTTP server had been provided by `taosd` prior to version 2.4.0.0, now is provided by `taosAdapter` after version 2.4.0.0.
HTTP service was provided by `taosd` prior to version 2.4.0.0 and is provided by `taosAdapter` after version 2.4.0.0.
The parameters described in this section are only application in versions prior to 2.4.0.0. If you are using any version from 2.4.0.0, please refer to [taosAdapter](/reference/taosadapter/).
:::

View File

@ -1,11 +1,11 @@
---
title: Schemaless Writing
description: "The Schemaless write method eliminates the need to create super tables/sub tables in advance and automatically creates the storage structure corresponding to the data as it is written to the interface."
description: "The Schemaless write method eliminates the need to create super tables/sub tables in advance and automatically creates the storage structure corresponding to the data, as it is written to the interface."
---
In IoT applications, many data items are often collected for intelligent control, business analysis, device monitoring, etc. Due to the version upgrades of the application logic, or the hardware adjustment of the devices themselves, the data collection items may change frequently. To facilitate the data logging work in such cases, TDengine starting from version 2.2.0.0 provides a series of interfaces to the schemaless writing method, which eliminate the need to create super tables and subtables in advance by automatically creating the storage structure corresponding to the data as the data is written to the interface. And when necessary, schemaless writing will automatically add the required columns to ensure that the data written by the user is stored correctly.
In IoT applications, data is collected for many purposes such as intelligent control, business analysis, device monitoring and so on. Due to changes in business or functional requirements or changes in device hardware, the application logic and even the data collected may change. To provide the flexibility needed in such cases and in a rapidly changing IoT landscape, TDengine starting from version 2.2.0.0, provides a series of interfaces for the schemaless writing method. These interfaces eliminate the need to create super tables and subtables in advance by automatically creating the storage structure corresponding to the data as the data is written to the interface. When necessary, schemaless writing will automatically add the required columns to ensure that the data written by the user is stored correctly.
The schemaless writing method creates super tables and their corresponding subtables completely indistinguishable from the super tables and subtables created directly via SQL. You can write data directly to them via SQL statements. Note that the names of tables created by schemaless writing are based on fixed mapping rules for tag values, so they are not explicitly ideographic and lack readability.
The schemaless writing method creates super tables and their corresponding subtables. These are completely indistinguishable from the super tables and subtables created directly via SQL. You can write data directly to them via SQL statements. Note that the names of tables created by schemaless writing are based on fixed mapping rules for tag values, so they are not explicitly ideographic and they lack readability.
## Schemaless Writing Line Protocol
@ -76,8 +76,7 @@ If the subtable obtained by the parse line protocol does not exist, Schemaless c
8. Errors encountered throughout the processing will interrupt the writing process and return an error code.
:::tip
All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed
16k bytes. See [TAOS SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area.
All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48k bytes. See [TAOS SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area.
:::
## Time resolution recognition
@ -87,7 +86,7 @@ Three specified modes are supported in the schemaless writing process, as follow
| **Serial** | **Value** | **Description** |
| -------- | ------------------- | ------------------------------- |
| 1 | SML_LINE_PROTOCOL | InfluxDB Line Protocol |
| 2 | SML_TELNET_PROTOCOL | OpenTSDB Text Line Protocol | | 2 | SML_TELNET_PROTOCOL | OpenTSDB Text Line Protocol
| 2 | SML_TELNET_PROTOCOL | OpenTSDB Text Line Protocol |
| 3 | SML_JSON_PROTOCOL | JSON protocol format |
In the SML_LINE_PROTOCOL parsing mode, the user is required to specify the time resolution of the input timestamp. The available time resolutions are shown in the following table.
@ -106,8 +105,11 @@ In SML_TELNET_PROTOCOL and SML_JSON_PROTOCOL modes, the time precision is determ
## Data schema mapping rules
This section describes how data for line protocols are mapped to data with a schema. The data measurement in each line protocol is mapped to
The tag name in tag_set is the name of the tag in the data schema, and the name in field_set is the column's name. The following data is used as an example to illustrate the mapping rules.
This section describes how data for line protocols are mapped to data with a schema. The data measurement in each line protocol is mapped as follows:
- The tag name in tag_set is the name of the tag in the data schema
- The name in field_set is the column's name.
The following data is used as an example to illustrate the mapping rules.
```json
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
@ -139,7 +141,7 @@ st,t1=3,t2=4,t3=t3 c1=3i64,c5="pass" 1626006833639000000
st,t1=3,t2=4,t3=t3 c1=3i64,c5="passit" 1626006833640000000
```
The first line of the line protocol parsing will declare column c5 is a BINARY(4) field, the second line data write will extract column c5 is still a BINARY column. Still, its width is 6, then you need to increase the width of the BINARY field to be able to accommodate the new string.
The first line of the line protocol parsing will declare column c5 is a BINARY(4) field. The second line data write will parse column c5 as a BINARY column. But in the second line, c5's width is 6 so you need to increase the width of the BINARY field to be able to accommodate the new string.
```json
st,t1=3,t2=4,t3=t3 c1=3i64 1626006833639000000

View File

@ -25,7 +25,7 @@ The default database name written by taosAdapter is `collectd`. You can also mod
#collectd
collectd uses a plugin mechanism to write the collected monitoring data to different data storage software in various forms. tdengine supports both direct collection plugins and write_tsdb plugins.
#### is configured to receive data from the direct collection plugin
#### Configure the direct collection plugin
Modify the relevant configuration items in the collectd configuration file (default location /etc/collectd/collectd.conf).
@ -62,7 +62,7 @@ LoadPlugin write_tsdb
</Plugin>
```
Where <taosAdapter's host\> fills in the server's domain name or IP address running taosAdapter. <port for collectd write_tsdb plugin\> Fill in the data that taosAdapter uses to receive the collectd write_tsdb plugin (default is 6047).
Where <taosAdapter's host\> is the domain name or IP address of the server running taosAdapter. <port for collectd write_tsdb plugin\> Fill in the data that taosAdapter uses to receive the collectd write_tsdb plugin (default is 6047).
```text
LoadPlugin write_tsdb

View File

@ -17,7 +17,7 @@ password = "taosdata"
...
```
The taosAdapter writes to the database with the default name `tcollector`. You can also modify the taosAdapter configuration file dbs entry to specify a different name. user and password fill in the actual TDengine configuration values. After changing the configuration file, you need to restart the taosAdapter.
The taosAdapter writes to the database with the default name `tcollector`. You can also modify the taosAdapter configuration file dbs entry to specify a different name. Fill in the actual user and password for TDengine. After changing the configuration file, you need to restart the taosAdapter.
- You can also enable taosAdapter to receive tcollector data by using the taosAdapter command-line parameters or setting environment variables.
@ -25,7 +25,7 @@ The taosAdapter writes to the database with the default name `tcollector`. You c
To use TCollector, you need to download its [source code](https://github.com/OpenTSDB/tcollector). Its configuration items are in its source code. Note: TCollector differs significantly from version to version, so here is an example of the latest code for the current master branch (git commit: 37ae920).
Modify the contents of the `collectors/etc/config.py` and `tcollector.py` files. Change the address of the OpenTSDB host to the domain name or IP address of the server where taosAdapter is deployed, and change the port to the port that taosAdapter supports TCollector on (default is 6049).
Modify the contents of the `collectors/etc/config.py` and `tcollector.py` files. Change the address of the OpenTSDB host to the domain name or IP address of the server where taosAdapter is deployed, and change the port to the port on which taosAdapter supports TCollector (default is 6049).
Example of git diff output of source code changes.

View File

@ -3,13 +3,13 @@ sidebar_label: Grafana
title: Grafana
---
TDengine can be quickly integrated with the open-source data visualization system [Grafana](https://www.grafana.com/) to build a data monitoring and alerting system. The whole process does not require any code development. And you can visualize the contents of the data tables in TDengine on a DashBoard.
TDengine can be quickly integrated with the open-source data visualization system [Grafana](https://www.grafana.com/) to build a data monitoring and alerting system. The whole process does not require any code development. And you can visualize the contents of the data tables in TDengine on a dashboard.
You can learn more about using the TDengine plugin on [GitHub](https://github.com/taosdata/grafanaplugin/blob/master/README.md).
## Prerequisites
In order for Grafana to add the TDengine data source successfully, the following preparations are required:
In order for Grafana to add the TDengine data source successfully, the following preparation is required:
1. The TDengine cluster is deployed and functioning properly
2. taosAdapter is installed and running properly. Please refer to the taosAdapter manual for details.
@ -36,7 +36,7 @@ GF_VERSION=3.1.4
wget https://github.com/taosdata/grafanaplugin/releases/download/v$GF_VERSION/tdengine-datasource-$GF_VERSION.zip
```
Take CentOS 7.2 for example, extract the plugin package to /var/lib/grafana/plugins directory, and restart grafana.
In CentOS 7.2 for example, extract the plugin package to /var/lib/grafana/plugins directory, and restart grafana.
```bash
sudo unzip tdengine-datasource-$GF_VERSION.zip -d /var/lib/grafana/plugins/
@ -76,13 +76,13 @@ Enter the datasource configuration page, and follow the default prompts to modif
- User: TDengine user name.
- Password: TDengine user password.
Click `Save & Test` to test. Follows are a success.
Click `Save & Test` to test. You should see a success message if the test worked.
![TDengine Database TDinsight plugin add database 4](./grafana/add_datasource4.webp)
### Create Dashboard
Go back to the main interface to create the Dashboard, click Add Query to enter the panel query page:
Go back to the main interface to create a dashboard and click Add Query to enter the panel query page:
![TDengine Database TDinsight plugin create dashboard 1](./grafana/create_dashboard1.webp)

View File

@ -5,7 +5,7 @@ title: Telegraf writing
import Telegraf from "../14-reference/_telegraf.mdx"
Telegraf is a viral metrics collection open-source software. Telegraf can collect the operation information of various components without writing any scripts to collect regularly, reducing the difficulty of data acquisition.
Telegraf is a viral, open-source, metrics collection software. Telegraf can collect the operation information of various components without having to write any scripts to collect regularly, reducing the difficulty of data acquisition.
Telegraf's data can be written to TDengine by simply adding the output configuration of Telegraf to the URL corresponding to taosAdapter and modifying several configuration items. The presence of Telegraf data in TDengine can take advantage of TDengine's efficient storage query performance and clustering capabilities for time-series data.

View File

@ -6,7 +6,7 @@ title: collectd writing
import CollectD from "../14-reference/_collectd.mdx"
collectd is a daemon used to collect system performance metric data. collectd provides various storage mechanisms to store different values. It periodically counts system performance statistics number while the system is running and storing information. You can use this information to help identify current system performance bottlenecks and predict future system load.
collectd is a daemon used to collect system performance metric data. collectd provides various storage mechanisms to store different values. It periodically counts system performance statistics while the system is running and storing information. You can use this information to help identify current system performance bottlenecks and predict future system load.
You can write the data collected by collectd to TDengine by simply modifying the configuration of collectd to the domain name (or IP address) and corresponding port of the server running taosAdapter. It can take full advantage of TDengine's efficient storage query performance and clustering capability for time-series data.

View File

@ -7,7 +7,7 @@ import StatsD from "../14-reference/_statsd.mdx"
StatsD is a simple daemon for aggregating application metrics, which has evolved rapidly in recent years into a unified protocol for collecting application performance metrics.
You can write StatsD data to TDengine by simply modifying in the configuration file of StatsD with the domain name (or IP address) of the server running taosAdapter and the corresponding port. It can take full advantage of TDengine's efficient storage query performance and clustering capabilities for time-series data.
You can write StatsD data to TDengine by simply modifying the configuration file of StatsD with the domain name (or IP address) of the server running taosAdapter and the corresponding port. It can take full advantage of TDengine's efficient storage query performance and clustering capabilities for time-series data.
## Prerequisites

View File

@ -5,7 +5,7 @@ title: icinga2 writing
import Icinga2 from "../14-reference/_icinga2.mdx"
icinga2 is an open-source software monitoring host and network initially developed from the Nagios network monitoring application. Currently, icinga2 is distributed under the GNU GPL v2 license.
icinga2 is an open-source, host and network monitoring software initially developed from the Nagios network monitoring application. Currently, icinga2 is distributed under the GNU GPL v2 license.
You can write the data collected by icinga2 to TDengine by simply modifying the icinga2 configuration to point to the taosAdapter server and the corresponding port, taking advantage of TDengine's efficient storage and query performance and clustering capabilities for time-series data.

View File

@ -3,7 +3,7 @@ sidebar_label: EMQX Broker
title: EMQX Broker writing
---
MQTT is a popular IoT data transfer protocol, [EMQX](https://github.com/emqx/emqx) is an open-source MQTT Broker software, you can write MQTT data directly to TDengine without any code, you only need to use "rules" in EMQX Dashboard to create a simple configuration. EMQX supports saving data to TDengine by sending it to web services and provides a native TDengine driver for direct saving in the Enterprise Edition. Please refer to the [EMQX official documentation](https://www.emqx.io/docs/en/v4.4/rule/rule-engine.html) for details on how to use it.).
MQTT is a popular IoT data transfer protocol. [EMQX](https://github.com/emqx/emqx) is an open-source MQTT Broker software. You can write MQTT data directly to TDengine without any code. You only need to setup "rules" in EMQX Dashboard to create a simple configuration. EMQX supports saving data to TDengine by sending data to a web service and provides a native TDengine driver for direct saving in the Enterprise Edition. Please refer to the [EMQX official documentation](https://www.emqx.io/docs/en/v4.4/rule/rule-engine.html) for details on how to use it.).
## Prerequisites

View File

@ -7,7 +7,7 @@ TDengine Kafka Connector contains two plugins: TDengine Source Connector and TDe
## What is Kafka Connect?
Kafka Connect is a component of Apache Kafka that enables other systems, such as databases, cloud services, file systems, etc., to connect to Kafka easily. Data can flow from other software to Kafka via Kafka Connect and Kafka to other systems via Kafka Connect. Plugins that read data from other software are called Source Connectors, and plugins that write data to other software are called Sink Connectors. Neither Source Connector nor Sink Connector will directly connect to Kafka Broker, and Source Connector transfers data to Kafka Connect. Sink Connector receives data from Kafka Connect.
Kafka Connect is a component of [Apache Kafka](https://kafka.apache.org/) that enables other systems, such as databases, cloud services, file systems, etc., to connect to Kafka easily. Data can flow from other software to Kafka via Kafka Connect and Kafka to other systems via Kafka Connect. Plugins that read data from other software are called Source Connectors, and plugins that write data to other software are called Sink Connectors. Neither Source Connector nor Sink Connector will directly connect to Kafka Broker, and Source Connector transfers data to Kafka Connect. Sink Connector receives data from Kafka Connect.
![TDengine Database Kafka Connector -- Kafka Connect](kafka/Kafka_Connect.webp)
@ -17,7 +17,7 @@ TDengine Source Connector is used to read data from TDengine in real-time and se
## What is Confluent?
Confluent adds many extensions to Kafka. include:
[Confluent](https://www.confluent.io/) adds many extensions to Kafka. include:
1. Schema Registry
2. REST Proxy
@ -79,10 +79,10 @@ Development: false
git clone https://github.com:taosdata/kafka-connect-tdengine.git
cd kafka-connect-tdengine
mvn clean package
unzip -d $CONFLUENT_HOME/share/confluent-hub-components/ target/components/packages/taosdata-kafka-connect-tdengine-0.1.0.zip
unzip -d $CONFLUENT_HOME/share/java/ target/components/packages/taosdata-kafka-connect-tdengine-*.zip
```
The above script first clones the project source code and then compiles and packages it with Maven. After the package is complete, the zip package of the plugin is generated in the `target/components/packages/` directory. Unzip this zip package to the path where the plugin is installed. The path to install the plugin is in the configuration file `$CONFLUENT_HOME/etc/kafka/connect-standalone.properties`. The default path is `$CONFLUENT_HOME/share/confluent-hub-components/`.
The above script first clones the project source code and then compiles and packages it with Maven. After the package is complete, the zip package of the plugin is generated in the `target/components/packages/` directory. Unzip this zip package to plugin path. We used `$CONFLUENT_HOME/share/java/` above because it's a build in plugin path.
### Install with confluent-hub
@ -96,7 +96,7 @@ confluent local services start
```
:::note
Be sure to install the plugin before starting Confluent. Otherwise, there will be a class not found error. The log of Kafka Connect (default path: /tmp/confluent.xxxx/connect/logs/connect.log) will output the successfully installed plugin, which users can use to determine whether the plugin is installed successfully.
Be sure to install the plugin before starting Confluent. Otherwise, Kafka Connect will fail to discover the plugins.
:::
:::tip
@ -123,6 +123,59 @@ Control Center is [UP]
To clear data, execute `rm -rf /tmp/confluent.106668`.
:::
### Check Confluent Services Status
Use command bellow to check the status of all service:
```
confluent local services status
```
The expected output is:
```
Connect is [UP]
Control Center is [UP]
Kafka is [UP]
Kafka REST is [UP]
ksqlDB Server is [UP]
Schema Registry is [UP]
ZooKeeper is [UP]
```
### Check Successfully Loaded Plugin
After Kafka Connect was completely started, you can use bellow command to check if our plugins are installed successfully:
```
confluent local services connect plugin list
```
The output should contains `TDengineSinkConnector` and `TDengineSourceConnector` as bellow:
```
Available Connect Plugins:
[
{
"class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector",
"type": "sink",
"version": "1.0.0"
},
{
"class": "com.taosdata.kafka.connect.source.TDengineSourceConnector",
"type": "source",
"version": "1.0.0"
},
......
```
If not, please check the log file of Kafka Connect. To view the log file path, please execute:
```
echo `cat /tmp/confluent.current`/connect/connect.stdout
```
It should produce a path like:`/tmp/confluent.104086/connect/connect.stdout`
Besides log file `connect.stdout` there is a file named `connect.properties`. At the end of this file you can see the effective `plugin.path` which is a series of paths joined by comma. If Kafka Connect not found our plugins, it's probably because the installed path is not included in `plugin.path`.
## The use of TDengine Sink Connector
The role of the TDengine Sink Connector is to synchronize the data of the specified topic to TDengine. Users do not need to create databases and super tables in advance. The name of the target database can be specified manually (see the configuration parameter connection.database), or it can be generated according to specific rules (see the configuration parameter connection.database.prefix).
@ -142,7 +195,7 @@ vi sink-demo.properties
sink-demo.properties' content is following:
```ini title="sink-demo.properties"
name=tdengine-sink-demo
name=TDengineSinkConnector
connector.class=com.taosdata.kafka.connect.sink.TDengineSinkConnector
tasks.max=1
topics=meters
@ -151,6 +204,7 @@ connection.user=root
connection.password=taosdata
connection.database=power
db.schemaless=line
data.precision=ns
key.converter=org.apache.kafka.connect.storage.StringConverter
value.converter=org.apache.kafka.connect.storage.StringConverter
```
@ -177,6 +231,7 @@ If the above command is executed successfully, the output is as follows:
"connection.url": "jdbc:TAOS://127.0.0.1:6030",
"connection.user": "root",
"connector.class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector",
"data.precision": "ns",
"db.schemaless": "line",
"key.converter": "org.apache.kafka.connect.storage.StringConverter",
"tasks.max": "1",
@ -221,10 +276,10 @@ Database changed.
taos> select * from meters;
ts | current | voltage | phase | groupid | location |
===============================================================================================================================================================
2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LoSangeles |
2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LoSangeles |
2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LoSangeles |
2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LoSangeles |
2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LosAngeles |
2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LosAngeles |
2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LosAngeles |
2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LosAngeles |
Query OK, 4 row(s) in set (0.004208s)
```
@ -356,6 +411,7 @@ The following configuration items apply to TDengine Sink Connector and TDengine
4. `max.retries`: The maximum number of retries when an error occurs. Defaults to 1.
5. `retry.backoff.ms`: The time interval for retry when sending an error. The unit is milliseconds. The default is 3000.
6. `db.schemaless`: Data format, could be one of `line`, `json`, and `telnet`. Represent InfluxDB line protocol format, OpenTSDB JSON format, and OpenTSDB Telnet line protocol format.
7. `data.precision`: The time precision when use InfluxDB line protocol format data, could be one of `ms`, `us` and `ns`. The default is `ns`.
### TDengine Source Connector specific configuration
@ -366,7 +422,13 @@ The following configuration items apply to TDengine Sink Connector and TDengine
5. `fetch.max.rows`: The maximum number of rows retrieved when retrieving the database. Default is 100.
6. `out.format`: The data format. The value could be line or json. The line represents the InfluxDB Line protocol format, and json represents the OpenTSDB JSON format. Default is `line`.
## feedback
## Other notes
1. To install plugin to a customized location, refer to https://docs.confluent.io/home/connect/self-managed/install.html#install-connector-manually.
2. To use Kafka Connect without confluent, refer to https://kafka.apache.org/documentation/#connect.
## Feedback
https://github.com/taosdata/kafka-connect-tdengine/issues

View File

@ -5,11 +5,11 @@ title: Architecture
## Cluster and Primary Logic Unit
The design of TDengine is based on the assumption that any hardware or software system is not 100% reliable and that no single node can provide sufficient computing and storage resources to process massive data. Therefore, TDengine has been designed in a distributed and high-reliability architecture since day one of the development, so that hardware failure or software failure of any single even multiple servers will not affect the availability and reliability of the system. At the same time, through node virtualization and automatic load-balancing technology, TDengine can make the most efficient use of computing and storage resources in heterogeneous clusters to reduce hardware resources significantly.
The design of TDengine is based on the assumption that any hardware or software system is not 100% reliable and that no single node can provide sufficient computing and storage resources to process massive data. Therefore, since day one, TDengine has been designed as a natively distributed system, with high-reliability architecture. Hardware failure or software failure of a single, or even multiple servers will not affect the availability and reliability of the system. At the same time, through node virtualization and automatic load-balancing technology, TDengine can make the most efficient use of computing and storage resources in heterogeneous clusters to reduce hardware resource needs, significantly.
### Primary Logic Unit
Logical structure diagram of TDengine distributed architecture as following:
Logical structure diagram of TDengine's distributed architecture is as follows:
![TDengine Database architecture diagram](structure.webp)
<center> Figure 1: TDengine architecture diagram </center>
@ -18,25 +18,25 @@ A complete TDengine system runs on one or more physical nodes. Logically, it inc
**Physical node (pnode)**: A pnode is a computer that runs independently and has its own computing, storage and network capabilities. It can be a physical machine, virtual machine, or Docker container installed with OS. The physical node is identified by its configured FQDN (Fully Qualified Domain Name). TDengine relies entirely on FQDN for network communication. If you don't know about FQDN, please check [wikipedia](https://en.wikipedia.org/wiki/Fully_qualified_domain_name).
**Data node (dnode):** A dnode is a running instance of the TDengine server-side execution code taosd on a physical node. A working system must have at least one data node. A dnode contains zero to multiple logical virtual nodes (VNODE), zero or at most one logical management node (mnode). The unique identification of a dnode in the system is determined by the instance's End Point (EP). EP is a combination of FQDN (Fully Qualified Domain Name) of the physical node where the dnode is located and the network port number (Port) configured by the system. By configuring different ports, a physical node (a physical machine, virtual machine or container) can run multiple instances or have multiple data nodes.
**Data node (dnode):** A dnode is a running instance of the TDengine server-side execution code taosd on a physical node (pnode). A working system must have at least one data node. A dnode contains zero to multiple logical virtual nodes (VNODE) and zero or at most one logical management node (mnode). The unique identification of a dnode in the system is determined by the instance's End Point (EP). EP is a combination of FQDN (Fully Qualified Domain Name) of the physical node where the dnode is located and the network port number (Port) configured by the system. By configuring different ports, a physical node (a physical machine, virtual machine or container) can run multiple instances or have multiple data nodes.
**Virtual node (vnode)**: To better support data sharding, load balancing and prevent data from overheating or skewing, data nodes are virtualized into multiple virtual nodes (vnode, V2, V3, V4, etc. in the figure). Each vnode is a relatively independent work unit, which is the basic unit of time-series data storage and has independent running threads, memory space and persistent storage path. A vnode contains a certain number of tables (data collection points). When a new table is created, the system checks whether a new vnode needs to be created. The number of vnodes that can be created on a data node depends on the hardware capacities of the physical node where the data node is located. A vnode belongs to only one DB, but a DB can have multiple vnodes. In addition to the stored time-series data, a vnode also stores the schema and tag values of the included tables. A virtual node is uniquely identified in the system by the EP of the data node and the VGroup ID to which it belongs and is created and managed by the management node.
**Virtual node (vnode)**: To better support data sharding, load balancing and prevent data from overheating or skewing, data nodes are virtualized into multiple virtual nodes (vnode, V2, V3, V4, etc. in the figure). Each vnode is a relatively independent work unit, which is the basic unit of time-series data storage and has independent running threads, memory space and persistent storage path. A vnode contains a certain number of tables (data collection points). When a new table is created, the system checks whether a new vnode needs to be created. The number of vnodes that can be created on a data node depends on the capacity of the hardware of the physical node where the data node is located. A vnode belongs to only one DB, but a DB can have multiple vnodes. In addition to the stored time-series data, a vnode also stores the schema and tag values of the included tables. A virtual node is uniquely identified in the system by the EP of the data node and the VGroup ID to which it belongs and is created and managed by the management node.
**Management node (mnode)**: A virtual logical unit responsible for monitoring and maintaining the running status of all data nodes and load balancing among nodes (M in the figure). At the same time, the management node is also responsible for the storage and management of metadata (including users, databases, tables, static tags, etc.), so it is also called Meta Node. Multiple (up to 5) mnodes can be configured in a TDengine cluster, and they are automatically constructed into a virtual management node group (M0, M1, M2 in the figure). The master/slave mechanism is adopted for the mnode group and the data synchronization is carried out in a strongly consistent way. Any data update operation can only be executed on the master. The creation of mnode cluster is completed automatically by the system without manual intervention. There is at most one mnode on each dnode, which is uniquely identified by the EP of the data node to which it belongs. Each dnode automatically obtains the EP of the dnode where all mnodes in the whole cluster are located through internal messaging interaction.
**Management node (mnode)**: A virtual logical unit responsible for monitoring and maintaining the running status of all data nodes and load balancing among nodes (M in the figure). At the same time, the management node is also responsible for the storage and management of metadata (including users, databases, tables, static tags, etc.), so it is also called Meta Node. Multiple (up to 5) mnodes can be configured in a TDengine cluster, and they are automatically constructed into a virtual management node group (M0, M1, M2 in the figure). The master/slave mechanism is adopted for the mnode group and the data synchronization is carried out in a strongly consistent way. Any data update operation can only be executed on the master. The creation of mnode cluster is completed automatically by the system without manual intervention. There is at most one mnode on each dnode, which is uniquely identified by the EP of the data node to which it belongs. Each dnode automatically obtains the EP of the dnode where all mnodes in the whole cluster are located, through internal messaging interaction.
**Virtual node group (VGroup)**: Vnodes on different data nodes can form a virtual node group to ensure the high availability of the system. The virtual node group is managed in a master/slave mechanism. Write operations can only be performed on the master vnode, and then replicated to slave vnodes, thus ensuring that one single replica of data is copied on multiple physical nodes. The number of virtual nodes in a vgroup equals the number of data replicas. If the number of replicas of a DB is N, the system must have at least N data nodes. The number of replicas can be specified by the parameter `“replica”` when creating DB, and the default is 1. Using the multi-replication feature of TDengine, the same high data reliability can be achieved without the need for expensive storage devices such as disk arrays. Virtual node group is created and managed by the management node, and the management node assigns a system unique ID, aka VGroup ID. If two virtual nodes have the same vnode group ID, means that they belong to the same group and the data is backed up to each other. The number of virtual nodes in a virtual node group can be dynamically changed, allowing only one, that is, no data replication. VGroup ID is never changed. Even if a virtual node group is deleted, its ID will not be reused.
**Virtual node group (VGroup)**: Vnodes on different data nodes can form a virtual node group to ensure the high availability of the system. The virtual node group is managed in a master/slave mechanism. Write operations can only be performed on the master vnode, and then replicated to slave vnodes, thus ensuring that one single replica of data is copied on multiple physical nodes. The number of virtual nodes in a vgroup equals the number of data replicas. If the number of replicas of a DB is N, the system must have at least N data nodes. The number of replicas can be specified by the parameter `“replica”` when creating a DB, and the default is 1. Using the multi-replication feature of TDengine, the same high data reliability can be achieved without the need for expensive storage devices such as disk arrays. Virtual node groups are created and managed by the management node, and the management node assigns a system unique ID, aka VGroup ID. If two virtual nodes have the same vnode group ID, it means that they belong to the same group and the data is backed up to each other. The number of virtual nodes in a virtual node group can be dynamically changed, allowing only one, that is, no data replication. VGroup ID is never changed. Even if a virtual node group is deleted, its ID will not be reused.
**TAOSC**: TAOSC is the driver provided by TDengine to applications, which is responsible for dealing with the interaction between application and cluster, and provides the native interface of C/C++ language, which is embedded in JDBC, C #, Python, Go, Node.js language connection libraries. Applications interact with the whole cluster through TAOSC instead of directly connecting to data nodes in the cluster. This module is responsible for obtaining and caching metadata; forwarding requests for insertion, query, etc. to the correct data node; when returning the results to the application, TAOSC also needs to be responsible for the final level of aggregation, sorting, filtering and other operations. For JDBC, C/C++/C #/Python/Go/Node.js interfaces, this module runs on the physical node where the application is located. At the same time, in order to support the fully distributed RESTful interface, TAOSC has a running instance on each dnode of TDengine cluster.
**TAOSC**: TAOSC is the driver provided by TDengine to applications. It is responsible for dealing with the interaction between application and cluster, and provides the native interface for the C/C++ language. It is also embedded in the JDBC, C #, Python, Go, Node.js language connection libraries. Applications interact with the whole cluster through TAOSC instead of directly connecting to data nodes in the cluster. This module is responsible for obtaining and caching metadata; forwarding requests for insertion, query, etc. to the correct data node; when returning the results to the application, TAOSC also needs to be responsible for the final level of aggregation, sorting, filtering and other operations. For JDBC, C/C++/C#/Python/Go/Node.js interfaces, this module runs on the physical node where the application is located. At the same time, in order to support the fully distributed RESTful interface, TAOSC has a running instance on each dnode of TDengine cluster.
### Node Communication
**Communication mode**: The communication among each data node of TDengine system, and among the client driver and each data node is carried out through TCP/UDP. Considering an IoT scenario, the data writing packets are generally not large, so TDengine uses UDP in addition to TCP for transmission, because UDP is more efficient and is not limited by the number of connections. TDengine implements its own timeout, retransmission, confirmation and other mechanisms to ensure reliable transmission of UDP. For packets with a data volume of less than 15K, UDP is adopted for transmission, and TCP is automatically adopted for transmission of packets with a data volume of more than 15K or query operations. At the same time, TDengine will automatically compress/decompress the data, digital sign/authenticate the data according to the configuration and data packet. For data replication among data nodes, only TCP is used for data transportation.
**Communication mode**: The communication among each data node of TDengine system, and among the client driver and each data node is carried out through TCP/UDP. Considering an IoT scenario, the data writing packets are generally not large, so TDengine uses UDP in addition to TCP for transmission, because UDP is more efficient and is not limited by the number of connections. TDengine implements its own timeout, retransmission, confirmation and other mechanisms to ensure reliable transmission of UDP. For packets with a data volume of less than 15K, UDP is adopted for transmission, and TCP is automatically adopted for transmission of packets with a data volume of more than 15K or query operations. At the same time, TDengine will automatically compress/decompress the data, digitally sign/authenticate the data according to the configuration and data packet. For data replication among data nodes, only TCP is used for data transportation.
**FQDN configuration:** A data node has one or more FQDNs, which can be specified in the system configuration file taos.cfg with the parameter “fqdn”. If it is not specified, the system will automatically use the hostname of the computer as its FQDN. If the node is not configured with FQDN, you can directly set the configuration parameter “fqdn” of the node to its IP address. However, IP is not recommended because IP address may be changed, and once it changes, the cluster will not work properly. The EP (End Point) of a data node consists of FQDN + Port. With FQDN, it is necessary to ensure the DNS service is running, or hosts files on nodes are configured properly.
**Port configuration**: The external port of a data node is determined by the system configuration parameter “serverPort” in TDengine, and the port for internal communication of cluster is serverPort+5. The data replication operation among data nodes in the cluster also occupies a TCP port, which is serverPort+10. In order to support multithreading and efficient processing of UDP data, each internal and external UDP connection needs to occupy 5 consecutive ports. Therefore, the total port range of a data node will be serverPort to serverPort + 10, for a total of 11 TCP/UDP ports. To run the system, make sure that the firewall keeps these ports open. Each data node can be configured with a different serverPort.
**Cluster external connection**: TDengine cluster can accommodate one single, multiple or even thousands of data nodes. The application only needs to initiate a connection to any data node in the cluster. The network parameter required for connection is the End Point (FQDN plus configured port number) of a data node. When starting the application taos through CLI, the FQDN of the data node can be specified through the option `-h`, and the configured port number can be specified through `-p`. If the port is not configured, the system configuration parameter “serverPort” of TDengine will be adopted.
**Cluster external connection**: TDengine cluster can accommodate a single, multiple or even thousands of data nodes. The application only needs to initiate a connection to any data node in the cluster. The network parameter required for connection is the End Point (FQDN plus configured port number) of a data node. When starting the application taos through CLI, the FQDN of the data node can be specified through the option `-h`, and the configured port number can be specified through `-p`. If the port is not configured, the system configuration parameter “serverPort” of TDengine will be adopted.
**Inter-cluster communication**: Data nodes connect with each other through TCP/UDP. When a data node starts, it will obtain the EP information of the dnode where the mnode is located, and then establish a connection with the mnode in the system to exchange information. There are three steps to obtain EP information of the mnode:
@ -44,11 +44,13 @@ A complete TDengine system runs on one or more physical nodes. Logically, it inc
2. Check the system configuration file taos.cfg to obtain node configuration parameters “firstEp” and “secondEp” (the node specified by these two parameters can be a normal node without mnode, in this case, the node will try to redirect to the mnode node when connected). If these two configuration parameters do not exist or do not exist in taos.cfg, or are invalid, skip to the third step;
3. Set your own EP as a mnode EP and run it independently. After obtaining the mnode EP list, the data node initiates the connection. It will successfully join the working cluster after connection. If not successful, it will try the next item in the mnode EP list. If all attempts are made, but the connection still fails, sleep for a few seconds before trying again.
**The choice of MNODE**: TDengine logically has a management node, but there is no separated execution code. The server-side only has a set of execution code taosd. So which data node will be the management node? This is determined automatically by the system without any manual intervention. The principle is as follows: when a data node starts, it will check its End Point and compare it with the obtained mnode EP List. If its EP exists in it, the data node shall start the mnode module and become a mnode. If your own EP is not in the mnode EP List, the mnode module will not start. During the system operation, due to load balancing, downtime and other reasons, mnode may migrate to the new dnode, while totally transparent without manual intervention. The modification of configuration parameters is the decision made by mnode itself according to resources usage.
**The choice of MNODE**: TDengine logically has a management node, but there is no separate execution code. The server-side only has one set of execution code, taosd. So which data node will be the management node? This is determined automatically by the system without any manual intervention. The principle is as follows: when a data node starts, it will check its End Point and compare it with the obtained mnode EP List. If its EP exists in it, the data node shall start the mnode module and become a mnode. If your own EP is not in the mnode EP List, the mnode module will not start. During the system operation, due to load balancing, downtime and other reasons, mnode may migrate to the new dnode, totally transparently and without manual intervention. The modification of configuration parameters is the decision made by mnode itself according to resources usage.
**Add new data nodes:** After the system has a data node, it has become a working system. There are two steps to add a new node into the cluster. Step1: Connect to the existing working data node using TDengine CLI, and then add the End Point of the new data node with the command "create dnode"; Step 2: In the system configuration parameter file taos.cfg of the new data node, set the “firstEp” and “secondEp” parameters to the EP of any two data nodes in the existing cluster. Please refer to the detailed user tutorial for detailed steps. In this way, the cluster will be established step by step.
**Add new data nodes:** After the system has a data node, it has become a working system. There are two steps to add a new node into the cluster.
- Step1: Connect to the existing working data node using TDengine CLI, and then add the End Point of the new data node with the command "create dnode"
- Step 2: In the system configuration parameter file taos.cfg of the new data node, set the “firstEp” and “secondEp” parameters to the EP of any two data nodes in the existing cluster. Please refer to the user tutorial for detailed steps. In this way, the cluster will be established step by step.
**Redirection**: No matter about dnode or TAOSC, the connection to the mnode shall be initiated first, but the mnode is automatically created and maintained by the system, so the user does not know which dnode is running the mnode. TDengine only requires a connection to any working dnode in the system. Because any running dnode maintains the currently running mnode EP List, when receiving a connecting request from the newly started dnode or TAOSC, if its not a mnode by self, it will reply to the mnode EP List back. After receiving this list, TAOSC or the newly started dnode will try to establish the connection again. When the mnode EP List changes, each data node quickly obtains the latest list and notifies TAOSC through messaging interaction among nodes.
**Redirection**: Regardless of dnode or TAOSC, the connection to the mnode is initiated first. The mnode is automatically created and maintained by the system, so the user does not know which dnode is running the mnode. TDengine only requires a connection to any working dnode in the system. Because any running dnode maintains the currently running mnode EP List, when receiving a connecting request from the newly started dnode or TAOSC, if its not an mnode itself, it will reply to the mnode with the EP List. After receiving this list, TAOSC or the newly started dnode will try to establish the connection again. When the mnode EP List changes, each data node quickly obtains the latest list and notifies TAOSC through messaging interaction among nodes.
### A Typical Data Writing Process
@ -58,17 +60,17 @@ To explain the relationship between vnode, mnode, TAOSC and application and thei
<center> Figure 2: Typical process of TDengine </center>
1. Application initiates a request to insert data through JDBC, ODBC, or other APIs.
2. TAOSC checks if meta data existing for the table in the cache. If so, go straight to Step 4. If not, TAOSC sends a get meta-data request to mnode.
2. TAOSC checks the cache to see if meta data exists for the table. If it does, it goes straight to Step 4. If not, TAOSC sends a get meta-data request to mnode.
3. Mnode returns the meta-data of the table to TAOSC. Meta-data contains the schema of the table, and also the vgroup information to which the table belongs (the vnode ID and the End Point of the dnode where the table belongs. If the number of replicas is N, there will be N groups of End Points). If TAOSC does not receive a response from the mnode for a long time, and there are multiple mnodes, TAOSC will send a request to the next mnode.
4. TAOSC initiates an insert request to master vnode.
5. After vnode inserts the data, it gives a reply to TAOSC, indicating that the insertion is successful. If TAOSC doesn't get a response from vnode for a long time, TAOSC will treat this node as offline. In this case, if there are multiple replicas of the inserted database, TAOSC will issue an insert request to the next vnode in vgroup.
6. TAOSC notifies APP that writing is successful.
For Step 2 and 3, when TAOSC starts, it does not know the End Point of mnode, so it will directly initiate a request to the configured serving End Point of the cluster. If the dnode that receives the request does not have a mnode configured, it will inform the mnode EP list in a reply message, so that TAOSC will re-issue a request to obtain meta-data to the EP of another new mnode.
For Step 2 and 3, when TAOSC starts, it does not know the End Point of mnode, so it will directly initiate a request to the configured serving End Point of the cluster. If the dnode that receives the request does not have a mnode configured, it will reply with the mnode EP list, so that TAOSC will re-issue a request to obtain meta-data to the EP of another mnode.
For Step 4 and 5, without caching, TAOSC can't recognize the master in the virtual node group, so assumes that the first vnode is the master and sends a request to it. If this vnode is not the master, it will reply to the actual master as a new target where TAOSC shall send a request to. Once the reply of successful insertion is obtained, TAOSC will cache the information of master node.
For Step 4 and 5, without caching, TAOSC can't recognize the master in the virtual node group, so assumes that the first vnode is the master and sends a request to it. If this vnode is not the master, it will reply to the actual master as a new target to which TAOSC shall send a request. Once a response of successful insertion is obtained, TAOSC will cache the information of master node.
The above is the process of inserting data, and the processes of querying and computing are the same. TAOSC encapsulates and hides all these complicated processes, and it is transparent to applications.
The above describes the process of inserting data. The processes of querying and computing are the same. TAOSC encapsulates and hides all these complicated processes, and it is transparent to applications.
Through TAOSC caching mechanism, mnode needs to be accessed only when a table is accessed for the first time, so mnode will not become a system bottleneck. However, because schema and vgroup may change (such as load balancing), TAOSC will interact with mnode regularly to automatically update the cache.
@ -76,24 +78,24 @@ Through TAOSC caching mechanism, mnode needs to be accessed only when a table is
### Storage Model
The data stored by TDengine include collected time-series data, metadata related to database and tables, tag data, etc. These data are specifically divided into three parts:
The data stored by TDengine includes collected time-series data, metadata related to database and tables, tag data, etc. All of the data is specifically divided into three parts:
- Time-series data: stored in vnode and composed of data, head and last files. The amount of data is large and query amount depends on the application scenario. Out-of-order writing is allowed, but delete operation is not supported for the time being, and update operation is only allowed when database “update” parameter is set to 1. By adopting the model with **one table for each data collection point**, the data of a given time period is continuously stored, and the writing against one single table is a simple appending operation. Multiple records can be read at one time, thus ensuring the insert and query operation of a single data collection point with the best performance.
- Tag data: meta files stored in vnode. Four standard operations of create, read, update and delete are supported. The amount of data is not large. If there are N tables, there are N records, so all can be stored in memory. To make tag filtering efficient, TDengine supports multi-core and multi-threaded concurrent queries. As long as the computing resources are sufficient, even in face of millions of tables, the tag filtering results will return in milliseconds.
- Metadata: stored in mnode, including system node, user, DB, Table Schema and other information. Four standard operations of create, delete, update and read are supported. The amount of these data are not large and can be stored in memory, moreover, the query amount is not large because of the client cache. Therefore, TDengine uses centralized storage management, however, there will be no performance bottleneck.
- Time-series data: stored in vnode and composed of data, head and last files. The amount of data is large and query amount depends on the application scenario. Out-of-order writing is allowed, but delete operation is not supported for the time being, and update operation is only allowed when database “update” parameter is set to 1. By adopting the model with **one table for each data collection point**, the data of a given time period is continuously stored, and the writing against one single table is a simple appending operation. Multiple records can be read at one time, thus ensuring the best performance for both insert and query operations of a single data collection point.
- Tag data: meta files stored in vnode. Four standard operations of create, read, update and delete are supported. The amount of data is not large. If there are N tables, there are N records, so all can be stored in memory. To make tag filtering efficient, TDengine supports multi-core and multi-threaded concurrent queries. As long as the computing resources are sufficient, even with millions of tables, the tag filtering results will return in milliseconds.
- Metadata: stored in mnode and includes system node, user, DB, table schema and other information. Four standard operations of create, delete, update and read are supported. The amount of this data is not large and can be stored in memory. Moreover, the number of queries is not large because of client cache. Even though TDengine uses centralized storage management, because of the architecture, there is no performance bottleneck.
Compared with the typical NoSQL storage model, TDengine stores tag data and time-series data completely separately, which has two major advantages:
Compared with the typical NoSQL storage model, TDengine stores tag data and time-series data completely separately. This has two major advantages:
- Reduce the redundancy of tag data storage significantly: general NoSQL database or time-series database adopts K-V storage, in which Key includes a timestamp, a device ID and various tags. Each record carries these duplicated tags, so storage space is wasted. Moreover, if the application needs to add, modify or delete tags on historical data, it has to traverse the data and rewrite them again, which is extremely expensive to operate.
- Aggregate data efficiently between multiple tables: when aggregating data between multiple tables, it first finds out the tables which satisfy the filtering conditions, and then find out the corresponding data blocks of these tables to greatly reduce the data sets to be scanned, thus greatly improving the aggregation efficiency. Moreover, tag data is managed and maintained in a full-memory structure, and tag data queries in tens of millions can return in milliseconds.
- Reduces the redundancy of tag data storage significantly. General NoSQL database or time-series database adopts K-V (key-value) storage, in which the key includes a timestamp, a device ID and various tags. Each record carries these duplicated tags, so storage space is wasted. Moreover, if the application needs to add, modify or delete tags on historical data, it has to traverse the data and rewrite them again, which is an extremely expensive operation.
- Aggregate data efficiently between multiple tables: when aggregating data between multiple tables, it first finds the tables which satisfy the filtering conditions, and then finds the corresponding data blocks of these tables. This greatly reduces the data sets to be scanned which in turn improves the aggregation efficiency. Moreover, tag data is managed and maintained in a full-memory structure, and tag data queries in tens of millions can return in milliseconds.
### Data Sharding
For large-scale data management, to achieve scale-out, it is generally necessary to adopt the Partitioning or Sharding strategy. TDengine implements data sharding via vnode, and time-series data partitioning via one data file for a time range.
For large-scale data management, to achieve scale-out, it is generally necessary to adopt a Partitioning or Sharding strategy. TDengine implements data sharding via vnode, and time-series data partitioning via one data file for a time range.
VNode (Virtual Data Node) is responsible for providing writing, query and computing functions for collected time-series data. To facilitate load balancing, data recovery and support heterogeneous environments, TDengine splits a data node into multiple vnodes according to its computing and storage resources. The management of these vnodes is done automatically by TDengine and is completely transparent to the application.
For a single data collection point, regardless of the amount of data, a vnode (or vnode group, if the number of replicas is greater than 1) has enough computing resource and storage resource to process (if a 16-byte record is generated per second, the original data generated in one year will be less than 0.5 G), so TDengine stores all the data of a table (a data collection point) in one vnode instead of distributing the data to two or more dnodes. Moreover, a vnode can store data from multiple data collection points (tables), and the upper limit of the tables quantity for a vnode is one million. By design, all tables in a vnode belong to the same DB. On a data node, unless specially configured, the number of vnodes owned by a DB will not exceed the number of system cores.
For a single data collection point, regardless of the amount of data, a vnode (or vnode group, if the number of replicas is greater than 1) has enough computing resource and storage resource to process (if a 16-byte record is generated per second, the original data generated in one year will be less than 0.5 G). So TDengine stores all the data of a table (a data collection point) in one vnode instead of distributing the data to two or more dnodes. Moreover, a vnode can store data from multiple data collection points (tables), and the upper limit of the tables quantity for a vnode is one million. By design, all tables in a vnode belong to the same DB. On a data node, unless specially configured, the number of vnodes owned by a DB will not exceed the number of system cores.
When creating a DB, the system does not allocate resources immediately. However, when creating a table, the system will check if there is an allocated vnode with free tablespace. If so, the table will be created in the vacant vnode immediately. If not, the system will create a new vnode on a dnode from the cluster according to the current workload, and then a table. If there are multiple replicas of a DB, the system does not create only one vnode, but a vgroup (virtual data node group). The system has no limit on the number of vnodes, which is just limited by the computing and storage resources of physical nodes.
@ -101,23 +103,23 @@ The meta data of each table (including schema, tags, etc.) is also stored in vno
### Data Partitioning
In addition to vnode sharding, TDengine partitions the time-series data by time range. Each data file contains only one time range of time-series data, and the length of the time range is determined by DB's configuration parameter `“days”`. This method of partitioning by time rang is also convenient to efficiently implement the data retention policy. As long as the data file exceeds the specified number of days (system configuration parameter `“keep”`), it will be automatically deleted. Moreover, different time ranges can be stored in different paths and storage media, so as to facilitate the tiered-storage. Cold/hot data can be stored in different storage media to reduce the storage cost.
In addition to vnode sharding, TDengine partitions the time-series data by time range. Each data file contains only one time range of time-series data, and the length of the time range is determined by the database configuration parameter `“days”`. This method of partitioning by time range is also convenient to efficiently implement data retention policies. As long as the data file exceeds the specified number of days (system configuration parameter `“keep”`), it will be automatically deleted. Moreover, different time ranges can be stored in different paths and storage media, so as to facilitate tiered-storage. Cold/hot data can be stored in different storage media to significantly reduce storage costs.
In general, **TDengine splits big data by vnode and time range in two dimensions** to manage the data efficiently with horizontal scalability.
### Load Balancing
Each dnode regularly reports its status (including hard disk space, memory size, CPU, network, number of virtual nodes, etc.) to the mnode (virtual management node), so mnode knows the status of the entire cluster. Based on the overall status, when the mnode finds a dnode is overloaded, it will migrate one or more vnodes to other dnodes. During the process, TDengine services keep running and the data insertion, query and computing operations are not affected.
Each dnode regularly reports its status (including hard disk space, memory size, CPU, network, number of virtual nodes, etc.) to the mnode (virtual management node) so that the mnode knows the status of the entire cluster. Based on the overall status, when the mnode finds a dnode is overloaded, it will migrate one or more vnodes to other dnodes. During the process, TDengine services keep running and the data insertion, query and computing operations are not affected.
If the mnode has not received the dnode status for a period of time, the dnode will be treated as offline. When offline lasts a certain period of time (configured by parameter `“offlineThreshold”`), the dnode will be forcibly removed from the cluster by mnode. If the number of replicas of vnodes on this dnode is greater than one, the system will automatically create new replicas on other dnodes to ensure the replica number. If there are other mnodes on this dnode and the number of mnodes replicas is greater than one, the system will automatically create new mnodes on other dnodes to ensure the replica number.
If the mnode has not received the dnode status for a period of time, the dnode will be treated as offline. If the dnode stays offline beyond the time configured by parameter `“offlineThreshold”`, the dnode will be forcibly removed from the cluster by mnode. If the number of replicas of vnodes on this dnode is greater than one, the system will automatically create new replicas on other dnodes to ensure the replica number. If there are other mnodes on this dnode and the number of mnodes replicas is greater than one, the system will automatically create new mnodes on other dnodes to ensure the replica number.
When new data nodes are added to the cluster, with new computing and storage resources are added, the system will automatically start the load balancing process.
When new data nodes are added to the cluster, with new computing and storage resources, the system will automatically start the load balancing process.
The load balancing process does not require any manual intervention, and it is transparent to the application. **Note: load balancing is controlled by parameter “balance”, which determines to turn on/off automatic load balancing.**
## Data Writing and Replication Process
If a database has N replicas, thus a virtual node group has N virtual nodes, but only one as Master and all others are slaves. When the application writes a new record to system, only the Master vnode can accept the writing request. If a slave vnode receives a writing request, the system will notifies TAOSC to redirect.
If a database has N replicas, a virtual node group has N virtual nodes. But only one is the Master and all others are slaves. When the application writes a new record to system, only the Master vnode can accept the writing request. If a slave vnode receives a writing request, the system will notifies TAOSC to redirect.
### Master vnode Writing Process
@ -130,7 +132,7 @@ Master Vnode uses a writing process as follows:
2. If the system configuration parameter `“walLevel”` is greater than 0, vnode will write the original request packet into database log file WAL. If walLevel is set to 2 and fsync is set to 0, TDengine will make WAL data written immediately to ensure that even system goes down, all data can be recovered from database log file;
3. If there are multiple replicas, vnode will forward data packet to slave vnodes in the same virtual node group, and the forwarded packet has a version number with data;
4. Write into memory and add the record to “skip list”;
5. Master vnode returns a confirmation message to the application, indicating a successful writing.
5. Master vnode returns a confirmation message to the application, indicating a successful write.
6. If any of Step 2, 3 or 4 fails, the error will directly return to the application.
### Slave vnode Writing Process
@ -146,19 +148,19 @@ For a slave vnode, the write process as follows:
Compared with Master vnode, slave vnode has no forwarding or reply confirmation step, means two steps less. But writing into memory and WAL is exactly the same.
### Remote Disaster Recovery and IDC Migration
### Remote Disaster Recovery and IDC (Internet Data Center) Migration
As above Master and Slave processes discussed, TDengine adopts asynchronous replication for data synchronization. This method can greatly improve the writing performance, with no obvious impact from network delay. By configuring IDC and rack number for each physical node, it can be ensured that for a virtual node group, virtual nodes are composed of physical nodes from different IDC and different racks, thus implementing remote disaster recovery without other tools.
As discussed above, TDengine writes using Master and Slave processes. TDengine adopts asynchronous replication for data synchronization. This method can greatly improve write performance, with no obvious impact from network delay. By configuring IDC and rack number for each physical node, it can be ensured that for a virtual node group, virtual nodes are composed of physical nodes from different IDC and different racks, thus implementing remote disaster recovery without other tools.
On the other hand, TDengine supports dynamic modification of the replicas number. Once the number of replicas increases, the newly added virtual nodes will immediately enter the data synchronization process. After synchronization completed, added virtual nodes can provide services. In the synchronization process, master and other synchronized virtual nodes keep serving. With this feature, TDengine can provide IDC migration without service interruption. It is only necessary to add new physical nodes to the existing IDC cluster, and then remove old physical nodes after the data synchronization is completed.
On the other hand, TDengine supports dynamic modification of the replica number. Once the number of replicas increases, the newly added virtual nodes will immediately enter the data synchronization process. After synchronization is complete, added virtual nodes can provide services. In the synchronization process, master and other synchronized virtual nodes keep serving. With this feature, TDengine can provide IDC migration without service interruption. It is only necessary to add new physical nodes to the existing IDC cluster, and then remove old physical nodes after the data synchronization is completed.
However, the asynchronous replication has a tiny time window where data can be lost. The specific scenario is as follows:
However, the asynchronous replication has a very low probability scenario where data may be lost. The specific scenario is as follows:
1. Master vnode has finished its 5-step operations, confirmed the success of writing to APP, and then went down;
1. Master vnode has finished its 5-step operations, confirmed the success of writing to APP, and then goes down;
2. Slave vnode receives the write request, then processing fails before writing to the log in Step 2;
3. Slave vnode will become the new master, thus losing one record.
In theory, for asynchronous replication, there is no guarantee to prevent data loss. However, this window is extremely small, only if mater and slave fail at the same time, and just confirm the successful write to the application before.
In theory, for asynchronous replication, there is no guarantee to prevent data loss. However, this is an extremely low probability scenario as described above.
Note: Remote disaster recovery and no-downtime IDC migration are only supported by Enterprise Edition. **Hint: This function is not available yet**
@ -171,43 +173,43 @@ When a vnode starts, the roles (master, slave) are uncertain, and the data is in
1. If theres only one replica, its always master
2. When all replicas are online, the one with latest version is master
3. Over half of online nodes are virtual nodes, and some virtual node is slave, it will automatically become master
4. For 2 and 3, if multiple virtual nodes meet the requirement, the first vnode in virtual node group list will be selected as master
4. For 2 and 3, if multiple virtual nodes meet the requirement, the first vnode in virtual node group list will be selected as master.
### Synchronous Replication
For scenarios with strong data consistency requirements, asynchronous data replication is not applicable, because there is a small probability of data loss. So, TDengine provides a synchronous replication mechanism for users. When creating a database, in addition to specifying the number of replicas, user also needs to specify a new parameter “quorum”. If quorum is greater than one, it means that every time the Master forwards a message to the replica, it needs to wait for “quorum-1” reply confirms before informing the application that data has been successfully written in slave. If “quorum-1” reply confirms are not received within a certain period of time, the master vnode will return an error to the application.
With synchronous replication, performance of system will decrease and latency will increase. Because metadata needs strong consistent, the default for data synchronization between mnodes is synchronous replication.
With synchronous replication, performance of system will decrease and latency will increase. Because metadata needs strong consistency, the default for data synchronization between mnodes is synchronous replication.
## Caching and Persistence
### Caching
TDengine adopts a time-driven cache management strategy (First-In-First-Out, FIFO), also known as a Write-driven Cache Management Mechanism. This strategy is different from the read-driven data caching mode (Least-Recent-Used, LRU), which directly put the most recently written data in the system buffer. When the buffer reaches a threshold, the earliest data are written to disk in batches. Generally speaking, for the use of IoT data, users are most concerned about the newly generated data, that is, the current status. TDengine takes full advantage of this feature to put the most recently arrived (current state) data in the buffer.
TDengine adopts a time-driven cache management strategy (First-In-First-Out, FIFO), also known as a Write-driven Cache Management Mechanism. This strategy is different from the read-driven data caching mode (Least-Recent-Used, LRU), which directly puts the most recently written data in the system buffer. When the buffer reaches a threshold, the earliest data are written to disk in batches. Generally speaking, for the use of IoT data, users are most concerned about the most recently generated data, that is, the current status. TDengine takes full advantage of this feature to put the most recently arrived (current state) data in the buffer.
TDengine provides millisecond-level data collecting capability to users through query functions. Putting the recently arrived data directly in the buffer can respond to users' analysis query for the latest piece or batch of data more quickly, and provide faster database query response capability as a whole. In this sense, **TDengine can be used as a data cache by setting appropriate configuration parameters without deploying Redis or other additional cache systems**, which can effectively simplify the system architecture and reduce the operation costs. It should be noted that after the TDengine is restarted, the buffer of the system will be emptied, the previously cached data will be written to disk in batches, and the previously cached data will not be reloaded into the buffer as so in a proprietary key-value cache system.
TDengine provides millisecond-level data collecting capability to users through query functions. Putting the recently arrived data directly in the buffer can respond to users' analysis query for the latest piece or batch of data more quickly, and provide faster database query response capability as a whole. In this sense, **TDengine can be used as a data cache by setting appropriate configuration parameters without deploying Redis or other additional cache systems**. This can effectively simplify the system architecture and reduce operational costs. It should be noted that after TDengine is restarted, the buffer of the system will be emptied, the previously cached data will be written to disk in batches, and the previously cached data will not be reloaded into the buffer. In this sense, TDengine's cache differs from proprietary key-value cache systems.
Each vnode has its own independent memory, and it is composed of multiple memory blocks of fixed size, and different vnodes are completely isolated. When writing data, similar to the writing of logs, data is sequentially added to memory, but each vnode maintains its own skip list for quick search. When more than one third of the memory block are used, the disk writing operation will start, and the subsequent writing operation is carried out in a new memory block. By this design, one third of the memory blocks in a vnode keep the latest data, so as to achieve the purpose of caching and quick search. The number of memory blocks of a vnode is determined by the configuration parameter “blocks”, and the size of memory blocks is determined by the configuration parameter “cache”.
### Persistent Storage
TDengine uses a data-driven method to write the data from buffer into hard disk for persistent storage. When the cached data in vnode reaches a certain volume, TDengine will also pull up the disk-writing thread to write the cached data into persistent storage in order not to block subsequent data writing. TDengine will open a new database log file when the data is written, and delete the old database log file after written successfully to avoid unlimited log growth.
TDengine uses a data-driven method to write the data from buffer into hard disk for persistent storage. When the cached data in vnode reaches a certain volume, TDengine will pull up the disk-writing thread to write the cached data into persistent storage so that subsequent data writing is not blocked. TDengine will open a new database log file when the data is written, and delete the old database log file after successfull persistence, to avoid unlimited log growth.
To make full use of the characteristics of time-series data, TDengine splits the data stored in persistent storage by a vnode into multiple files, each file only saves data for a fixed number of days, which is determined by the system configuration parameter `“days”`. By so, for the given start and end date of a query, you can locate the data files to open immediately without any index, thus greatly speeding up reading operations.
To make full use of the characteristics of time-series data, TDengine splits the data stored in persistent storage by a vnode into multiple files, each file only saves data for a fixed number of days, which is determined by the system configuration parameter `“days”`. Thus for given start and end dates of a query, you can locate the data files to open immediately without any index. This greatly speeds up read operations.
For time-series data, there is generally a retention policy, which is determined by the system configuration parameter `“keep”`. Data files exceeding this set number of days will be automatically deleted by the system to free up storage space.
Given “days” and “keep” parameters, the total number of data files in a vnode is: keep/days. The total number of data files should not be too large or too small. 10 to 100 is appropriate. Based on this principle, reasonable days can be set. In the current version, parameter “keep” can be modified, but parameter “days” cannot be modified once it is set.
In each data file, the data of a table is stored by blocks. A table can have one or more data file blocks. In a file block, data is stored in columns, occupying a continuous storage space, thus greatly improving the reading speed. The size of file block is determined by the system parameter `“maxRows”` (the maximum number of records per block), and the default value is 4096. This value should not be too large or too small. If it is too large, the data locating in search will cost longer; if too small, the index of data block is too large, and the compression efficiency will be low with slower reading speed.
In each data file, the data of a table is stored in blocks. A table can have one or more data file blocks. In a file block, data is stored in columns, occupying a continuous storage space, thus greatly improving the reading speed. The size of file block is determined by the system parameter `“maxRows”` (the maximum number of records per block), and the default value is 4096. This value should not be too large or too small. If it is too large, data location for queries will take a longer tim. If it is too small, the index of data block is too large, and the compression efficiency will be low with slower reading speed.
Each data file (with a .data postfix) has a corresponding index file (with a .head postfix). The index file has summary information of a data block for each table, recording the offset of each data block in the data file, start and end time of data and other information, so as to lead system quickly locate the data to be found. Each data file also has a corresponding last file (with a .last postfix), which is designed to prevent data block fragmentation when written in disk. If the number of written records from a table does not reach the system configuration parameter `“minRows”` (minimum number of records per block), it will be stored in the last file first. When write to disk next time, the newly written records will be merged with the records in last file and then written into data file.
Each data file (with a .data postfix) has a corresponding index file (with a .head postfix). The index file has summary information of a data block for each table, recording the offset of each data block in the data file, start and end time of data and other information which allows the system to locate the data to be found very quickly. Each data file also has a corresponding last file (with a .last postfix), which is designed to prevent data block fragmentation when written in disk. If the number of written records from a table does not reach the system configuration parameter `“minRows”` (minimum number of records per block), it will be stored in the last file first. At the next write operation to the disk, the newly written records will be merged with the records in last file and then written into data file.
When data is written to disk, it is decided whether to compress the data according to system configuration parameter `“comp”`. TDengine provides three compression options: no compression, one-stage compression and two-stage compression, corresponding to comp values of 0, 1 and 2 respectively. One-stage compression is carried out according to the type of data. Compression algorithms include delta-delta coding, simple 8B method, zig-zag coding, LZ4 and other algorithms. Two-stage compression is based on one-stage compression and compressed by general compression algorithm, which has higher compression ratio.
When data is written to disk, the system decideswhether to compress the data based on the system configuration parameter `“comp”`. TDengine provides three compression options: no compression, one-stage compression and two-stage compression, corresponding to comp values of 0, 1 and 2 respectively. One-stage compression is carried out according to the type of data. Compression algorithms include delta-delta coding, simple 8B method, zig-zag coding, LZ4 and other algorithms. Two-stage compression is based on one-stage compression and compressed by general compression algorithm, which has higher compression ratio.
### Tiered Storage
By default, TDengine saves all data in /var/lib/taos directory, and the data files of each vnode are saved in a different directory under this directory. In order to expand the storage space, minimize the bottleneck of file reading and improve the data throughput rate, TDengine can configure the system parameter “dataDir” to allow multiple mounted hard disks to be used by system at the same time. In addition, TDengine also provides the function of tiered data storage, i.e. storage on different storage media according to the time stamps of data files. For example, the latest data is stored on SSD, the data for more than one week is stored on local hard disk, and the data for more than four weeks is stored on network storage device, thus reducing the storage cost and ensuring efficient data access. The movement of data on different storage media is automatically done by the system and completely transparent to applications. Tiered storage of data is also configured through the system parameter “dataDir”.
By default, TDengine saves all data in /var/lib/taos directory, and the data files of each vnode are saved in a different directory under this directory. In order to expand the storage space, minimize the bottleneck of file reading and improve the data throughput rate, TDengine can configure the system parameter “dataDir” to allow multiple mounted hard disks to be used by system at the same time. In addition, TDengine also provides the function of tiered data storage, i.e. storage on different storage media according to the time stamps of data files. For example, the latest data is stored on SSD, the data older than a week is stored on local hard disk, and data older than four weeks is stored on network storage device. This reduces storage costs and ensures efficient data access. The movement of data on different storage media is automatically done by the system and is completely transparent to applications. Tiered storage of data is also configured through the system parameter “dataDir”.
dataDir format is as follows:
```
@ -216,7 +218,7 @@ dataDir data_path [tier_level]
Where data_path is the folder path of mount point and tier_level is the media storage-tier. The higher the media storage-tier, means the older the data file. Multiple hard disks can be mounted at the same storage-tier, and data files on the same storage-tier are distributed on all hard disks within the tier. TDengine supports up to 3 tiers of storage, so tier_level values are 0, 1, and 2. When configuring dataDir, there must be only one mount path without specifying tier_level, which is called special mount disk (path). The mount path defaults to level 0 storage media and contains special file links, which cannot be removed, otherwise it will have a devastating impact on the written data.
Suppose a physical node with six mountable hard disks/mnt/disk1,/mnt/disk2, …,/mnt/disk6, where disk1 and disk2 need to be designated as level 0 storage media, disk3 and disk4 are level 1 storage media, and disk5 and disk6 are level 2 storage media. Disk1 is a special mount disk, you can configure it in/etc/taos/taos.cfg as follows:
Suppose there is a physical node with six mountable hard disks/mnt/disk1,/mnt/disk2, …,/mnt/disk6, where disk1 and disk2 need to be designated as level 0 storage media, disk3 and disk4 are level 1 storage media, and disk5 and disk6 are level 2 storage media. Disk1 is a special mount disk, you can configure it in/etc/taos/taos.cfg as follows:
```
dataDir /mnt/disk1/taos
@ -233,11 +235,11 @@ Note: Tiered Storage is only supported in Enterprise Edition
## Data Query
TDengine provides a variety of query processing functions for tables and STables. In addition to common aggregation queries, TDengine also provides window queries and statistical aggregation functions for time-series data. The query processing of TDengine needs the collaboration of client, vnode and mnode.
TDengine provides a variety of query processing functions for tables and STables. In addition to common aggregation queries, TDengine also provides window queries and statistical aggregation functions for time-series data. Query processing in TDengine needs the collaboration of client, vnode and mnode.
### Single Table Query
The parsing and verification of SQL statements are completed on the client side. SQL statements are parsed and generate an Abstract Syntax Tree (AST), which is then checksummed. Then request metadata information (table metadata) for the table specified in the query from management node (mnode).
The parsing and verification of SQL statements are completed on the client side. SQL statements are parsed and generate an Abstract Syntax Tree (AST), which is then checksummed. Then metadata information (table metadata) for the table specified is requested in the query from management node (mnode).
According to the End Point information in metadata information, the query request is serialized and sent to the data node (dnode) where the table is located. After receiving the query, the dnode identifies the virtual node (vnode) pointed to and forwards the message to the query execution queue of the vnode. The query execution thread of vnode establishes the basic query execution environment, immediately returns the query request and starts executing the query at the same time.
@ -245,9 +247,9 @@ When client obtains query result, the worker thread in query execution queue of
### Aggregation by Time Axis, Downsampling, Interpolation
The remarkable feature that time-series data is different from ordinary data is that each record has a timestamp, so aggregating data with timestamps on the time axis is an important and distinct feature from common databases. From this point of view, it is similar to the window query of stream computing engine.
Time-series data is different from ordinary data in that each record has a timestamp. So aggregating data by timestamps on the time axis is an important and distinct feature of time-series databases which is different from that of common databases. It is similar to the window query of stream computing engines.
The keyword `interval` is introduced into TDengine to split fixed length time windows on time axis, and the data are aggregated based on time windows, and the data within window range are aggregated as needed. For example:
The keyword `interval` is introduced into TDengine to split fixed length time windows on the time axis. The data is aggregated based on time windows, and the data within time window ranges is aggregated as needed. For example:
```mysql
select count(*) from d1001 interval(1h);
@ -265,7 +267,7 @@ For the data collected by device D1001, the number of records per hour is counte
### Multi-table Aggregation Query
TDengine creates a separate table for each data collection point, but in practical applications, it is often necessary to aggregate data from different data collection points. In order to perform aggregation operations efficiently, TDengine introduces the concept of STable. STable is used to represent a specific type of data collection point. It is a table set containing multiple tables. The schema of each table in the set is the same, but each table has its own static tag. The tags can be multiple and be added, deleted and modified at any time. Applications can aggregate or statistically operate all or a subset of tables under a STABLE by specifying tag filters, thus greatly simplifying the development of applications. The process is shown in the following figure:
TDengine creates a separate table for each data collection point, but in practical applications, it is often necessary to aggregate data from different data collection points. In order to perform aggregation operations efficiently, TDengine introduces the concept of STable (super table). STable is used to represent a specific type of data collection point. It is a table set containing multiple tables. The schema of each table in the set is the same, but each table has its own static tag. There can be multiple tags which can be added, deleted and modified at any time. Applications can aggregate or statistically operate on all or a subset of tables under a STABLE by specifying tag filters. This greatly simplifies the development of applications. The process is shown in the following figure:
![TDengine Database Diagram of multi-table aggregation query](multi_tables.webp)
<center> Figure 5: Diagram of multi-table aggregation query </center>
@ -274,12 +276,12 @@ TDengine creates a separate table for each data collection point, but in practic
2. TAOSC sends the STable name to Meta Node(management node);
3. Management node sends the vnode list owned by the STable back to TAOSC;
4. TAOSC sends the computing request together with tag filters to multiple data nodes corresponding to these vnodes;
5. Each vnode first finds out the set of tables within its own node that meet the tag filters from memory, then scans the stored time-series data, completes corresponding aggregation calculations, and returns result to TAOSC;
5. Each vnode first finds the set of tables within its own node that meet the tag filters from memory, then scans the stored time-series data, completes corresponding aggregation calculations, and returns result to TAOSC;
6. TAOSC finally aggregates the results returned by multiple data nodes and send them back to application.
Since TDengine stores tag data and time-series data separately in vnode, by filtering tag data in memory, the set of tables that need to participate in aggregation operation is first found, which greatly reduces the volume of data scanned and improves aggregation speed. At the same time, because the data is distributed in multiple vnodes/dnodes, the aggregation operation is carried out concurrently in multiple vnodes, which further improves the aggregation speed. Aggregation functions for ordinary tables and most operations are applicable to STables. The syntax is exactly the same. Please see TAOS SQL for details.
Since TDengine stores tag data and time-series data separately in vnode, by filtering tag data in memory, the set of tables that need to participate in aggregation operation is first found, which reduces the volume of data to be scanned and improves aggregation speed. At the same time, because the data is distributed in multiple vnodes/dnodes, the aggregation operation is carried out concurrently in multiple vnodes, which further improves the aggregation speed. Aggregation functions for ordinary tables and most operations are applicable to STables. The syntax is exactly the same. Please see TAOS SQL for details.
### Precomputation
In order to effectively improve the performance of query processing, based-on the unchangeable feature of IoT data, statistical information of data stored in data block is recorded in the head of data block, including max value, min value, and sum. We call it a precomputing unit. If the query processing involves all the data of a whole data block, the pre-calculated results are directly used, and no need to read the data block contents at all. Since the amount of pre-calculated data is much smaller than the actual size of data block stored on disk, for query processing with disk IO as bottleneck, the use of pre-calculated results can greatly reduce the pressure of reading IO and accelerate the query process. The precomputation mechanism is similar to the index BRIN (Block Range Index) of PostgreSQL.
In order to effectively improve the performance of query processing, based-on the unchangeable feature of IoT data, statistical information of data stored in data block is recorded in the head of data block, including max value, min value, and sum. We call it a precomputing unit. If the query processing involves all the data of a whole data block, the pre-calculated results are directly used, and no need to read the data block contents at all. Since the amount of pre-calculated data is much smaller than the actual size of data block stored on disk, for query processing with disk IO as bottleneck, the use of pre-calculated results can greatly reduce the pressure of reading IO and accelerate the query process. The precomputation mechanism is similar to the BRIN (Block Range Index) of PostgreSQL.

View File

@ -5,16 +5,16 @@ title: Quickly Build IT DevOps Visualization System with TDengine + Telegraf + G
## Background
TDengine is a big data platform designed and optimized for IoT (Internet of Things), Vehicle Telematics, Industrial Internet, IT DevOps, etc. by TAOSData. Since it opened its source code in July 2019, it has won the favor of a large number of time-series data developers with its innovative data modeling design, convenient installation, easy-to-use programming interface, and powerful data writing and query performance.
TDengine is a big data platform designed and optimized for IoT (Internet of Things), Vehicle Telemetry, Industrial Internet, IT DevOps and other applications. Since it was open-sourced in July 2019, it has won the favor of a large number of time-series data developers with its innovative data modeling design, convenient installation, easy-to-use programming interface, and powerful data writing and query performance.
IT DevOps metric data usually are time sensitive, for example:
- System resource metrics: CPU, memory, IO, bandwidth, etc.
- Software system metrics: health status, number of connections, number of requests, number of timeouts, number of errors, response time, service type, and other business-related metrics.
Current mainstream IT DevOps system usually include a data collection module, a data persistent module, and a visualization module; Telegraf and Grafana are one of the most popular data collection modules and visualization modules, respectively. The data persistent module is available in a wide range of options, with OpenTSDB or InfluxDB being the most popular. TDengine, as an emerging time-series big data platform, has the advantages of high performance, high reliability, easy management and easy maintenance.
Current mainstream IT DevOps system usually include a data collection module, a data persistent module, and a visualization module; Telegraf and Grafana are one of the most popular data collection modules and visualization modules, respectively. The data persistence module is available in a wide range of options, with OpenTSDB or InfluxDB being the most popular. TDengine, as an emerging time-series big data platform, has the advantages of high performance, high reliability, easy management and easy maintenance.
This article introduces how to quickly build a TDengine + Telegraf + Grafana based IT DevOps visualization system without writing even a single line of code and by simply modifying a few lines of configuration files. The architecture is as follows.
This article introduces how to quickly build a TDengine + Telegraf + Grafana based IT DevOps visualization system without writing even a single line of code and by simply modifying a few lines in configuration files. The architecture is as follows.
![TDengine Database IT-DevOps-Solutions-Telegraf](./IT-DevOps-Solutions-Telegraf.webp)
@ -79,5 +79,5 @@ Click on the plus icon on the left and select `Import` to get the data from `htt
## Wrap-up
The above demonstrates how to quickly build a IT DevOps visualization system. Thanks to the new schemaless protocol parsing feature in TDengine version 2.4.0.0 and the powerful ecological software adaptation capability, users can build an efficient and easy-to-use IT DevOps visualization system in just a few minutes.
The above demonstrates how to quickly build a IT DevOps visualization system. Thanks to the new schemaless protocol parsing feature in TDengine version 2.4.0.0 and ability to integrate easily with a large software ecosystem, users can build an efficient and easy-to-use IT DevOps visualization system in just a few minutes.
Please refer to the official documentation and product implementation cases for other features.

View File

@ -5,17 +5,17 @@ title: Quickly build an IT DevOps visualization system using TDengine + collectd
## Background
TDengine is a big data platform designed and optimized for IoT (Internet of Things), Vehicle Telematics, Industrial Internet, IT DevOps, etc. by TAOSData. Since it opened its source code in July 2019, it has won the favor of a large number of time-series data developers with its innovative data modeling design, convenient installation, easy-to-use programming interface, and powerful data writing and query performance.
TDengine is a big data platform designed and optimized for IoT (Internet of Things), Vehicle Telemetry, Industrial Internet, IT DevOps and other applications. Since it was open-sourced in July 2019, it has won the favor of a large number of time-series data developers with its innovative data modeling design, convenient installation, easy-to-use programming interface, and powerful data writing and query performance.
IT DevOps metric data usually are time sensitive, for example:
- System resource metrics: CPU, memory, IO, bandwidth, etc.
- Software system metrics: health status, number of connections, number of requests, number of timeouts, number of errors, response time, service type, and other business-related metrics.
The current mainstream IT DevOps visualization system usually contains a data collection module, a data persistent module, and a visual display module. collectd/StatsD, as an old-fashion open source data collection tool, has a wide user base. However, collectd/StatsD has limited functionality, and often needs to be combined with Telegraf, Grafana, and a time-series database to build a complete monitoring system.
The current mainstream IT DevOps visualization system usually contains a data collection module, a data persistence module, and a visual display module. collectd/StatsD, as an old-fashion open source data collection tool, has a wide user base. However, collectd/StatsD has limited functionality, and often needs to be combined with Telegraf, Grafana, and a time-series database to build a complete monitoring system.
The new version of TDengine supports multiple data protocols and can accept data from collectd and StatsD directly, and provides Grafana dashboard for graphical display.
This article introduces how to quickly build an IT DevOps visualization system based on TDengine + collectd / StatsD + Grafana without writing even a single line of code but by simply modifying a few lines of configuration files. The architecture is shown in the following figure.
This article introduces how to quickly build an IT DevOps visualization system based on TDengine + collectd / StatsD + Grafana without writing even a single line of code but by simply modifying a few lines in configuration files. The architecture is shown in the following figure.
![TDengine Database IT-DevOps-Solutions-Collectd-StatsD](./IT-DevOps-Solutions-Collectd-StatsD.webp)
@ -99,6 +99,6 @@ Download the dashboard json from `https://github.com/taosdata/grafanaplugin/blob
## Wrap-up
TDengine, as an emerging time-series big data platform, has the advantages of high performance, high reliability, easy management and easy maintenance. Thanks to the new schemaless protocol parsing function in TDengine version 2.4.0.0 and the powerful ecological software adaptation capability, users can build an efficient and easy-to-use IT DevOps visualization system or adapt to an existing system in just a few minutes.
TDengine, as an emerging time-series big data platform, has the advantages of high performance, high reliability, easy management and easy maintenance. Thanks to the new schemaless protocol parsing feature in TDengine version 2.4.0.0 and ability to integrate easily with a large software ecosystem, users can build an efficient and easy-to-use IT DevOps visualization system, or adapt an existing system, in just a few minutes.
For TDengine's powerful data writing and querying performance and other features, please refer to the official documentation and successful product implementation cases.

View File

@ -3,10 +3,9 @@ sidebar_label: OpenTSDB Migration to TDengine
title: Best Practices for Migrating OpenTSDB Applications to TDengine
---
As a distributed, scalable, HBase-based distributed time-series database software, thanks to its first-mover advantage, OpenTSDB has been introduced and widely used in DevOps by people. However, using new technologies like cloud computing, microservices, and containerization technology with rapid development. Enterprise-level services are becoming more and more diverse. The architecture is becoming more complex.
As a distributed, scalable, distributed time-series database platform based on HBase, and thanks to its first-mover advantage, OpenTSDB is widely used for monitoring in DevOps. However, as new technologies like cloud computing, microservices, and containerization technology has developed rapidly, Enterprise-level services are becoming more and more diverse and the architecture is becoming more complex.
From this situation, it increasingly plagues to use of OpenTSDB as a DevOps backend storage for monitoring by performance issues and delayed feature upgrades. The resulting increase in application deployment costs and reduced operational efficiency.
These problems are becoming increasingly severe as the system scales up.
As a result, as a DevOps backend for monitoring, OpenTSDB is plagued by performance issues and delayed feature upgrades. This has resulted in increased application deployment costs and reduced operational efficiency. These problems become increasingly severe as the system tries to scale up.
To meet the fast-growing IoT big data market and technical needs, TAOSData developed an innovative big-data processing product, **TDengine**.
@ -14,14 +13,14 @@ After learning the advantages of many traditional relational databases and NoSQL
Compared with OpenTSDB, TDengine has the following distinctive features.
- Performance of data writing and querying far exceeds that of OpenTSDB.
- Efficient compression mechanism for time-series data, which compresses less than 1/5 of the storage space on disk.
- The installation and deployment are straightforward. A single installation package can complete the installation and deployment and does not rely on other third-party software. The entire installation and deployment process in a few seconds;
- The built-in functions cover all of OpenTSDB's query functions. And support more time-series data query functions, scalar functions, and aggregation functions. And support advanced query functions such as multiple time-window aggregations, join query, expression operation, multiple group aggregation, user-defined sorting, and user-defined functions. Adopting SQL-like syntax rules is more straightforward and has no learning cost.
- Data writing and querying performance far exceeds that of OpenTSDB.
- Efficient compression mechanism for time-series data, which compresses to less than 1/5 of the storage space, on disk.
- The installation and deployment are straightforward. A single installation package can complete the installation and deployment and does not rely on other third-party software. The entire installation and deployment process takes a few seconds.
- The built-in functions cover all of OpenTSDB's query functions and TDengine supports more time-series data query functions, scalar functions, and aggregation functions. TDengine also supports advanced query functions such as multiple time-window aggregations, join query, expression operation, multiple group aggregation, user-defined sorting, and user-defined functions. With a SQL-like query language, querying is more straightforward and has no learning cost.
- Supports up to 128 tags, with a total tag length of 16 KB.
- In addition to the REST interface, it also provides interfaces to Java, Python, C, Rust, Go, C# and other languages. Its supports a variety of enterprise-class standard connector protocols such as JDBC.
If we migrate the applications originally running on OpenTSDB to TDengine, we will effectively reduce the compute and storage resource consumption and the number of deployed servers. And will also significantly reduce the operation and maintenance costs, making operation and maintenance management more straightforward and more accessible, and considerably reducing the total cost of ownership. Like OpenTSDB, TDengine has also been open-sourced, including the stand-alone version and the cluster version source code. So there is no need to be concerned about the vendor-lock problem.
Migrating applications originally running on OpenTSDB to TDengine, effectively reduces compute and storage resource consumption and the number of deployed servers. It also significantly reduces operation and maintenance costs, makes operation and maintenance management more straightforward and more accessible, and considerably reduces the total cost of ownership. Like OpenTSDB, TDengine has also been open-sourced. Both the stand-alone version and the cluster version are open-sourced and there is no need to be concerned about the vendor-lock problem.
We will explain how to migrate OpenTSDB applications to TDengine quickly, securely, and reliably without coding, using the most typical DevOps scenarios. Subsequent chapters will go into more depth to facilitate migration for non-DevOps systems.
@ -34,7 +33,7 @@ The following figure (Figure 1) shows the system's overall architecture for a ty
**Figure 1. Typical architecture in a DevOps scenario**
![TDengine Database IT-DevOps-Solutions-Immigrate-OpenTSDB-Arch](./IT-DevOps-Solutions-Immigrate-OpenTSDB-Arch.webp "Figure 1. Typical architecture in a DevOps scenario")
In this application scenario, there are Agent tools deployed in the application environment to collect machine metrics, network metrics, and application metrics. Data collectors to aggregate information collected by agents, systems for persistent data storage and management, and tools for monitoring data visualization (e.g., Grafana, etc.).
In this application scenario, there are Agent tools deployed in the application environment to collect machine metrics, network metrics, and application metrics. There are also data collectors to aggregate information collected by agents, systems for persistent data storage and management, and tools for data visualization (e.g., Grafana, etc.).
The agents deployed in the application nodes are responsible for providing operational metrics from different sources to collectd/Statsd. And collectd/StatsD is accountable for pushing the aggregated data to the OpenTSDB cluster system and then visualizing the data using the visualization kanban board software, Grafana.
@ -44,15 +43,15 @@ The agents deployed in the application nodes are responsible for providing opera
First of all, please install TDengine. Download the latest stable version of TDengine from the official website and install it. For help with using various installation packages, please refer to the blog ["Installation and Uninstallation of TDengine Multiple Installation Packages"](https://www.taosdata.com/blog/2019/08/09/566.html).
Note that once the installation is complete, do not start the `taosd` service immediately, but after properly configuring the parameters.
Note that once the installation is complete, do not start the `taosd` service before properly configuring the parameters.
- **Adjusting the data collector configuration**
TDengine version 2.4 and later version includes `taosAdapter`. taosAdapter is a stateless, rapidly elastic, and scalable component. taosAdapter supports Influxdb's Line Protocol and OpenTSDB's telnet/JSON writing protocol specification, providing rich data access capabilities, effectively saving user migration costs and reducing the difficulty of user migration.
Users can flexibly deploy taosAdapter instances according to their requirements to rapidly improve the throughput of data writes in conjunction with the needs of scenarios and provide guarantees for data writes in different application scenarios.
Users can flexibly deploy taosAdapter instances, based on their requirements, to improve data writing throughput and provide guarantees for data writes in different application scenarios.
Through taosAdapter, users can directly push the data collected by `collectd` or `StatsD` to TDengine to achieve seamless migration of application scenarios, which is very easy and convenient. taosAdapter also supports Telegraf, Icinga, TCollector, and node_exporter data. For more details, please refer to [taosAdapter](/reference/taosadapter/).
Through taosAdapter, users can directly write the data collected by `collectd` or `StatsD` to TDengine to achieve easy, convenient and seamless migration in application scenarios. taosAdapter also supports Telegraf, Icinga, TCollector, and node_exporter data. For more details, please refer to [taosAdapter](/reference/taosadapter/).
If using collectd, modify the configuration file in its default location `/etc/collectd/collectd.conf` to point to the IP address and port of the node where to deploy taosAdapter. For example, assuming the taosAdapter IP address is 192.168.1.130 and port 6046, configure it as follows.
@ -66,56 +65,55 @@ LoadPlugin write_tsdb
</Plugin>
```
You can use collectd and push the data to taosAdapter utilizing the push to OpenTSDB plugin. taosAdapter will call the API to write the data to TDengine, thus completing the writing of the data. If you are using StatsD, adjust the profile information accordingly.
You can use collectd and push the data to taosAdapter utilizing the write_tsdb plugin. taosAdapter will call the API to write the data to TDengine. If you are using StatsD, adjust the profile information accordingly.
- **Tuning the Dashboard system**
After writing the data to TDengine properly, you can adapt Grafana to visualize the data written to TDengine. To obtain and use the Grafana plugin provided by TDengine, please refer to [Links to other tools](/third-party/grafana).
After writing the data to TDengine, you can configure Grafana to visualize the data written to TDengine. To obtain and use the Grafana plugin provided by TDengine, please refer to [Links to other tools](/third-party/grafana).
TDengine provides two sets of Dashboard templates by default, and users only need to import the templates from the Grafana directory into Grafana to activate their use.
**Importing Grafana Templates** Figure 2.
![TDengine Database IT-DevOps-Solutions-Immigrate-OpenTSDB-Dashboard](./IT-DevOps-Solutions-Immigrate-OpenTSDB-Dashboard.webp "Figure 2. Importing a Grafana Template")
After the above steps, you completed the migration to replace OpenTSDB with TDengine. You can see that the whole process is straightforward, there is no need to write any code, and only some configuration files need to be adjusted to meet the migration work.
With the above steps completed, you have finished replacing OpenTSDB with TDengine. You can see that the whole process is straightforward, there is no need to write any code, and only some configuration files need to be changed.
### 3. Post-migration architecture
After completing the migration, the figure below (Figure 3) shows the system's overall architecture. The whole process of the acquisition side, the data writing, and the monitoring and presentation side are all kept stable, except for a few configuration adjustments, which do not involve any critical changes or alterations. OpenTSDB to TDengine migration action, using TDengine more powerful processing power and query performance.
After completing the migration, the figure below (Figure 3) shows the system's overall architecture. The whole process of the acquisition side, the data writing, and the monitoring and presentation side are all kept stable. There are a few configuration adjustments, which do not involve any critical changes or alterations. Migrating to TDengine from OpenTSDB leads to powerful processing power and query performance.
In most DevOps scenarios, if you have a small OpenTSDB cluster (3 or fewer nodes) for providing the storage layer of DevOps and rely on OpenTSDB to give a data persistence layer and query capabilities, you can safely replace OpenTSDB with TDengine. TDengine will save more compute and storage resources. With the same compute resource allocation, a single TDengine can meet the service capacity provided by 3 to 5 OpenTSDB nodes. If the scale is more prominent, then TDengine clustering is required.
Suppose your application is particularly complex, or the application domain is not a DevOps scenario. You can continue reading subsequent chapters for a more comprehensive and in-depth look at the advanced topics of migrating an OpenTSDB application to TDengine.
In most DevOps scenarios, if you have a small OpenTSDB cluster (3 or fewer nodes) which provides storage and data persistence layer in addition to query capability, you can safely replace OpenTSDB with TDengine. TDengine will save compute and storage resources. With the same compute resource allocation, a single TDengine can meet the service capacity provided by 3 to 5 OpenTSDB nodes. TDengine clustering may be required depending on the scale of the application.
**Figure 3. System architecture after migration**
![TDengine Database IT-DevOps-Solutions-Immigrate-TDengine-Arch](./IT-DevOps-Solutions-Immigrate-TDengine-Arch.webp "Figure 3. System architecture after migration completion")
The following chapters provide a more comprehensive and in-depth look at the advanced topics of migrating an OpenTSDB application to TDengine. This will be useful if your application is particularly complex and is not a DevOps application.
## Migration evaluation and strategy for other scenarios
### 1. Differences between TDengine and OpenTSDB
This chapter describes the differences between OpenTSDB and TDengine at the system functionality level. After reading this chapter, you can fully evaluate whether you can migrate some complex OpenTSDB-based applications to TDengine, and what you should pay attention to after migration.
TDengine currently only supports Grafana for visual kanban rendering, so if your application uses front-end kanban boards other than Grafana (e.g., [TSDash](https://github.com/facebook/tsdash), [Status Wolf](https://github.com/box/StatusWolf), etc.). You cannot directly migrate those front-end kanbans to TDengine, and the front-end kanban will need to be ported to Grafana to work correctly.
TDengine currently only supports Grafana for visual kanban rendering, so if your application uses front-end kanban boards other than Grafana (e.g., [TSDash](https://github.com/facebook/tsdash), [Status Wolf](https://github.com/box/StatusWolf), etc.) you cannot directly migrate those front-end kanbans to TDengine. The front-end kanban will need to be ported to Grafana to work correctly.
TDengine version 2.3.0.x only supports collectd and StatsD as data collection aggregation software but will provide more data collection aggregation software in the future. If you use other data aggregators on the collection side, your application needs to be ported to these two data aggregation systems to write data correctly.
TDengine version 2.3.0.x only supports collectd and StatsD as data collection and aggregation software but future versions will provide support for more data collection and aggregation software in the future. If you use other data aggregators on the collection side, your application needs to be ported to these two data aggregation systems to write data correctly.
In addition to the two data aggregator software protocols mentioned above, TDengine also supports writing data directly via InfluxDB's line protocol and OpenTSDB's data writing protocol, JSON format. You can rewrite the logic on the data push side to write data using the line protocols supported by TDengine.
In addition, if your application uses the following features of OpenTSDB, you need to understand the following considerations before migrating your application to TDengine.
In addition, if your application uses the following features of OpenTSDB, you need to take into account the following considerations before migrating your application to TDengine.
1. `/api/stats`: If your application uses this feature to monitor the service status of OpenTSDB, and you have built the relevant logic to link the processing in your application, then this part of the status reading and fetching logic needs to be re-adapted to TDengine. TDengine provides a new mechanism for handling cluster state monitoring to meet the monitoring and maintenance needs of your application.
2. `/api/tree`: If you rely on this feature of OpenTSDB for the hierarchical organization and maintenance of timelines, you cannot migrate it directly to TDengine, which uses a database -> super table -> sub-table hierarchy to organize and maintain timelines, with all timelines belonging to the same super table in the same system hierarchy, but it is possible to simulate a logical multi-level structure of the application through the unique construction of different tag values.
3. `Rollup And PreAggregates`: The use of Rollup and PreAggregates requires the application to decide where to access the Rollup results and, in some scenarios, to access the actual results. The opacity of this structure makes the application processing logic extraordinarily complex and not portable at all. We think this strategy is a compromise when the time-series database does not.
TDengine does not support automatic downsampling of multiple timelines and preaggregation (for a range of periods) for the time being. Still, thanks to its high-performance query processing logic can provide very high-performance query responses without relying on Rollup and preaggregation (for a range of periods), making your application query processing logic much more straightforward.
The logic is much simpler.
4. `Rate`: TDengine provides two functions to calculate the rate of change of values, namely `Derivative` (the result is consistent with the Derivative behavior of InfluxDB) and `IRate` (the result is compatible with the IRate function in Prometheus). However, the results of these two functions are slightly different from Rate, but the functions are more powerful overall. In addition, TDengine supports all the calculation functions provided by OpenTSDB, and TDengine's query functions are much more potent than those supported by OpenTSDB, which can significantly simplify the processing logic of your application.
2. `/api/tree`: If you rely on this feature of OpenTSDB for the hierarchical organization and maintenance of timelines, you cannot migrate it directly to TDengine, which uses a database -> super table -> sub-table hierarchy to organize and maintain timelines, with all timelines belonging to the same super table in the same system hierarchy. But it is possible to simulate a logical multi-level structure of the application through the unique construction of different tag values.
3. `Rollup And PreAggregates`: The use of Rollup and PreAggregates requires the application to decide where to access the Rollup results and, in some scenarios, to access the actual results. The opacity of this structure makes the application processing logic extraordinarily complex and not portable at all.
While TDengine does not currently support automatic downsampling of multiple timelines and preaggregation (for a range of periods), thanks to its high-performance query processing logic, it can provide very high-performance query responses without relying on Rollup and preaggregation (for a range of periods). This makes your application query processing logic straightforward and simple.
4. `Rate`: TDengine provides two functions to calculate the rate of change of values, namely `Derivative` (the result is consistent with the Derivative behavior of InfluxDB) and `IRate` (the result is compatible with the IRate function in Prometheus). However, the results of these two functions are slightly different from that of Rate. But the TDengine functions are more powerful. In addition, TDengine supports all the calculation functions provided by OpenTSDB. TDengine's query functions are much more powerful than those supported by OpenTSDB, which can significantly simplify the processing logic of your application.
Through the above introduction, I believe you should be able to understand the changes brought about by the migration of OpenTSDB to TDengine. And this information will also help you correctly determine whether you would migrate your application to TDengine to experience the powerful and convenient time-series data processing capability provided by TDengine.
With the above introduction, we believe you should be able to understand the changes brought about by the migration of OpenTSDB to TDengine. And this information will also help you correctly determine whether you should migrate your application to TDengine to experience the powerful and convenient time-series data processing capability provided by TDengine.
### 2. Migration strategy suggestion
First, the OpenTSDB-based system migration involves data schema design, system scale estimation, and data write end transformation, data streaming, and application adaptation; after that, the two systems will run in parallel for a while and then migrate the historical data to TDengine. Of course, if your application has some functions that strongly depend on the above OpenTSDB features and you do not want to stop using them, you can migrate the historical data to TDengine.
You can consider keeping the original OpenTSDB system running while starting TDengine to provide the primary services.
OpenTSDB-based system migration involves data schema design, system scale estimation, data write transformation, data streaming, and application changes. The two systems should run in parallel for a while and then the historical data should be migrated to TDengine if your application has some functions that strongly depend on the above OpenTSDB features and you do not want to stop using them.
You can also consider keeping the original OpenTSDB system running while using TDengine to provide the primary services.
## Data model design
@ -129,16 +127,19 @@ Let us now assume a DevOps scenario where we use collectd to collect the underly
| 2 | swap | value | double | host | swap_type | swap_type_instance | source | n/a |
| 3 | disk | value | double | host | disk_point | disk_instance | disk_type | source |
TDengine requires the data stored to have a data schema, i.e., you need to create a super table and specify the schema of the super table before writing the data. For data schema creation, you have two ways to do this: 1) Take advantage of TDengine's native data writing support for OpenTSDB by calling the TDengine API to write (text line or JSON format)
and automate the creation of single-value models. This approach does not require significant adjustments to the data writing application, nor does it require converting the written data format.
TDengine requires the data stored to have a data schema, i.e., you need to create a super table and specify the schema of the super table before writing the data. For data schema creation, you have two ways to do this:
1) Take advantage of TDengine's native data writing support for OpenTSDB by calling the TDengine API to write (text line or JSON format) and automate the creation of single-value models. This approach does not require significant adjustments to the data writing application, nor does it require converting the written data format.
At the C level, TDengine provides the `taos_schemaless_insert()` function to write data in OpenTSDB format directly (in early version this function was named `taos_insert_lines()`). Please refer to the sample code `schemaless.c` in the installation package directory as reference.
(2) based on a complete understanding of TDengine's data model, to establish the mapping relationship between OpenTSDB and TDengine's data model adjustment manually. Considering that OpenTSDB is a single-value mapping model, recommended using the single-value model in TDengine. TDengine can support both multi-value and single-value models.
(2) Based on a thorough understanding of TDengine's data model, establish a mapping between OpenTSDB and TDengine's data model. Considering that OpenTSDB is a single-value mapping model, we recommended using the single-value model in TDengine for simplicity. But keep in mind that TDengine supports both multi-value and single-value models.
- **Single-valued model**.
The steps are as follows: use the name of the metrics as the name of the TDengine super table, which build with two basic data columns - timestamp and value, and the label of the super table is equivalent to the label information of the metrics, and the number of labels is equal to the number of labels of the metrics. The names of sub-tables are named with fixed rules: `metric + '_' + tags1_value + '_' + tag2_value + '_' + tag3_value ...` as the sub-table name.
The steps are as follows:
- Use the name of the metrics as the name of the TDengine super table
- Build with two basic data columns - timestamp and value. The label of the super table is equivalent to the label information of the metrics, and the number of labels is equal to the number of labels of the metrics.
- The names of sub-tables are named with fixed rules: `metric + '_' + tags1_value + '_' + tag2_value + '_' + tag3_value ...` as the sub-table name.
Create 3 super tables in TDengine.
@ -158,13 +159,13 @@ The final system will have about 340 sub-tables and three super-tables. Note tha
- **Multi-value model**
Suppose you want to take advantage of TDengine's multi-value modeling capabilities. In that case, you need first to meet the requirements that different collection quantities have the same collection frequency and can reach the **data write side simultaneously via a message queue**, thus ensuring writing multiple metrics at once using SQL statements. The metric's name is used as the name of the super table to create a multi-column model of data that has the same collection frequency and can arrive simultaneously. The names of the sub-tables are named using a fixed rule. Each of the above metrics contains only one measurement value, so converting it into a multi-value model is impossible.
Ideally you should take advantage of TDengine's multi-value modeling capabilities. In that case, you first need to meet the requirement that different collection quantities have the same collection frequency and can reach the **data write side simultaneously via a message queue**, thus ensuring writing multiple metrics at once, using SQL statements. The metric's name is used as the name of the super table to create a multi-column model of data that has the same collection frequency and can arrive simultaneously. The sub-tables are named using a fixed rule. Each of the above metrics contains only one measurement value, so converting it into a multi-value model is impossible.
## Data triage and application adaptation
Subscribe data from the message queue and start the adapted writer to write the data.
Subscribe to the message queue and start writing data to TDengine.
After writing the data starts for a while, you can use SQL statements to check whether the amount of data written meets the expected writing requirements. Use the following SQL statement to count the amount of data.
After data has been written for a while, you can use SQL statements to check whether the amount of data written meets the expected writing requirements. Use the following SQL statement to count the amount of data.
```sql
select count(*) from memory
@ -184,7 +185,7 @@ To facilitate historical data migration, we provide a plug-in for the data synch
For the specific usage of DataX and how to use DataX to write data to TDengine, please refer to [DataX-based TDengine Data Migration Tool](https://www.taosdata.com/blog/2021/10/26/3156.html).
After migrating via DataX, we found that we can significantly improve the efficiency of migrating historical data by starting multiple processes and migrating numerous metrics simultaneously. The following are some records of the migration process. I wish to use these for application migration as a reference.
After migrating via DataX, we found that we can significantly improve the efficiency of migrating historical data by starting multiple processes and migrating numerous metrics simultaneously. The following are some records of the migration process. We provide these as a reference for application migration.
| Number of datax instances (number of concurrent processes) | Migration record speed (pieces/second) |
| ----------------------------- | ------------------- -- |
@ -202,13 +203,13 @@ Suppose you need to use the multi-value model for data writing. In that case, yo
Manual migration of data requires attention to the following two issues:
1) When storing the exported data on the disk, the disk needs to have enough storage space to accommodate the exported data files fully. Adopting the partial import mode to avoid the shortage of disk file storage after the total amount of data is exported. Preferentially export the timelines belonging to the same super table. Then the exported data files are imported into the TDengine system.
1) When storing the exported data on the disk, the disk needs to have enough storage space to accommodate the exported data files fully. To avoid running out of disk space, you can adopt a partial import mode in which you preferentially export the timelines belonging to the same super table and then only those files are imported into TDengine.
2) Under the full load of the system, if there are enough remaining computing and IO resources, establish a multi-threaded importing to maximize the efficiency of data migration. Considering the vast load that data parsing brings to the CPU, it is necessary to control the maximum number of parallel tasks to avoid the overall overload of the system triggered by importing historical data.
2) Under the full load of the system, if there are enough remaining computing and IO resources, establish a multi-threaded import to maximize the efficiency of data migration. Considering the vast load that data parsing brings to the CPU, it is necessary to control the maximum number of parallel tasks to avoid overloading the system when importing historical data.
Due to the ease of operation of TDengine itself, there is no need to perform index maintenance and data format change processing in the entire process. The whole process only needs to be executed sequentially.
When wholly importing the historical data into TDengine, the two systems run simultaneously and then switch the query request to TDengine to achieve seamless application switching.
While importing historical data into TDengine, the two systems should run simultaneously. Once all the data is migrated, switch the query request to TDengine to achieve seamless application switching.
## Appendix 1: OpenTSDB query function correspondence table
@ -222,12 +223,12 @@ Example:
SELECT avg(val) FROM (SELECT first(val) FROM super_table WHERE ts >= startTime and ts <= endTime INTERVAL(20s) Fill(linear)) INTERVAL(20s)
```
Remark:
Remarks:
1. The value in Interval needs to be the same as the interval value in the outer query.
2. The interpolation processing in TDengine needs to use subqueries to assist in the completion. As shown above, it is enough to specify the interpolation type in the inner query. Since the interpolation of the values in OpenTSDB uses linear interpolation, use fill( in the interpolation clause. linear) to declare the interpolation type. The following functions with the exact interpolation calculation requirements are processed by this method.
3. The parameter 20s in Interval indicates that the inner query will generate results according to a time window of 20 seconds. In an actual query, it needs to adjust to the time interval between different records. It ensures that producing interpolation results equivalent to the original data.
4. Due to the particular interpolation strategy and mechanism of OpenTSDB, the method of the first interpolation and then calculation in the aggregate query (Aggregate) makes the calculation results impossible to be utterly consistent with TDengine. But in the case of downsampling (Downsample), TDengine and OpenTSDB can obtain consistent results (since OpenTSDB performs aggregation and downsampling queries).
2. Interpolation processing in TDengine uses subqueries to assist in completion. As shown above, it is enough to specify the interpolation type in the inner query. Since OpenTSDB uses linear interpolation, use `fill(linear)` to declare the interpolation type in TDengine. Some of the functions mentioned below have exactly the same interpolation calculation requirements.
3. The parameter 20s in Interval indicates that the inner query will generate results according to a time window of 20 seconds. In an actual query, it needs to adjust to the time interval between different records. It ensures that interpolation results are equivalent to the original data.
4. Due to the particular interpolation strategy and mechanism of OpenTSDB i.e. interpolation followed by aggregate calculation, it is impossible for the results to be completely consistent with those of TDengine. But in the case of downsampling (Downsample), TDengine and OpenTSDB can obtain consistent results (since OpenTSDB performs aggregation and downsampling queries).
### Count
@ -261,7 +262,7 @@ Select apercentile(col1, 50, “t-digest”) from table_name
Remark:
1. During the approximate query processing, OpenTSDB uses the t-digest algorithm by default, so in order to obtain the same calculation result, the algorithm used needs to be specified in the `apercentile()` function. TDengine can support two different approximation processing algorithms, declared by "default" and "t-digest" respectively.
1. When calculating estimate percentiles, OpenTSDB uses the t-digest algorithm by default. In order to obtain the same calculation results in TDengine, the algorithm used needs to be specified in the `apercentile()` function. TDengine can support two different percentile calculation algorithms named "default" and "t-digest" respectively.
### First
@ -379,35 +380,34 @@ We still use the hypothetical environment from Chapter 4. There are three measur
### Storage resource estimation
Assuming that the number of sensor devices that generate data and need to be stored is `n`, the frequency of data generation is `t` per second, and the length of each record is `L` bytes, the scale of data generated per day is `n * t * L` bytes. Assuming the compression ratio is `C`, the daily data size is `(n * t * L)/C` bytes. The storage resources are estimated to accommodate the data scale for 1.5 years. In the production environment, the compression ratio C of TDengine is generally between 5 and 7.
With additional 20% redundancy, you can calculate the required storage resources:
With additional 20% redundancy, you can calculate the required storage resources:
```matlab
(n * t * L) * (365 * 1.5) * (1+20%)/C
````
Combined with the above calculation formula, bring the parameters into the formula, and the raw data scale generated every year is 11.8TB without considering the label information. Note that since tag information is associated with each timeline in TDengine, not every record. The scale of the amount of data to be recorded is somewhat reduced relative to the generated data, and this part of label data can be ignored as a whole. Assuming a compression ratio of 5, the size of the retained data ends up being 2.56 TB.
Substituting in the above formula, the raw data generated every year is 11.8TB without considering the label information. Note that tag information is associated with each timeline in TDengine, not every record. The amount of data to be recorded is somewhat reduced relative to the generated data, and label data can be ignored as a whole. Assuming a compression ratio of 5, the size of the retained data ends up being 2.56 TB.
### Storage Device Selection Considerations
The hard disk should be capable of better random read performance. Considering using an SSD as much as possible is a better choice. A disk with better random read performance is a great help to improve the system's query performance and improve the query response performance as a whole system. To obtain better query performance, the performance index of the single-threaded random read IOPS of the hard disk device should not be lower than 1000, and it is better to reach 5000 IOPS or more. Recommend to use `fio` utility software to evaluate the running performance (please refer to Appendix 1 for specific usage) for the random IO read of the current device to confirm whether it can meet the requirements of random read of large files.
A disk with better random read performance, such as an SSD, improves the system's query performance and improves the query response performance of the whole system. To obtain better query performance, the performance index of the single-threaded random read IOPS of the hard disk device should not be lower than 1000, and it is better to reach 5000 IOPS or more. We recommend using `fio` utility software to evaluate the running performance (please refer to Appendix 1 for specific usage) for the random IO read of the current device to confirm whether it can meet the requirements of random read of large files.
Hard disk writing performance has little effect on TDengine. The TDengine writing process adopts the append write mode, so as long as it has good sequential write performance, both SAS hard disks and SSDs in the general sense can well meet TDengine's requirements for disk write performance.
### Computational resource estimates
Due to the particularity of IoT data, after the frequency of data generation is consistent, the writing process of TDengine maintains a relatively fixed amount of resource consumption (computing and storage). According to the [TDengine Operation and Maintenance Guide](/operation/) description, the system consumes less than 1 CPU core at 22,000 writes per second.
Due to the characteristics of IoT data, when the frequency of data generation is consistent, the writing process of TDengine maintains a relatively fixed amount of resource consumption (computing and storage). According to the [TDengine Operation and Maintenance Guide](/operation/) description, the system consumes less than 1 CPU core at 22,000 writes per second.
In estimating the CPU resources consumed by the query, assuming that the application requires the database to provide 10,000 QPS, the CPU time consumed by each query is about 1 ms. The query provided by each core per second is 1,000 QPS, which satisfies 10,000 QPS. The query request requires at least 10 cores. For the system as a whole system to have less than 50% CPU load, the entire cluster needs twice as many as 10 cores or 20 cores.
In estimating the CPU resources consumed by the query, assuming that the application requires the database to provide 10,000 QPS, the CPU time consumed by each query is about 1 ms. The query provided by each core per second is 1,000 QPS, which satisfies 10,000 QPS. The query request requires at least 10 cores. For the system as a whole system to have less than 50% CPU load, the entire cluster needs twice as many cores i.e. 20 cores.
### Memory resource estimation
The database allocates 16MB\*3 buffer memory for each Vnode by default. If the cluster system includes 22 CPU cores, TDengine will create 22 Vnodes (virtual nodes) by default. Each Vnode contains 1000 tables, which can accommodate all the tables. Then it takes about 1.5 hours to write a block, which triggers the drop, and no adjustment is required. A total of 22 Vnodes require about 1GB of memory cache. Considering the memory needed for the query, assuming that the memory overhead of each query is about 50MB, the memory required for 500 queries concurrently is about 25GB.
The database allocates 16MB\*3 buffer memory for each Vnode by default. If the cluster system includes 22 CPU cores, TDengine will create 22 Vnodes (virtual nodes) by default. Each Vnode contains 1000 tables, which is more than enough to accommodate all the tables in our hypothetical scenario. Then it takes about 1.5 hours to write a block, which triggers persistence to disk without requiring any adjustment. A total of 22 Vnodes require about 1GB of memory cache. Considering the memory needed for the query, assuming that the memory overhead of each query is about 50MB, the memory required for 500 queries concurrently is about 25GB.
In summary, using a single 16-core 32GB machine or a cluster of 2 8-core 16GB machines is enough.
## Appendix 3: Cluster Deployment and Startup
TDengine provides a wealth of help documents to explain many aspects of cluster installation and deployment. Here is the list of corresponding document for your reference.
TDengine provides a wealth of help documents to explain many aspects of cluster installation and deployment. Here is the list of documents for your reference.
### Cluster Deployment
@ -421,7 +421,7 @@ To ensure that the system can obtain the necessary information for regular opera
FQDN, firstEp, secondEP, dataDir, logDir, tmpDir, serverPort. For the specific meaning and setting requirements of each parameter, please refer to the document "[TDengine Cluster Installation and Management](/cluster/)"
Follow the same steps to set parameters on the nodes that need running, start the taosd service, and then add Dnodes to the cluster.
Follow the same steps to set parameters on the other nodes, start the taosd service, and then add Dnodes to the cluster.
Finally, start `taos` and execute the `show dnodes` command. If you can see all the nodes that have joined the cluster, the cluster building process was successfully completed. For specific operation procedures and precautions, please refer to the document "[TDengine Cluster Installation and Management](/cluster/)".

View File

@ -5,38 +5,38 @@ title: Frequently Asked Questions
## Submit an Issue
If the tips in FAQ don't help much, please submit an issue on [GitHub](https://github.com/taosdata/TDengine) to describe your problem description, including TDengine version, hardware and OS information, the steps to reproduce the problem, etc. It would be very helpful if you package the contents in `/var/log/taos` and `/etc/taos` and upload. These two are the default directories used by TDengine, if they have been changed in your configuration, please use according to the actual configuration. It's recommended to firstly set `debugFlag` to 135 in `taos.cfg`, restart `taosd`, then reproduce the problem and collect logs. If you don't want to restart, an alternative way of setting `debugFlag` is executing `alter dnode <dnode_id> debugFlag 135` command in TDengine CLI `taos`. During normal running, however, please make sure `debugFlag` is set to 131.
If the tips in FAQ don't help much, please submit an issue on [GitHub](https://github.com/taosdata/TDengine) to describe your problem. In your description please include the TDengine version, hardware and OS information, the steps to reproduce the problem and any other relevant information. It would be very helpful if you can package the contents in `/var/log/taos` and `/etc/taos` and upload. These two are the default directories used by TDengine. If you have changed the default directories in your configuration, please package the files in your configured directories. We recommended setting `debugFlag` to 135 in `taos.cfg`, restarting `taosd`, then reproducing the problem and collecting the logs. If you don't want to restart, an alternative way of setting `debugFlag` is executing `alter dnode <dnode_id> debugFlag 135` command in TDengine CLI `taos`. During normal running, however, please make sure `debugFlag` is set to 131.
## Frequently Asked Questions
### 1. How to upgrade to TDengine 2.0 from older version?
version 2.x is not compatible with version 1.x regarding configuration file and data file, please do following before upgrading:
version 2.x is not compatible with version 1.x. With regard to the configuration and data files, please perform the following steps before upgrading. Please follow data integrity, security, backup and other relevant SOPs, best practices before removing/deleting any data.
1. Delete configuration files: `sudo rm -rf /etc/taos/taos.cfg`
1. Delete configuration files: `sudo rm -rf /etc/taos/taos.cfg`
2. Delete log files: `sudo rm -rf /var/log/taos/`
3. Delete data files if the data doesn't need to be kept: `sudo rm -rf /var/lib/taos/`
4. Install latests 2.x version
5. If the data needs to be kept and migrated to newer version, please contact professional service of TDengine for assistance
4. Install latest 2.x version
5. If the data needs to be kept and migrated to newer version, please contact professional service at TDengine for assistance.
### 2. How to handle "Unable to establish connection"
When the client is unable to connect to the server, you can try following ways to find out why.
When the client is unable to connect to the server, you can try the following ways to troubleshoot and resolve the problem.
1. Check the network
- Check if the hosts where the client and server are running can be accessible to each other, for example by `ping` command.
- Check if the TCP/UDP on port 6030-6042 are open for access if firewall is enabled. It's better to firstly disable firewall for diagnostics.
- Check if the FQDN and serverPort are configured correctly in `taos.cfg` used by the server side
- Check if the `firstEp` is set properly in the `taos.cfg` used by the client side
- Check if the hosts where the client and server are running are accessible to each other, for example by `ping` command.
- Check if the TCP/UDP on port 6030-6042 are open for access if firewall is enabled. If possible, disable the firewall for diagnostics, but please ensure that you are following security and other relevant protocols.
- Check if the FQDN and serverPort are configured correctly in `taos.cfg` used by the server side.
- Check if the `firstEp` is set properly in the `taos.cfg` used by the client side.
2. Make sure the client version and server version are same.
3. On server side, check the running status of `taosd` by executing `systemctl status taosd` . If your server is started using another way instead of `systemctl`, use the proper method to check whether the server process is running normally.
4. If using connector of Python, Java, Go, Rust, C#, node.JS on Linux to connect toe the server, please make sure `libtaos.so` is in directory `/usr/local/taos/driver` and `/usr/local/taos/driver` is in system lib search environment variable `LD_LIBRARY_PATH`.
4. If using connector of Python, Java, Go, Rust, C#, node.JS on Linux to connect to the server, please make sure `libtaos.so` is in directory `/usr/local/taos/driver` and `/usr/local/taos/driver` is in system lib search environment variable `LD_LIBRARY_PATH`.
5. If using connector on Windows, please make sure `C:\TDengine\driver\taos.dll` is in your system lib search path, it's suggested to put `taos.dll` under `C:\Windows\System32`.
5. If using connector on Windows, please make sure `C:\TDengine\driver\taos.dll` is in your system lib search path. We recommend putting `taos.dll` under `C:\Windows\System32`.
6. Some advanced network diagnostics tools
@ -45,7 +45,7 @@ When the client is unable to connect to the server, you can try following ways t
Check whether a TCP port on server side is open: `nc -l {port}`
Check whether a TCP port on client side is open: `nc {hostIP} {port}`
- On Windows system `Net-TestConnection -ComputerName {fqdn} -Port {port}` on PowerShell can be used to check whether the port on serer side is open for access.
- On Windows system `Net-TestConnection -ComputerName {fqdn} -Port {port}` on PowerShell can be used to check whether the port on server side is open for access.
7. TDengine CLI `taos` can also be used to check network, please refer to [TDengine CLI](/reference/taos-shell).

View File

@ -3,15 +3,15 @@ sidebar_label: TDengine in Docker
title: Deploy TDengine in Docker
---
Even though it's not recommended to deploy TDengine using docker in production system, docker is still very useful in development environment, especially when your host is not Linux. From version 2.0.14.0, the official image of TDengine can support X86-64, X86, arm64, and rm32 .
We do not recommend deploying TDengine using Docker in a production system. However, Docker is still very useful in a development environment, especially when your host is not Linux. From version 2.0.14.0, the official image of TDengine can support X86-64, X86, arm64, and rm32 .
In this chapter a simple step by step guide of using TDengine in docker is introduced.
In this chapter we introduce a simple step by step guide to use TDengine in Docker.
## Install Docker
The installation of docker please refer to [Get Docker](https://docs.docker.com/get-docker/).
To install Docker please refer to [Get Docker](https://docs.docker.com/get-docker/).
After docker is installed, you can check whether Docker is installed properly by displaying Docker version.
After Docker is installed, you can check whether Docker is installed properly by displaying Docker version.
```bash
$ docker -v
@ -27,7 +27,7 @@ $ docker run -d -p 6030-6049:6030-6049 -p 6030-6049:6030-6049/udp tdengine/tdeng
526aa188da767ae94b244226a2b2eec2b5f17dd8eff592893d9ec0cd0f3a1ccd
```
In the above command, a docker container is started to run TDengine server, the port range 6030-6049 of the container is mapped to host port range 6030-6049. If port range 6030-6049 has been occupied on the host, please change to an available host port range. Regarding the requirements about ports on the host, please refer to [Port Configuration](/reference/config/#serverport).
In the above command, a docker container is started to run TDengine server, the port range 6030-6049 of the container is mapped to host port range 6030-6049. If port range 6030-6049 has been occupied on the host, please change to an available host port range. For port requirements on the host, please refer to [Port Configuration](/reference/config/#serverport).
- **docker run**: Launch a docker container
- **-d**: the container will run in background mode
@ -95,7 +95,7 @@ In TDengine CLI, SQL commands can be executed to create/drop databases, tables,
### Access TDengine from host
If `-p` used to map ports properly between host and container, it's also able to access TDengine in container from the host as long as `firstEp` is configured correctly for the client on host.
If option `-p` used to map ports properly between host and container, it's also able to access TDengine in container from the host as long as `firstEp` is configured correctly for the client on host.
```
$ taos
@ -271,7 +271,7 @@ Below is an example output:
### Access TDengine from 3rd party tools
A lot of 3rd party tools can be used to write data into TDengine through `taosAdapter` , for details please refer to [3rd party tools](/third-party/).
A lot of 3rd party tools can be used to write data into TDengine through `taosAdapter`, for details please refer to [3rd party tools](/third-party/).
There is nothing different from the 3rd party side to access TDengine server inside a container, as long as the end point is specified correctly, the end point should be the FQDN and the mapped port of the host.

View File

@ -55,9 +55,9 @@ int32_t qParseSql(SParseContext* pCxt, SQuery** pQuery);
bool qIsInsertSql(const char* pStr, size_t length);
// for async mode
int32_t qSyntaxParseSql(SParseContext* pCxt, SQuery** pQuery, struct SCatalogReq* pCatalogReq);
int32_t qSemanticAnalysisSql(SParseContext* pCxt, const struct SCatalogReq* pCatalogReq,
const struct SMetaData* pMetaData, SQuery* pQuery);
int32_t qParseSqlSyntax(SParseContext* pCxt, SQuery** pQuery, struct SCatalogReq* pCatalogReq);
int32_t qAnalyseSqlSemantic(SParseContext* pCxt, const struct SCatalogReq* pCatalogReq,
const struct SMetaData* pMetaData, SQuery* pQuery);
void qDestroyQuery(SQuery* pQueryNode);

View File

@ -368,7 +368,11 @@ typedef enum ELogicConditionType {
#define PRIMARYKEY_TIMESTAMP_COL_ID 1
#define COL_REACH_END(colId, maxColId) ((colId) > (maxColId))
#ifdef WINDOWS
#define TSDB_MAX_RPC_THREADS 4 // windows pipe only support 4 connections.
#else
#define TSDB_MAX_RPC_THREADS 5
#endif
#define TSDB_QUERY_TYPE_NON_TYPE 0x00u // none type
#define TSDB_QUERY_TYPE_FREE_RESOURCE 0x01u // free qhandle at vnode

View File

@ -1,32 +1,25 @@
FROM ubuntu:18.04
WORKDIR /root
ARG pkgFile
ARG dirName
ARG cpuType
RUN echo ${pkgFile} && echo ${dirName}
COPY ${pkgFile} /root/
RUN tar -zxf ${pkgFile}
WORKDIR /root/
RUN cd /root/${dirName}/ && /bin/bash install.sh -e no && cd /root
RUN rm /root/${pkgFile}
RUN rm -rf /root/${dirName}
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get clean && apt-get update && apt-get install -y locales tzdata netcat && locale-gen en_US.UTF-8
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib" \
LC_CTYPE=en_US.UTF-8 \
LANG=en_US.UTF-8 \
LC_ALL=en_US.UTF-8
COPY ./bin/* /usr/bin/
ENV TINI_VERSION v0.19.0
RUN bash -c 'echo -e "Downloading tini-${cpuType} ..."'
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${cpuType} /tini
RUN chmod +x /tini
ENTRYPOINT ["/tini", "--", "/usr/bin/entrypoint.sh"]
CMD ["taosd"]
VOLUME [ "/var/lib/taos", "/var/log/taos", "/corefile" ]
FROM ubuntu:18.04
WORKDIR /root
ARG pkgFile
ARG dirName
ARG cpuType
RUN echo ${pkgFile} && echo ${dirName}
COPY ${pkgFile} /root/
ENV TINI_VERSION v0.19.0
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${cpuType} /tini
ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /root/
RUN tar -zxf ${pkgFile} && cd /root/${dirName}/ && /bin/bash install.sh -e no && cd /root && rm /root/${pkgFile} && rm -rf /root/${dirName} && apt-get update && apt-get install -y locales tzdata netcat && locale-gen en_US.UTF-8 && apt-get clean && rm -rf /var/lib/apt/lists/ && chmod +x /tini
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib" \
LC_CTYPE=en_US.UTF-8 \
LANG=en_US.UTF-8 \
LC_ALL=en_US.UTF-8
COPY ./bin/* /usr/bin/
ENTRYPOINT ["/tini", "--", "/usr/bin/entrypoint.sh"]
CMD ["taosd"]
VOLUME [ "/var/lib/taos", "/var/log/taos", "/corefile" ]

View File

@ -11,39 +11,22 @@ DISABLE_ADAPTER=${TAOS_DISABLE_ADAPTER:-0}
unset TAOS_DISABLE_ADAPTER
# to get mnodeEpSet from data dir
DATA_DIR=${TAOS_DATA_DIR:-/var/lib/taos}
DATA_DIR=$(taosd -C|grep -E 'dataDir.*(\S+)' -o |head -n1|sed 's/dataDir *//')
DATA_DIR=${DATA_DIR:-/var/lib/taos}
# append env to custom taos.cfg
CFG_DIR=/tmp/taos
CFG_FILE=$CFG_DIR/taos.cfg
mkdir -p $CFG_DIR >/dev/null 2>&1
[ -f /etc/taos/taos.cfg ] && cat /etc/taos/taos.cfg | grep -E -v "^#|^\s*$" >$CFG_FILE
env-to-cfg >>$CFG_FILE
FQDN=$(cat $CFG_FILE | grep -E -v "^#|^$" | grep fqdn | tail -n1 | sed -E 's/.*fqdn\s+//')
FQDN=$(taosd -C|grep -E 'fqdn.*(\S+)' -o |head -n1|sed 's/fqdn *//')
# ensure the fqdn is resolved as localhost
grep "$FQDN" /etc/hosts >/dev/null || echo "127.0.0.1 $FQDN" >>/etc/hosts
FIRSET_EP=$(taosd -C|grep -E 'firstEp.*(\S+)' -o |head -n1|sed 's/firstEp *//')
# parse first ep host and port
FIRST_EP_HOST=${TAOS_FIRST_EP%:*}
FIRST_EP_PORT=${TAOS_FIRST_EP#*:}
FIRST_EP_HOST=${FIRSET_EP%:*}
FIRST_EP_PORT=${FIRSET_EP#*:}
# in case of custom server port
SERVER_PORT=$(cat $CFG_FILE | grep -E -v "^#|^$" | grep serverPort | tail -n1 | sed -E 's/.*serverPort\s+//')
SERVER_PORT=$(taosd -C|grep -E 'serverPort.*(\S+)' -o |head -n1|sed 's/serverPort *//')
SERVER_PORT=${SERVER_PORT:-6030}
# for other binaries like interpreters
if echo $1 | grep -E "taosd$" - >/dev/null; then
true # will run taosd
else
cp -f $CFG_FILE /etc/taos/taos.cfg || true
$@
exit $?
fi
set +e
ulimit -c unlimited
# set core files pattern, maybe failed
@ -62,22 +45,23 @@ fi
# if has mnode ep set or the host is first ep or not for cluster, just start.
if [ -f "$DATA_DIR/dnode/mnodeEpSet.json" ] ||
[ "$TAOS_FQDN" = "$FIRST_EP_HOST" ]; then
$@ -c $CFG_DIR
$@
# others will first wait the first ep ready.
else
if [ "$TAOS_FIRST_EP" = "" ]; then
echo "run TDengine with single node."
$@ -c $CFG_DIR
$@
exit $?
fi
while true; do
es=0
taos -h $FIRST_EP_HOST -P $FIRST_EP_PORT -n startup >/dev/null || es=$?
if [ "$es" -eq 0 ]; then
es=$(taos -h $FIRST_EP_HOST -P $FIRST_EP_PORT --check)
echo ${es}
if [ "${es%%:*}" -eq 2 ]; then
echo "execute create dnode"
taos -h $FIRST_EP_HOST -P $FIRST_EP_PORT -s "create dnode \"$FQDN:$SERVER_PORT\";"
break
fi
sleep 1s
done
$@ -c $CFG_DIR
$@
fi

View File

@ -0,0 +1,8 @@
#!/bin/sh
es=$(taos --check)
code=${es%%:*}
if [ "$code" -ne "0" ] && [ "$code" -ne "4" ]; then
exit 0
fi
echo $es
exit 1

View File

@ -605,6 +605,10 @@ static int32_t tdAppendKvRowToDataCol(STSRow *pRow, STSchema *pSchema, SDataCols
* @param pCols
*/
int32_t tdAppendSTSRowToDataCol(STSRow *pRow, STSchema *pSchema, SDataCols *pCols, bool isMerge) {
#ifdef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS
printf("%s:%d ts: %" PRIi64 " sver:%d maxCols:%" PRIi16 " nCols:%" PRIi16 ", nRows:%d\n", __func__, __LINE__,
TD_ROW_KEY(pRow), TD_ROW_SVER(pRow), pCols->maxCols, pCols->numOfCols, pCols->numOfRows);
#endif
if (TD_IS_TP_ROW(pRow)) {
return tdAppendTpRowToDataCol(pRow, pSchema, pCols, isMerge);
} else if (TD_IS_KV_ROW(pRow)) {

View File

@ -53,6 +53,11 @@ typedef enum {
MND_AUTH_MAX
} EAuthOp;
typedef enum {
TRN_STEP_LOG = 1,
TRN_STEP_ACTION = 2,
} ETrnStep;
typedef enum {
TRN_STAGE_PREPARE = 0,
TRN_STAGE_REDO_LOG = 1,
@ -468,7 +473,7 @@ typedef struct {
char* ast;
char* physicalPlan;
SSchemaWrapper schema;
int32_t refConsumerCnt;
// int32_t refConsumerCnt;
} SMqTopicObj;
typedef struct {

View File

@ -22,6 +22,13 @@
extern "C" {
#endif
typedef enum {
TRANS_START_FUNC_TEST = 1,
TRANS_STOP_FUNC_TEST = 2,
TRANS_START_FUNC_MQ_REB = 3,
TRANS_STOP_FUNC_TEST_MQ_REB = 4,
} ETrnFunc;
typedef struct {
SEpSet epSet;
tmsg_t msgType;
@ -33,12 +40,17 @@ typedef struct {
void *pCont;
} STransAction;
typedef enum {
TEST_TRANS_START_FUNC = 1,
TEST_TRANS_STOP_FUNC = 2,
MQ_REB_TRANS_START_FUNC = 3,
MQ_REB_TRANS_STOP_FUNC = 4,
} ETrnFuncType;
typedef struct {
SSdbRaw *pRaw;
} STransLog;
typedef struct {
ETrnStep stepType;
STransAction redoAction;
STransAction undoAction;
STransLog redoLog;
STransLog undoLog;
} STransStep;
typedef void (*TransCbFp)(SMnode *pMnode, void *param, int32_t paramLen);
@ -55,7 +67,7 @@ int32_t mndTransAppendCommitlog(STrans *pTrans, SSdbRaw *pRaw);
int32_t mndTransAppendRedoAction(STrans *pTrans, STransAction *pAction);
int32_t mndTransAppendUndoAction(STrans *pTrans, STransAction *pAction);
void mndTransSetRpcRsp(STrans *pTrans, void *pCont, int32_t contLen);
void mndTransSetCb(STrans *pTrans, ETrnFuncType startFunc, ETrnFuncType stopFunc, void *param, int32_t paramLen);
void mndTransSetCb(STrans *pTrans, ETrnFunc startFunc, ETrnFunc stopFunc, void *param, int32_t paramLen);
void mndTransSetDbInfo(STrans *pTrans, SDbObj *pDb);
void mndTransSetExecOneByOne(STrans *pTrans);

View File

@ -414,6 +414,7 @@ static int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) {
goto SUBSCRIBE_OVER;
}
#if 0
// ref topic to prevent drop
// TODO make topic complete
SMqTopicObj topicObj = {0};
@ -422,6 +423,7 @@ static int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) {
mInfo("subscribe topic %s by consumer %ld cgroup %s, refcnt %d", pTopic->name, consumerId, cgroup,
topicObj.refConsumerCnt);
if (mndSetTopicCommitLogs(pMnode, pTrans, &topicObj) != 0) goto SUBSCRIBE_OVER;
#endif
mndReleaseTopic(pMnode, pTopic);
}

View File

@ -1044,9 +1044,9 @@ static int32_t mndDropDb(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb) {
if (mndSetDropDbRedoLogs(pMnode, pTrans, pDb) != 0) goto _OVER;
if (mndSetDropDbCommitLogs(pMnode, pTrans, pDb) != 0) goto _OVER;
/*if (mndDropOffsetByDB(pMnode, pTrans, pDb) != 0) goto _OVER;*/
/*if (mndDropSubByDB(pMnode, pTrans, pDb) != 0) goto _OVER;*/
/*if (mndDropTopicByDB(pMnode, pTrans, pDb) != 0) goto _OVER;*/
if (mndDropOffsetByDB(pMnode, pTrans, pDb) != 0) goto _OVER;
if (mndDropSubByDB(pMnode, pTrans, pDb) != 0) goto _OVER;
if (mndDropTopicByDB(pMnode, pTrans, pDb) != 0) goto _OVER;
if (mndSetDropDbRedoActions(pMnode, pTrans, pDb) != 0) goto _OVER;
SUserObj *pUser = mndAcquireUser(pMnode, pDb->createUser);

View File

@ -21,6 +21,7 @@
#include "mndMnode.h"
#include "mndShow.h"
#include "mndStb.h"
#include "mndTopic.h"
#include "mndTrans.h"
#include "mndUser.h"
#include "mndVgroup.h"
@ -188,7 +189,15 @@ static int32_t mndProcessCommitOffsetReq(SRpcMsg *pMsg) {
bool create = false;
SMqOffsetObj *pOffsetObj = mndAcquireOffset(pMnode, key);
if (pOffsetObj == NULL) {
SMqTopicObj *pTopic = mndAcquireTopic(pMnode, pOffset->topicName);
if (pTopic == NULL) {
terrno = TSDB_CODE_MND_TOPIC_NOT_EXIST;
mError("submit offset to topic %s failed since %s", pOffset->topicName, terrstr());
continue;
}
pOffsetObj = taosMemoryMalloc(sizeof(SMqOffsetObj));
pOffsetObj->dbUid = pTopic->dbUid;
mndReleaseTopic(pMnode, pTopic);
memcpy(pOffsetObj->key, key, TSDB_PARTITION_KEY_LEN);
create = true;
}

View File

@ -286,7 +286,7 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) {
pStream->tasks = taosArrayInit(totLevel, sizeof(void*));
bool hasExtraSink = false;
if (totLevel == 2) {
if (totLevel == 2 || strcmp(pStream->sourceDb, pStream->targetDb) != 0) {
SArray* taskOneLevel = taosArrayInit(0, sizeof(void*));
taosArrayPush(pStream->tasks, &taskOneLevel);
// add extra sink
@ -407,7 +407,7 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) {
/*pTask->dispatchMsgType = TDMT_VND_TASK_WRITE_EXEC;*/
pTask->dispatchMsgType = TDMT_VND_TASK_DISPATCH;
SDbObj* pDb = mndAcquireDb(pMnode, pStream->sourceDb);
SDbObj* pDb = mndAcquireDb(pMnode, pStream->targetDb);
ASSERT(pDb);
if (mndExtractDbInfo(pMnode, pDb, &pTask->shuffleDispatcher.dbInfo, NULL) < 0) {
sdbRelease(pSdb, pDb);

View File

@ -393,6 +393,15 @@ static int32_t mndCreateStream(SMnode *pMnode, SRpcMsg *pReq, SCMCreateStreamReq
streamObj.trigger = pCreate->triggerType;
streamObj.waterMark = pCreate->watermark;
if (streamObj.targetSTbName[0]) {
pDb = mndAcquireDbByStb(pMnode, streamObj.targetSTbName);
if (pDb == NULL) {
terrno = TSDB_CODE_MND_DB_NOT_SELECTED;
return -1;
}
tstrncpy(streamObj.targetDb, pDb->name, TSDB_DB_FNAME_LEN);
}
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_TYPE_CREATE_STREAM, pReq);
if (pTrans == NULL) {
mError("stream:%s, failed to create since %s", pCreate->name, terrstr());

View File

@ -157,6 +157,7 @@ static int32_t mndPersistSubChangeVgReq(SMnode *pMnode, STrans *pTrans, const SM
int32_t vgId = pRebVg->pVgEp->vgId;
SVgObj *pVgObj = mndAcquireVgroup(pMnode, vgId);
if (pVgObj == NULL) {
ASSERT(0);
taosMemoryFree(buf);
return -1;
}
@ -451,6 +452,7 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu
taosArrayPush(pConsumerNew->rebNewTopics, &topic);
mndReleaseConsumer(pMnode, pConsumerOld);
if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) {
ASSERT(0);
goto REB_FAIL;
}
}
@ -469,9 +471,11 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu
taosArrayPush(pConsumerNew->rebRemovedTopics, &topic);
mndReleaseConsumer(pMnode, pConsumerOld);
if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) {
ASSERT(0);
goto REB_FAIL;
}
}
#if 0
if (consumerNum) {
char topic[TSDB_TOPIC_FNAME_LEN];
char cgroup[TSDB_CGROUP_LEN];
@ -486,17 +490,24 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu
pTopic->refConsumerCnt = topicObj.refConsumerCnt;
mInfo("subscribe topic %s unref %d consumer cgroup %s, refcnt %d", pTopic->name, consumerNum, cgroup,
topicObj.refConsumerCnt);
if (mndSetTopicCommitLogs(pMnode, pTrans, &topicObj) != 0) goto REB_FAIL;
if (mndSetTopicCommitLogs(pMnode, pTrans, &topicObj) != 0) {
ASSERT(0);
goto REB_FAIL;
}
}
}
#endif
// 4. TODO commit log: modification log
// 5. set cb
mndTransSetCb(pTrans, MQ_REB_TRANS_START_FUNC, MQ_REB_TRANS_STOP_FUNC, NULL, 0);
mndTransSetCb(pTrans, TRANS_START_FUNC_MQ_REB, TRANS_STOP_FUNC_TEST_MQ_REB, NULL, 0);
// 6. execution
if (mndTransPrepare(pMnode, pTrans) != 0) goto REB_FAIL;
if (mndTransPrepare(pMnode, pTrans) != 0) {
ASSERT(0);
goto REB_FAIL;
}
mndTransDrop(pTrans);
return 0;

View File

@ -15,6 +15,7 @@
#include "mndTopic.h"
#include "mndAuth.h"
#include "mndConsumer.h"
#include "mndDb.h"
#include "mndDnode.h"
#include "mndMnode.h"
@ -121,7 +122,7 @@ SSdbRaw *mndTopicActionEncode(SMqTopicObj *pTopic) {
SDB_SET_BINARY(pRaw, dataPos, swBuf, schemaLen, TOPIC_ENCODE_OVER);
}
SDB_SET_INT32(pRaw, dataPos, pTopic->refConsumerCnt, TOPIC_ENCODE_OVER);
/*SDB_SET_INT32(pRaw, dataPos, pTopic->refConsumerCnt, TOPIC_ENCODE_OVER);*/
SDB_SET_RESERVE(pRaw, dataPos, MND_TOPIC_RESERVE_SIZE, TOPIC_ENCODE_OVER);
SDB_SET_DATALEN(pRaw, dataPos, TOPIC_ENCODE_OVER);
@ -221,7 +222,7 @@ SSdbRow *mndTopicActionDecode(SSdbRaw *pRaw) {
pTopic->schema.pSchema = NULL;
}
SDB_GET_INT32(pRaw, dataPos, &pTopic->refConsumerCnt, TOPIC_DECODE_OVER);
/*SDB_GET_INT32(pRaw, dataPos, &pTopic->refConsumerCnt, TOPIC_DECODE_OVER);*/
SDB_GET_RESERVE(pRaw, dataPos, MND_TOPIC_RESERVE_SIZE, TOPIC_DECODE_OVER);
@ -253,7 +254,7 @@ static int32_t mndTopicActionUpdate(SSdb *pSdb, SMqTopicObj *pOldTopic, SMqTopic
atomic_exchange_64(&pOldTopic->updateTime, pNewTopic->updateTime);
atomic_exchange_32(&pOldTopic->version, pNewTopic->version);
atomic_store_32(&pOldTopic->refConsumerCnt, pNewTopic->refConsumerCnt);
/*atomic_store_32(&pOldTopic->refConsumerCnt, pNewTopic->refConsumerCnt);*/
/*taosWLockLatch(&pOldTopic->lock);*/
@ -327,7 +328,7 @@ static int32_t mndCreateTopic(SMnode *pMnode, SRpcMsg *pReq, SCMCreateTopicReq *
topicObj.version = 1;
topicObj.sql = strdup(pCreate->sql);
topicObj.sqlLen = strlen(pCreate->sql) + 1;
topicObj.refConsumerCnt = 0;
/*topicObj.refConsumerCnt = 0;*/
if (pCreate->ast && pCreate->ast[0]) {
topicObj.ast = strdup(pCreate->ast);
@ -492,8 +493,8 @@ static int32_t mndDropTopic(SMnode *pMnode, STrans *pTrans, SRpcMsg *pReq, SMqTo
}
static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node;
/*SSdb *pSdb = pMnode->pSdb;*/
SMnode *pMnode = pReq->info.node;
SSdb *pSdb = pMnode->pSdb;
SMDropTopicReq dropReq = {0};
if (tDeserializeSMDropTopicReq(pReq->pCont, pReq->contLen, &dropReq) != 0) {
@ -513,12 +514,36 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) {
}
}
void *pIter = NULL;
SMqConsumerObj *pConsumer;
while (1) {
pIter = sdbFetch(pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer);
if (pIter == NULL) break;
if (pConsumer->status == MQ_CONSUMER_STATUS__LOST_REBD) continue;
int32_t sz = taosArrayGetSize(pConsumer->assignedTopics);
for (int32_t i = 0; i < sz; i++) {
char *name = taosArrayGetP(pConsumer->assignedTopics, i);
if (strcmp(name, pTopic->name) == 0) {
mndReleaseConsumer(pMnode, pConsumer);
mndReleaseTopic(pMnode, pTopic);
terrno = TSDB_CODE_MND_TOPIC_SUBSCRIBED;
mError("topic:%s, failed to drop since subscribed by consumer %ld from cgroup %s", dropReq.name,
pConsumer->consumerId, pConsumer->cgroup);
return -1;
}
}
sdbRelease(pSdb, pConsumer);
}
#if 0
if (pTopic->refConsumerCnt != 0) {
mndReleaseTopic(pMnode, pTopic);
terrno = TSDB_CODE_MND_TOPIC_SUBSCRIBED;
mError("topic:%s, failed to drop since %s", dropReq.name, terrstr());
return -1;
}
#endif
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_TYPE_DROP_TOPIC, pReq);
if (pTrans == NULL) {

View File

@ -464,15 +464,15 @@ static void mndTransTestStopFunc(SMnode *pMnode, void *param, int32_t paramLen)
mInfo("test trans stop, param:%s, len:%d", (char *)param, paramLen);
}
static TransCbFp mndTransGetCbFp(ETrnFuncType ftype) {
static TransCbFp mndTransGetCbFp(ETrnFunc ftype) {
switch (ftype) {
case TEST_TRANS_START_FUNC:
case TRANS_START_FUNC_TEST:
return mndTransTestStartFunc;
case TEST_TRANS_STOP_FUNC:
case TRANS_STOP_FUNC_TEST:
return mndTransTestStopFunc;
case MQ_REB_TRANS_START_FUNC:
case TRANS_START_FUNC_MQ_REB:
return mndRebCntInc;
case MQ_REB_TRANS_STOP_FUNC:
case TRANS_STOP_FUNC_TEST_MQ_REB:
return mndRebCntDec;
default:
return NULL;
@ -657,7 +657,7 @@ void mndTransSetRpcRsp(STrans *pTrans, void *pCont, int32_t contLen) {
pTrans->rpcRspLen = contLen;
}
void mndTransSetCb(STrans *pTrans, ETrnFuncType startFunc, ETrnFuncType stopFunc, void *param, int32_t paramLen) {
void mndTransSetCb(STrans *pTrans, ETrnFunc startFunc, ETrnFunc stopFunc, void *param, int32_t paramLen) {
pTrans->startFunc = startFunc;
pTrans->stopFunc = stopFunc;
pTrans->param = param;

View File

@ -5,7 +5,9 @@ target_link_libraries(
PUBLIC sut
)
add_test(
NAME dbTest
COMMAND dbTest
)
if(NOT TD_WINDOWS)
add_test(
NAME dbTest
COMMAND dbTest
)
endif(NOT TD_WINDOWS)

View File

@ -5,7 +5,9 @@ target_link_libraries(
PUBLIC sut
)
add_test(
NAME smaTest
COMMAND smaTest
)
if(NOT TD_WINDOWS)
add_test(
NAME smaTest
COMMAND smaTest
)
endif(NOT TD_WINDOWS)

View File

@ -5,7 +5,9 @@ target_link_libraries(
PUBLIC sut
)
add_test(
NAME stbTest
COMMAND stbTest
)
if(NOT TD_WINDOWS)
add_test(
NAME stbTest
COMMAND stbTest
)
endif(NOT TD_WINDOWS)

View File

@ -123,7 +123,7 @@ class MndTestTrans2 : public ::testing::Test {
sdbSetRawStatus(pUndoRaw, SDB_STATUS_DROPPED);
char *param = strdup("====> test log <=====");
mndTransSetCb(pTrans, TEST_TRANS_START_FUNC, TEST_TRANS_STOP_FUNC, param, strlen(param) + 1);
mndTransSetCb(pTrans, TRANS_START_FUNC_TEST, TRANS_STOP_FUNC_TEST, param, strlen(param) + 1);
if (pDb != NULL) {
mndTransSetDbInfo(pTrans, pDb);
@ -156,7 +156,7 @@ class MndTestTrans2 : public ::testing::Test {
sdbSetRawStatus(pUndoRaw, SDB_STATUS_DROPPED);
char *param = strdup("====> test action <=====");
mndTransSetCb(pTrans, TEST_TRANS_START_FUNC, TEST_TRANS_STOP_FUNC, param, strlen(param) + 1);
mndTransSetCb(pTrans, TRANS_START_FUNC_TEST, TRANS_STOP_FUNC_TEST, param, strlen(param) + 1);
{
STransAction action = {0};
@ -228,7 +228,7 @@ class MndTestTrans2 : public ::testing::Test {
sdbSetRawStatus(pUndoRaw, SDB_STATUS_DROPPED);
char *param = strdup("====> test log <=====");
mndTransSetCb(pTrans, TEST_TRANS_START_FUNC, TEST_TRANS_STOP_FUNC, param, strlen(param) + 1);
mndTransSetCb(pTrans, TRANS_START_FUNC_TEST, TRANS_STOP_FUNC_TEST, param, strlen(param) + 1);
int32_t code = mndTransPrepare(pMnode, pTrans);
mndTransDrop(pTrans);

View File

@ -79,7 +79,8 @@ struct STsdb {
struct STable {
uint64_t tid;
uint64_t uid;
STSchema *pSchema;
STSchema *pSchema; // latest schema
STSchema *pCacheSchema; // cached cache
};
#define TABLE_TID(t) (t)->tid
@ -181,12 +182,15 @@ int tsdbUnlockRepo(STsdb *pTsdb);
static FORCE_INLINE STSchema *tsdbGetTableSchemaImpl(STsdb *pTsdb, STable *pTable, bool lock, bool copy,
int32_t version) {
if ((version != -1) && (schemaVersion(pTable->pSchema) != version)) {
taosMemoryFreeClear(pTable->pSchema);
pTable->pSchema = metaGetTbTSchema(REPO_META(pTsdb), pTable->uid, version);
if ((version < 0) || (schemaVersion(pTable->pSchema) == version)) {
return pTable->pSchema;
}
return pTable->pSchema;
if (!pTable->pCacheSchema || (schemaVersion(pTable->pCacheSchema) != version)) {
taosMemoryFreeClear(pTable->pCacheSchema);
pTable->pCacheSchema = metaGetTbTSchema(REPO_META(pTsdb), pTable->uid, version);
}
return pTable->pCacheSchema;
}
// tsdbMemTable.h

View File

@ -300,7 +300,7 @@ STSchema *metaGetTbTSchema(SMeta *pMeta, tb_uid_t uid, int32_t sver) {
pSW = metaGetTableSchema(pMeta, quid, sver, 0);
if (!pSW) return NULL;
tdInitTSchemaBuilder(&sb, sver);
tdInitTSchemaBuilder(&sb, pSW->version);
for (int i = 0; i < pSW->nCols; i++) {
pSchema = pSW->pSchema + i;
tdAddColToSchema(&sb, pSchema->type, pSchema->flags, pSchema->colId, pSchema->bytes);

View File

@ -441,7 +441,7 @@ static int32_t tdExecuteRSma(SSma *pSma, const void *pMsg, int32_t inputType, tb
if (inputType == STREAM_DATA_TYPE_SUBMIT_BLOCK) {
// TODO: use the proper schema instead of 0, and cache STSchema in cache
STSchema *pTSchema = metaGetTbTSchema(SMA_META(pSma), suid, 1);
STSchema *pTSchema = metaGetTbTSchema(SMA_META(pSma), suid, -1);
if (!pTSchema) {
terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION;
return TSDB_CODE_FAILED;

View File

@ -466,7 +466,7 @@ static int tsdbCreateCommitIters(SCommitH *pCommith) {
pTbData = (STbData *)pNode->pData;
pCommitIter = pCommith->iters + i;
pTSchema = metaGetTbTSchema(REPO_META(pRepo), pTbData->uid, -1); // TODO: schema version
pTSchema = metaGetTbTSchema(REPO_META(pRepo), pTbData->uid, -1);
if (pTSchema) {
pCommitIter->pIter = tSkipListCreateIter(pTbData->pData);
@ -475,7 +475,8 @@ static int tsdbCreateCommitIters(SCommitH *pCommith) {
pCommitIter->pTable = (STable *)taosMemoryMalloc(sizeof(STable));
pCommitIter->pTable->uid = pTbData->uid;
pCommitIter->pTable->tid = pTbData->uid;
pCommitIter->pTable->pSchema = pTSchema; // metaGetTbTSchema(REPO_META(pRepo), pTbData->uid, 0);
pCommitIter->pTable->pSchema = pTSchema;
pCommitIter->pTable->pCacheSchema = NULL;
}
}
tSkipListDestroyIter(pSlIter);
@ -490,6 +491,7 @@ static void tsdbDestroyCommitIters(SCommitH *pCommith) {
tSkipListDestroyIter(pCommith->iters[i].pIter);
if (pCommith->iters[i].pTable) {
tdFreeSchema(pCommith->iters[i].pTable->pSchema);
tdFreeSchema(pCommith->iters[i].pTable->pCacheSchema);
taosMemoryFreeClear(pCommith->iters[i].pTable);
}
}
@ -914,7 +916,7 @@ static int tsdbMoveBlkIdx(SCommitH *pCommith, SBlockIdx *pIdx) {
while (bidx < nBlocks) {
if (!pTSchema && !tsdbCommitIsSameFile(pCommith, bidx)) {
// Set commit table
pTSchema = metaGetTbTSchema(REPO_META(pTsdb), pIdx->uid, 1); // TODO: schema version
pTSchema = metaGetTbTSchema(REPO_META(pTsdb), pIdx->uid, -1); // TODO: schema version
if (!pTSchema) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;

View File

@ -1395,7 +1395,7 @@ static int32_t handleDataMergeIfNeeded(STsdbReadHandle* pTsdbReadHandle, SBlock*
}
if (pTsdbReadHandle->outputCapacity >= binfo.rows) {
ASSERT(cur->blockCompleted);
ASSERT(cur->blockCompleted || cur->mixBlock);
}
if (cur->rows == binfo.rows) {

View File

@ -2767,7 +2767,7 @@ static SSDataBlock* concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SEx
code = setSDataBlockFromFetchRsp(pExchangeInfo->pResult, pLoadInfo, pTableRsp->numOfRows, pTableRsp->data,
pTableRsp->compLen, pTableRsp->numOfCols, startTs, &pDataInfo->totalRows, NULL);
if (code != 0) {
taosMemoryFreeClear(pDataInfo->pRsp);
taosMemoryFreeClear(pDataInfo->pRsp);
goto _error;
}
@ -2788,7 +2788,7 @@ static SSDataBlock* concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SEx
pDataInfo->status = EX_SOURCE_DATA_NOT_READY;
code = doSendFetchDataRequest(pExchangeInfo, pTaskInfo, i);
if (code != TSDB_CODE_SUCCESS) {
taosMemoryFreeClear(pDataInfo->pRsp);
taosMemoryFreeClear(pDataInfo->pRsp);
goto _error;
}
}
@ -2895,7 +2895,7 @@ static SSDataBlock* seqLoadRemoteData(SOperatorInfo* pOperator) {
pDataInfo->totalRows, pLoadInfo->totalRows);
pDataInfo->status = EX_SOURCE_DATA_EXHAUSTED;
pExchangeInfo->current += 1;
pExchangeInfo->current += 1;
taosMemoryFreeClear(pDataInfo->pRsp);
continue;
}
@ -2922,7 +2922,7 @@ static SSDataBlock* seqLoadRemoteData(SOperatorInfo* pOperator) {
}
pOperator->resultInfo.totalRows += pRes->info.rows;
taosMemoryFreeClear(pDataInfo->pRsp);
taosMemoryFreeClear(pDataInfo->pRsp);
return pExchangeInfo->pResult;
}
}
@ -3384,7 +3384,7 @@ int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t* order, int32_t* scan
// todo add more information about exchange operation
int32_t type = pOperator->operatorType;
if (type == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE || type == QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN ||
type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN || type == QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN) {
*order = TSDB_ORDER_ASC;
*scanFlag = MAIN_SCAN;
return TSDB_CODE_SUCCESS;
@ -3499,14 +3499,15 @@ static SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) {
}
int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* length) {
if(result == NULL || length == NULL){
if (result == NULL || length == NULL) {
return TSDB_CODE_TSC_INVALID_INPUT;
}
SOptrBasicInfo* pInfo = (SOptrBasicInfo*)(pOperator->info);
SAggSupporter* pSup = (SAggSupporter*)POINTER_SHIFT(pOperator->info, sizeof(SOptrBasicInfo));
int32_t size = taosHashGetSize(pSup->pResultRowHashTable);
size_t keyLen = sizeof(uint64_t) * 2; // estimate the key length
int32_t totalSize = sizeof(int32_t) + sizeof(int32_t) + size * (sizeof(int32_t) + keyLen + sizeof(int32_t) + pSup->resultRowSize);
SAggSupporter* pSup = (SAggSupporter*)POINTER_SHIFT(pOperator->info, sizeof(SOptrBasicInfo));
int32_t size = taosHashGetSize(pSup->pResultRowHashTable);
size_t keyLen = sizeof(uint64_t) * 2; // estimate the key length
int32_t totalSize =
sizeof(int32_t) + sizeof(int32_t) + size * (sizeof(int32_t) + keyLen + sizeof(int32_t) + pSup->resultRowSize);
*result = (char*)taosMemoryCalloc(1, totalSize);
if (*result == NULL) {
@ -3568,11 +3569,11 @@ int32_t aggEncodeResultRow(SOperatorInfo* pOperator, char** result, int32_t* len
}
int32_t aggDecodeResultRow(SOperatorInfo* pOperator, char* result) {
if(result == NULL){
if (result == NULL) {
return TSDB_CODE_TSC_INVALID_INPUT;
}
SOptrBasicInfo* pInfo = (SOptrBasicInfo*)(pOperator->info);
SAggSupporter* pSup = (SAggSupporter*)POINTER_SHIFT(pOperator->info, sizeof(SOptrBasicInfo));
SAggSupporter* pSup = (SAggSupporter*)POINTER_SHIFT(pOperator->info, sizeof(SOptrBasicInfo));
// int32_t size = taosHashGetSize(pSup->pResultRowHashTable);
int32_t length = *(int32_t*)(result);
@ -4512,8 +4513,8 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN == type) {
SScanPhysiNode* pScanPhyNode = (SScanPhysiNode*)pPhyNode; // simple child table.
STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode;
STimeWindowAggSupp twSup = {.waterMark = pTableScanNode->watermark,
.calTrigger = pTableScanNode->triggerType, .maxTs = INT64_MIN};
STimeWindowAggSupp twSup = {
.waterMark = pTableScanNode->watermark, .calTrigger = pTableScanNode->triggerType, .maxTs = INT64_MIN};
tsdbReaderT pDataReader = NULL;
if (pHandle->vnode) {
pDataReader = doCreateDataReader(pTableScanNode, pHandle, pTableListInfo, (uint64_t)queryId, taskId, pTagCond);
@ -4527,9 +4528,9 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo
} else {
qDebug("%s pDataReader is not NULL", GET_TASKID(pTaskInfo));
}
SArray* tableIdList = extractTableIdList(pTableListInfo);
SOperatorInfo* pOperator = createStreamScanOperatorInfo(pDataReader, pHandle,
tableIdList, pTableScanNode, pTaskInfo, &twSup, pTableScanNode->tsColId);
SArray* tableIdList = extractTableIdList(pTableListInfo);
SOperatorInfo* pOperator = createStreamScanOperatorInfo(pDataReader, pHandle, tableIdList, pTableScanNode,
pTaskInfo, &twSup, pTableScanNode->tsColId);
taosArrayDestroy(tableIdList);
return pOperator;
@ -4996,25 +4997,25 @@ _error:
return NULL;
}
int32_t encodeOperator(SOperatorInfo* ops, char** result, int32_t *length){
int32_t encodeOperator(SOperatorInfo* ops, char** result, int32_t* length) {
int32_t code = TDB_CODE_SUCCESS;
char *pCurrent = NULL;
char* pCurrent = NULL;
int32_t currLength = 0;
if(ops->fpSet.encodeResultRow){
if(result == NULL || length == NULL){
if (ops->fpSet.encodeResultRow) {
if (result == NULL || length == NULL) {
return TSDB_CODE_TSC_INVALID_INPUT;
}
code = ops->fpSet.encodeResultRow(ops, &pCurrent, &currLength);
if(code != TDB_CODE_SUCCESS){
if(*result != NULL){
if (code != TDB_CODE_SUCCESS) {
if (*result != NULL) {
taosMemoryFree(*result);
*result = NULL;
}
return code;
}
if(*result == NULL){
if (*result == NULL) {
*result = (char*)taosMemoryCalloc(1, currLength + sizeof(int32_t));
if (*result == NULL) {
taosMemoryFree(pCurrent);
@ -5022,9 +5023,9 @@ int32_t encodeOperator(SOperatorInfo* ops, char** result, int32_t *length){
}
memcpy(*result + sizeof(int32_t), pCurrent, currLength);
*(int32_t*)(*result) = currLength + sizeof(int32_t);
}else{
} else {
int32_t sizePre = *(int32_t*)(*result);
char* tmp = (char*)taosMemoryRealloc(*result, sizePre + currLength);
char* tmp = (char*)taosMemoryRealloc(*result, sizePre + currLength);
if (tmp == NULL) {
taosMemoryFree(pCurrent);
taosMemoryFree(*result);
@ -5041,33 +5042,33 @@ int32_t encodeOperator(SOperatorInfo* ops, char** result, int32_t *length){
for (int32_t i = 0; i < ops->numOfDownstream; ++i) {
code = encodeOperator(ops->pDownstream[i], result, length);
if(code != TDB_CODE_SUCCESS){
if (code != TDB_CODE_SUCCESS) {
return code;
}
}
return TDB_CODE_SUCCESS;
}
int32_t decodeOperator(SOperatorInfo* ops, char* result, int32_t length){
int32_t decodeOperator(SOperatorInfo* ops, char* result, int32_t length) {
int32_t code = TDB_CODE_SUCCESS;
if(ops->fpSet.decodeResultRow){
if(result == NULL){
if (ops->fpSet.decodeResultRow) {
if (result == NULL) {
return TSDB_CODE_TSC_INVALID_INPUT;
}
ASSERT(length == *(int32_t*)result);
char* data = result + sizeof(int32_t);
code = ops->fpSet.decodeResultRow(ops, data);
if(code != TDB_CODE_SUCCESS){
if (code != TDB_CODE_SUCCESS) {
return code;
}
int32_t totalLength = *(int32_t*)result;
int32_t dataLength = *(int32_t*)data;
if(totalLength == dataLength + sizeof(int32_t)) { // the last data
if (totalLength == dataLength + sizeof(int32_t)) { // the last data
result = NULL;
length = 0;
}else{
} else {
result += dataLength;
*(int32_t*)(result) = totalLength - dataLength;
length = totalLength - dataLength;
@ -5076,7 +5077,7 @@ int32_t decodeOperator(SOperatorInfo* ops, char* result, int32_t length){
for (int32_t i = 0; i < ops->numOfDownstream; ++i) {
code = decodeOperator(ops->pDownstream[i], result, length);
if(code != TDB_CODE_SUCCESS){
if (code != TDB_CODE_SUCCESS) {
return code;
}
}

View File

@ -445,6 +445,11 @@ static int32_t translateStateCount(SFunctionNode* pFunc, char* pErrBuf, int32_t
}
// param0
SNode* pParaNode0 = nodesListGetNode(pFunc->pParameterList, 0);
if (QUERY_NODE_COLUMN != nodeType(pParaNode0)) {
return buildFuncErrMsg(pErrBuf, len, TSDB_CODE_FUNC_FUNTION_ERROR,
"The input parameter of STATECOUNT function can only be column");
}
uint8_t colType = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type;
if (!IS_NUMERIC_TYPE(colType)) {
return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName);
@ -480,6 +485,11 @@ static int32_t translateStateDuration(SFunctionNode* pFunc, char* pErrBuf, int32
}
// param0
SNode* pParaNode0 = nodesListGetNode(pFunc->pParameterList, 0);
if (QUERY_NODE_COLUMN != nodeType(pParaNode0)) {
return buildFuncErrMsg(pErrBuf, len, TSDB_CODE_FUNC_FUNTION_ERROR,
"The input parameter of STATEDURATION function can only be column");
}
uint8_t colType = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type;
if (!IS_NUMERIC_TYPE(colType)) {
return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName);
@ -693,7 +703,7 @@ static int32_t translateFirstLast(SFunctionNode* pFunc, char* pErrBuf, int32_t l
static int32_t translateUnique(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
if (1 != LIST_LENGTH(pFunc->pParameterList)) {
return TSDB_CODE_SUCCESS;
return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName);
}
SNode* pPara = nodesListGetNode(pFunc->pParameterList, 0);
@ -1181,7 +1191,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.finalizeFunc = functionFinalize
},
{
.name = "state_count",
.name = "statecount",
.type = FUNCTION_TYPE_STATE_COUNT,
.classification = FUNC_MGT_INDEFINITE_ROWS_FUNC,
.translateFunc = translateStateCount,
@ -1191,7 +1201,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.finalizeFunc = NULL
},
{
.name = "state_duration",
.name = "stateduration",
.type = FUNCTION_TYPE_STATE_DURATION,
.classification = FUNC_MGT_INDEFINITE_ROWS_FUNC | FUNC_MGT_TIMELINE_FUNC,
.translateFunc = translateStateDuration,

View File

@ -3776,6 +3776,7 @@ static void tailAssignResult(STailItem* pItem, char *data, int32_t colBytes, TSK
if (isNull) {
pItem->isNull = true;
} else {
pItem->isNull = false;
memcpy(pItem->data, data, colBytes);
}
}

View File

@ -24,6 +24,7 @@ extern "C" {
#include "parUtil.h"
#include "parser.h"
int32_t parseInsertSyntax(SParseContext* pContext, SQuery** pQuery);
int32_t parseInsertSql(SParseContext* pContext, SQuery** pQuery);
int32_t parse(SParseContext* pParseCxt, SQuery** pQuery);
int32_t collectMetaKey(SParseContext* pParseCxt, SQuery* pQuery);

View File

@ -65,12 +65,15 @@ int32_t trimString(const char* src, int32_t len, char* dst, int32_t dlen);
int32_t buildCatalogReq(const SParseMetaCache* pMetaCache, SCatalogReq* pCatalogReq);
int32_t putMetaDataToCache(const SCatalogReq* pCatalogReq, const SMetaData* pMetaData, SParseMetaCache* pMetaCache);
int32_t reserveTableMetaInCache(int32_t acctId, const char* pDb, const char* pTable, SParseMetaCache* pMetaCache);
int32_t reserveTableMetaInCacheExt(const SName* pName, SParseMetaCache* pMetaCache);
int32_t reserveDbVgInfoInCache(int32_t acctId, const char* pDb, SParseMetaCache* pMetaCache);
int32_t reserveTableVgroupInCache(int32_t acctId, const char* pDb, const char* pTable, SParseMetaCache* pMetaCache);
int32_t reserveTableVgroupInCacheExt(const SName* pName, SParseMetaCache* pMetaCache);
int32_t reserveDbVgVersionInCache(int32_t acctId, const char* pDb, SParseMetaCache* pMetaCache);
int32_t reserveDbCfgInCache(int32_t acctId, const char* pDb, SParseMetaCache* pMetaCache);
int32_t reserveUserAuthInCache(int32_t acctId, const char* pUser, const char* pDb, AUTH_TYPE type,
SParseMetaCache* pMetaCache);
int32_t reserveUserAuthInCacheExt(const char* pUser, const SName* pName, AUTH_TYPE type, SParseMetaCache* pMetaCache);
int32_t reserveUdfInCache(const char* pFunc, SParseMetaCache* pMetaCache);
int32_t getTableMetaFromCache(SParseMetaCache* pMetaCache, const SName* pName, STableMeta** pMeta);
int32_t getDbVgInfoFromCache(SParseMetaCache* pMetaCache, const char* pDbFName, SArray** pVgInfo);
@ -78,7 +81,7 @@ int32_t getTableVgroupFromCache(SParseMetaCache* pMetaCache, const SName* pName,
int32_t getDbVgVersionFromCache(SParseMetaCache* pMetaCache, const char* pDbFName, int32_t* pVersion, int64_t* pDbId,
int32_t* pTableNum);
int32_t getDbCfgFromCache(SParseMetaCache* pMetaCache, const char* pDbFName, SDbCfgInfo* pInfo);
int32_t getUserAuthFromCache(SParseMetaCache* pMetaCache, const char* pUser, const char* pDb, AUTH_TYPE type,
int32_t getUserAuthFromCache(SParseMetaCache* pMetaCache, const char* pUser, const char* pDbFName, AUTH_TYPE type,
bool* pPass);
int32_t getUdfInfoFromCache(SParseMetaCache* pMetaCache, const char* pFunc, SFuncInfo* pInfo);

View File

@ -333,68 +333,22 @@ static int32_t collectMetaKeyFromQuery(SCollectMetaKeyCxt* pCxt, SNode* pStmt) {
return collectMetaKeyFromSetOperator(pCxt, (SSetOperator*)pStmt);
case QUERY_NODE_SELECT_STMT:
return collectMetaKeyFromSelect(pCxt, (SSelectStmt*)pStmt);
case QUERY_NODE_VNODE_MODIF_STMT:
case QUERY_NODE_CREATE_DATABASE_STMT:
case QUERY_NODE_DROP_DATABASE_STMT:
case QUERY_NODE_ALTER_DATABASE_STMT:
break;
case QUERY_NODE_CREATE_TABLE_STMT:
return collectMetaKeyFromCreateTable(pCxt, (SCreateTableStmt*)pStmt);
case QUERY_NODE_CREATE_SUBTABLE_CLAUSE:
break;
case QUERY_NODE_CREATE_MULTI_TABLE_STMT:
return collectMetaKeyFromCreateMultiTable(pCxt, (SCreateMultiTableStmt*)pStmt);
case QUERY_NODE_DROP_TABLE_CLAUSE:
case QUERY_NODE_DROP_TABLE_STMT:
case QUERY_NODE_DROP_SUPER_TABLE_STMT:
break;
case QUERY_NODE_ALTER_TABLE_STMT:
return collectMetaKeyFromAlterTable(pCxt, (SAlterTableStmt*)pStmt);
case QUERY_NODE_CREATE_USER_STMT:
case QUERY_NODE_ALTER_USER_STMT:
case QUERY_NODE_DROP_USER_STMT:
break;
case QUERY_NODE_USE_DATABASE_STMT:
return collectMetaKeyFromUseDatabase(pCxt, (SUseDatabaseStmt*)pStmt);
case QUERY_NODE_CREATE_DNODE_STMT:
case QUERY_NODE_DROP_DNODE_STMT:
case QUERY_NODE_ALTER_DNODE_STMT:
break;
case QUERY_NODE_CREATE_INDEX_STMT:
return collectMetaKeyFromCreateIndex(pCxt, (SCreateIndexStmt*)pStmt);
case QUERY_NODE_DROP_INDEX_STMT:
case QUERY_NODE_CREATE_QNODE_STMT:
case QUERY_NODE_DROP_QNODE_STMT:
case QUERY_NODE_CREATE_BNODE_STMT:
case QUERY_NODE_DROP_BNODE_STMT:
case QUERY_NODE_CREATE_SNODE_STMT:
case QUERY_NODE_DROP_SNODE_STMT:
case QUERY_NODE_CREATE_MNODE_STMT:
case QUERY_NODE_DROP_MNODE_STMT:
break;
case QUERY_NODE_CREATE_TOPIC_STMT:
return collectMetaKeyFromCreateTopic(pCxt, (SCreateTopicStmt*)pStmt);
case QUERY_NODE_DROP_TOPIC_STMT:
case QUERY_NODE_DROP_CGROUP_STMT:
case QUERY_NODE_ALTER_LOCAL_STMT:
break;
case QUERY_NODE_EXPLAIN_STMT:
return collectMetaKeyFromExplain(pCxt, (SExplainStmt*)pStmt);
case QUERY_NODE_DESCRIBE_STMT:
case QUERY_NODE_RESET_QUERY_CACHE_STMT:
case QUERY_NODE_COMPACT_STMT:
case QUERY_NODE_CREATE_FUNCTION_STMT:
case QUERY_NODE_DROP_FUNCTION_STMT:
break;
case QUERY_NODE_CREATE_STREAM_STMT:
return collectMetaKeyFromCreateStream(pCxt, (SCreateStreamStmt*)pStmt);
case QUERY_NODE_DROP_STREAM_STMT:
case QUERY_NODE_MERGE_VGROUP_STMT:
case QUERY_NODE_REDISTRIBUTE_VGROUP_STMT:
case QUERY_NODE_SPLIT_VGROUP_STMT:
case QUERY_NODE_SYNCDB_STMT:
case QUERY_NODE_GRANT_STMT:
case QUERY_NODE_REVOKE_STMT:
case QUERY_NODE_SHOW_DNODES_STMT:
return collectMetaKeyFromShowDnodes(pCxt, (SShowStmt*)pStmt);
case QUERY_NODE_SHOW_MNODES_STMT:
@ -407,8 +361,6 @@ static int32_t collectMetaKeyFromQuery(SCollectMetaKeyCxt* pCxt, SNode* pStmt) {
return collectMetaKeyFromShowSnodes(pCxt, (SShowStmt*)pStmt);
case QUERY_NODE_SHOW_BNODES_STMT:
return collectMetaKeyFromShowBnodes(pCxt, (SShowStmt*)pStmt);
case QUERY_NODE_SHOW_CLUSTER_STMT:
break;
case QUERY_NODE_SHOW_DATABASES_STMT:
return collectMetaKeyFromShowDatabases(pCxt, (SShowStmt*)pStmt);
case QUERY_NODE_SHOW_FUNCTIONS_STMT:
@ -429,25 +381,8 @@ static int32_t collectMetaKeyFromQuery(SCollectMetaKeyCxt* pCxt, SNode* pStmt) {
return collectMetaKeyFromShowVgroups(pCxt, (SShowStmt*)pStmt);
case QUERY_NODE_SHOW_TOPICS_STMT:
return collectMetaKeyFromShowTopics(pCxt, (SShowStmt*)pStmt);
case QUERY_NODE_SHOW_CONSUMERS_STMT:
case QUERY_NODE_SHOW_SUBSCRIBES_STMT:
case QUERY_NODE_SHOW_SMAS_STMT:
case QUERY_NODE_SHOW_CONFIGS_STMT:
case QUERY_NODE_SHOW_CONNECTIONS_STMT:
case QUERY_NODE_SHOW_QUERIES_STMT:
case QUERY_NODE_SHOW_VNODES_STMT:
case QUERY_NODE_SHOW_APPS_STMT:
case QUERY_NODE_SHOW_SCORES_STMT:
case QUERY_NODE_SHOW_VARIABLE_STMT:
case QUERY_NODE_SHOW_CREATE_DATABASE_STMT:
case QUERY_NODE_SHOW_CREATE_TABLE_STMT:
case QUERY_NODE_SHOW_CREATE_STABLE_STMT:
break;
case QUERY_NODE_SHOW_TRANSACTIONS_STMT:
return collectMetaKeyFromShowTransactions(pCxt, (SShowStmt*)pStmt);
case QUERY_NODE_KILL_CONNECTION_STMT:
case QUERY_NODE_KILL_QUERY_STMT:
case QUERY_NODE_KILL_TRANSACTION_STMT:
default:
break;
}

View File

@ -64,6 +64,7 @@ typedef struct SInsertParseContext {
int32_t totalNum;
SVnodeModifOpStmt* pOutput;
SStmtCallback* pStmtCb;
SParseMetaCache* pMetaCache;
} SInsertParseContext;
typedef int32_t (*_row_append_fn_t)(SMsgBuf* pMsgBuf, const void* value, int32_t len, void* param);
@ -92,15 +93,15 @@ typedef struct SMemParam {
} \
} while (0)
static int32_t skipInsertInto(SInsertParseContext* pCxt) {
static int32_t skipInsertInto(char** pSql, SMsgBuf* pMsg) {
SToken sToken;
NEXT_TOKEN(pCxt->pSql, sToken);
NEXT_TOKEN(*pSql, sToken);
if (TK_INSERT != sToken.type) {
return buildSyntaxErrMsg(&pCxt->msg, "keyword INSERT is expected", sToken.z);
return buildSyntaxErrMsg(pMsg, "keyword INSERT is expected", sToken.z);
}
NEXT_TOKEN(pCxt->pSql, sToken);
NEXT_TOKEN(*pSql, sToken);
if (TK_INTO != sToken.type) {
return buildSyntaxErrMsg(&pCxt->msg, "keyword INTO is expected", sToken.z);
return buildSyntaxErrMsg(pMsg, "keyword INTO is expected", sToken.z);
}
return TSDB_CODE_SUCCESS;
}
@ -212,7 +213,7 @@ static int32_t createSName(SName* pName, SToken* pTableName, int32_t acctId, con
return buildInvalidOperationMsg(pMsgBuf, msg4);
}
char tbname[TSDB_TABLE_FNAME_LEN] = {0};
char tbname[TSDB_TABLE_FNAME_LEN] = {0};
strncpy(tbname, p + 1, tbLen);
/*tbLen = */ strdequote(tbname);
@ -250,25 +251,46 @@ static int32_t createSName(SName* pName, SToken* pTableName, int32_t acctId, con
return code;
}
static int32_t getTableMetaImpl(SInsertParseContext* pCxt, SName* name, char* dbFname, bool isStb) {
static int32_t checkAuth(SInsertParseContext* pCxt, char* pDbFname, bool* pPass) {
SParseContext* pBasicCtx = pCxt->pComCxt;
if (NULL != pCxt->pMetaCache) {
return getUserAuthFromCache(pCxt->pMetaCache, pBasicCtx->pUser, pDbFname, AUTH_TYPE_WRITE, pPass);
}
return catalogChkAuth(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, pBasicCtx->pUser, pDbFname,
AUTH_TYPE_WRITE, pPass);
}
static int32_t getTableSchema(SInsertParseContext* pCxt, SName* pTbName, bool isStb, STableMeta** pTableMeta) {
SParseContext* pBasicCtx = pCxt->pComCxt;
if (NULL != pCxt->pMetaCache) {
return getTableMetaFromCache(pCxt->pMetaCache, pTbName, pTableMeta);
}
if (isStb) {
return catalogGetSTableMeta(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, pTbName,
pTableMeta);
}
return catalogGetTableMeta(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, pTbName, pTableMeta);
}
static int32_t getTableVgroup(SInsertParseContext* pCxt, SName* pTbName, SVgroupInfo* pVg) {
SParseContext* pBasicCtx = pCxt->pComCxt;
if (NULL != pCxt->pMetaCache) {
return getTableVgroupFromCache(pCxt->pMetaCache, pTbName, pVg);
}
return catalogGetTableHashVgroup(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, pTbName, pVg);
}
static int32_t getTableMetaImpl(SInsertParseContext* pCxt, SName* name, char* dbFname, bool isStb) {
bool pass = false;
CHECK_CODE(catalogChkAuth(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, pBasicCtx->pUser,
dbFname, AUTH_TYPE_WRITE, &pass));
CHECK_CODE(checkAuth(pCxt, dbFname, &pass));
if (!pass) {
return TSDB_CODE_PAR_PERMISSION_DENIED;
}
if (isStb) {
CHECK_CODE(catalogGetSTableMeta(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, name,
&pCxt->pTableMeta));
} else {
CHECK_CODE(catalogGetTableMeta(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, name,
&pCxt->pTableMeta));
ASSERT(pCxt->pTableMeta->tableInfo.rowSize > 0);
CHECK_CODE(getTableSchema(pCxt, name, isStb, &pCxt->pTableMeta));
if (!isStb) {
SVgroupInfo vg;
CHECK_CODE(
catalogGetTableHashVgroup(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, name, &vg));
CHECK_CODE(getTableVgroup(pCxt, name, &vg));
CHECK_CODE(taosHashPut(pCxt->pVgroupsHashObj, (const char*)&vg.vgId, sizeof(vg.vgId), (char*)&vg, sizeof(vg)));
}
return TSDB_CODE_SUCCESS;
@ -777,7 +799,7 @@ static int32_t KvRowAppend(SMsgBuf* pMsgBuf, const void* value, int32_t len, voi
if (errno == E2BIG) {
return generateSyntaxErrMsg(pMsgBuf, TSDB_CODE_PAR_VALUE_TOO_LONG, pa->schema->name);
}
char buf[512] = {0};
snprintf(buf, tListLen(buf), " taosMbsToUcs4 error:%s", strerror(errno));
return buildSyntaxErrMsg(pMsgBuf, buf, value);
@ -857,10 +879,8 @@ static int32_t cloneTableMeta(STableMeta* pSrc, STableMeta** pDst) {
static int32_t storeTableMeta(SInsertParseContext* pCxt, SHashObj* pHash, SName* pTableName, const char* pName,
int32_t len, STableMeta* pMeta) {
SVgroupInfo vg;
SParseContext* pBasicCtx = pCxt->pComCxt;
CHECK_CODE(
catalogGetTableHashVgroup(pBasicCtx->pCatalog, pBasicCtx->pTransporter, &pBasicCtx->mgmtEpSet, pTableName, &vg));
SVgroupInfo vg;
CHECK_CODE(getTableVgroup(pCxt, pTableName, &vg));
CHECK_CODE(taosHashPut(pCxt->pVgroupsHashObj, (const char*)&vg.vgId, sizeof(vg.vgId), (char*)&vg, sizeof(vg)));
pMeta->uid = 0;
@ -1082,9 +1102,9 @@ static void destroyInsertParseContext(SInsertParseContext* pCxt) {
// VALUES (field1_value, ...) [(field1_value2, ...) ...] | FILE csv_file_path
// [...];
static int32_t parseInsertBody(SInsertParseContext* pCxt) {
int32_t tbNum = 0;
char tbFName[TSDB_TABLE_FNAME_LEN];
bool autoCreateTbl = false;
int32_t tbNum = 0;
char tbFName[TSDB_TABLE_FNAME_LEN];
bool autoCreateTbl = false;
// for each table
while (1) {
@ -1186,8 +1206,8 @@ static int32_t parseInsertBody(SInsertParseContext* pCxt) {
return TSDB_CODE_TSC_OUT_OF_MEMORY;
}
memcpy(tags, &pCxt->tags, sizeof(pCxt->tags));
(*pCxt->pStmtCb->setInfoFn)(pCxt->pStmtCb->pStmt, pCxt->pTableMeta, tags, tbFName, autoCreateTbl, pCxt->pVgroupsHashObj,
pCxt->pTableBlockHashObj);
(*pCxt->pStmtCb->setInfoFn)(pCxt->pStmtCb->pStmt, pCxt->pTableMeta, tags, tbFName, autoCreateTbl,
pCxt->pVgroupsHashObj, pCxt->pTableBlockHashObj);
memset(&pCxt->tags, 0, sizeof(pCxt->tags));
pCxt->pVgroupsHashObj = NULL;
@ -1245,12 +1265,11 @@ int32_t parseInsertSql(SParseContext* pContext, SQuery** pQuery) {
if (NULL == *pQuery) {
return TSDB_CODE_OUT_OF_MEMORY;
}
(*pQuery)->execMode = QUERY_EXEC_MODE_SCHEDULE;
(*pQuery)->haveResultSet = false;
(*pQuery)->msgType = TDMT_VND_SUBMIT;
(*pQuery)->pRoot = (SNode*)context.pOutput;
}
(*pQuery)->execMode = QUERY_EXEC_MODE_SCHEDULE;
(*pQuery)->haveResultSet = false;
(*pQuery)->msgType = TDMT_VND_SUBMIT;
(*pQuery)->pRoot = (SNode*)context.pOutput;
if (NULL == (*pQuery)->pTableList) {
(*pQuery)->pTableList = taosArrayInit(taosHashGetSize(context.pTableNameHashObj), sizeof(SName));
@ -1261,7 +1280,7 @@ int32_t parseInsertSql(SParseContext* pContext, SQuery** pQuery) {
context.pOutput->payloadType = PAYLOAD_TYPE_KV;
int32_t code = skipInsertInto(&context);
int32_t code = skipInsertInto(&context.pSql, &context.msg);
if (TSDB_CODE_SUCCESS == code) {
code = parseInsertBody(&context);
}
@ -1276,6 +1295,171 @@ int32_t parseInsertSql(SParseContext* pContext, SQuery** pQuery) {
return code;
}
typedef struct SInsertParseSyntaxCxt {
SParseContext* pComCxt;
char* pSql;
SMsgBuf msg;
SParseMetaCache* pMetaCache;
} SInsertParseSyntaxCxt;
static int32_t skipParentheses(SInsertParseSyntaxCxt* pCxt) {
SToken sToken;
while (1) {
NEXT_TOKEN(pCxt->pSql, sToken);
if (TK_NK_RP == sToken.type) {
break;
}
if (0 == sToken.n) {
return buildSyntaxErrMsg(&pCxt->msg, ") expected", NULL);
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t skipBoundColumns(SInsertParseSyntaxCxt* pCxt) { return skipParentheses(pCxt); }
// pSql -> (field1_value, ...) [(field1_value2, ...) ...]
static int32_t skipValuesClause(SInsertParseSyntaxCxt* pCxt) {
int32_t numOfRows = 0;
SToken sToken;
while (1) {
int32_t index = 0;
NEXT_TOKEN_KEEP_SQL(pCxt->pSql, sToken, index);
if (TK_NK_LP != sToken.type) {
break;
}
pCxt->pSql += index;
CHECK_CODE(skipParentheses(pCxt));
++numOfRows;
}
if (0 == numOfRows) {
return buildSyntaxErrMsg(&pCxt->msg, "no any data points", NULL);
}
return TSDB_CODE_SUCCESS;
}
static int32_t skipTagsClause(SInsertParseSyntaxCxt* pCxt) { return skipParentheses(pCxt); }
// pSql -> [(tag1_name, ...)] TAGS (tag1_value, ...)
static int32_t skipUsingClause(SInsertParseSyntaxCxt* pCxt) {
SToken sToken;
NEXT_TOKEN(pCxt->pSql, sToken);
if (TK_NK_LP == sToken.type) {
CHECK_CODE(skipBoundColumns(pCxt));
NEXT_TOKEN(pCxt->pSql, sToken);
}
if (TK_TAGS != sToken.type) {
return buildSyntaxErrMsg(&pCxt->msg, "TAGS is expected", sToken.z);
}
// pSql -> (tag1_value, ...)
NEXT_TOKEN(pCxt->pSql, sToken);
if (TK_NK_LP != sToken.type) {
return buildSyntaxErrMsg(&pCxt->msg, "( is expected", sToken.z);
}
CHECK_CODE(skipTagsClause(pCxt));
return TSDB_CODE_SUCCESS;
}
static int32_t collectTableMetaKey(SInsertParseSyntaxCxt* pCxt, SToken* pTbToken) {
SName name;
CHECK_CODE(createSName(&name, pTbToken, pCxt->pComCxt->acctId, pCxt->pComCxt->db, &pCxt->msg));
CHECK_CODE(reserveUserAuthInCacheExt(pCxt->pComCxt->pUser, &name, AUTH_TYPE_WRITE, pCxt->pMetaCache));
CHECK_CODE(reserveTableMetaInCacheExt(&name, pCxt->pMetaCache));
CHECK_CODE(reserveTableVgroupInCacheExt(&name, pCxt->pMetaCache));
return TSDB_CODE_SUCCESS;
}
static int32_t parseInsertBodySyntax(SInsertParseSyntaxCxt* pCxt) {
bool hasData = false;
// for each table
while (1) {
SToken sToken;
// pSql -> tb_name ...
NEXT_TOKEN(pCxt->pSql, sToken);
// no data in the sql string anymore.
if (sToken.n == 0) {
if (sToken.type && pCxt->pSql[0]) {
return buildSyntaxErrMsg(&pCxt->msg, "invalid charactor in SQL", sToken.z);
}
if (!hasData) {
return buildInvalidOperationMsg(&pCxt->msg, "no data in sql");
}
break;
}
hasData = false;
SToken tbnameToken = sToken;
NEXT_TOKEN(pCxt->pSql, sToken);
// USING clause
if (TK_USING == sToken.type) {
NEXT_TOKEN(pCxt->pSql, sToken);
CHECK_CODE(collectTableMetaKey(pCxt, &sToken));
CHECK_CODE(skipUsingClause(pCxt));
NEXT_TOKEN(pCxt->pSql, sToken);
} else {
CHECK_CODE(collectTableMetaKey(pCxt, &tbnameToken));
}
if (TK_NK_LP == sToken.type) {
// pSql -> field1_name, ...)
CHECK_CODE(skipBoundColumns(pCxt));
NEXT_TOKEN(pCxt->pSql, sToken);
}
if (TK_VALUES == sToken.type) {
// pSql -> (field1_value, ...) [(field1_value2, ...) ...]
CHECK_CODE(skipValuesClause(pCxt));
hasData = true;
continue;
}
// FILE csv_file_path
if (TK_FILE == sToken.type) {
// pSql -> csv_file_path
NEXT_TOKEN(pCxt->pSql, sToken);
if (0 == sToken.n || (TK_NK_STRING != sToken.type && TK_NK_ID != sToken.type)) {
return buildSyntaxErrMsg(&pCxt->msg, "file path is required following keyword FILE", sToken.z);
}
hasData = true;
continue;
}
return buildSyntaxErrMsg(&pCxt->msg, "keyword VALUES or FILE is expected", sToken.z);
}
return TSDB_CODE_SUCCESS;
}
int32_t parseInsertSyntax(SParseContext* pContext, SQuery** pQuery) {
SInsertParseSyntaxCxt context = {.pComCxt = pContext,
.pSql = (char*)pContext->pSql,
.msg = {.buf = pContext->pMsg, .len = pContext->msgLen},
.pMetaCache = taosMemoryCalloc(1, sizeof(SParseMetaCache))};
if (NULL == context.pMetaCache) {
return TSDB_CODE_OUT_OF_MEMORY;
}
int32_t code = skipInsertInto(&context.pSql, &context.msg);
if (TSDB_CODE_SUCCESS == code) {
code = parseInsertBodySyntax(&context);
}
if (TSDB_CODE_SUCCESS == code) {
*pQuery = taosMemoryCalloc(1, sizeof(SQuery));
if (NULL == *pQuery) {
return TSDB_CODE_OUT_OF_MEMORY;
}
TSWAP((*pQuery)->pMetaCache, context.pMetaCache);
}
return code;
}
int32_t qCreateSName(SName* pName, const char* pTableName, int32_t acctId, char* dbName, char* msgBuf,
int32_t msgBufLen) {
SMsgBuf msg = {.buf = msgBuf, .len = msgBufLen};

View File

@ -752,18 +752,30 @@ static bool isMultiResFunc(SNode* pNode) {
return (QUERY_NODE_COLUMN == nodeType(pParam) ? 0 == strcmp(((SColumnNode*)pParam)->colName, "*") : false);
}
static EDealRes translateUnaryOperator(STranslateContext* pCxt, SOperatorNode* pOp) {
static int32_t rewriteNegativeOperator(SNode** pOp) {
SNode* pRes = NULL;
int32_t code = scalarCalculateConstants(*pOp, &pRes);
if (TSDB_CODE_SUCCESS == code) {
*pOp = pRes;
}
return code;
}
static EDealRes translateUnaryOperator(STranslateContext* pCxt, SOperatorNode** pOpRef) {
SOperatorNode* pOp = *pOpRef;
if (OP_TYPE_MINUS == pOp->opType) {
if (!IS_MATHABLE_TYPE(((SExprNode*)(pOp->pLeft))->resType.type)) {
return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pLeft))->aliasName);
}
pOp->node.resType.type = TSDB_DATA_TYPE_DOUBLE;
pOp->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_DOUBLE].bytes;
pCxt->errCode = rewriteNegativeOperator((SNode**)pOpRef);
} else {
pOp->node.resType.type = TSDB_DATA_TYPE_BOOL;
pOp->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BOOL].bytes;
}
return DEAL_RES_CONTINUE;
return TSDB_CODE_SUCCESS == pCxt->errCode ? DEAL_RES_CONTINUE : DEAL_RES_ERROR;
}
static EDealRes translateArithmeticOperator(STranslateContext* pCxt, SOperatorNode* pOp) {
@ -824,7 +836,9 @@ static EDealRes translateJsonOperator(STranslateContext* pCxt, SOperatorNode* pO
return DEAL_RES_CONTINUE;
}
static EDealRes translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) {
static EDealRes translateOperator(STranslateContext* pCxt, SOperatorNode** pOpRef) {
SOperatorNode* pOp = *pOpRef;
if (isMultiResFunc(pOp->pLeft)) {
return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pLeft))->aliasName);
}
@ -833,7 +847,7 @@ static EDealRes translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) {
}
if (nodesIsUnaryOp(pOp)) {
return translateUnaryOperator(pCxt, pOp);
return translateUnaryOperator(pCxt, pOpRef);
} else if (nodesIsArithmeticOp(pOp)) {
return translateArithmeticOperator(pCxt, pOp);
} else if (nodesIsComparisonOp(pOp)) {
@ -992,7 +1006,7 @@ static EDealRes doTranslateExpr(SNode** pNode, void* pContext) {
case QUERY_NODE_VALUE:
return translateValue(pCxt, (SValueNode*)*pNode);
case QUERY_NODE_OPERATOR:
return translateOperator(pCxt, (SOperatorNode*)*pNode);
return translateOperator(pCxt, (SOperatorNode**)pNode);
case QUERY_NODE_FUNCTION:
return translateFunction(pCxt, (SFunctionNode*)*pNode);
case QUERY_NODE_LOGIC_CONDITION:
@ -1891,9 +1905,9 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SNodeList* pPartiti
return translateExprList(pCxt, pPartitionByList);
}
static int32_t translateWhere(STranslateContext* pCxt, SNode* pWhere) {
static int32_t translateWhere(STranslateContext* pCxt, SNode** pWhere) {
pCxt->currClause = SQL_CLAUSE_WHERE;
return translateExpr(pCxt, &pWhere);
return translateExpr(pCxt, pWhere);
}
static int32_t translateFrom(STranslateContext* pCxt, SSelectStmt* pSelect) {
@ -1964,7 +1978,7 @@ static int32_t translateSelect(STranslateContext* pCxt, SSelectStmt* pSelect) {
pCxt->pCurrStmt = pSelect;
int32_t code = translateFrom(pCxt, pSelect);
if (TSDB_CODE_SUCCESS == code) {
code = translateWhere(pCxt, pSelect->pWhere);
code = translateWhere(pCxt, &pSelect->pWhere);
}
if (TSDB_CODE_SUCCESS == code) {
code = translatePartitionBy(pCxt, pSelect->pPartitionByList);

View File

@ -671,22 +671,32 @@ int32_t putMetaDataToCache(const SCatalogReq* pCatalogReq, const SMetaData* pMet
return code;
}
static int32_t reserveTableReqInCache(int32_t acctId, const char* pDb, const char* pTable, SHashObj** pTables) {
static int32_t reserveTableReqInCacheImpl(const char* pTbFName, int32_t len, SHashObj** pTables) {
if (NULL == *pTables) {
*pTables = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
if (NULL == *pTables) {
return TSDB_CODE_OUT_OF_MEMORY;
}
}
return taosHashPut(*pTables, pTbFName, len, &pTables, POINTER_BYTES);
}
static int32_t reserveTableReqInCache(int32_t acctId, const char* pDb, const char* pTable, SHashObj** pTables) {
char fullName[TSDB_TABLE_FNAME_LEN];
int32_t len = snprintf(fullName, sizeof(fullName), "%d.%s.%s", acctId, pDb, pTable);
return taosHashPut(*pTables, fullName, len, &pTables, POINTER_BYTES);
return reserveTableReqInCacheImpl(fullName, len, pTables);
}
int32_t reserveTableMetaInCache(int32_t acctId, const char* pDb, const char* pTable, SParseMetaCache* pMetaCache) {
return reserveTableReqInCache(acctId, pDb, pTable, &pMetaCache->pTableMeta);
}
int32_t reserveTableMetaInCacheExt(const SName* pName, SParseMetaCache* pMetaCache) {
char fullName[TSDB_TABLE_FNAME_LEN];
tNameExtractFullName(pName, fullName);
return reserveTableReqInCacheImpl(fullName, strlen(fullName), &pMetaCache->pTableMeta);
}
int32_t getTableMetaFromCache(SParseMetaCache* pMetaCache, const SName* pName, STableMeta** pMeta) {
char fullName[TSDB_TABLE_FNAME_LEN];
tNameExtractFullName(pName, fullName);
@ -736,6 +746,12 @@ int32_t reserveTableVgroupInCache(int32_t acctId, const char* pDb, const char* p
return reserveTableReqInCache(acctId, pDb, pTable, &pMetaCache->pTableVgroup);
}
int32_t reserveTableVgroupInCacheExt(const SName* pName, SParseMetaCache* pMetaCache) {
char fullName[TSDB_TABLE_FNAME_LEN];
tNameExtractFullName(pName, fullName);
return reserveTableReqInCacheImpl(fullName, strlen(fullName), &pMetaCache->pTableVgroup);
}
int32_t getTableVgroupFromCache(SParseMetaCache* pMetaCache, const SName* pName, SVgroupInfo* pVgroup) {
char fullName[TSDB_TABLE_FNAME_LEN];
tNameExtractFullName(pName, fullName);
@ -776,18 +792,30 @@ int32_t getDbCfgFromCache(SParseMetaCache* pMetaCache, const char* pDbFName, SDb
return TSDB_CODE_SUCCESS;
}
int32_t reserveUserAuthInCache(int32_t acctId, const char* pUser, const char* pDb, AUTH_TYPE type,
SParseMetaCache* pMetaCache) {
static int32_t reserveUserAuthInCacheImpl(const char* pKey, int32_t len, SParseMetaCache* pMetaCache) {
if (NULL == pMetaCache->pUserAuth) {
pMetaCache->pUserAuth = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
if (NULL == pMetaCache->pUserAuth) {
return TSDB_CODE_OUT_OF_MEMORY;
}
}
bool pass = false;
return taosHashPut(pMetaCache->pUserAuth, pKey, len, &pass, sizeof(pass));
}
int32_t reserveUserAuthInCache(int32_t acctId, const char* pUser, const char* pDb, AUTH_TYPE type,
SParseMetaCache* pMetaCache) {
char key[USER_AUTH_KEY_MAX_LEN] = {0};
int32_t len = userAuthToString(acctId, pUser, pDb, type, key);
bool pass = false;
return taosHashPut(pMetaCache->pUserAuth, key, len, &pass, sizeof(pass));
return reserveUserAuthInCacheImpl(key, len, pMetaCache);
}
int32_t reserveUserAuthInCacheExt(const char* pUser, const SName* pName, AUTH_TYPE type, SParseMetaCache* pMetaCache) {
char dbFName[TSDB_DB_FNAME_LEN] = {0};
tNameGetFullDbName(pName, dbFName);
char key[USER_AUTH_KEY_MAX_LEN] = {0};
int32_t len = userAuthToStringExt(pUser, dbFName, type, key);
return reserveUserAuthInCacheImpl(key, len, pMetaCache);
}
int32_t getUserAuthFromCache(SParseMetaCache* pMetaCache, const char* pUser, const char* pDbFName, AUTH_TYPE type,

View File

@ -34,7 +34,7 @@ bool qIsInsertSql(const char* pStr, size_t length) {
} while (1);
}
static int32_t semanticAnalysis(SParseContext* pCxt, SQuery* pQuery) {
static int32_t analyseSemantic(SParseContext* pCxt, SQuery* pQuery) {
int32_t code = authenticate(pCxt, pQuery);
if (TSDB_CODE_SUCCESS == code && pQuery->placeholderNum > 0) {
@ -54,12 +54,12 @@ static int32_t semanticAnalysis(SParseContext* pCxt, SQuery* pQuery) {
static int32_t parseSqlIntoAst(SParseContext* pCxt, SQuery** pQuery) {
int32_t code = parse(pCxt, pQuery);
if (TSDB_CODE_SUCCESS == code) {
code = semanticAnalysis(pCxt, *pQuery);
code = analyseSemantic(pCxt, *pQuery);
}
return code;
}
static int32_t syntaxParseSql(SParseContext* pCxt, SQuery** pQuery) {
static int32_t parseSqlSyntax(SParseContext* pCxt, SQuery** pQuery) {
int32_t code = parse(pCxt, pQuery);
if (TSDB_CODE_SUCCESS == code) {
code = collectMetaKey(pCxt, *pQuery);
@ -192,12 +192,12 @@ int32_t qParseSql(SParseContext* pCxt, SQuery** pQuery) {
return code;
}
int32_t qSyntaxParseSql(SParseContext* pCxt, SQuery** pQuery, struct SCatalogReq* pCatalogReq) {
int32_t qParseSqlSyntax(SParseContext* pCxt, SQuery** pQuery, struct SCatalogReq* pCatalogReq) {
int32_t code = TSDB_CODE_SUCCESS;
if (qIsInsertSql(pCxt->pSql, pCxt->sqlLen)) {
// todo insert sql
code = parseInsertSyntax(pCxt, pQuery);
} else {
code = syntaxParseSql(pCxt, pQuery);
code = parseSqlSyntax(pCxt, pQuery);
}
if (TSDB_CODE_SUCCESS == code) {
code = buildCatalogReq((*pQuery)->pMetaCache, pCatalogReq);
@ -206,13 +206,13 @@ int32_t qSyntaxParseSql(SParseContext* pCxt, SQuery** pQuery, struct SCatalogReq
return code;
}
int32_t qSemanticAnalysisSql(SParseContext* pCxt, const struct SCatalogReq* pCatalogReq,
const struct SMetaData* pMetaData, SQuery* pQuery) {
int32_t qAnalyseSqlSemantic(SParseContext* pCxt, const struct SCatalogReq* pCatalogReq,
const struct SMetaData* pMetaData, SQuery* pQuery) {
int32_t code = putMetaDataToCache(pCatalogReq, pMetaData, pQuery->pMetaCache);
if (NULL == pQuery->pRoot) {
// todo insert sql
return parseInsertSql(pCxt, &pQuery);
}
return semanticAnalysis(pCxt, pQuery);
return analyseSemantic(pCxt, pQuery);
}
void qDestroyQuery(SQuery* pQueryNode) { nodesDestroyNode(pQueryNode); }

View File

@ -26,9 +26,7 @@ if(${BUILD_WINGETOPT})
target_link_libraries(parserTest PUBLIC wingetopt)
endif()
if(NOT TD_WINDOWS)
add_test(
NAME parserTest
COMMAND parserTest
)
endif(NOT TD_WINDOWS)
add_test(
NAME parserTest
COMMAND parserTest
)

View File

@ -242,6 +242,8 @@ class MockCatalogServiceImpl {
info->outputType = outputType;
info->outputLen = outputLen;
info->bufSize = bufSize;
info->pCode = nullptr;
info->pComment = nullptr;
udf_.insert(std::make_pair(func, info));
}

View File

@ -15,6 +15,7 @@
#include <gtest/gtest.h>
#include "mockCatalogService.h"
#include "os.h"
#include "parInt.h"
@ -57,6 +58,38 @@ class InsertTest : public Test {
return code_;
}
int32_t runAsync() {
code_ = parseInsertSyntax(&cxt_, &res_);
if (code_ != TSDB_CODE_SUCCESS) {
cout << "parseInsertSyntax code:" << toString(code_) << ", msg:" << errMagBuf_ << endl;
return code_;
}
SCatalogReq catalogReq = {0};
code_ = buildCatalogReq(res_->pMetaCache, &catalogReq);
if (code_ != TSDB_CODE_SUCCESS) {
cout << "buildCatalogReq code:" << toString(code_) << ", msg:" << errMagBuf_ << endl;
return code_;
}
SMetaData metaData = {0};
g_mockCatalogService->catalogGetAllMeta(&catalogReq, &metaData);
code_ = putMetaDataToCache(&catalogReq, &metaData, res_->pMetaCache);
if (code_ != TSDB_CODE_SUCCESS) {
cout << "putMetaDataToCache code:" << toString(code_) << ", msg:" << errMagBuf_ << endl;
return code_;
}
code_ = parseInsertSql(&cxt_, &res_);
if (code_ != TSDB_CODE_SUCCESS) {
cout << "parseInsertSql code:" << toString(code_) << ", msg:" << errMagBuf_ << endl;
return code_;
}
return code_;
}
void dumpReslut() {
SVnodeModifOpStmt* pStmt = getVnodeModifStmt(res_);
size_t num = taosArrayGetSize(pStmt->pDataBlocks);
@ -125,7 +158,7 @@ class InsertTest : public Test {
SQuery* res_;
};
// INSERT INTO tb_name VALUES (field1_value, ...)
// INSERT INTO tb_name [(field1_name, ...)] VALUES (field1_value, ...)
TEST_F(InsertTest, singleTableSingleRowTest) {
setDatabase("root", "test");
@ -133,6 +166,17 @@ TEST_F(InsertTest, singleTableSingleRowTest) {
ASSERT_EQ(run(), TSDB_CODE_SUCCESS);
dumpReslut();
checkReslut(1, 1);
bind("insert into t1 (ts, c1, c2, c3, c4, c5) values (now, 1, 'beijing', 3, 4, 5)");
ASSERT_EQ(run(), TSDB_CODE_SUCCESS);
bind("insert into t1 values (now, 1, 'beijing', 3, 4, 5)");
ASSERT_EQ(runAsync(), TSDB_CODE_SUCCESS);
dumpReslut();
checkReslut(1, 1);
bind("insert into t1 (ts, c1, c2, c3, c4, c5) values (now, 1, 'beijing', 3, 4, 5)");
ASSERT_EQ(runAsync(), TSDB_CODE_SUCCESS);
}
// INSERT INTO tb_name VALUES (field1_value, ...)(field1_value, ...)
@ -140,11 +184,16 @@ TEST_F(InsertTest, singleTableMultiRowTest) {
setDatabase("root", "test");
bind(
"insert into t1 values (now, 1, 'beijing', 3, 4, 5)(now+1s, 2, 'shanghai', 6, 7, 8)(now+2s, 3, 'guangzhou', 9, "
"10, 11)");
"insert into t1 values (now, 1, 'beijing', 3, 4, 5)(now+1s, 2, 'shanghai', 6, 7, 8)"
"(now+2s, 3, 'guangzhou', 9, 10, 11)");
ASSERT_EQ(run(), TSDB_CODE_SUCCESS);
dumpReslut();
checkReslut(1, 3);
bind(
"insert into t1 values (now, 1, 'beijing', 3, 4, 5)(now+1s, 2, 'shanghai', 6, 7, 8)"
"(now+2s, 3, 'guangzhou', 9, 10, 11)");
ASSERT_EQ(runAsync(), TSDB_CODE_SUCCESS);
}
// INSERT INTO tb1_name VALUES (field1_value, ...) tb2_name VALUES (field1_value, ...)
@ -155,6 +204,9 @@ TEST_F(InsertTest, multiTableSingleRowTest) {
ASSERT_EQ(run(), TSDB_CODE_SUCCESS);
dumpReslut();
checkReslut(2, 1);
bind("insert into st1s1 values (now, 1, \"beijing\") st1s2 values (now, 10, \"131028\")");
ASSERT_EQ(runAsync(), TSDB_CODE_SUCCESS);
}
// INSERT INTO tb1_name VALUES (field1_value, ...) tb2_name VALUES (field1_value, ...)
@ -167,6 +219,11 @@ TEST_F(InsertTest, multiTableMultiRowTest) {
ASSERT_EQ(run(), TSDB_CODE_SUCCESS);
dumpReslut();
checkReslut(2, 3, 2);
bind(
"insert into st1s1 values (now, 1, \"beijing\")(now+1s, 2, \"shanghai\")(now+2s, 3, \"guangzhou\")"
" st1s2 values (now, 10, \"131028\")(now+1s, 20, \"132028\")");
ASSERT_EQ(runAsync(), TSDB_CODE_SUCCESS);
}
// INSERT INTO
@ -181,6 +238,21 @@ TEST_F(InsertTest, autoCreateTableTest) {
ASSERT_EQ(run(), TSDB_CODE_SUCCESS);
dumpReslut();
checkReslut(1, 3);
bind(
"insert into st1s1 using st1 (tag1, tag2) tags(1, 'wxy') values (now, 1, \"beijing\")"
"(now+1s, 2, \"shanghai\")(now+2s, 3, \"guangzhou\")");
ASSERT_EQ(run(), TSDB_CODE_SUCCESS);
bind(
"insert into st1s1 using st1 tags(1, 'wxy') values (now, 1, \"beijing\")(now+1s, 2, \"shanghai\")(now+2s, 3, "
"\"guangzhou\")");
ASSERT_EQ(runAsync(), TSDB_CODE_SUCCESS);
bind(
"insert into st1s1 using st1 (tag1, tag2) tags(1, 'wxy') values (now, 1, \"beijing\")"
"(now+1s, 2, \"shanghai\")(now+2s, 3, \"guangzhou\")");
ASSERT_EQ(runAsync(), TSDB_CODE_SUCCESS);
}
TEST_F(InsertTest, toleranceTest) {
@ -190,4 +262,9 @@ TEST_F(InsertTest, toleranceTest) {
ASSERT_NE(run(), TSDB_CODE_SUCCESS);
bind("insert into t");
ASSERT_NE(run(), TSDB_CODE_SUCCESS);
bind("insert into");
ASSERT_NE(runAsync(), TSDB_CODE_SUCCESS);
bind("insert into t");
ASSERT_NE(runAsync(), TSDB_CODE_SUCCESS);
}

View File

@ -44,6 +44,8 @@ TEST_F(ParserSelectTest, constant) {
"timestamp '2022-02-09 17:30:20', true, false, 15s FROM t1");
run("SELECT 123 + 45 FROM t1 WHERE 2 - 1");
run("SELECT * FROM t1 WHERE -2");
}
TEST_F(ParserSelectTest, expression) {
@ -76,6 +78,12 @@ TEST_F(ParserSelectTest, pseudoColumnSemanticCheck) {
run("SELECT TBNAME FROM (SELECT * FROM st1s1)", TSDB_CODE_PAR_INVALID_TBNAME, PARSER_STAGE_TRANSLATE);
}
TEST_F(ParserSelectTest, aggFunc) {
useDb("root", "test");
run("SELECT LEASTSQUARES(c1, -1, 1) FROM t1");
}
TEST_F(ParserSelectTest, multiResFunc) {
useDb("root", "test");

View File

@ -27,6 +27,14 @@ TEST_F(PlanSuperTableTest, pseudoCol) {
run("SELECT TBNAME, tag1, tag2 FROM st1");
}
TEST_F(PlanSuperTableTest, pseudoColOnChildTable) {
useDb("root", "test");
run("SELECT TBNAME FROM st1s1");
run("SELECT TBNAME, tag1, tag2 FROM st1s1");
}
TEST_F(PlanSuperTableTest, orderBy) {
useDb("root", "test");

View File

@ -17,9 +17,7 @@ TARGET_INCLUDE_DIRECTORIES(
PUBLIC "${TD_SOURCE_DIR}/source/libs/parser/inc"
PRIVATE "${TD_SOURCE_DIR}/source/libs/scalar/inc"
)
if(NOT TD_WINDOWS)
add_test(
NAME scalarTest
COMMAND scalarTest
)
endif(NOT TD_WINDOWS)
add_test(
NAME scalarTest
COMMAND scalarTest
)

View File

@ -2498,7 +2498,7 @@ TEST(ScalarFunctionTest, tanFunction_column) {
code = tanFunction(pInput, 1, pOutput);
ASSERT_EQ(code, TSDB_CODE_SUCCESS);
for (int32_t i = 0; i < rowNum; ++i) {
ASSERT_EQ(*((double *)colDataGetData(pOutput->columnData, i)), result[i]);
ASSERT_NEAR(*((double *)colDataGetData(pOutput->columnData, i)), result[i], 1e-15);
PRINTF("tiny_int after TAN:%f\n", *((double *)colDataGetData(pOutput->columnData, i)));
}
scltDestroyDataBlock(pInput);
@ -2517,7 +2517,7 @@ TEST(ScalarFunctionTest, tanFunction_column) {
code = tanFunction(pInput, 1, pOutput);
ASSERT_EQ(code, TSDB_CODE_SUCCESS);
for (int32_t i = 0; i < rowNum; ++i) {
ASSERT_EQ(*((double *)colDataGetData(pOutput->columnData, i)), result[i]);
ASSERT_NEAR(*((double *)colDataGetData(pOutput->columnData, i)), result[i], 1e-15);
PRINTF("float after TAN:%f\n", *((double *)colDataGetData(pOutput->columnData, i)));
}

View File

@ -111,12 +111,10 @@ target_link_libraries (pushServer
)
if(NOT TD_WINDOWS)
add_test(
NAME transUT
COMMAND transUT
)
endif(NOT TD_WINDOWS)
add_test(
NAME transUT
COMMAND transUT
)
add_test(
NAME transUtilUt
COMMAND transportTest

View File

@ -252,6 +252,7 @@ endi
print ======== step8
sql alter table tb add column h binary(10)
sql select * from tb
sql describe tb
if $data00 != ts then
return -1
@ -304,7 +305,7 @@ endi
if $data80 != h then
return -1
endi
if $data81 != BINARY then
if $data81 != VARCHAR then
return -1
endi
if $data82 != 10 then
@ -371,7 +372,7 @@ endi
if $data80 != h then
return -1
endi
if $data81 != BINARY then
if $data81 != VARCHAR then
return -1
endi
if $data82 != 10 then
@ -447,7 +448,7 @@ endi
if $data70 != h then
return -1
endi
if $data71 != BINARY then
if $data71 != VARCHAR then
return -1
endi
if $data72 != 10 then
@ -496,7 +497,7 @@ endi
if $data60 != h then
return -1
endi
if $data61 != BINARY then
if $data61 != VARCHAR then
return -1
endi
if $data62 != 10 then
@ -539,7 +540,7 @@ endi
if $data50 != h then
return -1
endi
if $data51 != BINARY then
if $data51 != VARCHAR then
return -1
endi
if $data52 != 10 then
@ -576,7 +577,7 @@ endi
if $data40 != h then
return -1
endi
if $data41 != BINARY then
if $data41 != VARCHAR then
return -1
endi
if $data42 != 10 then
@ -607,7 +608,7 @@ endi
if $data30 != h then
return -1
endi
if $data31 != BINARY then
if $data31 != VARCHAR then
return -1
endi
if $data32 != 10 then
@ -632,7 +633,7 @@ endi
if $data20 != h then
return -1
endi
if $data21 != BINARY then
if $data21 != VARCHAR then
return -1
endi
if $data22 != 10 then

View File

@ -104,6 +104,10 @@
./test.sh -f tsim/stable/tag_modify.sim
./test.sh -f tsim/stable/tag_rename.sim
./test.sh -f tsim/stable/alter_comment.sim
./test.sh -f tsim/stable/alter_count.sim
./test.sh -f tsim/stable/alter_insert1.sim
./test.sh -f tsim/stable/alter_insert2.sim
./test.sh -f tsim/stable/alter_import.sim
# --- for multi process mode
./test.sh -f tsim/user/basic1.sim -m

View File

@ -29,14 +29,14 @@ if $data00 != 3 then
endi
print ========= step3
sql import into tb values(now-23d, -23, 0)
sql import into tb values(now-21d, -21, 0)
sql insert into tb values(now-23d, -23, 0)
sql insert into tb values(now-21d, -21, 0)
sql select count(b) from tb
if $data00 != 5 then
return -1
endi
sql import into tb values(now-29d, -29, 0)
sql insert into tb values(now-29d, -29, 0)
sql select count(b) from tb
if $data00 != 6 then
return -1

View File

@ -347,7 +347,7 @@ endi
if $data80 != h then
return -1
endi
if $data81 != BINARY then
if $data81 != VARCHAR then
return -1
endi
if $data82 != 10 then
@ -363,9 +363,8 @@ endi
print ======== step9
print ======== step10
system sh/exec.sh -n dnode1 -s stop -x SIGINT
sleep 3000
system sh/exec.sh -n dnode1 -s start
sleep 3000
sql connect
sql use d2
sql describe tb
@ -420,7 +419,7 @@ endi
if $data80 != h then
return -1
endi
if $data81 != BINARY then
if $data81 != VARCHAR then
return -1
endi
if $data82 != 10 then
@ -502,7 +501,7 @@ endi
if $data70 != h then
return -1
endi
if $data71 != BINARY then
if $data71 != VARCHAR then
return -1
endi
if $data72 != 10 then
@ -557,7 +556,7 @@ endi
if $data60 != h then
return -1
endi
if $data61 != BINARY then
if $data61 != VARCHAR then
return -1
endi
if $data62 != 10 then
@ -606,7 +605,7 @@ endi
if $data50 != h then
return -1
endi
if $data51 != BINARY then
if $data51 != VARCHAR then
return -1
endi
if $data52 != 10 then
@ -649,7 +648,7 @@ endi
if $data40 != h then
return -1
endi
if $data41 != BINARY then
if $data41 != VARCHAR then
return -1
endi
if $data42 != 10 then
@ -686,7 +685,7 @@ endi
if $data30 != h then
return -1
endi
if $data31 != BINARY then
if $data31 != VARCHAR then
return -1
endi
if $data32 != 10 then
@ -717,7 +716,7 @@ endi
if $data20 != h then
return -1
endi
if $data21 != BINARY then
if $data21 != VARCHAR then
return -1
endi
if $data22 != 10 then
@ -758,7 +757,7 @@ endi
print ======= over
sql drop database d2
sql show databases
if $rows != 0 then
if $rows != 2 then
return -1
endi

View File

@ -79,28 +79,31 @@ system sh/exec.sh -n dnode1 -s stop -x SIGINT
system sh/exec.sh -n dnode1 -s start
sql connect
sql select * from db.ctb
sql select * from db.stb
print $data[0][0] $data[0][1] $data[0][2] $data[0][3] $data[0][4] $data[0][5] $data[0][6]
print $data[1][0] $data[1][1] $data[1][2] $data[1][3] $data[1][4] $data[1][5] $data[1][6]
if $rows != 2 then
return -1
endi
#if $data[0][1] != 1 then
# return -1
#endi
#if $data[0][2] != 1234 then
# return -1
#endi
#if $data[0][3] != 101 then
# return -1
#endi
#if $data[1][1] != 1 then
# return -1
#endi
#if $data[1][2] != 12345 then
# return -1
#endi
#if $data[1][3] != 101 then
# return -1
#endi
if $data[0][1] != 1 then
return -1
endi
if $data[0][2] != 1234 then
return -1
endi
if $data[0][3] != 101 then
return -1
endi
if $data[1][1] != 1 then
return -1
endi
if $data[1][2] != 12345 then
return -1
endi
if $data[1][3] != 101 then
return -1
endi
system sh/exec.sh -n dnode1 -s stop -x SIGINT

View File

@ -49,10 +49,9 @@ if $data00 != $totalNum then
return -1
endi
sleep 1000
system sh/exec.sh -n dnode1 -s stop -x SIGINT
sleep 1000
system sh/exec.sh -n dnode1 -s start
sql connect
sql use $db
sql show vgroups

View File

@ -93,9 +93,6 @@ $i = 2
$tb = $tbPrefix . $i
sql insert into $tb values (now + 1m , 1 )
print sleep 2000
sleep 2000
print =============== step6
# sql select * from $mt

View File

@ -0,0 +1,457 @@
from math import floor
from random import randint, random
from numpy import equal
import taos
import sys
import datetime
import inspect
from util.log import *
from util.sql import *
from util.cases import *
class TDTestCase:
updatecfgDict = {'debugFlag': 143 ,"cDebugFlag":143,"uDebugFlag":143 ,"rpcDebugFlag":143 , "tmrDebugFlag":143 ,
"jniDebugFlag":143 ,"simDebugFlag":143,"dDebugFlag":143, "dDebugFlag":143,"vDebugFlag":143,"mDebugFlag":143,"qDebugFlag":143,
"wDebugFlag":143,"sDebugFlag":143,"tsdbDebugFlag":143,"tqDebugFlag":143 ,"fsDebugFlag":143 ,"fnDebugFlag":143}
def init(self, conn, logSql):
tdLog.debug(f"start to excute {__file__}")
tdSql.init(conn.cursor())
def prepare_datas(self):
tdSql.execute(
'''create table stb1
(ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp)
tags (t1 int)
'''
)
tdSql.execute(
'''
create table t1
(ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp)
'''
)
for i in range(4):
tdSql.execute(f'create table ct{i+1} using stb1 tags ( {i+1} )')
for i in range(9):
tdSql.execute(
f"insert into ct1 values ( now()-{i*10}s, {1*i}, {11111*i}, {111*i}, {11*i}, {1.11*i}, {11.11*i}, {i%2}, 'binary{i}', 'nchar{i}', now()+{1*i}a )"
)
tdSql.execute(
f"insert into ct4 values ( now()-{i*90}d, {1*i}, {11111*i}, {111*i}, {11*i}, {1.11*i}, {11.11*i}, {i%2}, 'binary{i}', 'nchar{i}', now()+{1*i}a )"
)
tdSql.execute("insert into ct1 values (now()-45s, 0, 0, 0, 0, 0, 0, 0, 'binary0', 'nchar0', now()+8a )")
tdSql.execute("insert into ct1 values (now()+10s, 9, -99999, -999, -99, -9.99, -99.99, 1, 'binary9', 'nchar9', now()+9a )")
tdSql.execute("insert into ct1 values (now()+15s, 9, -99999, -999, -99, -9.99, NULL, 1, 'binary9', 'nchar9', now()+9a )")
tdSql.execute("insert into ct1 values (now()+20s, 9, -99999, -999, NULL, -9.99, -99.99, 1, 'binary9', 'nchar9', now()+9a )")
tdSql.execute("insert into ct4 values (now()-810d, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ")
tdSql.execute("insert into ct4 values (now()-400d, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ")
tdSql.execute("insert into ct4 values (now()+90d, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ")
tdSql.execute(
f'''insert into t1 values
( '2020-04-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
( '2020-10-21 01:01:01.000', 1, 11111, 111, 11, 1.11, 11.11, 1, "binary1", "nchar1", now()+1a )
( '2020-12-31 01:01:01.000', 2, 22222, 222, 22, 2.22, 22.22, 0, "binary2", "nchar2", now()+2a )
( '2021-01-01 01:01:06.000', 3, 33333, 333, 33, 3.33, 33.33, 0, "binary3", "nchar3", now()+3a )
( '2021-05-07 01:01:10.000', 4, 44444, 444, 44, 4.44, 44.44, 1, "binary4", "nchar4", now()+4a )
( '2021-07-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
( '2021-09-30 01:01:16.000', 5, 55555, 555, 55, 5.55, 55.55, 0, "binary5", "nchar5", now()+5a )
( '2022-02-01 01:01:20.000', 6, 66666, 666, 66, 6.66, 66.66, 1, "binary6", "nchar6", now()+6a )
( '2022-10-28 01:01:26.000', 7, 00000, 000, 00, 0.00, 00.00, 1, "binary7", "nchar7", "1970-01-01 08:00:00.000" )
( '2022-12-01 01:01:30.000', 8, -88888, -888, -88, -8.88, -88.88, 0, "binary8", "nchar8", "1969-01-01 01:00:00.000" )
( '2022-12-31 01:01:36.000', 9, -99999999999999999, -999, -99, -9.99, -999999999999999999999.99, 1, "binary9", "nchar9", "1900-01-01 00:00:00.000" )
( '2023-02-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL )
'''
)
def test_errors(self):
error_sql_lists = [
"select unique from t1",
"select unique(123--123)==1 from t1",
"select unique(123,123) from t1",
"select unique(c1,ts) from t1",
"select unique(c1,c1,ts) from t1",
"select unique(c1) as 'd1' from t1",
"select unique(c1 ,c2 ) from t1",
"select unique(c1 ,NULL) from t1",
"select unique(,) from t1;",
"select unique(floor(c1) ab from t1)",
"select unique(c1) as int from t1",
"select unique('c1') from t1",
"select unique(NULL) from t1",
"select unique('') from t1",
"select unique(c%) from t1",
"select unique(t1) from t1",
"select unique(True) from t1",
"select unique(c1) , count(c1) from t1",
"select unique(c1) , avg(c1) from t1",
"select unique(c1) , min(c1) from t1",
"select unique(c1) , spread(c1) from t1",
"select unique(c1) , diff(c1) from t1",
"select unique(c1) , abs(c1) from t1",
"select unique(c1) , c1 from t1",
"select unique from stb1 partition by tbname",
"select unique(123--123)==1 from stb1 partition by tbname",
"select unique(123) from stb1 partition by tbname",
"select unique(c1,ts) from stb1 partition by tbname",
"select unique(c1,c1,ts) from stb1 partition by tbname",
"select unique(c1) as 'd1' from stb1 partition by tbname",
"select unique(c1 ,c2 ) from stb1 partition by tbname",
"select unique(c1 ,NULL) from stb1 partition by tbname",
"select unique(,) from stb1 partition by tbname;",
"select unique(floor(c1) ab from stb1 partition by tbname)",
"select unique(c1) as int from stb1 partition by tbname",
"select unique('c1') from stb1 partition by tbname",
"select unique(NULL) from stb1 partition by tbname",
"select unique('') from stb1 partition by tbname",
"select unique(c%) from stb1 partition by tbname",
#"select unique(t1) from stb1 partition by tbname",
"select unique(True) from stb1 partition by tbname",
"select unique(c1) , count(c1) from stb1 partition by tbname",
"select unique(c1) , avg(c1) from stb1 partition by tbname",
"select unique(c1) , min(c1) from stb1 partition by tbname",
"select unique(c1) , spread(c1) from stb1 partition by tbname",
"select unique(c1) , diff(c1) from stb1 partition by tbname",
"select unique(c1) , abs(c1) from stb1 partition by tbname",
"select unique(c1) , c1 from stb1 partition by tbname"
]
for error_sql in error_sql_lists:
tdSql.error(error_sql)
pass
def support_types(self):
other_no_value_types = [
"select unique(ts) from t1" ,
"select unique(c7) from t1",
"select unique(c8) from t1",
"select unique(c9) from t1",
"select unique(ts) from ct1" ,
"select unique(c7) from ct1",
"select unique(c8) from ct1",
"select unique(c9) from ct1",
"select unique(ts) from ct3" ,
"select unique(c7) from ct3",
"select unique(c8) from ct3",
"select unique(c9) from ct3",
"select unique(ts) from ct4" ,
"select unique(c7) from ct4",
"select unique(c8) from ct4",
"select unique(c9) from ct4",
"select unique(ts) from stb1 partition by tbname" ,
"select unique(c7) from stb1 partition by tbname",
"select unique(c8) from stb1 partition by tbname",
"select unique(c9) from stb1 partition by tbname"
]
for type_sql in other_no_value_types:
tdSql.query(type_sql)
tdLog.info("support type ok , sql is : %s"%type_sql)
type_sql_lists = [
"select unique(c1) from t1",
"select unique(c2) from t1",
"select unique(c3) from t1",
"select unique(c4) from t1",
"select unique(c5) from t1",
"select unique(c6) from t1",
"select unique(c1) from ct1",
"select unique(c2) from ct1",
"select unique(c3) from ct1",
"select unique(c4) from ct1",
"select unique(c5) from ct1",
"select unique(c6) from ct1",
"select unique(c1) from ct3",
"select unique(c2) from ct3",
"select unique(c3) from ct3",
"select unique(c4) from ct3",
"select unique(c5) from ct3",
"select unique(c6) from ct3",
"select unique(c1) from stb1 partition by tbname",
"select unique(c2) from stb1 partition by tbname",
"select unique(c3) from stb1 partition by tbname",
"select unique(c4) from stb1 partition by tbname",
"select unique(c5) from stb1 partition by tbname",
"select unique(c6) from stb1 partition by tbname",
"select unique(c6) as alisb from stb1 partition by tbname",
"select unique(c6) alisb from stb1 partition by tbname",
]
for type_sql in type_sql_lists:
tdSql.query(type_sql)
def check_unique_table(self , unique_sql):
# unique_sql = "select unique(c1) from ct1"
origin_sql = unique_sql.replace("unique(","").replace(")","")
tdSql.query(unique_sql)
unique_result = tdSql.queryResult
unique_datas = []
for elem in unique_result:
unique_datas.append(elem[0])
tdSql.query(origin_sql)
origin_result = tdSql.queryResult
origin_datas = []
for elem in origin_result:
origin_datas.append(elem[0])
pre_unique = []
for elem in origin_datas:
if elem in pre_unique:
continue
else:
pre_unique.append(elem)
if pre_unique == unique_datas:
tdLog.info(" unique query check pass , unique sql is: %s" %unique_sql)
else:
tdLog.exit(" unique query check fail , unique sql is: %s " %unique_sql)
def basic_unique_function(self):
# basic query
tdSql.query("select c1 from ct3")
tdSql.checkRows(0)
tdSql.query("select c1 from t1")
tdSql.checkRows(12)
tdSql.query("select c1 from stb1")
tdSql.checkRows(25)
# used for empty table , ct3 is empty
tdSql.query("select unique(c1) from ct3")
tdSql.checkRows(0)
tdSql.query("select unique(c2) from ct3")
tdSql.checkRows(0)
tdSql.query("select unique(c3) from ct3")
tdSql.checkRows(0)
tdSql.query("select unique(c4) from ct3")
tdSql.checkRows(0)
tdSql.query("select unique(c5) from ct3")
tdSql.checkRows(0)
tdSql.query("select unique(c6) from ct3")
# will support _rowts mix with
# tdSql.query("select unique(c6),_rowts from ct3")
# auto check for t1 table
# used for regular table
tdSql.query("select unique(c1) from t1")
tdSql.query("desc t1")
col_lists_rows = tdSql.queryResult
col_lists = []
for col_name in col_lists_rows:
col_lists.append(col_name[0])
for col in col_lists:
self.check_unique_table(f"select unique({col}) from t1")
# unique with super tags
tdSql.query("select unique(c1) from ct1")
tdSql.checkRows(10)
tdSql.query("select unique(c1) from ct4")
tdSql.checkRows(10)
tdSql.error("select unique(c1),tbname from ct1")
tdSql.error("select unique(c1),t1 from ct1")
# unique with common col
tdSql.error("select unique(c1) ,ts from ct1")
tdSql.error("select unique(c1) ,c1 from ct1")
# unique with scalar function
tdSql.error("select unique(c1) ,abs(c1) from ct1")
tdSql.error("select unique(c1) , unique(c2) from ct1")
tdSql.error("select unique(c1) , abs(c2)+2 from ct1")
# unique with aggregate function
tdSql.error("select unique(c1) ,sum(c1) from ct1")
tdSql.error("select unique(c1) ,max(c1) from ct1")
tdSql.error("select unique(c1) ,csum(c1) from ct1")
tdSql.error("select unique(c1) ,count(c1) from ct1")
# unique with filter where
tdSql.query("select unique(c1) from ct4 where c1 is null")
tdSql.checkData(0, 0, None)
tdSql.query("select unique(c1) from ct4 where c1 >2 ")
tdSql.checkData(0, 0, 8)
tdSql.checkData(1, 0, 7)
tdSql.checkData(2, 0, 6)
tdSql.checkData(5, 0, 3)
tdSql.query("select unique(c1) from ct4 where c2 between 0 and 99999")
tdSql.checkData(0, 0, 8)
tdSql.checkData(1, 0, 7)
tdSql.checkData(2, 0, 6)
tdSql.checkData(3, 0, 5)
tdSql.checkData(4, 0, 4)
tdSql.checkData(5, 0, 3)
tdSql.checkData(6, 0, 2)
tdSql.checkData(7, 0, 1)
tdSql.checkData(8, 0, 0)
# unique with union all
tdSql.query("select unique(c1) from ct4 union all select c1 from ct1")
tdSql.checkRows(23)
tdSql.query("select unique(c1) from ct4 union all select distinct(c1) from ct4")
tdSql.checkRows(20)
tdSql.query("select unique(c2) from ct4 union all select abs(c2)/2 from ct4")
tdSql.checkRows(22)
# unique with join
# prepare join datas with same ts
tdSql.execute(" use db ")
tdSql.execute(" create stable st1 (ts timestamp , num int) tags(ind int)")
tdSql.execute(" create table tb1 using st1 tags(1)")
tdSql.execute(" create table tb2 using st1 tags(2)")
tdSql.execute(" create stable st2 (ts timestamp , num int) tags(ind int)")
tdSql.execute(" create table ttb1 using st2 tags(1)")
tdSql.execute(" create table ttb2 using st2 tags(2)")
start_ts = 1622369635000 # 2021-05-30 18:13:55
for i in range(10):
ts_value = start_ts+i*1000
tdSql.execute(f" insert into tb1 values({ts_value} , {i})")
tdSql.execute(f" insert into tb2 values({ts_value} , {i})")
tdSql.execute(f" insert into ttb1 values({ts_value} , {i})")
tdSql.execute(f" insert into ttb2 values({ts_value} , {i})")
tdSql.query("select unique(tb2.num) from tb1, tb2 where tb1.ts=tb2.ts ")
tdSql.checkRows(10)
tdSql.checkData(0,0,0)
tdSql.checkData(1,0,1)
tdSql.checkData(2,0,2)
tdSql.checkData(9,0,9)
tdSql.query("select unique(tb2.num) from tb1, tb2 where tb1.ts=tb2.ts union all select unique(tb1.num) from tb1, tb2 where tb1.ts=tb2.ts ")
tdSql.checkRows(20)
tdSql.checkData(0,0,0)
tdSql.checkData(1,0,1)
tdSql.checkData(2,0,2)
tdSql.checkData(9,0,9)
# nest query
# tdSql.query("select unique(c1) from (select c1 from ct1)")
tdSql.query("select c1 from (select unique(c1) c1 from ct4)")
tdSql.checkRows(10)
tdSql.checkData(0, 0, None)
tdSql.checkData(1, 0, 8)
tdSql.checkData(9, 0, 0)
tdSql.query("select sum(c1) from (select unique(c1) c1 from ct1)")
tdSql.checkRows(1)
tdSql.checkData(0, 0, 45)
tdSql.query("select sum(c1) from (select distinct(c1) c1 from ct1) union all select sum(c1) from (select unique(c1) c1 from ct1)")
tdSql.checkRows(2)
tdSql.checkData(0, 0, 45)
tdSql.checkData(1, 0, 45)
tdSql.query("select 1-abs(c1) from (select unique(c1) c1 from ct4)")
tdSql.checkRows(10)
tdSql.checkData(0, 0, None)
tdSql.checkData(1, 0, -7.000000000)
# bug for stable
#partition by tbname
# tdSql.query(" select unique(c1) from stb1 partition by tbname ")
# tdSql.checkRows(21)
# tdSql.query(" select unique(c1) from stb1 partition by tbname ")
# tdSql.checkRows(21)
# group by
tdSql.error("select unique(c1) from ct1 group by c1")
tdSql.error("select unique(c1) from ct1 group by tbname")
# super table
def check_boundary_values(self):
tdSql.execute("drop database if exists bound_test")
tdSql.execute("create database if not exists bound_test")
tdSql.execute("use bound_test")
tdSql.execute(
"create table stb_bound (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) tags (t1 int);"
)
tdSql.execute(f'create table sub1_bound using stb_bound tags ( 1 )')
tdSql.execute(
f"insert into sub1_bound values ( now()-1s, 2147483647, 9223372036854775807, 32767, 127, 3.40E+38, 1.7e+308, True, 'binary_tb1', 'nchar_tb1', now() )"
)
tdSql.execute(
f"insert into sub1_bound values ( now(), 2147483646, 9223372036854775806, 32766, 126, 3.40E+38, 1.7e+308, True, 'binary_tb1', 'nchar_tb1', now() )"
)
tdSql.execute(
f"insert into sub1_bound values ( now(), -2147483646, -9223372036854775806, -32766, -126, -3.40E+38, -1.7e+308, True, 'binary_tb1', 'nchar_tb1', now() )"
)
tdSql.execute(
f"insert into sub1_bound values ( now(), 2147483643, 9223372036854775803, 32763, 123, 3.39E+38, 1.69e+308, True, 'binary_tb1', 'nchar_tb1', now() )"
)
tdSql.execute(
f"insert into sub1_bound values ( now(), -2147483643, -9223372036854775803, -32763, -123, -3.39E+38, -1.69e+308, True, 'binary_tb1', 'nchar_tb1', now() )"
)
tdSql.error(
f"insert into sub1_bound values ( now()+1s, 2147483648, 9223372036854775808, 32768, 128, 3.40E+38, 1.7e+308, True, 'binary_tb1', 'nchar_tb1', now() )"
)
tdSql.query("select unique(c2) from sub1_bound")
tdSql.checkRows(5)
tdSql.checkData(0,0,9223372036854775807)
def run(self): # sourcery skip: extract-duplicate-method, remove-redundant-fstring
tdSql.prepare()
tdLog.printNoPrefix("==========step1:create table ==============")
self.prepare_datas()
tdLog.printNoPrefix("==========step2:test errors ==============")
self.test_errors()
tdLog.printNoPrefix("==========step3:support types ============")
self.support_types()
tdLog.printNoPrefix("==========step4: floor basic query ============")
self.basic_unique_function()
tdLog.printNoPrefix("==========step5: floor boundary query ============")
self.check_boundary_values()
def stop(self):
tdSql.close()
tdLog.success(f"{__file__} successfully executed")
tdCases.addLinux(__file__, TDTestCase())
tdCases.addWindows(__file__, TDTestCase())

View File

@ -382,6 +382,7 @@ class TDTestCase:
tdLog.info("act consume rows: %d, expect consume rows: %d"%(totalConsumeRows, expectrowcnt))
tdLog.exit("tmq consume rows error!")
time.sleep(15)
tdSql.query("drop topic %s"%topicName1)
tdLog.printNoPrefix("======== test case 10 end ...... ")
@ -453,6 +454,7 @@ class TDTestCase:
tdLog.info("act consume rows: %d, expect consume rows: %d"%(totalConsumeRows, expectrowcnt))
tdLog.exit("tmq consume rows error!")
time.sleep(15)
tdSql.query("drop topic %s"%topicName1)
tdLog.printNoPrefix("======== test case 11 end ...... ")

View File

@ -81,6 +81,7 @@ python3 ./test.py -f 2-query/mavg.py
python3 ./test.py -f 2-query/diff.py
python3 ./test.py -f 2-query/sample.py
python3 ./test.py -f 2-query/function_diff.py
python3 ./test.py -f 2-query/unique.py
python3 ./test.py -f 7-tmq/basic5.py
python3 ./test.py -f 7-tmq/subscribeDb.py
@ -92,4 +93,3 @@ python3 ./test.py -f 7-tmq/subscribeStb1.py
python3 ./test.py -f 7-tmq/subscribeStb2.py
python3 ./test.py -f 7-tmq/subscribeStb3.py
python3 ./test.py -f 7-tmq/subscribeStb4.py
python3 ./test.py -f 7-tmq/subscribeStb2.py

Some files were not shown because too many files have changed in this diff Show More