Merge branch '3.0' into enh/TD-21161-3.0

This commit is contained in:
kailixu 2023-04-12 18:30:40 +08:00
commit d3f7717d62
4 changed files with 40 additions and 25 deletions

View File

@ -2,7 +2,7 @@
# taos-tools # taos-tools
ExternalProject_Add(taos-tools ExternalProject_Add(taos-tools
GIT_REPOSITORY https://github.com/taosdata/taos-tools.git GIT_REPOSITORY https://github.com/taosdata/taos-tools.git
GIT_TAG 149ac34 GIT_TAG 0681d8b
SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools"
BINARY_DIR "" BINARY_DIR ""
#BUILD_IN_SOURCE TRUE #BUILD_IN_SOURCE TRUE

View File

@ -3,13 +3,11 @@ title: Schemaless Writing
description: This document describes how to use the schemaless write component of TDengine. description: This document describes how to use the schemaless write component of TDengine.
--- ---
In IoT applications, data is collected for many purposes such as intelligent control, business analysis, device monitoring and so on. Due to changes in business or functional requirements or changes in device hardware, the application logic and even the data collected may change. Schemaless writing automatically creates storage structures for your data as it is being written to TDengine, so that you do not need to create supertables in advance. When necessary, schemaless writing In IoT applications, data is collected for many purposes such as intelligent control, business analysis, device monitoring and so on. Due to changes in business or functional requirements or changes in device hardware, the application logic and even the data collected may change. Schemaless writing automatically creates storage structures for your data as it is being written to TDengine, so that you do not need to create supertables in advance. When necessary, schemaless writing will automatically add the required columns to ensure that the data written by the user is stored correctly.
will automatically add the required columns to ensure that the data written by the user is stored correctly.
The schemaless writing method creates super tables and their corresponding subtables. These are completely indistinguishable from the super tables and subtables created directly via SQL. You can write data directly to them via SQL statements. Note that the names of tables created by schemaless writing are based on fixed mapping rules for tag values, so they are not explicitly ideographic and they lack readability. The schemaless writing method creates super tables and their corresponding subtables. These are completely indistinguishable from the super tables and subtables created directly via SQL. You can write data directly to them via SQL statements. Note that the names of tables created by schemaless writing are based on fixed mapping rules for tag values, so they are not explicitly ideographic and they lack readability.
Tips: Note: Schemaless writing creates tables automatically. Creating tables manually is not supported with schemaless writing.
The schemaless write will automatically create a table. You do not need to create a table manually, or an unknown error may occur.
## Schemaless Writing Line Protocol ## Schemaless Writing Line Protocol
@ -50,8 +48,7 @@ In the schemaless writing data line protocol, each data item in the field_set ne
- `t`, `T`, `true`, `True`, `TRUE`, `f`, `F`, `false`, and `False` will be handled directly as BOOL types. - `t`, `T`, `true`, `True`, `TRUE`, `f`, `F`, `false`, and `False` will be handled directly as BOOL types.
For example, the following data rows write c1 column as 3 (BIGINT), c2 column as false (BOOL), c3 column For example, the following string indicates that the one row of data is written to the st supertable with the t1 tag as "3" (NCHAR), the t2 tag as "4" (NCHAR), and the t3 tag as "t3" (NCHAR); the c1 column is 3 (BIGINT), the c2 column is false (BOOL), the c3 column is "passit" (BINARY), the c4 column is 4 (DOUBLE), and the primary key timestamp is 1626006833639000000.
as "passit" (BINARY), c4 column as 4 (DOUBLE), and the primary key timestamp as 1626006833639000000 to child table with the t1 label as "3" (NCHAR), the t2 label as "4" (NCHAR), and the t3 label as "t3" (NCHAR) and the super table named `st`.
```json ```json
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000 st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
@ -69,23 +66,31 @@ Schemaless writes process row data according to the following principles.
"measurement,tag_key1=tag_value1,tag_key2=tag_value2" "measurement,tag_key1=tag_value1,tag_key2=tag_value2"
``` ```
:::tip
Note that tag_key1, tag_key2 are not the original order of the tags entered by the user but the result of using the tag names in ascending order of the strings. Therefore, tag_key1 is not the first tag entered in the line protocol. Note that tag_key1, tag_key2 are not the original order of the tags entered by the user but the result of using the tag names in ascending order of the strings. Therefore, tag_key1 is not the first tag entered in the line protocol.
The string's MD5 hash value "md5_val" is calculated after the ranking is completed. The calculation result is then combined with the string to generate the table name: "t_md5_val". "t_" is a fixed prefix that every table generated by this mapping relationship has. The string's MD5 hash value "md5_val" is calculated after the ranking is completed. The calculation result is then combined with the string to generate the table name: "t_md5_val". "t\_" is a fixed prefix that every table generated by this mapping relationship has.
:::
You can configure smlChildTableName in taos.cfg to specify table names, for example, `smlChildTableName=tname`. You can insert `st,tname=cpul,t1=4 c1=3 1626006833639000000` and the cpu1 table will be automatically created. Note that if multiple rows have the same tname but different tag_set values, the tag_set of the first row is used to create the table and the others are ignored. You can configure smlChildTableName in taos.cfg to specify table names, for example, `smlChildTableName=tname`. You can insert `st,tname=cpul,t1=4 c1=3 1626006833639000000` and the cpu1 table will be automatically created. Note that if multiple rows have the same tname but different tag_set values, the tag_set of the first row is used to create the table and the others are ignored.
2. If the super table obtained by parsing the line protocol does not exist, this super table is created. 2. If the super table obtained by parsing the line protocol does not exist, this super table is created.
**Important:** Manually creating supertables for schemaless writing is not supported. Schemaless writing creates appropriate supertables automatically.
3. If the subtable obtained by the parse line protocol does not exist, Schemaless creates the sub-table according to the subtable name determined in steps 1 or 2. 3. If the subtable obtained by the parse line protocol does not exist, Schemaless creates the sub-table according to the subtable name determined in steps 1 or 2.
4. If the specified tag or regular column in the data row does not exist, the corresponding tag or regular column is added to the super table (only incremental). 4. If the specified tag or regular column in the data row does not exist, the corresponding tag or regular column is added to the super table (only incremental).
5. If there are some tag columns or regular columns in the super table that are not specified to take values in a data row, then the values of these columns are set to
NULL. 5. If there are some tag columns or regular columns in the super table that are not specified to take values in a data row, then the values of these columns are set to NULL.
6. For BINARY or NCHAR columns, if the length of the value provided in a data row exceeds the column type limit, the maximum length of characters allowed to be stored in the column is automatically increased (only incremented and not decremented) to ensure complete preservation of the data. 6. For BINARY or NCHAR columns, if the length of the value provided in a data row exceeds the column type limit, the maximum length of characters allowed to be stored in the column is automatically increased (only incremented and not decremented) to ensure complete preservation of the data.
7. Errors encountered throughout the processing will interrupt the writing process and return an error code. 7. Errors encountered throughout the processing will interrupt the writing process and return an error code.
8. It is assumed that the order of field_set in a supertable is consistent, meaning that the first record contains all fields and subsequent records store fields in the same order. If the order is not consistent, set smlDataFormat in taos.cfg to false. Otherwise, data will be written out of order and a database error will occur.(smlDataFormat in taos.cfg default to false after version of 3.0.1.3, discarded since 3.0.3.0)
8. It is assumed that the order of field_set in a supertable is consistent, meaning that the first record contains all fields and subsequent records store fields in the same order. If the order is not consistent, set smlDataFormat in taos.cfg to false. Otherwise, data will be written out of order and a database error will occur.
Note: TDengine 3.0.3.0 and later automatically detect whether order is consistent. This parameter is no longer used.
:::tip :::tip
All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48 KB and the total length of a tag value cannot exceed 16 KB. See [TDengine SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area.
48KB, and the total length of tag value cannot exceed 16KB. See [TDengine SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area.
::: :::
## Time resolution recognition ## Time resolution recognition
@ -114,8 +119,7 @@ In OpenTSDB file and JSON protocol modes, the precision of the timestamp is dete
## Data Model Mapping ## Data Model Mapping
This section describes how data in line protocol is mapped to a schema. The data measurement in each line is mapped to a This section describes how data in InfluxDB line protocol is mapped to a schema. The data measurement in each line is mapped to a supertable name. The tag name in tag_set is the tag name in the schema, and the name in field_set is the column name in the schema. The following example shows how data is mapped:
supertable name. The tag name in tag_set is the tag name in the schema, and the name in field_set is the column name in the schema. The following example shows how data is mapped:
```json ```json
st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000 st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000
@ -160,7 +164,7 @@ The preceding data includes a new entry, c6, with type binary(6). When this occu
TDengine guarantees the idempotency of data writes. This means that you can repeatedly call the API to perform write operations with bad data. However, TDengine does not guarantee the atomicity of multi-row writes. In a multi-row write, some data may be written successfully and other data unsuccessfully. TDengine guarantees the idempotency of data writes. This means that you can repeatedly call the API to perform write operations with bad data. However, TDengine does not guarantee the atomicity of multi-row writes. In a multi-row write, some data may be written successfully and other data unsuccessfully.
##: Error Codes ## Error Codes
The TSDB_CODE_TSC_LINE_SYNTAX_ERROR indicates an error in the schemaless writing component. The TSDB_CODE_TSC_LINE_SYNTAX_ERROR indicates an error in the schemaless writing component.
This error occurs when writing text. For other errors, schemaless writing uses the standard TDengine error codes This error occurs when writing text. For other errors, schemaless writing uses the standard TDengine error codes

View File

@ -12,6 +12,9 @@ CREATE STREAM [IF NOT EXISTS] stream_name [stream_options] INTO stb_name[(field1
stream_options: { stream_options: {
TRIGGER [AT_ONCE | WINDOW_CLOSE | MAX_DELAY time] TRIGGER [AT_ONCE | WINDOW_CLOSE | MAX_DELAY time]
WATERMARK time WATERMARK time
IGNORE EXPIRED [0|1]
DELETE_MARK time
FILL_HISTORY [0|1]
} }
``` ```
@ -202,3 +205,11 @@ PARTITION 子句中,为 concat("tag-", tbname)定义了一个别名cc, 对应
会对TAG信息进行如下检查 会对TAG信息进行如下检查
1.检查tag的schema信息是否匹配对于不匹配的则自动进行数据类型转换当前只有数据长度大于4096byte时才报错其余场景都能进行类型转换。 1.检查tag的schema信息是否匹配对于不匹配的则自动进行数据类型转换当前只有数据长度大于4096byte时才报错其余场景都能进行类型转换。
2.检查tag的个数是否相同如果不同需要显示的指定超级表与subquery的tag的对应关系否则报错如果相同可以指定对应关系也可以不指定不指定则按位置顺序对应。 2.检查tag的个数是否相同如果不同需要显示的指定超级表与subquery的tag的对应关系否则报错如果相同可以指定对应关系也可以不指定不指定则按位置顺序对应。
## 清理中间状态
```
DELETE_MARK time
```
DELETE_MARK用于删除缓存的窗口状态也就是删除流计算的中间结果。如果不设置默认值是10年
T = 最新事件时间 - DELETE_MARK

View File

@ -816,7 +816,7 @@ JNIEXPORT jint JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_setBindTableNameI
(*env)->ReleaseStringUTFChars(env, jname, name); (*env)->ReleaseStringUTFChars(env, jname, name);
jniError("bindTableName jobj:%p, conn:%p, code: 0x%x", jobj, tsconn, code); jniError("bindTableName jobj:%p, conn:%p, code: 0x%x", jobj, tsconn, code);
return JNI_TDENGINE_ERROR; return code;
} }
jniDebug("jobj:%p, conn:%p, set stmt bind table name:%s", jobj, tsconn, name); jniDebug("jobj:%p, conn:%p, set stmt bind table name:%s", jobj, tsconn, name);
@ -891,7 +891,7 @@ JNIEXPORT jint JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_setTableNameTagsI
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
jniError("tableNameTags jobj:%p, conn:%p, code: 0x%x", jobj, tsconn, code); jniError("tableNameTags jobj:%p, conn:%p, code: 0x%x", jobj, tsconn, code);
return JNI_TDENGINE_ERROR; return code;
} }
return JNI_SUCCESS; return JNI_SUCCESS;
} }
@ -957,7 +957,7 @@ JNIEXPORT jlong JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_bindColDataImp(
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
jniError("bindColData jobj:%p, conn:%p, code: 0x%x", jobj, tscon, code); jniError("bindColData jobj:%p, conn:%p, code: 0x%x", jobj, tscon, code);
return JNI_TDENGINE_ERROR; return code;
} }
return JNI_SUCCESS; return JNI_SUCCESS;
@ -980,7 +980,7 @@ JNIEXPORT jint JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_addBatchImp(JNIEn
int32_t code = taos_stmt_add_batch(pStmt); int32_t code = taos_stmt_add_batch(pStmt);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
jniError("add batch jobj:%p, conn:%p, code: 0x%x", jobj, tscon, code); jniError("add batch jobj:%p, conn:%p, code: 0x%x", jobj, tscon, code);
return JNI_TDENGINE_ERROR; return code;
} }
jniDebug("jobj:%p, conn:%p, stmt closed", jobj, tscon); jniDebug("jobj:%p, conn:%p, stmt closed", jobj, tscon);
@ -1004,7 +1004,7 @@ JNIEXPORT jint JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_executeBatchImp(J
int32_t code = taos_stmt_execute(pStmt); int32_t code = taos_stmt_execute(pStmt);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
jniError("excute batch jobj:%p, conn:%p, code: 0x%x", jobj, tscon, code); jniError("excute batch jobj:%p, conn:%p, code: 0x%x", jobj, tscon, code);
return JNI_TDENGINE_ERROR; return code;
} }
jniDebug("jobj:%p, conn:%p, batch execute", jobj, tscon); jniDebug("jobj:%p, conn:%p, batch execute", jobj, tscon);
@ -1028,7 +1028,7 @@ JNIEXPORT jint JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_closeStmt(JNIEnv
int32_t code = taos_stmt_close(pStmt); int32_t code = taos_stmt_close(pStmt);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
jniError("close stmt jobj:%p, conn:%p, code: 0x%x", jobj, tscon, code); jniError("close stmt jobj:%p, conn:%p, code: 0x%x", jobj, tscon, code);
return JNI_TDENGINE_ERROR; return code;
} }
jniDebug("jobj:%p, conn:%p, stmt closed", jobj, tscon); jniDebug("jobj:%p, conn:%p, stmt closed", jobj, tscon);