Merge branch 'develop' into docs/Update-Latest-Feature

This commit is contained in:
Elias Soong 2021-09-09 18:49:02 +08:00
commit 53f8e057a6
68 changed files with 7512 additions and 3006 deletions

2
Jenkinsfile vendored
View File

@ -239,7 +239,7 @@ pipeline {
sh '''
cd ${WKC}/tests/examples/C#/taosdemo
mcs -out:taosdemo *.cs > /dev/null 2>&1
echo '' |./taosdemo
echo '' |./taosdemo -c /etc/taos
'''
sh '''
cd ${WKC}/tests/gotest

View File

@ -133,8 +133,10 @@ IF (TD_LINUX)
IF (TD_MEMORY_SANITIZER)
SET(DEBUG_FLAGS "-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fsanitize=float-divide-by-zero -fsanitize=float-cast-overflow -fno-sanitize=null -fno-sanitize=alignment -static-libasan -O0 -g3 -DDEBUG")
MESSAGE(STATUS "memory sanitizer detected as true")
ELSE ()
SET(DEBUG_FLAGS "-O0 -g3 -DDEBUG")
MESSAGE(STATUS "memory sanitizer detected as false")
ENDIF ()
SET(RELEASE_FLAGS "-O3 -Wno-error")

View File

@ -86,7 +86,7 @@ ENDIF ()
MESSAGE(STATUS "============= compile version parameter information start ============= ")
MESSAGE(STATUS "ver number:" ${TD_VER_NUMBER})
MESSAGE(STATUS "compatible ver number:" ${TD_VER_COMPATIBLE})
MESSAGE(STATUS "communit commit id:" ${TD_VER_GIT})
MESSAGE(STATUS "community commit id:" ${TD_VER_GIT})
MESSAGE(STATUS "internal commit id:" ${TD_VER_GIT_INTERNAL})
MESSAGE(STATUS "build date:" ${TD_VER_DATE})
MESSAGE(STATUS "ver type:" ${TD_VER_VERTYPE})

View File

@ -64,7 +64,10 @@ TDengine提供了丰富的应用程序开发接口其中包括C/C++、Java、
编辑taos.cfg文件(默认路径/etc/taos/taos.cfg)将firstEP修改为TDengine服务器的End Point例如h1.taos.com:6030
**提示: 如本机没有部署TDengine服务仅安装了应用驱动则taos.cfg中仅需配置firstEP无需配置FQDN。**
**提示: **
1. **如本机没有部署TDengine服务仅安装了应用驱动则taos.cfg中仅需配置firstEP无需配置FQDN。**
2. **为防止与服务器端连接时出现“unable to resolve FQDN”错误建议确认客户端的hosts文件已经配置正确的FQDN值。**
**Windows x64/x86**
@ -96,7 +99,7 @@ TDengine提供了丰富的应用程序开发接口其中包括C/C++、Java、
**提示:**
1. **如利用FQDN连接服务器必须确认本机网络环境DNS已配置好或在hosts文件中添加FQDN寻址记录如编辑C:\Windows\system32\drivers\etc\hosts添加如下的记录`192.168.1.99 h1.taos.com` **
2**卸载运行unins000.exe可卸载TDengine应用驱动。**
2. **卸载运行unins000.exe可卸载TDengine应用驱动。**
### 安装验证

View File

@ -119,9 +119,14 @@ taos>
上面已经介绍如何从零开始搭建集群。集群组建完后,还可以随时添加新的数据节点进行扩容,或删除数据节点,并检查集群当前状态。
**提示:**
- 以下所有执行命令的操作需要先登陆进TDengine系统必要时请使用root权限。
### 添加数据节点
执行CLI程序taos使用root账号登录进系统执行
执行CLI程序taos执行
```
CREATE DNODE "fqdn:port";
@ -131,7 +136,7 @@ CREATE DNODE "fqdn:port";
### 删除数据节点
执行CLI程序taos使用root账号登录进TDengine系统执行:
执行CLI程序taos执行
```mysql
DROP DNODE "fqdn:port | dnodeID";
@ -153,7 +158,7 @@ DROP DNODE "fqdn:port | dnodeID";
手动将某个vnode迁移到指定的dnode。
执行CLI程序taos使用root账号登录进TDengine系统执行:
执行CLI程序taos执行
```mysql
ALTER DNODE <source-dnodeId> BALANCE "VNODE:<vgId>-DNODE:<dest-dnodeId>";
@ -169,7 +174,7 @@ ALTER DNODE <source-dnodeId> BALANCE "VNODE:<vgId>-DNODE:<dest-dnodeId>";
### 查看数据节点
执行CLI程序taos使用root账号登录进TDengine系统执行:
执行CLI程序taos执行
```mysql
SHOW DNODES;
```
@ -180,8 +185,9 @@ SHOW DNODES;
为充分利用多核技术并提供scalability数据需要分片处理。因此TDengine会将一个DB的数据切分成多份存放在多个vnode里。这些vnode可能分布在多个数据节点dnode里这样就实现了水平扩展。一个vnode仅仅属于一个DB但一个DB可以有多个vnode。vnode的是mnode根据当前系统资源的情况自动进行分配的无需任何人工干预。
执行CLI程序taos使用root账号登录进TDengine系统执行:
执行CLI程序taos执行
```mysql
USE SOME_DATABASE;
SHOW VGROUPS;
```

View File

@ -1,6 +1,6 @@
# TDengine Documentation
TDengine is a highly efficient platform to store, query, and analyze time-series data. It is specially designed and optimized for IoT, Internet of Vehicles, Industrial IoT, IT Infrastructure and Application Monitoring, etc. It works like a relational database, such as MySQL, but you are strongly encouraged to read through the following documentation before you experience it, especially the Data Model and Data Modeling sections. In addition to this document, you should also download and read our technology white paper. For the older TDengine version 1.6 documentation, please click here.
TDengine is a highly efficient platform to store, query, and analyze time-series data. It is specially designed and optimized for IoT, Internet of Vehicles, Industrial IoT, IT Infrastructure and Application Monitoring, etc. It works like a relational database, such as MySQL, but you are strongly encouraged to read through the following documentation before you experience it, especially the Data Modeling sections. In addition to this document, you should also download and read the technology white paper. For the older TDengine version 1.6 documentation, please click [here](https://www.taosdata.com/en/documentation16/).
## [TDengine Introduction](/evaluation)
@ -10,28 +10,41 @@ TDengine is a highly efficient platform to store, query, and analyze time-series
## [Getting Started](/getting-started)
* [Quickly Install](/getting-started#install): install via source code/package / Docker within seconds
- [Easy to Launch](/getting-started#start): start / stop TDengine with systemctl
- [Command-line](/getting-started#console) : an easy way to access TDengine server
- [Experience Lightning Speed](/getting-started#demo): running a demo, inserting/querying data to experience faster speed
- [List of Supported Platforms](/getting-started#platforms): a list of platforms supported by TDengine server and client
- [Deploy to Kubernetes](https://taosdata.github.io/TDengine-Operator/en/index.html)a detailed guide for TDengine deployment in Kubernetes environment
* [Quick Install](/getting-started#install): install via source code/package / Docker within seconds
* [Quick Launch](/getting-started#start): start / stop TDengine quickly with systemctl
* [Command-line](/getting-started#console) : an easy way to access TDengine server
* [Experience Lightning Speed](/getting-started#demo): running a demo, inserting/querying data to experience faster speed
* [List of Supported Platforms](/getting-started#platforms): a list of platforms supported by TDengine server and client
* [Deploy to Kubernetes](https://taosdata.github.io/TDengine-Operator/en/index.html)a detailed guide for TDengine deployment in Kubernetes environment
## [Overall Architecture](/architecture)
- [Data Model](/architecture#model): relational database model, but one table for one device with static tags
- [Cluster and Primary Logical Unit](/architecture#cluster): Take advantage of NoSQL, support scale-out and high-reliability
- [Storage Model and Data Partitioning/Sharding](/architecture#sharding): tag data will be separated from time-series data, segmented by vnode and time
- [Data Writing and Replication Process](/architecture#replication): records received are written to WAL, cached, with acknowledgement is sent back to client, while supporting multi-replicas
- [Data Model](/architecture#model): relational database model, but one table for one data collection point with static tags
- [Cluster and Primary Logical Unit](/architecture#cluster): Take advantage of NoSQL architecture, high availability and horizontal scalability
- [Storage Model and Data Partitioning/Sharding](/architecture#sharding): tag data is separated from time-series data, sharded by vnodes and partitioned by time
- [Data Writing and Replication Process](/architecture#replication): records received are written to WAL, cached, with acknowledgement sent back to client, while supporting data replications
- [Caching and Persistence](/architecture#persistence): latest records are cached in memory, but are written in columnar format with an ultra-high compression ratio
- [Data Query](/architecture#query): support various functions, time-axis aggregation, interpolation, and multi-table aggregation
- [Data Query](/architecture#query): support various SQL functions, downsampling, interpolation, and multi-table aggregation
## [Data Modeling](/model)
- [Create a Database](/model#create-db): create a database for all data collection points with similar features
- [Create a Database](/model#create-db): create a database for all data collection points with similar data characteristics
- [Create a Super Table(STable)](/model#create-stable): create a STable for all data collection points with the same type
- [Create a Table](/model#create-table): use STable as the template, to create a table for each data collecting point
- [Create a Table](/model#create-table): use STable as the template to create a table for each data collecting point
## [Efficient Data Ingestion](/insert)
- [Data Writing via SQL](/insert#sql): write one or multiple records into one or multiple tables via SQL insert command
- [Data Writing via Prometheus](/insert#prometheus): Configure Prometheus to write data directly without any code
- [Data Writing via Telegraf](/insert#telegraf): Configure Telegraf to write collected data directly without any code
- [Data Writing via EMQ X](/insert#emq): Configure EMQ X to write MQTT data directly without any code
- [Data Writing via HiveMQ Broker](/insert#hivemq): Configure HiveMQ to write MQTT data directly without any code
## [Efficient Data Querying](/queries)
- [Major Features](/queries#queries): support various standard query functions, setting filter conditions, and querying per time segment
- [Multi-table Aggregation](/queries#aggregation): use STable and set tag filter conditions to perform efficient aggregation
- [Downsampling](/queries#sampling): aggregate data in successive time windows, support interpolation
## [TAOS SQL](/taos-sql)
@ -40,27 +53,13 @@ TDengine is a highly efficient platform to store, query, and analyze time-series
- [Table Management](/taos-sql#table): add, drop, check, alter tables
- [STable Management](/taos-sql#super-table): add, drop, check, alter STables
- [Tag Management](/taos-sql#tags): add, drop, alter tags
- [Inserting Records](/taos-sql#insert): support to write single/multiple items per table, multiple items across tables, and support to write historical data
- [Inserting Records](/taos-sql#insert): write single/multiple records a table, multiple records across tables, and historical data
- [Data Query](/taos-sql#select): support time segment, value filtering, sorting, manual paging of query results, etc
- [SQL Function](/taos-sql#functions): support various aggregation functions, selection functions, and calculation functions, such as avg, min, diff, etc
- [Time Dimensions Aggregation](/taos-sql#aggregation): aggregate and reduce the dimension after cutting table data by time segment
- [Cutting and Aggregation](/taos-sql#aggregation): aggregate and reduce the dimension after cutting table data by time segment
- [Boundary Restrictions](/taos-sql#limitation): restrictions for the library, table, SQL, and others
- [Error Code](/taos-sql/error-code): TDengine 2.0 error codes and corresponding decimal codes
## [Efficient Data Ingestion](/insert)
- [SQL Ingestion](/insert#sql): write one or multiple records into one or multiple tables via SQL insert command
- [Prometheus Ingestion](/insert#prometheus): Configure Prometheus to write data directly without any code
- [Telegraf Ingestion](/insert#telegraf): Configure Telegraf to write collected data directly without any code
- [EMQ X Broker](/insert#emq): Configure EMQ X to write MQTT data directly without any code
- [HiveMQ Broker](/insert#hivemq): Configure HiveMQ to write MQTT data directly without any code
## [Efficient Data Querying](/queries)
- [Main Query Features](/queries#queries): support various standard functions, setting filter conditions, and querying per time segment
- [Multi-table Aggregation Query](/queries#aggregation): use STable and set tag filter conditions to perform efficient aggregation queries
- [Downsampling to Query Value](/queries#sampling): aggregate data in successive time windows, support interpolation
## [Advanced Features](/advanced-features)
- [Continuous Query](/advanced-features#continuous-query): Based on sliding windows, the data stream is automatically queried and calculated at regular intervals
@ -88,12 +87,12 @@ TDengine is a highly efficient platform to store, query, and analyze time-series
## [Installation and Management of TDengine Cluster](/cluster)
- [Preparation](/cluster#prepare): important considerations before deploying TDengine for production usage
- [Create Your First Node](/cluster#node-one): simple to follow the quick setup
- [Preparation](/cluster#prepare): important steps before deploying TDengine for production usage
- [Create the First Node](/cluster#node-one): just follow the steps in quick start
- [Create Subsequent Nodes](/cluster#node-other): configure taos.cfg for new nodes to add more to the existing cluster
- [Node Management](/cluster#management): add, delete, and check nodes in the cluster
- [High-availability of Vnode](/cluster#high-availability): implement high-availability of Vnode through multi-replicas
- [Mnode Management](/cluster#mnode): automatic system creation without any manual intervention
- [High-availability of Vnode](/cluster#high-availability): implement high-availability of Vnode through replicas
- [Mnode Management](/cluster#mnode): mnodes are created automatically without any manual intervention
- [Load Balancing](/cluster#load-balancing): automatically performed once the number of nodes or load changes
- [Offline Node Processing](/cluster#offline): any node that offline for more than a certain period will be removed from the cluster
- [Arbitrator](/cluster#arbitrator): used in the case of an even number of replicas to prevent split-brain
@ -108,27 +107,14 @@ TDengine is a highly efficient platform to store, query, and analyze time-series
- [Export Data](/administrator#export): export data either from TDengine shell or from the taosdump tool
- [System Monitor](/administrator#status): monitor the system connections, queries, streaming calculation, logs, and events
- [File Directory Structure](/administrator#directories): directories where TDengine data files and configuration files located
- [Parameter Restrictions and Reserved Keywords](/administrator#keywords): TDengines list of parameter restrictions and reserved keywords
## TDengine Technical Design
- [System Module]: taosd functions and modules partitioning
- [Data Replication]: support real-time synchronous/asynchronous replication, to ensure high-availability of the system
- [Technical Blog](https://www.taosdata.com/cn/blog/?categories=3): More technical analysis and architecture design articles
## Common Tools
- [TDengine sample import tools](https://www.taosdata.com/blog/2020/01/18/1166.html)
- [TDengine performance comparison test tools](https://www.taosdata.com/blog/2020/01/18/1166.html)
- [Use TDengine visually through IDEA Database Management Tool](https://www.taosdata.com/blog/2020/08/27/1767.html)
- [Parameter Limitss and Reserved Keywords](/administrator#keywords): TDengines list of parameter limits and reserved keywords
## Performance: TDengine vs Others
- [Performance: TDengine vs InfluxDB with InfluxDBs open-source performance testing tool](https://www.taosdata.com/blog/2020/01/13/1105.html)
- [Performance: TDengine vs OpenTSDB](https://www.taosdata.com/blog/2019/08/21/621.html)
- [Performance: TDengine vs Cassandra](https://www.taosdata.com/blog/2019/08/14/573.html)
- [Performance: TDengine vs InfluxDB](https://www.taosdata.com/blog/2019/07/19/419.html)
- [Performance Test Reports of TDengine vs InfluxDB/OpenTSDB/Cassandra/MySQL/ClickHouse](https://www.taosdata.com/downloads/TDengine_Testing_Report_cn.pdf)
- [Performance: TDengine vs OpenTSDB](https://www.taosdata.com/blog/2019/09/12/710.html)
- [Performance: TDengine vs Cassandra](https://www.taosdata.com/blog/2019/09/12/708.html)
- [Performance: TDengine vs InfluxDB](https://www.taosdata.com/blog/2019/09/12/706.html)
- [Performance Test Reports of TDengine vs InfluxDB/OpenTSDB/Cassandra/MySQL/ClickHouse](https://www.taosdata.com/downloads/TDengine_Testing_Report_en.pdf)
## More on IoT Big Data
@ -136,7 +122,8 @@ TDengine is a highly efficient platform to store, query, and analyze time-series
- [Features and Functions of IoT Big Data platforms](https://www.taosdata.com/blog/2019/07/29/542.html)
- [Why dont General Big Data Platforms Fit IoT Scenarios?](https://www.taosdata.com/blog/2019/07/09/why-does-the-general-big-data-platform-not-fit-iot-data-processing/)
- [Why TDengine is the best choice for IoT, Internet of Vehicles, and Industry Internet Big Data platforms?](https://www.taosdata.com/blog/2019/07/09/why-tdengine-is-the-best-choice-for-iot-big-data-processing/)
- [Technical Blog](https://www.taosdata.com/cn/blog/?categories=3): More technical analysis and architecture design articles
## FAQ
- [FAQ: Common questions and answers](/faq)
- [FAQ: Common questions and answers](/faq)

View File

@ -2,18 +2,18 @@
## <a class="anchor" id="intro"></a> About TDengine
TDengine is an innovative Big Data processing product launched by Taos Data in the face of the fast-growing Internet of Things (IoT) Big Data market and technical challenges. It does not rely on any third-party software, nor does it optimize or package any open-source database or stream computing product. Instead, it is a product independently developed after absorbing the advantages of many traditional relational databases, NoSQL databases, stream computing engines, message queues, and other software. TDengine has its own unique Big Data processing advantages in time-series space.
TDengine is an innovative Big Data processing product launched by TAOS Data in the face of the fast-growing Internet of Things (IoT) Big Data market and technical challenges. It does not rely on any third-party software, nor does it optimize or package any open-source database or stream computing product. Instead, it is a product independently developed after absorbing the advantages of many traditional relational databases, NoSQL databases, stream computing engines, message queues, and other software. TDengine has its own unique Big Data processing advantages in time-series space.
One of the modules of TDengine is the time-series database. However, in addition to this, to reduce the complexity of research and development and the difficulty of system operation, TDengine also provides functions such as caching, message queuing, subscription, stream computing, etc. TDengine provides a full-stack technical solution for the processing of IoT and Industrial Internet BigData. It is an efficient and easy-to-use IoT Big Data platform. Compared with typical Big Data platforms such as Hadoop, TDengine has the following distinct characteristics:
- **Performance improvement over 10 times**: An innovative data storage structure is defined, with each single core can process at least 20,000 requests per second, insert millions of data points, and read more than 10 million data points, which is more than 10 times faster than other existing general database.
- **Reduce the cost of hardware or cloud services to 1/5**: Due to its ultra-performance, TDengines computing resources consumption is less than 1/5 of other common Big Data solutions; through columnar storage and advanced compression algorithms, the storage consumption is less than 1/10 of other general databases.
- **Full-stack time-series data processing engine**: Integrate database, message queue, cache, stream computing, and other functions, and the applications do not need to integrate with software such as Kafka/Redis/HBase/Spark/HDFS, thus greatly reducing the complexity cost of application development and maintenance.
- **Powerful analysis functions**: Data from ten years ago or one second ago, can all be queried based on a specified time range. Data can be aggregated on a timeline or multiple devices. Ad-hoc queries can be made at any time through Shell, Python, R, and MATLAB.
- **Seamless connection with third-party tools**: Integration with Telegraf, Grafana, EMQ, HiveMQ, Prometheus, MATLAB, R, etc. without even one single line of code. OPC, Hadoop, Spark, etc. will be supported in the future, and more BI tools will be seamlessly connected to.
- **Highly Available and Horizontal Scalable **: With the distributed architecture and consistency algorithm, via multi-replication and clustering features, TDengine ensures high availability and horizontal scalability to support the mission-critical applications.
- **Zero operation cost & zero learning cost**: Installing clusters is simple and quick, with real-time backup built-in, and no need to split libraries or tables. Similar to standard SQL, TDengine can support RESTful, Python/Java/C/C++/C#/Go/Node.js, and similar to MySQL with zero learning cost.
- **Core is Open Sourced:** Except some auxiliary features, the core of TDengine is open sourced. Enterprise won't be locked by the database anymore. Ecosystem is more strong, product is more stable, and developer communities are more active.
With TDengine, the total cost of ownership of typical IoT, Internet of Vehicles, and Industrial Internet Big Data platforms can be greatly reduced. However, it should be pointed out that due to making full use of the characteristics of IoT time-series data, TDengine cannot be used to process general data from web crawlers, microblogs, WeChat, e-commerce, ERP, CRM, and other sources.
With TDengine, the total cost of ownership of typical IoT, Internet of Vehicles, and Industrial Internet Big Data platforms can be greatly reduced. However, since it makes full use of the characteristics of IoT time-series data, TDengine cannot be used to process general data from web crawlers, microblogs, WeChat, e-commerce, ERP, CRM, and other sources.
![TDengine Technology Ecosystem](page://images/eco_system.png)
@ -62,4 +62,4 @@ From the perspective of data sources, designers can analyze the applicability of
| ------------------------------------------------- | ------------------ | ----------------------- | ------------------- | ------------------------------------------------------------ |
| Require system with high-reliability | | | √ | TDengine has a very robust and reliable system architecture to implement simple and convenient daily operation with streamlined experiences for operators, thus human errors and accidents are eliminated to the greatest extent. |
| Require controllable operation learning cost | | | √ | As above. |
| Require abundant talent supply | √ | | | As a new-generation product, its still difficult to find talents with TDengine experiences from market. However, the learning cost is low. As the vendor, we also provide extensive operation training and counselling services. |
| Require abundant talent supply | √ | | | As a new-generation product, its still difficult to find talents with TDengine experiences from market. However, the learning cost is low. As the vendor, we also provide extensive operation training and counselling services. |

View File

@ -2,7 +2,7 @@
## <a class="anchor" id="install"></a>Quick Install
TDengine software consists of 3 components: server, client, and alarm module. At the moment, TDengine server only runs on Linux (Windows, mac OS and more OS supports will come soon), but client can run on either Windows or Linux. TDengine client can be installed and run on Windows or Linux. Applications based-on any OSes can all connect to server taosd via a RESTful interface. About CPU, TDengine supports X64/ARM64/MIPS64/Alpha64, and ARM32、RISC-V, other more CPU architectures will be supported soon. You can set up and install TDengine server either from the [source code](https://www.taosdata.com/en/getting-started/#Install-from-Source) or the [packages](https://www.taosdata.com/en/getting-started/#Install-from-Package).
TDengine software consists of 3 parts: server, client, and alarm module. At the moment, TDengine server only runs on Linux (Windows, mac OS and more OS supports will come soon), but client can run on either Windows or Linux. TDengine client can be installed and run on Windows or Linux. Applications based-on any OSes can all connect to server taosd via a RESTful interface. About CPU, TDengine supports X64/ARM64/MIPS64/Alpha64, and ARM32、RISC-V, other more CPU architectures will be supported soon. You can set up and install TDengine server either from the [source code](https://www.taosdata.com/en/getting-started/#Install-from-Source) or the [packages](https://www.taosdata.com/en/getting-started/#Install-from-Package).
### <a class="anchor" id="source-install"></a>Install from Source
@ -16,7 +16,7 @@ Please refer to the detailed operation in [Quickly experience TDengine through D
### <a class="anchor" id="package-install"></a>Install from Package
Its extremely easy to install for TDengine, which takes only a few seconds from downloaded to successful installed. The server installation package includes clients and connectors. We provide 3 installation packages, which you can choose according to actual needs:
Three different packages for TDengine server are provided, please pick up the one you like. (Lite packages only have execution files and connector of C/C++, but standard packages support connectors of nearly all programming languages.) Beta version has more features, but we suggest you to install stable version for production or testing.
Click [here](https://www.taosdata.com/en/getting-started/#Install-from-Package) to download the install package.
@ -131,7 +131,7 @@ After starting the TDengine server, you can execute the command `taosdemo` in th
$ taosdemo
```
Using this command, a STable named `meters` will be created in the database `test` There are 10k tables under this stable, named from `t0` to `t9999`. In each table there are 100k rows of records, each row with columns `f1`, `f2` and `f3`. The timestamp is from "2017-07-14 10:40:00 000" to "2017-07-14 10:41:39 999". Each table also has tags `areaid` and `loc`: `areaid` is set from 1 to 10, `loc` is set to "beijing" or "shanghai".
Using this command, a STable named `meters` will be created in the database `test`. There are 10k tables under this STable, named from `t0` to `t9999`. In each table there are 100k rows of records, each row with columns `f1`, `f2` and `f3`. The timestamp is from "2017-07-14 10:40:00 000" to "2017-07-14 10:41:39 999". Each table also has tags `areaid` and `loc`: `areaid` is set from 1 to 10, `loc` is set to "beijing" or "shanghai".
It takes about 10 minutes to execute this command. Once finished, 1 billion rows of records will be inserted.
@ -201,7 +201,7 @@ Note: ● has been verified by official tests; ○ has been verified by unoffici
List of platforms supported by TDengine client and connectors
At the moment, TDengine connectors can support a wide range of platforms, including hardware platforms such as X64/X86/ARM64/ARM32/MIPS/Alpha, and development environments such as Linux/Win64/Win32.
At the moment, TDengine connectors can support a wide range of platforms, including hardware platforms such as X64/X86/ARM64/ARM32/MIPS/Alpha, and operating system such as Linux/Win64/Win32.
Comparison matrix as following:
@ -218,4 +218,4 @@ Comparison matrix as following:
Note: ● has been verified by official tests; ○ has been verified by unofficial tests.
Please visit [Connectors](https://www.taosdata.com/en/documentation/connector) section for more detailed information.
Please visit Connectors section for more detailed information.

View File

@ -4,7 +4,7 @@
### A Typical IoT Scenario
In typical IoT, Internet of Vehicles and Operation Monitoring scenarios, there are often many different types of data collecting devices that collect one or more different physical metrics. However, for the collection devices of the same type, there are often many specific collection devices distributed in places. BigData processing system aims to collect all kinds of data, and then calculate and analyze them. For the same kind of devices, the data collected are very regular. Taking smart meters as an example, assuming that each smart meter collects three metrics of current, voltage and phase, the collected data are similar to the following table:
In typical industry IoT, Internet of Vehicles and Operation Monitoring scenarios, there are often many different types of data collecting devices that collect one or more different physical metrics. However, for the data collection devices of the same type, there are often many specific collection devices distributed in places. Big Data processing system aims to collect all kinds of data, then store and analyze them. For the same kind of devices, the data collected are very structured. Taking smart meters as an example, assuming that each smart meter collects three metrics of current, voltage and phase, the collected data are similar to the following table:
<figure><table>
<thead><tr>
@ -108,48 +108,48 @@ Each data record contains the device ID, timestamp, collected metrics (current,
As the data points are a series of data points over time, the data points generated by IoT, Internet of Vehicles, and Operation Monitoring have some strong common characteristics:
1. Metrics are always structured data;
2. There are rarely delete/update operations on collected data;
3. No need for transactions of traditional databases
4. The ratio of reading is lower but write is higher than typical Internet applications;
5. data flow is uniform and can be predicted according to the number of devices and collection frequency;
1. metrics are always structured data;
2. there are rarely delete/update operations on collected data;
3. unlike traditional databases, transaction processing is not required;
4. the ratio of writing over reading is much higher than typical Internet applications;
5. data volume is stable and can be predicted according to the number of devices and sampling rate;
6. the user pays attention to the trend of data, not a specific value at a specific time;
7. there is always a data retention policy;
8. the data query is always executed in a given time range and a subset of space;
9. in addition to storage and query operations, various statistical and real-time calculation operations are also required;
9. in addition to storage and query operations, various statistical and real-time computing are also required;
10. data volume is huge, a system may generate over 10 billion data points in a day.
In light of the characteristics mentioned above, TDengine designs the storage and computing engine in a special and optimized way for time-series data, resulting in massive improvements in system efficiency.
By utilizing the above characteristics, TDengine designs the storage and computing engine in a special and optimized way for time-series data, resulting in massive improvements in system efficiency.
### Relational Database Model
Since time-series data is most likely to be structured data, TDengine adopts the traditional relational database model to process them with a shallow learning curve. You need to create a database, create tables with schema definitions, then insert data points and execute queries to explore the data. Standard SQL is used, instead of NoSQLs key-value storage.
Since time-series data is most likely to be structured data, TDengine adopts the traditional relational database model to process them with a short learning curve. You need to create a database, create tables with schema definitions, then insert data points and execute queries to explore the data. SQL like syntax is used, instead of NoSQLs key-value storage.
### One Table for One Collection Point
### One Table for One Data Collection Point
To utilize this time-series and other data features, TDengine requires the user to create a table for each collection point to store collected time-series data. For example, if there are over 10 millions smart meters, means 10 millions tables shall be created. For the table above, 4 tables shall be created for devices D1001, D1002, D1003, and D1004 to store the data collected. This design has several advantages:
To utilize this time-series and other data features, TDengine requires the user to create a table for each data collection point to store collected time-series data. For example, if there are over 10 million smart meters, it means 10 million tables shall be created. For the table above, 4 tables shall be created for devices D1001, D1002, D1003, and D1004 to store the data collected. This design has several advantages:
1. Guarantee that all data from a collection point can be saved in a continuous memory/hard disk space block by block. If queries are applied only on one point in a time range, this design will reduce the random read latency significantly, thus increase read and query speed by orders of magnitude.
2. Since the data generation process of each collection device is completely independent, means each device has its unique data source, thus writes can be carried out in a lock-free manner to greatly improve the speed.
1. Guarantee that all data from a data collection point can be saved in a continuous memory/hard disk space block by block. If queries are applied only on one data collection point in a time range, this design will reduce the random read latency significantly, thus increase read and query speed by orders of magnitude.
2. Since the data generation process of each data collection device is completely independent, and each data collection point has its unique data source, thus writes can be carried out in a lock-free manner to greatly improve the performance.
3. Write latency can be significantly reduced too as the data points generated by the same device will arrive in time order, the new data point will be simply appended to a block.
If the data of multiple devices are written into a table in the traditional way, due to the uncontrollable network delay, the timing of the data from different devices arriving at the server cannot be guaranteed, the writing operation must be protected by locks, and the data of one device cannot be guaranteed to continuously stored together. **The method of one table for each data collection point can ensure the optimal performance of insertion and query of a single data collection point to the greatest extent.**
If the data of multiple devices are traditionally written into a table, due to the uncontrollable network delay, the timing of the data from different devices arriving at the server cannot be guaranteed, the writing operation must be protected by locks, and the data of one device cannot be guaranteed to be continuously stored together. **One table for each data collection point can ensure the optimal performance of insert and query of a single data collection point to the greatest extent.**
TDengine suggests using collection point ID as the table name (like D1001 in the above table). Each point may collect one or more metrics (like the current, voltage, phase as above). Each metric has a column in the table. The data type for a column can be int, float, string and others. In addition, the first column in the table must be a timestamp. TDengine uses the time stamp as the index, and wont build the index on any metrics stored. All data will be stored in columns.
TDengine suggests using data collection point ID as the table name (like D1001 in the above table). Each point may collect one or more metrics (like the current, voltage, phase as above). Each metric has a column in the table. The data type for a column can be int, float, string and others. In addition, the first column in the table must be a timestamp. TDengine uses the time stamp as the index, and wont build the index on any metrics stored. All data will be stored in columns.
### STable: A Collection of Data Points in the Same Type
The method of one table for each point will bring a greatly increasing number of tables, which is difficult to manage. Moreover, applications often need to take aggregation operations between collection points, thus aggregation operations will become complicated. To support aggregation over multiple tables efficiently, the STable (Super Table) concept is introduced by TDengine.
The design of one table for each data collection point will require a huge number of tables, which is difficult to manage. Moreover, applications often need to take aggregation operations between data collection points, thus aggregation operations will become complicated. To support aggregation over multiple tables efficiently, the [STable(Super Table)](https://www.taosdata.com/en/documentation/super-table) concept is introduced by TDengine.
STable is an abstract collection for a type of data point. A STable contains a set of points (tables) that have the same schema or data structure, but with different static attributes (tags). To describe a STable (a combination of data collection points of a specific type), in addition to defining the table structure of the collected metrics, it is also necessary to define the schema of its tag. The data type of tags can be int, float, string, and there can be multiple tags, which can be added, deleted, or modified afterward. If the whole system has N different types of data collection points, N STables need to be established.
STable is an abstract set for a type of data collection point. A STable contains a set of data collection points (tables) that have the same schema or data structure, but with different static attributes (tags). To describe a STable (a set of data collection points of a specific type), in addition to defining the table structure of the collected metrics, it is also necessary to define the schema of its tags. The data type of tags can be int, float, string, and there can be multiple tags, which can be added, deleted, or modified afterward. If the whole system has N different types of data collection points, N STables need to be established.
In the design of TDengine, **a table is used to represent a specific data collection point, and STable is used to represent a set of data collection points of the same type**. When creating a table for a specific data collection point, the user uses the definition of STable as a template and specifies the tag value of the specific collection point (table). Compared with the traditional relational database, the table (a data collection point) has static tags, and these tags can be added, deleted, and modified afterward. **A STable contains multiple tables with the same time-series data schema but different tag values.**
When aggregating multiple data collection points with the same data type, TDEngine will first find out the tables that meet the tag filters from the STables, and then scan the time-series data of these tables to perform aggregation operation, which can greatly reduce the data sets to be scanned, thus greatly improving the performance of aggregation calculation.
When aggregating multiple data collection points with the same data type, TDengine will first find out the tables that meet the tag filter conditions from the STables, then scan the time-series data of these tables to perform aggregation operation, which can greatly reduce the data sets to be scanned, thus greatly improving the performance of data aggregation.
## <a class="anchor" id="cluster"></a> Cluster and Primary Logic Unit
The design of TDengine is based on the assumption that one single hardware or software system is unreliable and that no single computer can provide sufficient computing and storage resources to process massive data. Therefore, TDengine has been designed according to a distributed and high-reliability architecture since Day One of R&D, which supports scale-out, so that hardware failure or software failure of any single or multiple servers will not affect the availability and reliability of the system. At the same time, through node virtualization and automatic load-balancing technology, TDengine can make the most efficient use of computing and storage resources in heterogeneous clusters to reduce hardware investment.
The design of TDengine is based on the assumption that one single node or software system is unreliable and that no single node can provide sufficient computing and storage resources to process massive data. Therefore, TDengine has been designed in a distributed and high-reliability architecture since day one of the development, so that hardware failure or software failure of any single or multiple servers will not affect the availability and reliability of the system. At the same time, through node virtualization and automatic load-balancing technology, TDengine can make the most efficient use of computing and storage resources in heterogeneous clusters to reduce hardware investment.
### Primary Logic Unit
@ -162,37 +162,37 @@ Logical structure diagram of TDengine distributed architecture as following:
A complete TDengine system runs on one or more physical nodes. Logically, it includes data node (dnode), TDEngine application driver (taosc) and application (app). There are one or more data nodes in the system, which form a cluster. The application interacts with the TDengine cluster through taosc's API. The following is a brief introduction to each logical unit.
**Physical node (pnode)**: A pnode is a computer that runs independently and has its own computing, storage and network capabilities. It can be a physical machine, virtual machine or Docker container installed with OS. The physical node is identified by its configured FQDN (Fully Qualified Domain Name). TDengine relies entirely on FQDN for network communication. If you don't know about FQDN, please read the blog post "[All about FQDN of TDengine](https://www.taosdata.com/blog/2020/09/11/1824.html)".
**Physical node (pnode)**: A pnode is a computer that runs independently and has its own computing, storage and network capabilities. It can be a physical machine, virtual machine, or Docker container installed with OS. The physical node is identified by its configured FQDN (Fully Qualified Domain Name). TDengine relies entirely on FQDN for network communication. If you don't know about FQDN, please read the blog post "[All about FQDN of TDengine](https://www.taosdata.com/blog/2020/09/11/1824.html)".
**Data node (dnode):** A dnode is a running instance of the TDengine server-side execution code taosd on a physical node. A working system must have at least one data node. A dnode contains zero to multiple logical virtual nodes (VNODE), zero or at most one logical management node (mnode). The unique identification of a dnode in the system is determined by the instance's End Point (EP). EP is a combination of FQDN (Fully Qualified Domain Name) of the physical node where the dnode is located and the network port number (Port) configured by the system. By configuring different ports, a physical node (a physical machine, virtual machine or container) can run multiple instances or have multiple data nodes.
**Virtual node (vnode)**: In order to better support data sharding, load balancing and prevent data from overheating or skewing, data nodes are virtualized into multiple virtual nodes (vnode, V2, V3, V4, etc. in the figure). Each vnode is a relatively independent work unit, which is the basic unit of time-series data storage, and has independent running threads, memory space and persistent storage path. A vnode contains a certain number of tables (data collection points). When a new table is created, the system checks whether a new vnode needs to be created. The number of vnodes that can be created on a data node depends on the hardware capacities of the physical node where the data node is located. A vnode belongs to only one DB, but a DB can have multiple vnodes. In addition to the stored time-series data, a vnode also stores the schema and tag values of the included tables. A virtual node is uniquely identified in the system by the EP of the data node and the VGroup ID to which it belongs, and is created and managed by the management node.
**Virtual node (vnode)**: To better support data sharding, load balancing and prevent data from overheating or skewing, data nodes are virtualized into multiple virtual nodes (vnode, V2, V3, V4, etc. in the figure). Each vnode is a relatively independent work unit, which is the basic unit of time-series data storage and has independent running threads, memory space and persistent storage path. A vnode contains a certain number of tables (data collection points). When a new table is created, the system checks whether a new vnode needs to be created. The number of vnodes that can be created on a data node depends on the hardware capacities of the physical node where the data node is located. A vnode belongs to only one DB, but a DB can have multiple vnodes. In addition to the stored time-series data, a vnode also stores the schema and tag values of the included tables. A virtual node is uniquely identified in the system by the EP of the data node and the VGroup ID to which it belongs and is created and managed by the management node.
**Management node (mnode)**: A virtual logical unit responsible for monitoring and maintaining the running status of all data nodes and load balancing among nodes (M in figure). At the same time, the management node is also responsible for the storage and management of metadata (including users, databases, tables, static tags, etc.), so it is also called Meta Node. Multiple (up to 5) mnodes can be configured in a TDengine cluster, and they are automatically constructed into a virtual management node group (M0, M1, M2 in the figure). The master/slave mechanism is used to manage between mnodes, and the data synchronization is carried out in a strong consistent way. Any data update operation can only be done on the master. The creation of mnode cluster is completed automatically by the system without manual intervention. There is at most one mnode on each dnode, which is uniquely identified by the EP of the data node to which it belongs. Each dnode automatically obtains the EP of the dnode where all mnodes in the whole cluster are located through internal messaging interaction.
**Management node (mnode)**: A virtual logical unit responsible for monitoring and maintaining the running status of all data nodes and load balancing among nodes (M in the figure). At the same time, the management node is also responsible for the storage and management of metadata (including users, databases, tables, static tags, etc.), so it is also called Meta Node. Multiple (up to 5) mnodes can be configured in a TDengine cluster, and they are automatically constructed into a virtual management node group (M0, M1, M2 in the figure). The master/slave mechanism is adopted for the mnode group and the data synchronization is carried out in a strongly consistent way. Any data update operation can only be executed on the master. The creation of mnode cluster is completed automatically by the system without manual intervention. There is at most one mnode on each dnode, which is uniquely identified by the EP of the data node to which it belongs. Each dnode automatically obtains the EP of the dnode where all mnodes in the whole cluster are located through internal messaging interaction.
**Virtual node group (VGroup)**: Vnodes on different data nodes can form a virtual node group to ensure the high reliability of the system. The virtual node group is managed in a master/slave structure. Write operations can only be performed on the master vnode, and the system synchronizes data to the slave vnode via replication, thus ensuring that one single replica of data is copied on multiple physical nodes. The number of virtual nodes in a vgroup equals the number of data replicas. If the number of replicas of a DB is N, the system must have at least N data nodes. The number of replicas can be specified by the parameter replica when creating DB, and the default is 1. Using the multi-replica feature of TDengine, the same high data reliability can be done without the need for expensive storage devices such as disk arrays. Virtual node group is created and managed by management node, and the management node assigns a system unique ID, aka VGroup ID. If two virtual nodes has the same vnode group ID, means that they belong to the same group and the data is backed up to each other. The number of virtual nodes in a virtual node group can be dynamically changed, allowing only one, that is, no data replication. VGroup ID is never changed. Even if a virtual node group is deleted, its ID will not be reused.
**Virtual node group (VGroup)**: Vnodes on different data nodes can form a virtual node group to ensure the high availability of the system. The virtual node group is managed in a master/slave mechanism. Write operations can only be performed on the master vnode, and then replicated to slave vnodes, thus ensuring that one single replica of data is copied on multiple physical nodes. The number of virtual nodes in a vgroup equals the number of data replicas. If the number of replicas of a DB is N, the system must have at least N data nodes. The number of replicas can be specified by the parameter `replica` when creating DB, and the default is 1. Using the multi-replication feature of TDengine, the same high data reliability can be achieved without the need for expensive storage devices such as disk arrays. Virtual node group is created and managed by the management node, and the management node assigns a system unique ID, aka VGroup ID. If two virtual nodes have the same vnode group ID, means that they belong to the same group and the data is backed up to each other. The number of virtual nodes in a virtual node group can be dynamically changed, allowing only one, that is, no data replication. VGroup ID is never changed. Even if a virtual node group is deleted, its ID will not be reused.
**TAOSC**: TAOSC is the driver provided by TDengine to applications, which is responsible for dealing with the interface interaction between application and cluster, and provides the native interface of C/C++ language, which is embedded in JDBC, C #, Python, Go, Node.js language connection libraries. Applications interact with the whole cluster through taosc instead of directly connecting to data nodes in the cluster. This module is responsible for obtaining and caching metadata; forwarding requests for insertion, query, etc. to the correct data node; when returning the results to the application, taosc also need to be responsible for the final level of aggregation, sorting, filtering and other operations. For JDBC, C/C++/C #/Python/Go/Node.js interfaces, this module runs on the physical node where the application is located. At the same time, in order to support the fully distributed RESTful interface, taosc has a running instance on each dnode of TDengine cluster.
**TAOSC**: TAOSC is the driver provided by TDengine to applications, which is responsible for dealing with the interaction between application and cluster, and provides the native interface of C/C++ language, which is embedded in JDBC, C #, Python, Go, Node.js language connection libraries. Applications interact with the whole cluster through taosc instead of directly connecting to data nodes in the cluster. This module is responsible for obtaining and caching metadata; forwarding requests for insertion, query, etc. to the correct data node; when returning the results to the application, taosc also needs to be responsible for the final level of aggregation, sorting, filtering and other operations. For JDBC, C/C++/C #/Python/Go/Node.js interfaces, this module runs on the physical node where the application is located. At the same time, in order to support the fully distributed RESTful interface, taosc has a running instance on each dnode of TDengine cluster.
### Node Communication
**Communication mode**: The communication among each data node of TDengine system, and among application driver and each data node is carried out through TCP/UDP. Considering an IoT scenario, the data writing packets are generally not large, so TDengine uses UDP in addition to TCP for transmission, because UDP is more efficient and is not limited by the number of connections. TDengine implements its own timeout, retransmission, confirmation and other mechanisms to ensure reliable transmission of UDP. For packets with a data volume of less than 15K, UDP is adopted for transmission, and TCP is automatically adopted for transmission of packets with a data volume of more than 15K or query operations. At the same time, TDengine will automatically compress/decompress the data, digital sign/authenticate the data according to the configuration and data packet. For data replication among data nodes, only TCP is used for data transmission.
**Communication mode**: The communication among each data node of TDengine system, and among the application driver and each data node is carried out through TCP/UDP. Considering an IoT scenario, the data writing packets are generally not large, so TDengine uses UDP in addition to TCP for transmission, because UDP is more efficient and is not limited by the number of connections. TDengine implements its own timeout, retransmission, confirmation and other mechanisms to ensure reliable transmission of UDP. For packets with a data volume of less than 15K, UDP is adopted for transmission, and TCP is automatically adopted for transmission of packets with a data volume of more than 15K or query operations. At the same time, TDengine will automatically compress/decompress the data, digital sign/authenticate the data according to the configuration and data packet. For data replication among data nodes, only TCP is used for data transportation.
**FQDN configuration:** A data node has one or more FQDNs, which can be specified in the system configuration file taos.cfg with the parameter "fqdn". If it is not specified, the system will automatically use the hostname of the computer as its FQDN. If the node is not configured with FQDN, you can directly set the configuration parameter fqdn of the node to its IP address. However, IP is not recommended because IP address is variable, and once it changes, the cluster will not work properly. The EP (End Point) of a data node consists of FQDN + Port. With FQDN, it is necessary to ensure the normal operation of DNS service, or configure hosts files on nodes and the nodes where applications are located.
**FQDN configuration:** A data node has one or more FQDNs, which can be specified in the system configuration file taos.cfg with the parameter "fqdn". If it is not specified, the system will automatically use the hostname of the computer as its FQDN. If the node is not configured with FQDN, you can directly set the configuration parameter fqdn of the node to its IP address. However, IP is not recommended because IP address may be changed, and once it changes, the cluster will not work properly. The EP (End Point) of a data node consists of FQDN + Port. With FQDN, it is necessary to ensure the DNS service is running, or hosts files on nodes are configured properly.
**Port configuration**: The external port of a data node is determined by the system configuration parameter serverPort in TDengine, and the port for internal communication of cluster is serverPort+5. The data replication operation among data nodes in the cluster also occupies a TCP port, which is serverPort+10. In order to support multithreading and efficient processing of UDP data, each internal and external UDP connection needs to occupy 5 consecutive ports. Therefore, the total port range of a data node will be serverPort to serverPort + 10, for a total of 11 TCP/UDP ports. When using, make sure that the firewall keeps these ports open. Each data node can be configured with a different serverPort.
**Port configuration**: The external port of a data node is determined by the system configuration parameter serverPort in TDengine, and the port for internal communication of cluster is serverPort+5. The data replication operation among data nodes in the cluster also occupies a TCP port, which is serverPort+10. In order to support multithreading and efficient processing of UDP data, each internal and external UDP connection needs to occupy 5 consecutive ports. Therefore, the total port range of a data node will be serverPort to serverPort + 10, for a total of 11 TCP/UDP ports. To run the system, make sure that the firewall keeps these ports open. Each data node can be configured with a different serverPort.
**Cluster external connection**: TDengine cluster can accommodate one single, multiple or even thousands of data nodes. The application only needs to initiate a connection to any data node in the cluster. The network parameter required for connection is the End Point (FQDN plus configured port number) of a data node. When starting the application taos through CLI, the FQDN of the data node can be specified through the option-h, and the configured port number can be specified through -p. If the port is not configured, the system configuration parameter serverPort of TDengine will be adopted.
**Cluster external connection**: TDengine cluster can accommodate one single, multiple or even thousands of data nodes. The application only needs to initiate a connection to any data node in the cluster. The network parameter required for connection is the End Point (FQDN plus configured port number) of a data node. When starting the application taos through CLI, the FQDN of the data node can be specified through the option `-h`, and the configured port number can be specified through `-p`. If the port is not configured, the system configuration parameter serverPort of TDengine will be adopted.
**Inter-cluster communication**: Data nodes connect with each other through TCP/UDP. When a data node starts, it will obtain the EP information of the dnode where the mnode is located, and then establish a connection with the mnode in the system to exchange information. There are three steps to obtain EP information of the mnode: 1. Check whether the mnodeEpList file exists, if it does not exist or cannot be opened normally to obtain EP information of the mnode, skip to the second step; 2: Check the system configuration file taos.cfg to obtain node configuration parameters firstEp and secondEp (the node specified by these two parameters can be a normal node without mnode, in this case, the node will try to redirect to the mnode node when connected). If these two configuration parameters do not exist or do not exist in taos.cfg, or are invalid, skip to the third step; 3: Set your own EP as a mnode EP and run it independently. After obtaining the mnode EP list, the data node initiates the connection. It will successfully join the working cluster after connected. If not successful, it will try the next item in the mnode EP list. If all attempts are made, but the connection still fails, sleep for a few seconds before trying again.
**Inter-cluster communication**: Data nodes connect with each other through TCP/UDP. When a data node starts, it will obtain the EP information of the dnode where the mnode is located, and then establish a connection with the mnode in the system to exchange information. There are three steps to obtain EP information of the mnode: 1. Check whether the mnodeEpList file exists, if it does not exist or cannot be opened normally to obtain EP information of the mnode, skip to the second step; 2: Check the system configuration file taos.cfg to obtain node configuration parameters firstEp and secondEp (the node specified by these two parameters can be a normal node without mnode, in this case, the node will try to redirect to the mnode node when connected). If these two configuration parameters do not exist or do not exist in taos.cfg, or are invalid, skip to the third step; 3: Set your own EP as a mnode EP and run it independently. After obtaining the mnode EP list, the data node initiates the connection. It will successfully join the working cluster after connection. If not successful, it will try the next item in the mnode EP list. If all attempts are made, but the connection still fails, sleep for a few seconds before trying again.
**The choice of MNODE**: TDengine logically has a management node, but there is no separated execution code. The server side only has a set of execution code taosd. So which data node will be the management node? This is determined automatically by the system without any manual intervention. The principle is as follows: when a data node starts, it will check its End Point and compare it with the obtained mnode EP List. If its EP exists in it, the data node shall start the mnode module and become a mnode. If your own EP is not in the mnode EP List, the mnode module will not start. During the system operation, due to load balancing, downtime and other reasons, mnode may migrate to the new dnode, while totally transparent without manual intervention. The modification of configuration parameters is the decision made by mnode itself according to resources usage.
**The choice of MNODE**: TDengine logically has a management node, but there is no separated execution code. The server-side only has a set of execution code taosd. So which data node will be the management node? This is determined automatically by the system without any manual intervention. The principle is as follows: when a data node starts, it will check its End Point and compare it with the obtained mnode EP List. If its EP exists in it, the data node shall start the mnode module and become a mnode. If your own EP is not in the mnode EP List, the mnode module will not start. During the system operation, due to load balancing, downtime and other reasons, mnode may migrate to the new dnode, while totally transparent without manual intervention. The modification of configuration parameters is the decision made by mnode itself according to resources usage.
**Add new data nodes:** After the system has a data node, it has become a working system. There are two steps to add a new node into the cluster. Step1: Connect to the existing working data node using TDengine CLI, and then add the End Point of the new data node with the command "create dnode"; Step 2: In the system configuration parameter file taos.cfg of the new data node, set the firstEp and secondEp parameters to the EP of any two data nodes in the existing cluster. Please refer to the detailed user tutorial for detailed steps. In this way, the cluster will be established step by step.
**Redirection**: No matter about dnode or taosc, the connection to the mnode shall be initiated first, but the mnode is automatically created and maintained by the system, so user does not know which dnode is running the mnode. TDengine only requires a connection to any working dnode in the system. Because any running dnode maintains the currently running mnode EP List, when receiving a connecting request from the newly started dnode or taosc, if its not an mnode by self, it will reply the mnode EP List back. After receiving this list, taosc or the newly started dnode will try to establish the connection again. When the mnode EP List changes, each data node quickly obtains the latest list and notifies taosc through messaging interaction among nodes.
**Redirection**: No matter about dnode or taosc, the connection to the mnode shall be initiated first, but the mnode is automatically created and maintained by the system, so the user does not know which dnode is running the mnode. TDengine only requires a connection to any working dnode in the system. Because any running dnode maintains the currently running mnode EP List, when receiving a connecting request from the newly started dnode or taosc, if its not a mnode by self, it will reply to the mnode EP List back. After receiving this list, taosc or the newly started dnode will try to establish the connection again. When the mnode EP List changes, each data node quickly obtains the latest list and notifies taosc through messaging interaction among nodes.
### A Typical Messaging Process
### A Typical Data Writinfg Process
To explain the relationship between vnode, mnode, taosc and application and their respective roles, the following is an analysis of a typical data writing process.
@ -200,62 +200,62 @@ To explain the relationship between vnode, mnode, taosc and application and thei
<center> Picture 2 typical process of TDengine </center>
1. Application initiates a request to insert data through JDBC, ODBC, or other APIs.
2. Cache be checked by taosc that if meta data existing for the table. If so, go straight to Step 4. If not, taosc sends a get meta-data request to mnode.
2. taosc checks if meta data existing for the table in the cache. If so, go straight to Step 4. If not, taosc sends a get meta-data request to mnode.
3. Mnode returns the meta-data of the table to taosc. Meta-data contains the schema of the table, and also the vgroup information to which the table belongs (the vnode ID and the End Point of the dnode where the table belongs. If the number of replicas is N, there will be N groups of End Points). If taosc does not receive a response from the mnode for a long time, and there are multiple mnodes, taosc will send a request to the next mnode.
4. Taosc initiates an insert request to master vnode.
5. After vnode inserts the data, it gives a reply to taosc, indicating that the insertion is successful. If taosc doesn't get a response from vnode for a long time, taosc will judge the node as offline. In this case, if there are multiple replicas of the inserted database, taosc will issue an insert request to the next vnode in vgroup.
5. After vnode inserts the data, it gives a reply to taosc, indicating that the insertion is successful. If taosc doesn't get a response from vnode for a long time, taosc will treat this node as offline. In this case, if there are multiple replicas of the inserted database, taosc will issue an insert request to the next vnode in vgroup.
6. Taosc notifies APP that writing is successful.
For Step 2 and 3, when taosc starts, it does not know the End Point of mnode, so it will directly initiate a request to the externally serving End Point of the configured cluster. If the dnode that received the request does not have an mnode configured, it will inform the mnode EP list in a reply message, so that taosc will re-issue a request to obtain meta-data to the EP of another new mnode.
For Step 2 and 3, when taosc starts, it does not know the End Point of mnode, so it will directly initiate a request to the configured serving End Point of the cluster. If the dnode that receives the request does not have a mnode configured, it will inform the mnode EP list in a reply message, so that taosc will re-issue a request to obtain meta-data to the EP of another new mnode.
For Step 4 and 5, without caching, taosc can't recognize the master in the virtual node group, so assumes that the first vnodeID is the master and send a request to it. If the requested vnode is not the master, it will reply the actual master as a new target taosc makes a request to. Once the reply of successful insertion is obtained, taosc will cache the information of master node.
For Step 4 and 5, without caching, taosc can't recognize the master in the virtual node group, so assumes that the first vnode is the master and sends a request to it. If this vnode is not the master, it will reply to the actual master as a new target where taosc shall send a request to. Once the reply of successful insertion is obtained, taosc will cache the information of master node.
The above is the process of inserting data, and the processes of querying and calculating are completely consistent. Taosc encapsulates and shields all these complicated processes, and has no perception and no special treatment for applications.
The above is the process of inserting data, and the processes of querying and computing are the same. Taosc encapsulates and hides all these complicated processes, and it is transparent to applications.
Through taosc caching mechanism, mnode needs to be accessed only when a table is operated for the first time, so mnode will not become a system bottleneck. However, because schema and vgroup may change (such as load balancing), taosc will interact with mnode regularly to automatically update the cache.
Through taosc caching mechanism, mnode needs to be accessed only when a table is accessed for the first time, so mnode will not become a system bottleneck. However, because schema and vgroup may change (such as load balancing), taosc will interact with mnode regularly to automatically update the cache.
## <a class="anchor" id="sharding"></a> Storage Model and Data Partitioning/Sharding
### Storage Model
The data stored by TDengine include collected time-series data, metadata related to libraries and tables, tag data, etc. These data are specifically divided into three parts:
The data stored by TDengine include collected time-series data, metadata related to database and tables, tag data, etc. These data are specifically divided into three parts:
- Time-series data: stored in vnode and composed of data, head and last files. The amount of data is large and query amount depends on the application scenario. Out-of-order writing is allowed, but delete operation is not supported for the time being, and update operation is only allowed when update parameter is set to 1. By adopting the model with one table for each collection point, the data of a given time period is continuously stored, and the writing against one single table is a simple add operation. Multiple records can be read at one time, thus ensuring the insert and query operation of a single collection point with best performance.
- Tag data: meta files stored in vnode support four standard operations of add, delete, modify and check. The amount of data is not large. If there are N tables, there are N records, so all can be stored in memory. If there are many tag filtering operations, queries will be very frequent and TDengine supports multi-core and multi-threaded concurrent queries. As long as the computing resources are sufficient, even in face of millions of tables, the filtering results will return in milliseconds.
- Metadata: stored in mnode, including system node, user, DB, Table Schema and other information. Four standard operations of add, delete, modify and query are supported. The amount of these data are not large and can be stored in memory, moreover the query amount is not large because of the client cache. Therefore, TDengine uses centralized storage management, however, there will be no performance bottleneck.
- Time-series data: stored in vnode and composed of data, head and last files. The amount of data is large and query amount depends on the application scenario. Out-of-order writing is allowed, but delete operation is not supported for the time being, and update operation is only allowed when database update parameter is set to 1. By adopting the model with one table for each data collection point, the data of a given time period is continuously stored, and the writing against one single table is a simple appending operation. Multiple records can be read at one time, thus ensuring the insert and query operation of a single data collection point with the best performance.
- Tag data: meta files stored in vnode. Four standard operations of create, read, update and delete are supported. The amount of data is not large. If there are N tables, there are N records, so all can be stored in memory. To make tag filtering efficient, TDengine supports multi-core and multi-threaded concurrent queries. As long as the computing resources are sufficient, even in face of millions of tables, the tag filtering results will return in milliseconds.
- Metadata: stored in mnode, including system node, user, DB, Table Schema and other information. Four standard operations of create, delete, update and read are supported. The amount of these data are not large and can be stored in memory, moreover, the query amount is not large because of the client cache. Therefore, TDengine uses centralized storage management, however, there will be no performance bottleneck.
Compared with the typical NoSQL storage model, TDengine stores tag data and time-series data completely separately, which has two major advantages:
- Greatly reduce the redundancy of tag data storage: general NoSQL database or time-series database adopts K-V storage, in which Key includes timestamp, device ID and various tags. Each record carries these duplicates, so wasting storage space. Moreover, if the application needs to add, modify or delete tags on historical data, it has to traverse the data and rewrite again, which is extremely expensive to operate.
- Realize extremely efficient aggregation query between multiple tables: when doing aggregation query between multiple tables, it firstly finds out the tag filtered tables, and then find out the corresponding data blocks of these tables to greatly reduce the data sets to be scanned, thus greatly improving the query efficiency. Moreover, tag data is managed and maintained in a full-memory structure, and tag data queries in tens of millions can return in milliseconds.
- Greatly reduce the redundancy of tag data storage: general NoSQL database or time-series database adopts K-V storage, in which Key includes a timestamp, a device ID and various tags. Each record carries these duplicated tags, so storage space is wasted. Moreover, if the application needs to add, modify or delete tags on historical data, it has to traverse the data and rewrite them again, which is extremely expensive to operate.
- Aggregate data efficiently between multiple tables: when aggregating data between multiple tables, it first finds out the tables which satisfy the filtering conditions, and then find out the corresponding data blocks of these tables to greatly reduce the data sets to be scanned, thus greatly improving the aggregation efficiency. Moreover, tag data is managed and maintained in a full-memory structure, and tag data queries in tens of millions can return in milliseconds.
### Data Sharding
For large-scale data management, to achieve scale-out, it is generally necessary to adopt the a Partitioning strategy as Sharding. TDengine implements data sharding via vnode, and time-series data partitioning via one data file for each time range.
For large-scale data management, to achieve scale-out, it is generally necessary to adopt the Partitioning or Sharding strategy. TDengine implements data sharding via vnode, and time-series data partitioning via one data file for each time range.
VNode (Virtual Data Node) is responsible for providing writing, query and calculation functions for collected time-series data. To facilitate load balancing, data recovery and support heterogeneous environments, TDengine splits a data node into multiple vnodes according to its computing and storage resources. The management of these vnodes is done automatically by TDengine and completely transparent to the application.
VNode (Virtual Data Node) is responsible for providing writing, query and computing functions for collected time-series data. To facilitate load balancing, data recovery and support heterogeneous environments, TDengine splits a data node into multiple vnodes according to its computing and storage resources. The management of these vnodes is done automatically by TDengine and is completely transparent to the application.
For a single data collection point, regardless of the amount of data, a vnode (or vnode group, if the number of replicas is greater than 1) has enough computing resource and storage resource to process (if a 16-byte record is generated per second, the original data generated in one year will be less than 0.5 G), so TDengine stores all the data of a table (a data collection point) in one vnode instead of distributing the data to two or more dnodes. Moreover, a vnode can store data from multiple data collection points (tables), and the upper limit of the tables quantity for a vnode is one million. By design, all tables in a vnode belong to the same DB. On a data node, unless specially configured, the number of vnodes owned by a DB will not exceed the number of system cores.
When creating a DB, the system does not allocate resources immediately. However, when creating a table, the system will check if there is an allocated vnode with free tablespace. If so, the table will be created in the vacant vnode immediately. If not, the system will create a new vnode on a dnode from the cluster according to the current workload, and then a table. If there are multiple replicas of a DB, the system does not create only one vnode, but a vgroup (virtual data node group). The system has no limit on the number of vnodes, which is just limited by the computing and storage resources of physical nodes.
The meda data of each table (including schema, tags, etc.) is also stored in vnode instead of centralized storage in mnode. In fact, this means sharding of meta data, which is convenient for efficient and parallel tag filtering operations.
The meta data of each table (including schema, tags, etc.) is also stored in vnode instead of centralized storage in mnode. In fact, this means sharding of meta data, which is good for efficient and parallel tag filtering operations.
### Data Partitioning
In addition to vnode sharding, TDengine partitions the time-series data by time range. Each data file contains only one time range of time-series data, and the length of the time range is determined by DB's configuration parameter “days”. This method of partitioning by time rang is also convenient to efficiently implement the data retention strategy. As long as the data file exceeds the specified number of days (system configuration parameter keep), it will be automatically deleted. Moreover, different time ranges can be stored in different paths and storage media, so as to facilitate the cold/hot management of big data and realize tiered-storage.
In addition to vnode sharding, TDengine partitions the time-series data by time range. Each data file contains only one time range of time-series data, and the length of the time range is determined by DB's configuration parameter `days`. This method of partitioning by time rang is also convenient to efficiently implement the data retention policy. As long as the data file exceeds the specified number of days (system configuration parameter `keep`), it will be automatically deleted. Moreover, different time ranges can be stored in different paths and storage media, so as to facilitate the tiered-storage. Cold/hot data can be stored in different storage meida to reduce the storage cost.
In general, **TDengine splits big data by vnode and time as two dimensions**, which is convenient for parallel and efficient management with scale-out.
In general, **TDengine splits big data by vnode and time range in two dimensions** to manage the data efficiently with horizontal scalability.
### Load Balancing
Each dnode regularly reports its status (including hard disk space, memory size, CPU, network, number of virtual nodes, etc.) to the mnode (virtual management node) for declaring the status of the entire cluster. Based on the overall state, when an mnode finds an overloaded dnode, it will migrate one or more vnodes to other dnodes. In the process, external services keep running and the data insertion, query and calculation operations are not affected.
Each dnode regularly reports its status (including hard disk space, memory size, CPU, network, number of virtual nodes, etc.) to the mnode (virtual management node), so mnode knows the status of the entire cluster. Based on the overall status, when the mnode finds a dnode is overloaded, it will migrate one or more vnodes to other dnodes. During the process, TDengine services keep running and the data insertion, query and computing operations are not affected.
If the mnode has not received the dnode status for a period of time, the dnode will be judged as offline. When offline lasts a certain period of time (the duration is determined by the configuration parameter offlineThreshold), the dnode will be forcibly removed from the cluster by mnode. If the number of replicas of vnodes on this dnode is greater than one, the system will automatically create new replicas on other dnodes to ensure the replica number. If there are other mnodes on this dnode and the number of mnodes replicas is greater than one, the system will automatically create new mnodes on other dnodes to ensure t the replica number.
If the mnode has not received the dnode status for a period of time, the dnode will be treated as offline. When offline lasts a certain period of time (configured by parameter `offlineThreshold`), the dnode will be forcibly removed from the cluster by mnode. If the number of replicas of vnodes on this dnode is greater than one, the system will automatically create new replicas on other dnodes to ensure the replica number. If there are other mnodes on this dnode and the number of mnodes replicas is greater than one, the system will automatically create new mnodes on other dnodes to ensure the replica number.
When new data nodes are added to the cluster, with new computing and storage are added, the system will automatically start the load balancing process.
When new data nodes are added to the cluster, with new computing and storage resources are added, the system will automatically start the load balancing process.
The load balancing process does not require any manual intervention without application restarted. It will automatically connect new nodes with completely transparence. **Note: load balancing is controlled by parameter “balance”, which determines to turn on/off automatic load balancing.**
The load balancing process does not require any manual intervention, and it is transparent to the application. **Note: load balancing is controlled by parameter “balance”, which determines to turn on/off automatic load balancing.**
## <a class="anchor" id="replication"></a> Data Writing and Replication Process
@ -267,8 +267,8 @@ Master Vnode uses a writing process as follows:
Figure 3: TDengine Master writing process
1. Master vnode receives the application data insertion request, verifies, and to next step;
2. If the system configuration parameter “walLevel” is greater than 0, vnode will write the original request packet into database log file WAL. If walLevel is set to 2 and fsync is set to 0, TDengine will make WAL data written immediately to ensure that even system goes down, all data can be recovered from database log file;
1. Master vnode receives the application data insertion request, verifies, and moves to next step;
2. If the system configuration parameter `walLevel` is greater than 0, vnode will write the original request packet into database log file WAL. If walLevel is set to 2 and fsync is set to 0, TDengine will make WAL data written immediately to ensure that even system goes down, all data can be recovered from database log file;
3. If there are multiple replicas, vnode will forward data packet to slave vnodes in the same virtual node group, and the forwarded packet has a version number with data;
4. Write into memory and add the record to “skip list”;
5. Master vnode returns a confirmation message to the application, indicating a successful writing.
@ -282,30 +282,30 @@ For a slave vnode, the write process as follows:
<center> Picture 3 TDengine Slave Writing Process </center>
1. Slave vnode receives a data insertion request forwarded by Master vnode.
2. If the system configuration parameter “walLevel” is greater than 0, vnode will write the original request packet into database log file WAL. If walLevel is set to 2 and fsync is set to 0, TDengine will make WAL data written immediately to ensure that even system goes down, all data can be recovered from database log file;
2. If the system configuration parameter `walLevel` is greater than 0, vnode will write the original request packet into database log file WAL. If walLevel is set to 2 and fsync is set to 0, TDengine will make WAL data written immediately to ensure that even system goes down, all data can be recovered from database log file;
3. Write into memory and add the record to “skip list”;
Compared with Master vnode, slave vnode has no forwarding or reply confirmation step, means two steps less. But writing into memory is exactly the same as WAL.
Compared with Master vnode, slave vnode has no forwarding or reply confirmation step, means two steps less. But writing into memory and WAL is exactly the same.
### Remote Disaster Recovery and IDC Migration
As above Master and Slave processes discussed, TDengine adopts asynchronous replication for data synchronization. This method can greatly improve the writing performance, with not obvious impact from network delay. By configuring IDC and rack number for each physical node, it can be ensured that for a virtual node group, virtual nodes are composed of physical nodes from different IDC and different racks, thus implementing remote disaster recovery without other tools.
As above Master and Slave processes discussed, TDengine adopts asynchronous replication for data synchronization. This method can greatly improve the writing performance, with no obvious impact from network delay. By configuring IDC and rack number for each physical node, it can be ensured that for a virtual node group, virtual nodes are composed of physical nodes from different IDC and different racks, thus implementing remote disaster recovery without other tools.
On the other hand, TDengine supports dynamic modification of the replicas number. Once the number of replicas increases, the newly added virtual nodes will immediately enter the data synchronization process. After synchronization completed, added virtual nodes can provide services. In the synchronization process, master and other synchronized virtual nodes keep serving. With this feature, TDengine can realize IDC room migration without service interruption. It is only necessary to add new physical nodes to the existing IDC cluster, and then remove old physical nodes after the data synchronization is completed.
On the other hand, TDengine supports dynamic modification of the replicas number. Once the number of replicas increases, the newly added virtual nodes will immediately enter the data synchronization process. After synchronization completed, added virtual nodes can provide services. In the synchronization process, master and other synchronized virtual nodes keep serving. With this feature, TDengine can provide IDC migration without service interruption. It is only necessary to add new physical nodes to the existing IDC cluster, and then remove old physical nodes after the data synchronization is completed.
However, this asynchronous replication method has a tiny time window of written data lost. The specific scenario is as follows:
However, the asynchronous replication has a tiny time window where data can be lost. The specific scenario is as follows:
1. Master vnode has completed its 5-step operations, confirmed the success of writing to APP, and then went down;
1. Master vnode has finished its 5-step operations, confirmed the success of writing to APP, and then went down;
2. Slave vnode receives the write request, then processing fails before writing to the log in Step 2;
3. Slave vnode will become the new master, thus losing one record
In theory, as long as in asynchronous replication, there is no guarantee for no losing. However, this window is extremely small, only if mater and slave fail at the same time, and just confirm the successful write to the application before.
In theory, for asynchronous replication, there is no guarantee to prevent data loss. However, this window is extremely small, only if mater and slave fail at the same time, and just confirm the successful write to the application before.
Note: Remote disaster recovery and no-downtime IDC migration are only supported by Enterprise Edition. **Hint: This function is not available yet**
### Master/slave Selection
Vnode maintains a Version number. When memory data is persisted, the version number will also be persisted. For each data update operation, whether it is collecting time-series data or metadata, this version number will be increased by one.
Vnode maintains a version number. When memory data is persisted, the version number will also be persisted. For each data update operation, whether it is time-series data or metadata, this version number will be increased by one.
When a vnode starts, the roles (master, slave) are uncertain, and the data is in an unsynchronized state. Its necessary to establish TCP connections with other nodes in the virtual node group and exchange status, including version and its own roles. Through the exchange, the system implements a master-selection process. The rules are as follows:
@ -318,7 +318,7 @@ See [TDengine 2.0 Data Replication Module Design](https://www.taosdata.com/cn/do
### Synchronous Replication
For scenarios with higher data consistency requirements, asynchronous data replication is not applicable, because there is some small probability of data loss. So, TDengine provides a synchronous replication mechanism for users. When creating a database, in addition to specifying the number of replicas, user also needs to specify a new parameter “quorum”. If quorum is greater than one, it means that every time the Master forwards a message to the replica, it needs to wait for “quorum-1” reply confirms before informing the application that data has been successfully written in slave. If “quorum-1” reply confirms are not received within a certain period of time, the master vnode will return an error to the application.
For scenarios with strong data consistency requirements, asynchronous data replication is not applicable, because there is a small probability of data loss. So, TDengine provides a synchronous replication mechanism for users. When creating a database, in addition to specifying the number of replicas, user also needs to specify a new parameter “quorum”. If quorum is greater than one, it means that every time the Master forwards a message to the replica, it needs to wait for “quorum-1” reply confirms before informing the application that data has been successfully written in slave. If “quorum-1” reply confirms are not received within a certain period of time, the master vnode will return an error to the application.
With synchronous replication, performance of system will decrease and latency will increase. Because metadata needs strong consistent, the default for data synchronization between mnodes is synchronous replication.
@ -336,17 +336,17 @@ Each vnode has its own independent memory, and it is composed of multiple memory
TDengine uses a data-driven method to write the data from buffer into hard disk for persistent storage. When the cached data in vnode reaches a certain volume, TDengine will also pull up the disk-writing thread to write the cached data into persistent storage in order not to block subsequent data writing. TDengine will open a new database log file when the data is written, and delete the old database log file after written successfully to avoid unlimited log growth.
To make full use of the characteristics of time-series data, TDengine splits the data stored in persistent storage by a vnode into multiple files, each file only saves data for a fixed number of days, which is determined by the system configuration parameter “days”. By so, for the given start and end date of a query, you can locate the data files to open immediately without any index, thus greatly speeding up reading operations.
To make full use of the characteristics of time-series data, TDengine splits the data stored in persistent storage by a vnode into multiple files, each file only saves data for a fixed number of days, which is determined by the system configuration parameter `days`. By so, for the given start and end date of a query, you can locate the data files to open immediately without any index, thus greatly speeding up reading operations.
For collected data, there is generally a retention period, which is determined by the system configuration parameter “keep”. Data files exceeding this set number of days will be automatically deleted by the system to free up storage space.
For time-series data, there is generally a retention policy, which is determined by the system configuration parameter `keep`. Data files exceeding this set number of days will be automatically deleted by the system to free up storage space.
Given “days” and “keep” parameters, the total number of data files in a vnode is: keep/days. The total number of data files should not be too large or too small. 10 to 100 is appropriate. Based on this principle, reasonable days can be set. In the current version, parameter “keep” can be modified, but parameter “days” cannot be modified once it is set.
In each data file, the data of a table is stored by blocks. A table can have one or more data file blocks. In a file block, data is stored in columns, occupying a continuous storage space, thus greatly improving the reading speed. The size of file block is determined by the system parameter “maxRows” (the maximum number of records per block), and the default value is 4096. This value should not be too large or too small. If it is too large, the data locating in search will cost longer; if too small, the index of data block is too large, and the compression efficiency will be low with slower reading speed.
In each data file, the data of a table is stored by blocks. A table can have one or more data file blocks. In a file block, data is stored in columns, occupying a continuous storage space, thus greatly improving the reading speed. The size of file block is determined by the system parameter `maxRows` (the maximum number of records per block), and the default value is 4096. This value should not be too large or too small. If it is too large, the data locating in search will cost longer; if too small, the index of data block is too large, and the compression efficiency will be low with slower reading speed.
Each data file (with a .data postfix) has a corresponding index file (with a .head postfix). The index file has summary information of a data block for each table, recording the offset of each data block in the data file, start and end time of data and other information, so as to lead system quickly locate the data to be found. Each data file also has a corresponding last file (with a .last postfix), which is designed to prevent data block fragmentation when written in disk. If the number of written records from a table does not reach the system configuration parameter “minRows” (minimum number of records per block), it will be stored in the last file first. When write to disk next time, the newly written records will be merged with the records in last file and then written into data file.
Each data file (with a .data postfix) has a corresponding index file (with a .head postfix). The index file has summary information of a data block for each table, recording the offset of each data block in the data file, start and end time of data and other information, so as to lead system quickly locate the data to be found. Each data file also has a corresponding last file (with a .last postfix), which is designed to prevent data block fragmentation when written in disk. If the number of written records from a table does not reach the system configuration parameter `minRows` (minimum number of records per block), it will be stored in the last file first. When write to disk next time, the newly written records will be merged with the records in last file and then written into data file.
When data is written to disk, it is decided whether to compress the data according to system configuration parameter “comp”. TDengine provides three compression options: no compression, one-stage compression and two-stage compression, corresponding to comp values of 0, 1 and 2 respectively. One-stage compression is carried out according to the type of data. Compression algorithms include delta-delta coding, simple 8B method, zig-zag coding, LZ4 and other algorithms. Two-stage compression is based on one-stage compression and compressed by general compression algorithm, which has higher compression ratio.
When data is written to disk, it is decided whether to compress the data according to system configuration parameter `comp`. TDengine provides three compression options: no compression, one-stage compression and two-stage compression, corresponding to comp values of 0, 1 and 2 respectively. One-stage compression is carried out according to the type of data. Compression algorithms include delta-delta coding, simple 8B method, zig-zag coding, LZ4 and other algorithms. Two-stage compression is based on one-stage compression and compressed by general compression algorithm, which has higher compression ratio.
### Tiered Storage
@ -393,17 +393,15 @@ When client obtains query result, the worker thread in query execution queue of
### Aggregation by Time Axis, Downsampling, Interpolation
The remarkable feature that time-series data is different from ordinary data is that each record has a timestamp, so aggregating data with timestamps on the time axis is an important and unique function from common databases. From this point of view, it is similar to the window query of stream computing engine.
The remarkable feature that time-series data is different from ordinary data is that each record has a timestamp, so aggregating data with timestamps on the time axis is an important and distinct feature from common databases. From this point of view, it is similar to the window query of stream computing engine.
The keyword “interval” is introduced into TDengine to split fixed length time windows on time axis, and the data are aggregated according to time windows, and the data within window range are aggregated as needed. For example:
The keyword `interval` is introduced into TDengine to split fixed length time windows on time axis, and the data are aggregated based on time windows, and the data within window range are aggregated as needed. For example:
```mysql
select count(*) from d1001 interval(1h);
```
According to the data collected by device D1001, the number of records stored per hour is returned by a 1-hour time window.
For the data collected by device D1001, the number of records stored per hour is returned by a 1-hour time window.
In application scenarios where query results need to be obtained continuously, if there is data missing in a given time interval, the data results in this interval will also be lost. TDengine provides a strategy to interpolate the results of timeline aggregation calculation. The results of time axis aggregation can be interpolated by using keyword Fill. For example:
@ -411,11 +409,11 @@ In application scenarios where query results need to be obtained continuously, i
select count(*) from d1001 interval(1h) fill(prev);
```
According to the data collected by device D1001, the number of records per hour is counted. If there is no data in a certain hour, statistical data of the previous hour is returned. TDengine provides forward interpolation (prev), linear interpolation (linear), NULL value populating (NULL), and specific value populating (value).
For the data collected by device D1001, the number of records per hour is counted. If there is no data in a certain hour, statistical data of the previous hour is returned. TDengine provides forward interpolation (prev), linear interpolation (linear), NULL value populating (NULL), and specific value populating (value).
### Multi-table Aggregation Query
TDengine creates a separate table for each data collection point, but in practical applications, it is often necessary to aggregate data from different collection points. In order to perform aggregation operations efficiently, TDengine introduces the concept of STable. STable is used to represent a specific type of data collection point. It is a table set containing multiple tables. The schema of each table in the set is completely consistent, but each table has its own static tag. The tags can be multiple and be added, deleted and modified at any time. Applications can aggregate or statistically operate all or a subset of tables under a STABLE by specifying tag filters, thus greatly simplifying the development of applications. The process is shown in the following figure:
TDengine creates a separate table for each data collection point, but in practical applications, it is often necessary to aggregate data from different data collection points. In order to perform aggregation operations efficiently, TDengine introduces the concept of STable. STable is used to represent a specific type of data collection point. It is a table set containing multiple tables. The schema of each table in the set is the same, but each table has its own static tag. The tags can be multiple and be added, deleted and modified at any time. Applications can aggregate or statistically operate all or a subset of tables under a STABLE by specifying tag filters, thus greatly simplifying the development of applications. The process is shown in the following figure:
![Diagram of multi-table aggregation query](page://images/architecture/multi_tables.png)
<center> Picture 4 Diagram of multi-table aggregation query </center>
@ -427,8 +425,9 @@ TDengine creates a separate table for each data collection point, but in practic
5. Each vnode first finds out the set of tables within its own node that meet the tag filters from memory, then scans the stored time-series data, completes corresponding aggregation calculations, and returns result to taosc;
6. taosc finally aggregates the results returned by multiple data nodes and send them back to application.
Since TDengine stores tag data and time-series data separately in vnode, by filtering tag data in memory, the set of tables that need to participate in aggregation operation is first found, which greatly reduces the volume of data scanned and improves aggregation calculation speed. At the same time, because the data is distributed in multiple vnodes/dnodes, the aggregation calculation operation is carried out concurrently in multiple vnodes, which further improves the aggregation speed. Aggregation functions for ordinary tables and most operations are applicable to STables. The syntax is exactly the same. Please see TAOS SQL for details.
Since TDengine stores tag data and time-series data separately in vnode, by filtering tag data in memory, the set of tables that need to participate in aggregation operation is first found, which greatly reduces the volume of data scanned and improves aggregation speed. At the same time, because the data is distributed in multiple vnodes/dnodes, the aggregation operation is carried out concurrently in multiple vnodes, which further improves the aggregation speed. Aggregation functions for ordinary tables and most operations are applicable to STables. The syntax is exactly the same. Please see TAOS SQL for details.
### Precomputation
In order to effectively improve the performance of query processing, based-on the unchangeable feature of IoT data, statistical information of data stored in data block is recorded in the head of data block, including max value, min value, and sum. We call it a precomputing unit. If the query processing involves all the data of a whole data block, the pre-calculated results are directly used, and no need to read the data block contents at all. Since the amount of pre-calculated data is much smaller than the actual size of data block stored on disk, for query processing with disk IO as bottleneck, the use of pre-calculated results can greatly reduce the pressure of reading IO and accelerate the query process. The precomputation mechanism is similar to the index BRIN (Block Range Index) of PostgreSQL.

View File

@ -2,17 +2,15 @@
TDengine adopts a relational data model, so we need to build the "database" and "table". Therefore, for a specific application scenario, it is necessary to consider the design of the database, STable and ordinary table. This section does not discuss detailed syntax rules, but only concepts.
Please watch the [video tutorial](https://www.taosdata.com/blog/2020/11/11/1945.html) for data modeling.
## <a class="anchor" id="create-db"></a> Create a Database
Different types of data collection points often have different data characteristics, including frequency of data collecting, length of data retention time, number of replicas, size of data blocks, whether to update data or not, and so on. To ensure TDengine working with great efficiency in various scenarios, TDengine suggests creating tables with different data characteristics in different databases, because each database can be configured with different storage strategies. When creating a database, in addition to SQL standard options, the application can also specify a variety of parameters such as retention duration, number of replicas, number of memory blocks, time accuracy, max and min number of records in a file block, whether it is compressed or not, and number of days a data file will be overwritten. For example:
Different types of data collection points often have different data characteristics, including data sampling rate, length of data retention time, number of replicas, size of data blocks, whether to update data or not, and so on. To ensure TDengine working with great efficiency in various scenarios, TDengine suggests creating tables with different data characteristics in different databases, because each database can be configured with different storage strategies. When creating a database, in addition to SQL standard options, the application can also specify a variety of parameters such as retention duration, number of replicas, number of memory blocks, time resolution, max and min number of records in a file block, whether it is compressed or not, and number of days covered by a data file. For example:
```mysql
CREATE DATABASE power KEEP 365 DAYS 10 BLOCKS 6 UPDATE 1;
```
The above statement will create a database named “power”. The data of this database will be kept for 365 days (it will be automatically deleted 365 days later), one data file created per 10 days, and the number of memory blocks is 4 for data updating. For detailed syntax and parameters, please refer to [Data Management section of TAOS SQL](https://www.taosdata.com/en/documentation/taos-sql#management).
The above statement will create a database named “power”. The data of this database will be kept for 365 days (data will be automatically deleted 365 days later), one data file will be created per 10 days, the number of memory blocks is 4, and data updating is allowed. For detailed syntax and parameters, please refer to [Data Management section of TAOS SQL](https://www.taosdata.com/en/documentation/taos-sql#management).
After the database created, please use SQL command USE to switch to the new database, for example:
@ -20,13 +18,12 @@ After the database created, please use SQL command USE to switch to the new data
USE power;
```
Replace the database operating in the current connection with “power”, otherwise, before operating on a specific table, you need to use "database name. table name" to specify the name of database to use.
Specify the database operating in the current connection with “power”, otherwise, before operating on a specific table, you need to use "database-name.table-name" to specify the name of database to use.
**Note:**
- Any table or STable belongs to a database. Before creating a table, a database must be created first.
- Tables in two different databases cannot be JOIN.
- You need to specify a timestamp when creating and inserting records, or querying history records.
## <a class="anchor" id="create-stable"></a> Create a STable
@ -38,11 +35,11 @@ CREATE STABLE meters (ts timestamp, current float, voltage int, phase float) TAG
**Note:** The STABLE keyword in this instruction needs to be written as TABLE in versions before 2.0.15.
Just like creating an ordinary table, you need to provide the table name (meters in the example) and the table structure Schema, that is, the definition of data columns. The first column must be a timestamp (ts in the example), the other columns are the physical metrics collected (current, volume, phase in the example), and the data types can be int, float, string, etc. In addition, you need to provide the schema of the tag (location, groupId in the example), and the data types of the tag can be int, float, string and so on. Static attributes of collection points can often be used as tags, such as geographic location of collection points, device model, device group ID, administrator ID, etc. The schema of the tag can be added, deleted and modified afterwards. Please refer to the [STable Management section of TAOS SQL](https://www.taosdata.com/cn/documentation/taos-sql#super-table) for specific definitions and details.
Just like creating an ordinary table, you need to provide the table name (meters in the example) and the table structure Schema, that is, the definition of data columns. The first column must be a timestamp (ts in the example), the other columns are the physical metrics collected (current, volume, phase in the example), and the data types can be int, float, string, etc. In addition, you need to provide the schema of the tag (location, groupId in the example), and the data types of the tag can be int, float, string and so on. Static attributes of data collection points can often be used as tags, such as geographic location of collection points, device model, device group ID, administrator ID, etc. The schema of the tags can be added, deleted and modified afterwards. Please refer to the [STable Management section of TAOS SQL](https://www.taosdata.com/cn/documentation/taos-sql#super-table) for specific definitions and details.
Each type of data collection point needs an established STable, so an IoT system often has multiple STables. For the power grid, we need to build a STable for smart meters, transformers, buses, switches, etc. For IoT, a device may have multiple data collection points (for example, a fan for wind-driven generator, some collection points capture parameters such as current and voltage, and some capture environmental parameters such as temperature, humidity and wind direction). In this case, multiple STables need to be established for corresponding types of devices. All collected physical metrics contained in one and the same STable must be collected at the same time (with a consistent timestamp).
A STable must be created for each type of data collection point, so an IoT system often has multiple STables. For the power grid, we need to build a STable for smart meters, a STable for transformers, a STable for buses, a STable for switches, etc. For IoT, a device may have multiple data collection points (for example, a fan for wind-driven generator, one data collection point captures metrics such as current and voltage, and one data collection point captures environmental parameters such as temperature, humidity and wind direction). In this case, multiple STables need to be established for corresponding types of devices. All metrics contained in a STable must be collected at the same time (with the same timestamp).
A STable allows up to 1024 columns. If the number of physical metrics collected at a collection point exceeds 1024, multiple STables need to be built to process them. A system can have multiple DBs, and a DB can have one or more STables.
A STable allows up to 1024 columns. If the number of metrics collected at a data collection point exceeds 1024, multiple STables need to be built to process them. A system can have multiple DBs, and a DB can have one or more STables.
## <a class="anchor" id="create-table"></a> Create a Table
@ -54,22 +51,23 @@ CREATE TABLE d1001 USING meters TAGS ("Beijing.Chaoyang", 2);
Where d1001 is the table name, meters is the name of the STable, followed by the specific tag value of tag Location as "Beijing.Chaoyang", and the specific tag value of tag groupId 2. Although the tag value needs to be specified when creating the table, it can be modified afterwards. Please refer to the [Table Management section of TAOS SQL](https://www.taosdata.com/en/documentation/taos-sql#table) for details.
**Note: ** At present, TDengine does not technically restrict the use of a STable of a database (dbA) as a template to create a sub-table of another database (dbB). This usage will be prohibited later, and it is not recommended to use this method to create a table.
**Note: ** At present, TDengine does not technically restrict the use of a STable of a database (dbA) as a template to create a sub-table of another database (dbB). This usage will be prohibited later, and it is not recommended to use this way to create a table.
TDengine suggests to use the globally unique ID of data collection point as a table name (such as device serial number). However, in some scenarios, there is no unique ID, and multiple IDs can be combined into a unique ID. It is not recommended to use a unique ID as tag value.
**Automatic table creating** : In some special scenarios, user is not sure whether the table of a certain data collection point exists when writing data. In this case, the non-existent table can be created by using automatic table building syntax when writing data. If the table already exists, no new table will be created. For example:
**Automatic table creating** : In some special scenarios, user is not sure whether the table of a certain data collection point exists when writing data. In this case, the non-existent table can be created by using automatic table creating syntax when writing data. If the table already exists, no new table will be created. For example:
```mysql
INSERT INTO d1001 USING METERS TAGS ("Beijng.Chaoyang", 2) VALUES (now, 10.2, 219, 0.32);
```
The SQL statement above inserts records (now, 10.2, 219, 0.32) into table d1001. If table d1001 has not been created yet, the STable meters is used as the template to automatically create it, and the tag value "Beijing.Chaoyang", 2 is marked at the same time.
The SQL statement above inserts records (now, 10.2, 219, 0.32) into table d1001. If table d1001 has not been created yet, the STable meters is used as the template to create it automatically, and the tag value "Beijing.Chaoyang", 2 is set at the same time.
For detailed syntax of automatic table building, please refer to the "[Automatic Table Creation When Inserting Records](https://www.taosdata.com/en/documentation/taos-sql#auto_create_table)" section.
## Multi-column Model vs Single-column Model
TDengine supports multi-column model. As long as physical metrics are collected simultaneously by a data collection point (with a consistent timestamp), these metrics can be placed in a STable as different columns. However, there is also an extreme design, a single-column model, in which each collected physical metric is set up separately, so each type of physical metrics is set up separately with a STable. For example, create 3 Stables, one each for current, voltage and phase.
TDengine supports multi-column model. As long as metrics are collected simultaneously by a data collection point (with the same timestamp), these metrics can be placed in a STable as different columns. However, there is also an extreme design, a single-column model, in which a STable is created for each metric. For smart meter example, we need to create 3 Stables, one for current, one for voltage and one for phase.
TDengine recommends using multi-column model as much as possible because of higher insertion and storage efficiency. However, for some scenarios, types of collected metrics often change. In this case, if multi-column model is adopted, the schema definition of STable needs to be modified frequently and the application becomes complicated. To avoid that, single-column model is recommended.
TDengine recommends using multi-column model as much as possible because of higher insertion and storage efficiency. However, for some scenarios, types of collected metrics often change. In this case, if multi-column model is adopted, the structure definition of STable needs to be frequently modified so make the application complicated. To avoid that, single-column model is recommended.

View File

@ -1,22 +1,22 @@
# Efficient Data Writing
TDengine supports multiple interfaces to write data, including SQL, Prometheus, Telegraf, EMQ MQTT Broker, HiveMQ Broker, CSV file, etc. Kafka, OPC and other interfaces will be provided in the future. Data can be inserted in a single piece or in batches, data from one or multiple data collection points can be inserted at the same time. TDengine supports multi-thread insertion, nonsequential data insertion, and also historical data insertion.
TDengine supports multiple ways to write data, including SQL, Prometheus, Telegraf, EMQ MQTT Broker, HiveMQ Broker, CSV file, etc. Kafka, OPC and other interfaces will be provided in the future. Data can be inserted in one single record or in batches, data from one or multiple data collection points can be inserted at the same time. TDengine supports multi-thread insertion, out-of-order data insertion, and also historical data insertion.
## <a class="anchor" id="sql"></a> SQL Writing
## <a class="anchor" id="sql"></a> Data Writing via SQL
Applications insert data by executing SQL insert statements through C/C++, JDBC, GO, or Python Connector, and users can manually enter SQL insert statements to insert data through TAOS Shell. For example, the following insert writes a record to table d1001:
Applications insert data by executing SQL insert statements through C/C++, JDBC, GO, C#, or Python Connector, and users can manually enter SQL insert statements to insert data through TAOS Shell. For example, the following insert writes a record to table d1001:
```mysql
INSERT INTO d1001 VALUES (1538548685000, 10.3, 219, 0.31);
```
TDengine supports writing multiple records at a time. For example, the following command writes two records to table d1001:
TDengine supports writing multiple records in a single statement. For example, the following command writes two records to table d1001:
```mysql
INSERT INTO d1001 VALUES (1538548684000, 10.2, 220, 0.23) (1538548696650, 10.3, 218, 0.25);
```
TDengine also supports writing data to multiple tables at a time. For example, the following command writes two records to d1001 and one record to d1002:
TDengine also supports writing data to multiple tables in a single statement. For example, the following command writes two records to d1001 and one record to d1002:
```mysql
INSERT INTO d1001 VALUES (1538548685000, 10.3, 219, 0.31) (1538548695000, 12.6, 218, 0.33) d1002 VALUES (1538548696800, 12.3, 221, 0.31);
@ -26,22 +26,22 @@ For the SQL INSERT Grammar, please refer to [Taos SQL insert](https://www.taosd
**Tips:**
- To improve writing efficiency, batch writing is required. The more records written in a batch, the higher the insertion efficiency. However, a record cannot exceed 16K, and the total length of an SQL statement cannot exceed 64K (it can be configured by parameter maxSQLLength, and the maximum can be configured to 1M).
- TDengine supports multi-thread parallel writing. To further improve writing speed, a client needs to open more than 20 threads to write parallelly. However, after the number of threads reaches a certain threshold, it cannot be increased or even become decreased, because too much frequent thread switching brings extra overhead.
- For a same table, if the timestamp of a newly inserted record already exists, (no database was created using UPDATE 1) the new record will be discarded as default, that is, the timestamp must be unique in a table. If an application automatically generates records, it is very likely that the generated timestamps will be the same, so the number of records successfully inserted will be smaller than the number of records the application try to insert. If you use UPDATE 1 option when creating a database, inserting a new record with the same timestamp will overwrite the original record.
- To improve writing efficiency, batch writing is required. The more records written in a batch, the higher the insertion efficiency. However, a record size cannot exceed 16K, and the total length of an SQL statement cannot exceed 64K (it can be configured by parameter maxSQLLength, and the maximum can be configured to 1M).
- TDengine supports multi-thread parallel writing. To further improve writing speed, a client needs to open more than 20 threads to write parallelly. However, after the number of threads reaches a certain threshold, it cannot be increased or even become decreased, because too much thread switching brings extra overhead.
- For the same table, if the timestamp of a newly inserted record already exists, the new record will be discarded as default (database option update = 0), that is, the timestamp must be unique in a table. If an application automatically generates records, it is very likely that the generated timestamps will be the same, so the number of records successfully inserted will be smaller than the number of records the application try to insert. If you use UPDATE 1 option when creating a database, inserting a new record with the same timestamp will overwrite the original record.
- The timestamp of written data must be greater than the current time minus the time of configuration parameter keep. If keep is configured for 3650 days, data older than 3650 days cannot be written. The timestamp for writing data cannot be greater than the current time plus configuration parameter days. If days is configured to 2, data 2 days later than the current time cannot be written.
## <a class="anchor" id="prometheus"></a> Direct Writing of Prometheus
## <a class="anchor" id="prometheus"></a> Data Writing via Prometheus
As a graduate project of Cloud Native Computing Foundation, [Prometheus](https://www.prometheus.io/) is widely used in the field of performance monitoring and K8S performance monitoring. TDengine provides a simple tool [Bailongma](https://github.com/taosdata/Bailongma), which only needs to be simply configured in Prometheus without any code, and can directly write the data collected by Prometheus into TDengine, then automatically create databases and related table entries in TDengine according to rules. Blog post [Use Docker Container to Quickly Build a Devops Monitoring Demo](https://www.taosdata.com/blog/2020/02/03/1189.html), which is an example of using bailongma to write Prometheus and Telegraf data into TDengine.
### Compile blm_prometheus From Source
Users need to download the source code of [Bailongma](https://github.com/taosdata/Bailongma) from github, then compile and generate an executable file using Golang language compiler. Before you start compiling, you need to complete following prepares:
Users need to download the source code of [Bailongma](https://github.com/taosdata/Bailongma) from github, then compile and generate an executable file using Golang language compiler. Before you start compiling, you need to prepare:
- A server running Linux OS
- Golang version 1.10 and higher installed
- An appropriated TDengine version. Because the client dynamic link library of TDengine is used, it is necessary to install the same version of TDengine as the server-side; for example, if the server version is TDengine 2.0. 0, ensure install the same version on the linux server where bailongma is located (can be on the same server as TDengine, or on a different server)
- Since the client dynamic link library of TDengine is used, it is necessary to install the same version of TDengine as the server-side. For example, if the server version is TDengine 2.0. 0, ensure install the same version on the linux server where bailongma is located (can be on the same server as TDengine, or on a different server)
Bailongma project has a folder, blm_prometheus, which holds the prometheus writing API. The compiling process is as follows:
@ -134,7 +134,7 @@ The format of generated data by Prometheus is as follows:
}
```
Where apiserver_request_latencies_bucket is the name of the time-series data collected by prometheus, and the tag of the time-series data is in the following {}. blm_prometheus automatically creates a STable in TDengine with the name of the time series data, and converts the tag in {} into the tag value of TDengine, with Timestamp as the timestamp and value as the value of the time-series data. Therefore, in the client of TDEngine, you can check whether this data was successfully written through the following instruction.
Where apiserver_request_latencies_bucket is the name of the time-series data collected by prometheus, and the tag of the time-series data is in the following {}. blm_prometheus automatically creates a STable in TDengine with the name of the time series data, and converts the tag in {} into the tag value of TDengine, with Timestamp as the timestamp and value as the value of the time-series data. Therefore, in the client of TDengine, you can check whether this data was successfully written through the following instruction.
```mysql
use prometheus;
@ -144,7 +144,7 @@ select * from apiserver_request_latencies_bucket;
## <a class="anchor" id="telegraf"></a> Direct Writing of Telegraf
## <a class="anchor" id="telegraf"></a> Data Writing via Telegraf
[Telegraf](https://www.influxdata.com/time-series-platform/telegraf/) is a popular open source tool for IT operation data collection. TDengine provides a simple tool [Bailongma](https://github.com/taosdata/Bailongma), which only needs to be simply configured in Telegraf without any code, and can directly write the data collected by Telegraf into TDengine, then automatically create databases and related table entries in TDengine according to rules. Blog post [Use Docker Container to Quickly Build a Devops Monitoring Demo](https://www.taosdata.com/blog/2020/02/03/1189.html), which is an example of using bailongma to write Prometheus and Telegraf data into TDengine.
@ -271,12 +271,12 @@ select * from cpu;
MQTT is a popular data transmission protocol in the IoT. TDengine can easily access the data received by MQTT Broker and write it to TDengine.
## <a class="anchor" id="emq"></a> Direct Writing of EMQ Broker
## <a class="anchor" id="emq"></a> Data Writing via EMQ Broker
[EMQ](https://github.com/emqx/emqx) is an open source MQTT Broker software, with no need of coding, only to use "rules" in EMQ Dashboard for simple configuration, and MQTT data can be directly written into TDengine. EMQ X supports storing data to the TDengine by sending it to a Web service, and also provides a native TDengine driver on Enterprise Edition for direct data store. Please refer to [EMQ official documents](https://docs.emqx.io/broker/latest/cn/rule/rule-example.html#%E4%BF%9D%E5%AD%98%E6%95%B0%E6%8D%AE%E5%88%B0-tdengine) for more details.
## <a class="anchor" id="hivemq"></a> Direct Writing of HiveMQ Broker
## <a class="anchor" id="hivemq"></a> Data Writing via HiveMQ Broker
[HiveMQ](https://www.hivemq.com/) is an MQTT agent that provides Free Personal and Enterprise Edition versions. It is mainly used for enterprises, emerging machine-to-machine(M2M) communication and internal transmission to meet scalability, easy management and security features. HiveMQ provides an open source plug-in development kit. You can store data to TDengine via HiveMQ extension-TDengine. Refer to the [HiveMQ extension-TDengine documentation](https://github.com/huskar-t/hivemq-tdengine-extension/blob/b62a26ecc164a310104df57691691b237e091c89/README.md) for more details.
[HiveMQ](https://www.hivemq.com/) is an MQTT agent that provides Free Personal and Enterprise Edition versions. It is mainly used for enterprises, emerging machine-to-machine(M2M) communication and internal transmission to meet scalability, easy management and security features. HiveMQ provides an open source plug-in development kit. You can store data to TDengine via HiveMQ extension-TDengine. Refer to the [HiveMQ extension-TDengine documentation](https://github.com/huskar-t/hivemq-tdengine-extension/blob/b62a26ecc164a310104df57691691b237e091c89/README.md) for more details.

View File

@ -28,7 +28,7 @@ For specific query syntax, please see the [Data Query section of TAOS SQL](https
## <a class="anchor" id="aggregation"></a> Multi-table Aggregation Query
In an IoT scenario, there are often multiple data collection points in a same type. TDengine uses the concept of STable to describe a certain type of data collection point, and an ordinary table to describe a specific data collection point. At the same time, TDengine uses tags to describe the statical attributes of data collection points. A given data collection point has a specific tag value. By specifying the filters of tags, TDengine provides an efficient method to aggregate and query the sub-tables of STables (data collection points of a certain type). Aggregation functions and most operations on ordinary tables are applicable to STables, and the syntax is exactly the same.
In an IoT scenario, there are often multiple data collection points in a same type. TDengine uses the concept of STable to describe a certain type of data collection point, and an ordinary table to describe a specific data collection point. At the same time, TDengine uses tags to describe the static attributes of data collection points. A given data collection point has a specific tag value. By specifying the filters of tags, TDengine provides an efficient method to aggregate and query the sub-tables of STables (data collection points of a certain type). Aggregation functions and most operations on ordinary tables are applicable to STables, and the syntax is exactly the same.
**Example 1**: In TAOS Shell, look up the average voltages collected by all smart meters in Beijing and group them by location
@ -55,7 +55,7 @@ TDengine only allows aggregation queries between tables belonging to a same STab
## <a class="anchor" id="sampling"></a> Down Sampling Query, Interpolation
In a scenario of IoT, it is often necessary to aggregate the collected data by intervals through down sampling. TDengine provides a simple keyword interval, which makes query operations according to time windows extremely simple. For example, the current values collected by smart meter d1001 are summed every 10 seconds.
In a scenario of IoT, it is often necessary to aggregate the collected data by intervals through down sampling. TDengine provides a simple keyword `interval`, which makes query operations according to time windows extremely simple. For example, the current values collected by smart meter d1001 are summed every 10 seconds.
```mysql
taos> SELECT sum(current) FROM d1001 INTERVAL(10s);
@ -94,6 +94,6 @@ taos> SELECT SUM(current) FROM meters INTERVAL(1s, 500a);
Query OK, 5 row(s) in set (0.001521s)
```
In a scenario of IoT, it is difficult to synchronize the time stamp of collected data at each point, but many analysis algorithms (such as FFT) need to align the collected data strictly at equal intervals of time. In many systems, its required to write their own programs to process, but the down sampling operation of TDengine can be easily solved. If there is no collected data in an interval, TDengine also provides interpolation calculation function.
In IoT scenario, it is difficult to synchronize the time stamp of collected data at each point, but many analysis algorithms (such as FFT) need to align the collected data strictly at equal intervals of time. In many systems, its required to write their own programs to process, but the down sampling operation of TDengine can be used to solve the problem easily. If there is no collected data in an interval, TDengine also provides interpolation calculation function.
For details of syntax rules, please refer to the [Time-dimension Aggregation section of TAOS SQL](https://www.taosdata.com/en/documentation/taos-sql#aggregation).

View File

@ -9,8 +9,8 @@ Continuous query of TDengine adopts time-driven mode, which can be defined direc
The continuous query provided by TDengine differs from the time window calculation in ordinary stream computing in the following ways:
- Unlike the real-time feedback calculated results of stream computing, continuous query only starts calculation after the time window is closed. For example, if the time period is 1 day, the results of that day will only be generated after 23:59:59.
- If a history record is written to the time interval that has been calculated, the continuous query will not recalculate and will not push the results to the user again. For the mode of writing back to TDengine, the existing calculated results will not be updated.
- Using the mode of continuous query pushing results, the server does not cache the client's calculation status, nor does it provide Exactly-Once semantic guarantee. If the user's application side crashed, the continuous query pulled up again would only recalculate the latest complete time window from the time pulled up again. If writeback mode is used, TDengine can ensure the validity and continuity of data writeback.
- If a history record is written to the time interval that has been calculated, the continuous query will not re-calculate and will not push the new results to the user again.
- TDengine server does not cache or save the client's status, nor does it provide Exactly-Once semantic guarantee. If the application crashes, the continuous query will be pull up again and starting time must be provided by the application.
### How to use continuous query
@ -29,7 +29,7 @@ We already know that the average voltage of these meters can be counted with one
select avg(voltage) from meters interval(1m) sliding(30s);
```
Every time this statement is executed, all data will be recalculated. If you need to execute every 30 seconds to incrementally calculate the data of the latest minute, you can improve the above statement as following, using a different `startTime` each time and executing it regularly:
Every time this statement is executed, all data will be re-calculated. If you need to execute every 30 seconds to incrementally calculate the data of the latest minute, you can improve the above statement as following, using a different `startTime` each time and executing it regularly:
```sql
select avg(voltage) from meters where ts > {startTime} interval(1m) sliding(30s);
@ -65,7 +65,7 @@ It should be noted that now in the above example refers to the time when continu
### Manage the Continuous Query
Users can view all continuous queries running in the system through the show streams command in the console, and can kill the corresponding continuous queries through the kill stream command. Subsequent versions will provide more finer-grained and convenient continuous query management commands.
Users can view all continuous queries running in the system through the `show streams` command in the console, and can kill the corresponding continuous queries through the `kill stream` command. Subsequent versions will provide more finer-grained and convenient continuous query management commands.
## <a class="anchor" id="subscribe"></a> Publisher/Subscriber
@ -101,7 +101,7 @@ Another method is to query the STable. In this way, no matter how many meters th
select * from meters where ts > {last_timestamp} and current > 10;
```
However, how to choose `last_timestamp` has become a new problem. Because, on the one hand, the time of data generation (the data timestamp) and the time of data storage are generally not the same, and sometimes the deviation is still very large; On the other hand, the time when the data of different meters arrive at TDengine will also vary. Therefore, if we use the timestamp of the data from the slowest meter as `last_timestamp` in the query, we may repeatedly read the data of other meters; If the timestamp of the fastest meter is used, the data of other meters may be missed.
However, how to choose `last_timestamp` has become a new problem. Because, on the one hand, the time of data generation (the data timestamp) and the time of data writing are generally not the same, and sometimes the deviation is still very large; On the other hand, the time when the data of different meters arrive at TDengine will also vary. Therefore, if we use the timestamp of the data from the slowest meter as `last_timestamp` in the query, we may repeatedly read the data of other meters; If the timestamp of the fastest meter is used, the data of other meters may be missed.
The subscription function of TDengine provides a thorough solution to the above problem.
@ -357,4 +357,4 @@ This SQL statement will obtain the last recorded voltage value of all smart mete
In scenarios of TDengine, alarm monitoring is a common requirement. Conceptually, it requires the program to filter out data that meet certain conditions from the data of the latest period of time, and calculate a result according to a defined formula based on these data. When the result meets certain conditions and lasts for a certain period of time, it will notify the user in some form.
In order to meet the needs of users for alarm monitoring, TDengine provides this function in the form of an independent module. For its installation and use, please refer to the blog [How to Use TDengine for Alarm Monitoring](https://www.taosdata.com/blog/2020/04/14/1438.html).
In order to meet the needs of users for alarm monitoring, TDengine provides this function in the form of an independent module. For its installation and use, please refer to the blog [How to Use TDengine for Alarm Monitoring](https://www.taosdata.com/blog/2020/04/14/1438.html).

View File

@ -66,7 +66,11 @@ Run install_client.sh to install.
Edit the taos.cfg file (default path/etc/taos/taos.cfg) and change firstEP to End Point of the TDengine server, for example: [h1.taos.com](http://h1.taos.com/):6030.
**Tip: If no TDengine service deployed in this machine, but only the application driver is installed, only firstEP needs to be configured in taos.cfg, and FQDN does not.**
**Tip: **
**1. If no TDengine service deployed in this machine, but only the application driver is installed, only firstEP needs to be configured in taos.cfg, and FQDN does not.**
**2. To prevent “unable to resolve FQDN” error when connecting to the server, ensure that the hosts file of the client has the correct FQDN value.**
**Windows x64/x86**
@ -128,7 +132,7 @@ taos>
**Windows (x64/x86) environment:**
Under cmd, enter the c:\ tdengine directory and directly execute taos.exe, and you should be able to connect to tdengine service normally and jump to taos shell interface. For example:
Under cmd, enter the c:\TDengine directory and directly execute taos.exe, and you should be able to connect to tdengine service normally and jump to taos shell interface. For example:
```mysql
C:\TDengine>taos
@ -179,7 +183,7 @@ Clean up the running environment and call this API before the application exits.
- `int taos_options(TSDB_OPTION option, const void * arg, ...)`
Set client options, currently only time zone setting (`_TSDB_OPTIONTIMEZONE`) and encoding setting (`_TSDB_OPTIONLOCALE`) are supported. The time zone and encoding default to the current operating system settings.
Set client options, currently only time zone setting (_TSDB_OPTIONTIMEZONE) and encoding setting (_TSDB_OPTIONLOCALE) are supported. The time zone and encoding default to the current operating system settings.
- `char *taos_get_client_info()`

View File

@ -2,7 +2,7 @@
## <a class="anchor" id="grafana"></a> Grafana
TDengine can quickly integrate with [Grafana](https://www.grafana.com/), an open source data visualization system, to build a data monitoring and alarming system. The whole process does not require any code to write. The contents of the data table in TDengine can be visually showed on DashBoard.
TDengine can be quickly integrated with [Grafana](https://www.grafana.com/), an open source data visualization system, to build a data monitoring and alarming system. The whole process does not require any code to write. The contents of the data table in TDengine can be visually showed on DashBoard.
### Install Grafana

View File

@ -1,8 +1,8 @@
# TDengine Cluster Management
Multiple TDengine servers, that is, multiple running instances of taosd, can form a cluster to ensure the highly reliable operation of TDengine and provide scale-out features. To understand cluster management in TDengine 2.0, it is necessary to understand the basic concepts of clustering. Please refer to the chapter "Overall Architecture of TDengine 2.0". And before installing the cluster, please follow the chapter ["Getting started"](https://www.taosdata.com/en/documentation/getting-started/) to install and experience the single node function.
Multiple TDengine servers, that is, multiple running instances of taosd, can form a cluster to ensure the highly reliable operation of TDengine and provide scale-out features. To understand cluster management in TDengine 2.0, it is necessary to understand the basic concepts of clustering. Please refer to the chapter "Overall Architecture of TDengine 2.0". And before installing the cluster, please follow the chapter ["Getting started"](https://www.taosdata.com/en/documentation/getting-started/) to install and experience the single node TDengine.
Each data node of the cluster is uniquely identified by End Point, which is composed of FQDN (Fully Qualified Domain Name) plus Port, such as [h1.taosdata.com](http://h1.taosdata.com/):6030. The general FQDN is the hostname of the server, which can be obtained through the Linux command `hostname -f` (how to configure FQDN, please refer to: [All about FQDN of TDengine](https://www.taosdata.com/blog/2020/09/11/1824.html)). Port is the external service port number of this data node. The default is 6030, but it can be modified by configuring the parameter serverPort in taos.cfg. A physical node may be configured with multiple hostnames, and TDengine will automatically get the first one, but it can also be specified through the configuration parameter fqdn in taos.cfg. If you are accustomed to direct IP address access, you can set the parameter fqdn to the IP address of this node.
Each data node of the cluster is uniquely identified by End Point, which is composed of FQDN (Fully Qualified Domain Name) plus Port, such as [h1.taosdata.com](http://h1.taosdata.com/):6030. The general FQDN is the hostname of the server, which can be obtained through the Linux command `hostname -f` (how to configure FQDN, please refer to: [All about FQDN of TDengine](https://www.taosdata.com/blog/2020/09/11/1824.html)). Port is the external service port number of this data node. The default is 6030, but it can be modified by configuring the parameter serverPort in taos.cfg. A physical node may be configured with multiple hostnames, and TDengine will automatically get the first one, but it can also be specified through the configuration parameter `fqdn` in taos.cfg. If you want to access via direct IP address, you can set the parameter `fqdn` to the IP address of this node.
The cluster management of TDengine is extremely simple. Except for manual intervention in adding and deleting nodes, all other tasks are completed automatically, thus minimizing the workload of operation. This chapter describes the operations of cluster management in detail.
@ -12,7 +12,7 @@ Please refer to the [video tutorial](https://www.taosdata.com/blog/2020/11/11/19
**Step 0:** Plan FQDN of all physical nodes in the cluster, and add the planned FQDN to /etc/hostname of each physical node respectively; modify the /etc/hosts of each physical node, and add the corresponding IP and FQDN of all cluster physical nodes. [If DNS is deployed, contact your network administrator to configure it on DNS]
**Step 1:** If the physical nodes have previous test data, installed with version 1. x, or installed with other versions of TDengine, please delete it first and drop all data. For specific steps, please refer to the blog "[Installation and Uninstallation of Various Packages of TDengine](https://www.taosdata.com/blog/2019/08/09/566.html)"
**Step 1:** If the physical nodes have previous test data, installed with version 1. x, or installed with other versions of TDengine, please backup all data, then delete it and drop all data. For specific steps, please refer to the blog "[Installation and Uninstallation of Various Packages of TDengine](https://www.taosdata.com/blog/2019/08/09/566.html)"
**Note 1:** Because the information of FQDN will be written into a file, if FQDN has not been configured or changed before, and TDengine has been started, be sure to clean up the previous data `rm -rf /var/lib/taos/*`on the premise of ensuring that the data is useless or backed up;
@ -136,7 +136,7 @@ Execute the CLI program taos, log in to the TDengine system using the root accou
DROP DNODE "fqdn:port";
```
Where fqdn is the FQDN of the deleted node, and port is the port number of its external server.
Where fqdn is the FQDN of the deleted node, and port is the port number.
<font color=green>**【Note】**</font>
@ -185,7 +185,7 @@ Because of the introduction of vnode, it is impossible to simply draw a conclusi
TDengine cluster is managed by mnode (a module of taosd, management node). In order to ensure the high-availability of mnode, multiple mnode replicas can be configured. The number of replicas is determined by system configuration parameter numOfMnodes, and the effective range is 1-3. In order to ensure the strong consistency of metadata, mnode replicas are duplicated synchronously.
A cluster has multiple data node dnodes, but a dnode runs at most one mnode instance. In the case of multiple dnodes, which dnode can be used as an mnode? This is automatically specified by the system according to the resource situation on the whole. User can execute the following command in the console of TDengine through the CLI program taos:
A cluster has multiple data node dnodes, but a dnode runs at most one mnode instance. In the case of multiple dnodes, which dnode can be used as an mnode? This is automatically selected by the system based on the resource on the whole. User can execute the following command in the console of TDengine through the CLI program taos:
```
SHOW MNODES;
@ -213,7 +213,7 @@ When the above three situations occur, the system will start a load computing of
If a data node is offline, the TDengine cluster will automatically detect it. There are two detailed situations:
- If the data node is offline for more than a certain period of time (configuration parameter offlineThreshold in taos.cfg controls the duration), the system will automatically delete the data node, generate system alarm information and trigger the load balancing process. If the deleted data node is online again, it will not be able to join the cluster, and the system administrator will need to add it to the cluster again.
- If the data node is offline for more than a certain period of time (configuration parameter `offlineThreshold` in taos.cfg controls the duration), the system will automatically delete the data node, generate system alarm information and trigger the load balancing process. If the deleted data node is online again, it will not be able to join the cluster, and the system administrator will need to add it to the cluster again.
- After offline, the system will automatically start the data recovery process if it goes online again within the duration of offlineThreshold. After the data is fully recovered, the node will start to work normally.
**Note:** If each data node belonging to a virtual node group (including mnode group) is in offline or unsynced state, Master can only be elected after all data nodes in the virtual node group are online and can exchange status information, and the virtual node group can serve externally. For example, the whole cluster has 3 data nodes with 3 replicas. If all 3 data nodes go down and then 2 data nodes restart, it will not work. Only when all 3 data nodes restart successfully can serve externally again.
@ -229,7 +229,7 @@ The name of the executable for Arbitrator is tarbitrator. The executable has alm
1. Click [Package Download](https://www.taosdata.com/cn/all-downloads/), and in the TDengine Arbitrator Linux section, select the appropriate version to download and install.
2. The command line parameter -p of this application can specify the port number of its external service, and the default is 6042.
2. The command line parameter -p of this application can specify the port number of its service, and the default is 6042.
3. Modify the configuration file of each taosd instance, and set parameter arbitrator to the End Point corresponding to the tarbitrator in taos.cfg. (If this parameter is configured, when the number of replicas is even, the system will automatically connect the configured Arbitrator. If the number of replicas is odd, even if the Arbitrator is configured, the system will not establish a connection.)
4. The Arbitrator configured in the configuration file will appear in the return result of instruction `SHOW DNODES`; the value of the corresponding role column will be "arb".

View File

@ -22,8 +22,8 @@ If there is plenty of memory, the configuration of Blocks can be increased so th
CPU requirements depend on the following two aspects:
- **Data insertion** TDengine single core can handle at least 10,000 insertion requests per second. Each insertion request can take multiple records, and inserting one record at a time is almost the same as inserting 10 records in computing resources consuming. Therefore, the larger the number of inserts, the higher the insertion efficiency. If an insert request has more than 200 records, a single core can insert 1 million records per second. However, the faster the insertion speed, the higher the requirement for front-end data collection, because records need to be cached and then inserted in batches.
- **Query requirements** TDengine to provide efficient queries, but the queries in each scenario vary greatly and the query frequency too, making it difficult to give objective figures. Users need to write some query statements for their own scenes to determine.
- **Data insertion**: TDengine single core can handle at least 10,000 insertion requests per second. Each insertion request can take multiple records, and inserting one record at a time is almost the same as inserting 10 records in computing resources consuming. Therefore, the larger the number of records per insert, the higher the insertion efficiency. If an insert request has more than 200 records, a single core can insert 1 million records per second. However, the faster the insertion speed, the higher the requirement for front-end data collection, because records need to be cached and then inserted in batches.
- **Query**: TDengine provides efficient queries, but the queries in each scenario vary greatly and the query frequency too, making it difficult to give objective figures. Users need to write some query statements for their own scenes to estimate.
Therefore, only for data insertion, CPU can be estimated, but the computing resources consumed by query cannot be that clear. In the actual operation, it is not recommended to make CPU utilization rate over 50%. After that, new nodes need to be added to bring more computing resources.
@ -78,7 +78,7 @@ When the nodes in TDengine cluster are deployed on different physical machines a
## <a class="anchor" id="config"></a> Server-side Configuration
The background service of TDengine system is provided by taosd, and the configuration parameters can be modified in the configuration file taos.cfg to meet the requirements of different scenarios. The default location of the configuration file is the /etc/taos directory, which can be specified by executing the parameter -c from the taosd command line. Such as taosd-c/home/user, to specify that the configuration file is located in the /home/user directory.
The background service of TDengine system is provided by taosd, and the configuration parameters can be modified in the configuration file taos.cfg to meet the requirements of different scenarios. The default location of the configuration file is the /etc/taos directory, which can be specified by executing the parameter `-c` from the taosd command line. Such as `taosd -c /home/user`, to specify that the configuration file is located in the /home/user directory.
You can also use “-C” to show the current server configuration parameters:
@ -88,14 +88,14 @@ taosd -C
Only some important configuration parameters are listed below. For more parameters, please refer to the instructions in the configuration file. Please refer to the previous chapters for detailed introduction and function of each parameter, and the default of these parameters is working and generally does not need to be set. **Note: After the configuration is modified, \*taosd service\* needs to be restarted to take effect.**
- firstEp: end point of the first dnode in the actively connected cluster when taosd starts, the default value is localhost: 6030.
- fqdn: FQDN of the data node, which defaults to the first hostname configured by the operating system. If you are accustomed to IP address access, you can set it to the IP address of the node.
- firstEp: end point of the first dnode which will be connected in the cluster when taosd starts, the default value is localhost: 6030.
- fqdn: FQDN of the data node, which defaults to the first hostname configured by the operating system. If you want to access via IP address directly, you can set it to the IP address of the node.
- serverPort: the port number of the external service after taosd started, the default value is 6030.
- httpPort: the port number used by the RESTful service to which all HTTP requests (TCP) require a query/write request. The default value is 6041.
- dataDir: the data file directory to which all data files will be written. [Default:/var/lib/taos](http://default/var/lib/taos).
- logDir: the log file directory to which the running log files of the client and server will be written. [Default:/var/log/taos](http://default/var/log/taos).
- arbitrator: the end point of the arbiter in the system; the default value is null.
- role: optional role for dnode. 0-any; it can be used as an mnode and to allocate vnodes; 1-mgmt; It can only be an mnode, but not to allocate vnodes; 2-dnode; caannot be an mnode, only vnode can be allocated
- arbitrator: the end point of the arbitrator in the system; the default value is null.
- role: optional role for dnode. 0-any; it can be used as an mnode and to allocate vnodes; 1-mgmt; It can only be an mnode, but not to allocate vnodes; 2-dnode; cannot be an mnode, only vnode can be allocated
- debugFlage: run the log switch. 131 (output error and warning logs), 135 (output error, warning, and debug logs), 143 (output error, warning, debug, and trace logs). Default value: 131 or 135 (different modules have different default values).
- numOfLogLines: the maximum number of lines allowed for a single log file. Default: 10,000,000 lines.
- logKeepDays: the maximum retention time of the log file. When it is greater than 0, the log file will be renamed to taosdlog.xxx, where xxx is the timestamp of the last modification of the log file in seconds. Default: 0 days.
@ -161,18 +161,18 @@ For example:
## <a class="anchor" id="client"></a> Client Configuration
The foreground interactive client application of TDengine system is taos and application driver, which shares the same configuration file taos.cfg with taosd. When running taos, use the parameter -c to specify the configuration file directory, such as taos-c/home/cfg, which means using the parameters in the taos.cfg configuration file under the /home/cfg/ directory. The default directory is /etc/taos. For more information on how to use taos, see the help information taos --help. This section mainly describes the parameters used by the taos client application in the configuration file taos.cfg.
The foreground interactive client application of TDengine system is taos and application driver, which shares the same configuration file taos.cfg with taosd. When running taos, use the parameter `-c` to specify the configuration file directory, such as `taos -c /home/cfg`, which means using the parameters in the taos.cfg configuration file under the /home/cfg/ directory. The default directory is /etc/taos. For more information on how to use taos, see the help information `taos --help`. This section mainly describes the parameters used by the taos client application in the configuration file taos.cfg.
**Versions after 2.0. 10.0 support the following parameters on command line to display the current client configuration parameters**
```bash
taos -C taos --dump-config
taos -C or taos --dump-config
```
Client configuration parameters:
- firstEp: end point of the first taosd instance in the actively connected cluster when taos is started, the default value is localhost: 6030.
- secondEp: when taos starts, if not impossible to connect to firstEp, it will try to connect to secondEp.
- secondEp: when taos starts, if unable to connect to firstEp, it will try to connect to secondEp.
- locale
Default value: obtained dynamically from the system. If the automatic acquisition fails, user needs to set it in the configuration file or through API
@ -493,4 +493,4 @@ At the moment, TDengine has nearly 200 internal reserved keywords, which cannot
| CONCAT | GLOB | METRICS | SET | VIEW |
| CONFIGS | GRANTS | MIN | SHOW | WAVG |
| CONFLICT | GROUP | MINUS | SLASH | WHERE |
| CONNECTION | | | | |
| CONNECTION | | | | |

View File

@ -1,8 +1,8 @@
# TAOS SQL
TDengine provides a SQL-style language, TAOS SQL, to insert or query data, and support other common tips. To finish this document, you should have some understanding about SQL.
TDengine provides a SQL-style language, TAOS SQL, to insert or query data. This document introduces TAOS SQL and supports other common tips. To read through this document, readers should have basic understanding about SQL.
TAOS SQL is the main tool for users to write and query data to TDengine. TAOS SQL provides a style and mode similar to standard SQL to facilitate users to get started quickly. Strictly speaking, TAOS SQL is not and does not attempt to provide SQL standard syntax. In addition, since TDengine does not provide deletion function for temporal structured data, the relevant function of data deletion is non-existent in TAO SQL.
TAOS SQL is the main tool for users to write and query data into/from TDengine. TAOS SQL provides a syntax style similar to standard SQL to facilitate users to get started quickly. Strictly speaking, TAOS SQL is not and does not attempt to provide SQL standard syntax. In addition, since TDengine does not provide deletion functionality for time-series data, the relevant functions of data deletion is unsupported in TAO SQL.
Lets take a look at the conventions used for syntax descriptions.
@ -127,7 +127,7 @@ Note:
ALTER DATABASE db_name CACHELAST 0;
```
CACHELAST parameter controls whether last_row of the data subtable is cached in memory. The default value is 0, and the value range is [0, 1]. Where 0 means not enabled and 1 means enabled. (supported from version 2.0. 11)
**Tips**: After all the above parameters are modified, show databases can be used to confirm whether the modification is successful.
- **Show all databases in system**
@ -138,14 +138,17 @@ Note:
## <a class="anchor" id="table"></a> Table Management
- Create a table
Note:
- **Create a table**
1. The first field must be a timestamp, and system will set it as the primary key;
2. The max length of table name is 192;
3. The length of each row of the table cannot exceed 16k characters;
4. Sub-table names can only consist of letters, numbers, and underscores, and cannot begin with numbers
5. If the data type binary or nchar is used, the maximum number of bytes should be specified, such as binary (20), which means 20 bytes;
```mysql
CREATE TABLE [IF NOT EXISTS] tb_name (timestamp_field_name TIMESTAMP, field1_name data_type1 [, field2_name data_type2 ...]);
```
Note:
1. The first field must be a timestamp, and system will set it as the primary key;
2. The max length of table name is 192;
3. The length of each row of the table cannot exceed 16k characters;
4. Sub-table names can only consist of letters, numbers, and underscores, and cannot begin with numbers
5. If the data type binary or nchar is used, the maximum number of bytes should be specified, such as binary (20), which means 20 bytes;
- **Create a table via STable**
@ -171,10 +174,10 @@ Note:
Note:
1. The method of batch creating tables requires that the data table must use STable as a template.
2. On the premise of not exceeding the length limit of SQL statements, it is suggested that the number of tables in a single statement should be controlled between 1000 and 3000, which will obtain an ideal speed of table building.
2. On the premise of not exceeding the length limit of SQL statements, it is suggested that the number of tables in a single statement should be controlled between 1000 and 3000, which will obtain an ideal speed of table creating.
- **Drop a table**
```mysql
DROP TABLE [IF EXISTS] tb_name;
```
@ -218,7 +221,7 @@ Note:
## <a class="anchor" id="super-table"></a> STable Management
Note: In 2.0. 15.0 and later versions, STABLE reserved words are supported. That is, in the instruction description later in this section, the three instructions of CREATE, DROP and ALTER need to write TABLE instead of STABLE in the old version as the reserved word.
Note: In 2.0.15.0 and later versions, STABLE reserved words are supported. That is, in the instruction description later in this section, the three instructions of CREATE, DROP and ALTER need to write TABLE instead of STABLE in the old version as the reserved word.
- **Create a STable**
@ -290,7 +293,7 @@ Note: In 2.0. 15.0 and later versions, STABLE reserved words are supported. That
Modify a tag name of STable. After modifying, all sub-tables under the STable will automatically update the new tag name.
- **Modify a tag value of sub-table**
```mysql
ALTER TABLE tb_name SET TAG tag_name=new_tag_value;
```
@ -306,7 +309,7 @@ Note: In 2.0. 15.0 and later versions, STABLE reserved words are supported. That
Insert a record into table tb_name.
- **Insert a record with data corresponding to a given column**
```mysql
INSERT INTO tb_name (field1_name, ...) VALUES (field1_value1, ...);
```
@ -320,14 +323,14 @@ Note: In 2.0. 15.0 and later versions, STABLE reserved words are supported. That
Insert multiple records into table tb_name.
- **Insert multiple records into a given column**
```mysql
INSERT INTO tb_name (field1_name, ...) VALUES (field1_value1, ...) (field1_value2, ...) ...;
```
Insert multiple records into a given column of table tb_name.
- **Insert multiple records into multiple tables**
```mysql
INSERT INTO tb1_name VALUES (field1_value1, ...) (field1_value2, ...) ...
tb2_name VALUES (field1_value1, ...) (field1_value2, ...) ...;
@ -421,7 +424,7 @@ taos> SELECT * FROM d1001;
Query OK, 3 row(s) in set (0.001165s)
```
For Stables, wildcards contain *tag columns*.
For STables, wildcards contain *tag columns*.
```mysql
taos> SELECT * FROM meters;
@ -720,7 +723,7 @@ TDengine supports aggregations over data, they are listed below:
================================================
9 | 9 |
Query OK, 1 row(s) in set (0.004475s)
taos> SELECT COUNT(*), COUNT(voltage) FROM d1001;
count(*) | count(voltage) |
================================================
@ -758,7 +761,7 @@ TDengine supports aggregations over data, they are listed below:
```
- **TWA**
```mysql
SELECT TWA(field_name) FROM tb_name WHERE clause;
```
@ -799,7 +802,7 @@ TDengine supports aggregations over data, they are listed below:
================================================================================
35.200000763 | 658 | 0.950000018 |
Query OK, 1 row(s) in set (0.000980s)
```
```
- **STDDEV**
@ -896,7 +899,7 @@ TDengine supports aggregations over data, they are listed below:
======================================
13.40000 | 223 |
Query OK, 1 row(s) in set (0.001123s)
taos> SELECT MAX(current), MAX(voltage) FROM d1001;
max(current) | max(voltage) |
======================================
@ -937,8 +940,6 @@ TDengine supports aggregations over data, they are listed below:
Query OK, 1 row(s) in set (0.001023s)
```
-
- **LAST**
```mysql
@ -972,7 +973,7 @@ TDengine supports aggregations over data, they are listed below:
```
- **TOP**
```mysql
SELECT TOP(field_name, K) FROM { tb_name | stb_name } [WHERE clause];
```
@ -1029,7 +1030,7 @@ TDengine supports aggregations over data, they are listed below:
2018-10-03 14:38:15.000 | 218 |
2018-10-03 14:38:16.650 | 218 |
Query OK, 2 row(s) in set (0.001332s)
taos> SELECT BOTTOM(current, 2) FROM d1001;
ts | bottom(current, 2) |
=================================================
@ -1092,7 +1093,7 @@ TDengine supports aggregations over data, they are listed below:
=======================
12.30000 |
Query OK, 1 row(s) in set (0.001238s)
taos> SELECT LAST_ROW(current) FROM d1002;
last_row(current) |
=======================
@ -1146,7 +1147,7 @@ TDengine supports aggregations over data, they are listed below:
============================
5.000000000 |
Query OK, 1 row(s) in set (0.001792s)
taos> SELECT SPREAD(voltage) FROM d1001;
spread(voltage) |
============================
@ -1172,7 +1173,7 @@ TDengine supports aggregations over data, they are listed below:
## <a class="anchor" id="aggregation"></a> Time-dimension Aggregation
TDengine supports aggregating by intervals. Data in a table can partitioned by intervals and aggregated to generate results. For example, a temperature sensor collects data once per second, but the average temperature needs to be queried every 10 minutes. This aggregation is suitable for down sample operation, and the syntax is as follows:
TDengine supports aggregating by intervals (time range). Data in a table can partitioned by intervals and aggregated to generate results. For example, a temperature sensor collects data once per second, but the average temperature needs to be queried every 10 minutes. This aggregation is suitable for down sample operation, and the syntax is as follows:
```mysql
SELECT function_list FROM tb_name
@ -1235,11 +1236,11 @@ SELECT AVG(current), MAX(current), LEASTSQUARES(current, start_val, step_val), P
**Restrictions on group by**
TAOS SQL supports group by operation on tags, tbnames and ordinary columns, required that only one column and whichhas less than 100,000 unique values.
TAOS SQL supports group by operation on tags, tbnames and ordinary columns, required that only one column and which has less than 100,000 unique values.
**Restrictions on join operation**
TAOS SQL supports join columns of two tables by Primary Key timestamp between them, and does not support four operations after tables aggregated for the time being.
TAOS SQL supports join columns of two tables by Primary Key timestamp between them, and does not support four arithmetic operations after tables aggregated for the time being.
**Availability of is no null**

View File

@ -102,6 +102,12 @@ elif echo $osinfo | grep -qwi "centos" ; then
elif echo $osinfo | grep -qwi "fedora" ; then
# echo "This is fedora system"
os_type=2
elif echo $osinfo | grep -qwi "Linx" ; then
# echo "This is Linx system"
os_type=1
service_mod=0
initd_mod=0
service_config_dir="/etc/systemd/system"
else
echo " osinfo: ${osinfo}"
echo " This is an officially unverified linux system,"

View File

@ -20,44 +20,33 @@ fi
# Dynamic directory
if [ "$osType" != "Darwin" ]; then
data_dir="/var/lib/taos"
log_dir="/var/log/taos"
else
data_dir="/usr/local/var/lib/taos"
log_dir="/usr/local/var/log/taos"
fi
if [ "$osType" != "Darwin" ]; then
cfg_install_dir="/etc/taos"
else
cfg_install_dir="/usr/local/etc/taos"
fi
if [ "$osType" != "Darwin" ]; then
bin_link_dir="/usr/bin"
lib_link_dir="/usr/lib"
lib64_link_dir="/usr/lib64"
inc_link_dir="/usr/include"
install_main_dir="/usr/local/taos"
bin_dir="/usr/local/taos/bin"
else
data_dir="/usr/local/var/lib/taos"
log_dir="/usr/local/var/log/taos"
cfg_install_dir="/usr/local/etc/taos"
bin_link_dir="/usr/local/bin"
lib_link_dir="/usr/local/lib"
inc_link_dir="/usr/local/include"
fi
#install main path
if [ "$osType" != "Darwin" ]; then
install_main_dir="/usr/local/taos"
else
install_main_dir="/usr/local/Cellar/tdengine/${verNumber}"
fi
# old bin dir
if [ "$osType" != "Darwin" ]; then
bin_dir="/usr/local/taos/bin"
else
bin_dir="/usr/local/Cellar/tdengine/${verNumber}/bin"
bin_dir="/usr/local/Cellar/tdengine/${verNumber}/bin"
fi
service_config_dir="/etc/systemd/system"
@ -254,7 +243,10 @@ function install_lib() {
${csudo} ln -sf ${lib64_link_dir}/libtaos.so.1 ${lib64_link_dir}/libtaos.so
fi
else
${csudo} cp -Rf ${binary_dir}/build/lib/libtaos.* ${install_main_dir}/driver && ${csudo} chmod 777 ${install_main_dir}/driver/*
${csudo} cp -Rf ${binary_dir}/build/lib/libtaos.${verNumber}.dylib ${install_main_dir}/driver && ${csudo} chmod 777 ${install_main_dir}/driver/*
${csudo} ln -sf ${install_main_dir}/driver/libtaos.* ${lib_link_dir}/libtaos.1.dylib
${csudo} ln -sf ${lib_link_dir}/libtaos.1.dylib ${lib_link_dir}/libtaos.dylib
fi
install_jemalloc

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2021 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TSCPARSELINE_H
#define TDENGINE_TSCPARSELINE_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
char* key;
uint8_t type;
int16_t length;
char* value;
} TAOS_SML_KV;
typedef struct {
char* stableName;
char* childTableName;
TAOS_SML_KV* tags;
int32_t tagNum;
// first kv must be timestamp
TAOS_SML_KV* fields;
int32_t fieldNum;
} TAOS_SML_DATA_POINT;
typedef enum {
SML_TIME_STAMP_NOW,
SML_TIME_STAMP_SECONDS,
SML_TIME_STAMP_MILLI_SECONDS,
SML_TIME_STAMP_MICRO_SECONDS,
SML_TIME_STAMP_NANO_SECONDS
} SMLTimeStampType;
typedef struct {
uint64_t id;
SHashObj* smlDataToSchema;
} SSmlLinesInfo;
int tscSmlInsert(TAOS* taos, TAOS_SML_DATA_POINT* points, int numPoint, SSmlLinesInfo* info);
bool checkDuplicateKey(char *key, SHashObj *pHash, SSmlLinesInfo* info);
int32_t isValidChildTableName(const char *pTbName, int16_t len);
bool convertSmlValueType(TAOS_SML_KV *pVal, char *value,
uint16_t len, SSmlLinesInfo* info);
int32_t convertSmlTimeStamp(TAOS_SML_KV *pVal, char *value,
uint16_t len, SSmlLinesInfo* info);
void destroySmlDataPoint(TAOS_SML_DATA_POINT* point);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TSCPARSELINE_H

View File

@ -318,6 +318,7 @@ void doAddGroupColumnForSubquery(SQueryInfo* pQueryInfo, int32_t tagIndex, SSqlC
int16_t tscGetJoinTagColIdByUid(STagCond* pTagCond, uint64_t uid);
int16_t tscGetTagColIndexById(STableMeta* pTableMeta, int16_t colId);
int32_t doInitSubState(SSqlObj* pSql, int32_t numOfSubqueries);
void tscPrintSelNodeList(SSqlObj* pSql, int32_t subClauseIndex);

View File

@ -17,6 +17,7 @@
#include "tscLog.h"
#include "taos.h"
#include "tscParseLine.h"
typedef struct {
char sTableName[TSDB_TABLE_NAME_LEN];
@ -27,38 +28,6 @@ typedef struct {
uint8_t precision;
} SSmlSTableSchema;
typedef struct {
char* key;
uint8_t type;
int16_t length;
char* value;
} TAOS_SML_KV;
typedef struct {
char* stableName;
char* childTableName;
TAOS_SML_KV* tags;
int32_t tagNum;
// first kv must be timestamp
TAOS_SML_KV* fields;
int32_t fieldNum;
} TAOS_SML_DATA_POINT;
typedef enum {
SML_TIME_STAMP_NOW,
SML_TIME_STAMP_SECONDS,
SML_TIME_STAMP_MILLI_SECONDS,
SML_TIME_STAMP_MICRO_SECONDS,
SML_TIME_STAMP_NANO_SECONDS
} SMLTimeStampType;
typedef struct {
uint64_t id;
SHashObj* smlDataToSchema;
} SSmlLinesInfo;
//=================================================================================================
static uint64_t linesSmlHandleId = 0;
@ -1565,8 +1534,8 @@ static bool convertStrToNumber(TAOS_SML_KV *pVal, char*str, SSmlLinesInfo* info)
return true;
}
//len does not include '\0' from value.
static bool convertSmlValueType(TAOS_SML_KV *pVal, char *value,
uint16_t len, SSmlLinesInfo* info) {
bool convertSmlValueType(TAOS_SML_KV *pVal, char *value,
uint16_t len, SSmlLinesInfo* info) {
if (len <= 0) {
return false;
}
@ -1708,7 +1677,7 @@ static int32_t getTimeStampValue(char *value, uint16_t len,
if (len >= 2) {
for (int i = 0; i < len - 2; ++i) {
if(!isdigit(value[i])) {
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
return TSDB_CODE_TSC_INVALID_TIME_STAMP;
}
}
}
@ -1743,20 +1712,20 @@ static int32_t getTimeStampValue(char *value, uint16_t len,
break;
}
default: {
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
return TSDB_CODE_TSC_INVALID_TIME_STAMP;
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t convertSmlTimeStamp(TAOS_SML_KV *pVal, char *value,
uint16_t len, SSmlLinesInfo* info) {
int32_t convertSmlTimeStamp(TAOS_SML_KV *pVal, char *value,
uint16_t len, SSmlLinesInfo* info) {
int32_t ret;
SMLTimeStampType type;
int64_t tsVal;
if (!isTimeStamp(value, len, &type)) {
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
return TSDB_CODE_TSC_INVALID_TIME_STAMP;
}
ret = getTimeStampValue(value, len, type, &tsVal);
@ -1805,7 +1774,7 @@ static int32_t parseSmlTimeStamp(TAOS_SML_KV **pTS, const char **index, SSmlLine
return ret;
}
static bool checkDuplicateKey(char *key, SHashObj *pHash, SSmlLinesInfo* info) {
bool checkDuplicateKey(char *key, SHashObj *pHash, SSmlLinesInfo* info) {
char *val = NULL;
char *cur = key;
char keyLower[TSDB_COL_NAME_LEN];
@ -1842,7 +1811,7 @@ static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index, SHashObj *pHash
while (*cur != '\0') {
if (len > TSDB_COL_NAME_LEN) {
tscError("SML:0x%"PRIx64" Key field cannot exceeds 65 characters", info->id);
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
return TSDB_CODE_TSC_INVALID_COLUMN_LENGTH;
}
//unescaped '=' identifies a tag key
if (*cur == '=' && *(cur - 1) != '\\') {
@ -1902,7 +1871,7 @@ static bool parseSmlValue(TAOS_SML_KV *pKV, const char **index,
free(pKV->key);
pKV->key = NULL;
free(value);
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
return TSDB_CODE_TSC_INVALID_VALUE;
}
free(value);
@ -1931,7 +1900,7 @@ static int32_t parseSmlMeasurement(TAOS_SML_DATA_POINT *pSml, const char **index
tscError("SML:0x%"PRIx64" Measurement field cannot exceeds 193 characters", info->id);
free(pSml->stableName);
pSml->stableName = NULL;
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
return TSDB_CODE_TSC_INVALID_TABLE_ID_LENGTH;
}
//first unescaped comma or space identifies measurement
//if space detected first, meaning no tag in the input
@ -1958,7 +1927,7 @@ static int32_t parseSmlMeasurement(TAOS_SML_DATA_POINT *pSml, const char **index
}
//Table name can only contain digits(0-9),alphebet(a-z),underscore(_)
static int32_t isValidChildTableName(const char *pTbName, int16_t len) {
int32_t isValidChildTableName(const char *pTbName, int16_t len) {
const char *cur = pTbName;
for (int i = 0; i < len; ++i) {
if(!isdigit(cur[i]) && !isalpha(cur[i]) && (cur[i] != '_')) {
@ -2146,14 +2115,14 @@ int32_t tscParseLines(char* lines[], int numLines, SArray* points, SArray* faile
if (code != TSDB_CODE_SUCCESS) {
tscError("SML:0x%"PRIx64" data point line parse failed. line %d : %s", info->id, i, lines[i]);
destroySmlDataPoint(&point);
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
return code;
} else {
tscDebug("SML:0x%"PRIx64" data point line parse success. line %d", info->id, i);
}
taosArrayPush(points, &point);
}
return 0;
return TSDB_CODE_SUCCESS;
}
int taos_insert_lines(TAOS* taos, char* lines[], int numLines) {

View File

@ -0,0 +1,424 @@
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "hash.h"
#include "taos.h"
#include "tscUtil.h"
#include "tsclient.h"
#include "tscLog.h"
#include "tscParseLine.h"
#define MAX_TELNET_FILEDS_NUM 2
#define OTS_TIMESTAMP_COLUMN_NAME "ts"
#define OTS_METRIC_VALUE_COLUMN_NAME "value"
/* telnet style API parser */
static uint64_t HandleId = 0;
static uint64_t genUID() {
uint64_t id;
do {
id = atomic_add_fetch_64(&HandleId, 1);
} while (id == 0);
return id;
}
static int32_t parseTelnetMetric(TAOS_SML_DATA_POINT *pSml, const char **index, SSmlLinesInfo* info) {
const char *cur = *index;
uint16_t len = 0;
pSml->stableName = tcalloc(TSDB_TABLE_NAME_LEN + 1, 1); // +1 to avoid 1772 line over write
if (pSml->stableName == NULL){
return TSDB_CODE_TSC_OUT_OF_MEMORY;
}
if (isdigit(*cur)) {
tscError("OTD:0x%"PRIx64" Metric cannnot start with digit", info->id);
tfree(pSml->stableName);
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
}
while (*cur != '\0') {
if (len > TSDB_TABLE_NAME_LEN) {
tscError("OTD:0x%"PRIx64" Metric cannot exceeds 193 characters", info->id);
tfree(pSml->stableName);
return TSDB_CODE_TSC_INVALID_TABLE_ID_LENGTH;
}
if (*cur == ' ') {
break;
}
pSml->stableName[len] = *cur;
cur++;
len++;
}
if (len == 0 || *cur == '\0') {
tfree(pSml->stableName);
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
}
pSml->stableName[len] = '\0';
*index = cur + 1;
tscDebug("OTD:0x%"PRIx64" Stable name in metric:%s|len:%d", info->id, pSml->stableName, len);
return TSDB_CODE_SUCCESS;
}
static int32_t parseTelnetTimeStamp(TAOS_SML_KV **pTS, int *num_kvs, const char **index, SSmlLinesInfo* info) {
//Timestamp must be the first KV to parse
assert(*num_kvs == 0);
const char *start, *cur;
int32_t ret = TSDB_CODE_SUCCESS;
int len = 0;
char key[] = OTS_TIMESTAMP_COLUMN_NAME;
char *value = NULL;
start = cur = *index;
//allocate fields for timestamp and value
*pTS = tcalloc(MAX_TELNET_FILEDS_NUM, sizeof(TAOS_SML_KV));
while(*cur != '\0') {
if (*cur == ' ') {
break;
}
cur++;
len++;
}
if (len > 0 && *cur != '\0') {
value = tcalloc(len + 1, 1);
memcpy(value, start, len);
} else {
tfree(*pTS);
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
}
ret = convertSmlTimeStamp(*pTS, value, len, info);
if (ret) {
tfree(value);
tfree(*pTS);
return ret;
}
tfree(value);
(*pTS)->key = tcalloc(sizeof(key), 1);
memcpy((*pTS)->key, key, sizeof(key));
*num_kvs += 1;
*index = cur + 1;
return ret;
}
static int32_t parseTelnetMetricValue(TAOS_SML_KV **pKVs, int *num_kvs, const char **index, SSmlLinesInfo* info) {
//skip timestamp
TAOS_SML_KV *pVal = *pKVs + 1;
const char *start, *cur;
int32_t ret = TSDB_CODE_SUCCESS;
int len = 0;
char key[] = OTS_METRIC_VALUE_COLUMN_NAME;
char *value = NULL;
start = cur = *index;
while(*cur != '\0') {
if (*cur == ' ') {
break;
}
cur++;
len++;
}
if (len > 0 && *cur != '\0') {
value = tcalloc(len + 1, 1);
memcpy(value, start, len);
} else {
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
}
if (!convertSmlValueType(pVal, value, len, info)) {
tscError("OTD:0x%"PRIx64" Failed to convert metric value string(%s) to any type",
info->id, value);
tfree(value);
return TSDB_CODE_TSC_INVALID_VALUE;
}
tfree(value);
pVal->key = tcalloc(sizeof(key), 1);
memcpy(pVal->key, key, sizeof(key));
*num_kvs += 1;
*index = cur + 1;
return ret;
}
static int32_t parseTelnetTagKey(TAOS_SML_KV *pKV, const char **index, SHashObj *pHash, SSmlLinesInfo* info) {
const char *cur = *index;
char key[TSDB_COL_NAME_LEN + 1]; // +1 to avoid key[len] over write
uint16_t len = 0;
//key field cannot start with digit
if (isdigit(*cur)) {
tscError("OTD:0x%"PRIx64" Tag key cannnot start with digit", info->id);
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
}
while (*cur != '\0') {
if (len > TSDB_COL_NAME_LEN) {
tscError("OTD:0x%"PRIx64" Tag key cannot exceeds 65 characters", info->id);
return TSDB_CODE_TSC_INVALID_COLUMN_LENGTH;
}
if (*cur == '=') {
break;
}
key[len] = *cur;
cur++;
len++;
}
if (len == 0 || *cur == '\0') {
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
}
key[len] = '\0';
if (checkDuplicateKey(key, pHash, info)) {
return TSDB_CODE_TSC_DUP_TAG_NAMES;
}
pKV->key = tcalloc(len + 1, 1);
memcpy(pKV->key, key, len + 1);
//tscDebug("OTD:0x%"PRIx64" Key:%s|len:%d", info->id, pKV->key, len);
*index = cur + 1;
return TSDB_CODE_SUCCESS;
}
static int32_t parseTelnetTagValue(TAOS_SML_KV *pKV, const char **index,
bool *is_last_kv, SSmlLinesInfo* info) {
const char *start, *cur;
char *value = NULL;
uint16_t len = 0;
start = cur = *index;
while (1) {
// ',' or '\0' identifies a value
if (*cur == ',' || *cur == '\0') {
// '\0' indicates end of value
*is_last_kv = (*cur == '\0') ? true : false;
break;
}
cur++;
len++;
}
if (len == 0) {
tfree(pKV->key);
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
}
value = tcalloc(len + 1, 1);
memcpy(value, start, len);
value[len] = '\0';
if (!convertSmlValueType(pKV, value, len, info)) {
tscError("OTD:0x%"PRIx64" Failed to convert sml value string(%s) to any type",
info->id, value);
//free previous alocated key field
tfree(pKV->key);
tfree(value);
return TSDB_CODE_TSC_INVALID_VALUE;
}
tfree(value);
*index = (*cur == '\0') ? cur : cur + 1;
return TSDB_CODE_SUCCESS;
}
static int32_t parseTelnetTagKvs(TAOS_SML_KV **pKVs, int *num_kvs,
const char **index, char **childTableName,
SHashObj *pHash, SSmlLinesInfo* info) {
const char *cur = *index;
int32_t ret = TSDB_CODE_SUCCESS;
TAOS_SML_KV *pkv;
bool is_last_kv = false;
int32_t capacity = 4;
*pKVs = tcalloc(capacity, sizeof(TAOS_SML_KV));
pkv = *pKVs;
while (*cur != '\0') {
ret = parseTelnetTagKey(pkv, &cur, pHash, info);
if (ret) {
tscError("OTD:0x%"PRIx64" Unable to parse key", info->id);
return ret;
}
ret = parseTelnetTagValue(pkv, &cur, &is_last_kv, info);
if (ret) {
tscError("OTD:0x%"PRIx64" Unable to parse value", info->id);
return ret;
}
if ((strcasecmp(pkv->key, "ID") == 0) && pkv->type == TSDB_DATA_TYPE_BINARY) {
ret = isValidChildTableName(pkv->value, pkv->length);
if (ret) {
return ret;
}
*childTableName = malloc(pkv->length + 1);
memcpy(*childTableName, pkv->value, pkv->length);
(*childTableName)[pkv->length] = '\0';
tfree(pkv->key);
tfree(pkv->value);
} else {
*num_kvs += 1;
}
if (is_last_kv) {
break;
}
//reallocate addtional memory for more kvs
if ((*num_kvs + 1) > capacity) {
TAOS_SML_KV *more_kvs = NULL;
capacity *= 3; capacity /= 2;
more_kvs = realloc(*pKVs, capacity * sizeof(TAOS_SML_KV));
if (!more_kvs) {
return TSDB_CODE_TSC_OUT_OF_MEMORY;
}
*pKVs = more_kvs;
}
//move pKV points to next TAOS_SML_KV block
pkv = *pKVs + *num_kvs;
}
return ret;
}
int32_t tscParseTelnetLine(const char* line, TAOS_SML_DATA_POINT* smlData, SSmlLinesInfo* info) {
const char* index = line;
int32_t ret = TSDB_CODE_SUCCESS;
//Parse metric
ret = parseTelnetMetric(smlData, &index, info);
if (ret) {
tscError("OTD:0x%"PRIx64" Unable to parse metric", info->id);
return ret;
}
tscDebug("OTD:0x%"PRIx64" Parse metric finished", info->id);
//Parse timestamp
ret = parseTelnetTimeStamp(&smlData->fields, &smlData->fieldNum, &index, info);
if (ret) {
tscError("OTD:0x%"PRIx64" Unable to parse timestamp", info->id);
return ret;
}
tscDebug("OTD:0x%"PRIx64" Parse timestamp finished", info->id);
//Parse value
ret = parseTelnetMetricValue(&smlData->fields, &smlData->fieldNum, &index, info);
if (ret) {
tscError("OTD:0x%"PRIx64" Unable to parse metric value", info->id);
return ret;
}
tscDebug("OTD:0x%"PRIx64" Parse metric value finished", info->id);
//Parse tagKVs
SHashObj *keyHashTable = taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, false);
ret = parseTelnetTagKvs(&smlData->tags, &smlData->tagNum, &index, &smlData->childTableName, keyHashTable, info);
if (ret) {
tscError("OTD:0x%"PRIx64" Unable to parse tags", info->id);
taosHashCleanup(keyHashTable);
return ret;
}
tscDebug("OTD:0x%"PRIx64" Parse tags finished", info->id);
taosHashCleanup(keyHashTable);
return TSDB_CODE_SUCCESS;
}
int32_t tscParseTelnetLines(char* lines[], int numLines, SArray* points, SArray* failedLines, SSmlLinesInfo* info) {
for (int32_t i = 0; i < numLines; ++i) {
TAOS_SML_DATA_POINT point = {0};
int32_t code = tscParseTelnetLine(lines[i], &point, info);
if (code != TSDB_CODE_SUCCESS) {
tscError("OTD:0x%"PRIx64" data point line parse failed. line %d : %s", info->id, i, lines[i]);
destroySmlDataPoint(&point);
return code;
} else {
tscDebug("OTD:0x%"PRIx64" data point line parse success. line %d", info->id, i);
}
taosArrayPush(points, &point);
}
return TSDB_CODE_SUCCESS;
}
int taos_insert_telnet_lines(TAOS* taos, char* lines[], int numLines) {
int32_t code = 0;
SSmlLinesInfo* info = tcalloc(1, sizeof(SSmlLinesInfo));
info->id = genUID();
if (numLines <= 0 || numLines > 65536) {
tscError("OTD:0x%"PRIx64" taos_insert_telnet_lines numLines should be between 1 and 65536. numLines: %d", info->id, numLines);
tfree(info);
code = TSDB_CODE_TSC_APP_ERROR;
return code;
}
for (int i = 0; i < numLines; ++i) {
if (lines[i] == NULL) {
tscError("OTD:0x%"PRIx64" taos_insert_telnet_lines line %d is NULL", info->id, i);
tfree(info);
code = TSDB_CODE_TSC_APP_ERROR;
return code;
}
}
SArray* lpPoints = taosArrayInit(numLines, sizeof(TAOS_SML_DATA_POINT));
if (lpPoints == NULL) {
tscError("OTD:0x%"PRIx64" taos_insert_telnet_lines failed to allocate memory", info->id);
tfree(info);
return TSDB_CODE_TSC_OUT_OF_MEMORY;
}
tscDebug("OTD:0x%"PRIx64" taos_insert_telnet_lines begin inserting %d lines, first line: %s", info->id, numLines, lines[0]);
code = tscParseTelnetLines(lines, numLines, lpPoints, NULL, info);
size_t numPoints = taosArrayGetSize(lpPoints);
if (code != 0) {
goto cleanup;
}
TAOS_SML_DATA_POINT* points = TARRAY_GET_START(lpPoints);
code = tscSmlInsert(taos, points, (int)numPoints, info);
if (code != 0) {
tscError("OTD:0x%"PRIx64" taos_insert_telnet_lines error: %s", info->id, tstrerror((code)));
}
cleanup:
tscDebug("OTD:0x%"PRIx64" taos_insert_telnet_lines finish inserting %d lines. code: %d", info->id, numLines, code);
points = TARRAY_GET_START(lpPoints);
numPoints = taosArrayGetSize(lpPoints);
for (int i=0; i<numPoints; ++i) {
destroySmlDataPoint(points+i);
}
taosArrayDestroy(lpPoints);
tfree(info);
return code;
}
int taos_telnet_insert(TAOS* taos, TAOS_SML_DATA_POINT* points, int numPoint) {
SSmlLinesInfo* info = tcalloc(1, sizeof(SSmlLinesInfo));
info->id = genUID();
int code = tscSmlInsert(taos, points, numPoint, info);
tfree(info);
return code;
}

View File

@ -22,6 +22,7 @@
#include <qSqlparser.h>
#include "os.h"
#include "regex.h"
#include "qPlan.h"
#include "qSqlparser.h"
#include "qTableMeta.h"
@ -278,6 +279,8 @@ static uint8_t convertRelationalOperator(SStrToken *pToken) {
return TSDB_BINARY_OP_REMAINDER;
case TK_LIKE:
return TSDB_RELATION_LIKE;
case TK_MATCH:
return TSDB_RELATION_MATCH;
case TK_ISNULL:
return TSDB_RELATION_ISNULL;
case TK_NOTNULL:
@ -3776,6 +3779,9 @@ static int32_t doExtractColumnFilterInfo(SSqlCmd* pCmd, SQueryInfo* pQueryInfo,
case TK_LIKE:
pColumnFilter->lowerRelOptr = TSDB_RELATION_LIKE;
break;
case TK_MATCH:
pColumnFilter->lowerRelOptr = TSDB_RELATION_MATCH;
break;
case TK_ISNULL:
pColumnFilter->lowerRelOptr = TSDB_RELATION_ISNULL;
break;
@ -3839,9 +3845,15 @@ static int32_t tablenameListToString(tSqlExpr* pExpr, SStringBuilder* sb) {
return TSDB_CODE_SUCCESS;
}
static int32_t tablenameCondToString(tSqlExpr* pExpr, SStringBuilder* sb) {
taosStringBuilderAppendStringLen(sb, QUERY_COND_REL_PREFIX_LIKE, QUERY_COND_REL_PREFIX_LIKE_LEN);
taosStringBuilderAppendString(sb, pExpr->value.pz);
static int32_t tablenameCondToString(tSqlExpr* pExpr, uint32_t opToken, SStringBuilder* sb) {
assert(opToken == TK_LIKE || opToken == TK_MATCH);
if (opToken == TK_LIKE) {
taosStringBuilderAppendStringLen(sb, QUERY_COND_REL_PREFIX_LIKE, QUERY_COND_REL_PREFIX_LIKE_LEN);
taosStringBuilderAppendString(sb, pExpr->value.pz);
} else if (opToken == TK_MATCH) {
taosStringBuilderAppendStringLen(sb, QUERY_COND_REL_PREFIX_MATCH, QUERY_COND_REL_PREFIX_MATCH_LEN);
taosStringBuilderAppendString(sb, pExpr->value.pz);
}
return TSDB_CODE_SUCCESS;
}
@ -3862,7 +3874,7 @@ static int32_t checkColumnFilterInfo(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SCol
STableMeta* pTableMeta = pTableMetaInfo->pTableMeta;
SSchema* pSchema = tscGetTableColumnSchema(pTableMeta, pIndex->columnIndex);
int32_t ret = 0;
const char* msg1 = "non binary column not support like operator";
const char* msg1 = "non binary column not support like/match operator";
const char* msg2 = "binary column not support this operator";
const char* msg3 = "bool column not support this operator";
const char* msg4 = "primary key not support this operator";
@ -3890,12 +3902,13 @@ static int32_t checkColumnFilterInfo(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SCol
&& pExpr->tokenId != TK_ISNULL
&& pExpr->tokenId != TK_NOTNULL
&& pExpr->tokenId != TK_LIKE
&& pExpr->tokenId != TK_MATCH
&& pExpr->tokenId != TK_IN) {
ret = invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg2);
goto _err_ret;
}
} else {
if (pExpr->tokenId == TK_LIKE) {
if (pExpr->tokenId == TK_LIKE || pExpr->tokenId == TK_MATCH) {
ret = invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg1);
goto _err_ret;
}
@ -3943,12 +3956,12 @@ static int32_t getTablenameCond(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, tSqlExpr*
if (pTableCond->tokenId == TK_IN) {
ret = tablenameListToString(pRight, sb);
} else if (pTableCond->tokenId == TK_LIKE) {
} else if (pTableCond->tokenId == TK_LIKE || pTableCond->tokenId == TK_MATCH) {
if (pRight->tokenId != TK_STRING) {
return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg1);
}
ret = tablenameCondToString(pRight, sb);
ret = tablenameCondToString(pRight, pTableCond->tokenId, sb);
}
if (ret != TSDB_CODE_SUCCESS) {
@ -4397,7 +4410,7 @@ static bool validateJoinExprNode(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, tSqlExpr
}
static bool validTableNameOptr(tSqlExpr* pExpr) {
const char nameFilterOptr[] = {TK_IN, TK_LIKE};
const char nameFilterOptr[] = {TK_IN, TK_LIKE, TK_MATCH};
for (int32_t i = 0; i < tListLen(nameFilterOptr); ++i) {
if (pExpr->tokenId == nameFilterOptr[i]) {
@ -4489,6 +4502,45 @@ static int32_t validateLikeExpr(tSqlExpr* pExpr, STableMeta* pTableMeta, int32_t
return TSDB_CODE_SUCCESS;
}
// check for match expression
static int32_t validateMatchExpr(tSqlExpr* pExpr, STableMeta* pTableMeta, int32_t index, char* msgBuf) {
const char* msg1 = "regular expression string should be less than %d characters";
const char* msg2 = "illegal column type for match";
const char* msg3 = "invalid regular expression";
tSqlExpr* pLeft = pExpr->pLeft;
tSqlExpr* pRight = pExpr->pRight;
if (pExpr->tokenId == TK_MATCH) {
if (pRight->value.nLen > tsMaxRegexStringLen) {
char tmp[64] = {0};
sprintf(tmp, msg1, tsMaxRegexStringLen);
return invalidOperationMsg(msgBuf, tmp);
}
SSchema* pSchema = tscGetTableSchema(pTableMeta);
if ((!isTablenameToken(&pLeft->columnName)) && !IS_VAR_DATA_TYPE(pSchema[index].type)) {
return invalidOperationMsg(msgBuf, msg2);
}
int errCode = 0;
regex_t regex;
char regErrBuf[256] = {0};
const char* pattern = pRight->value.pz;
int cflags = REG_EXTENDED;
if ((errCode = regcomp(&regex, pattern, cflags)) != 0) {
regerror(errCode, &regex, regErrBuf, sizeof(regErrBuf));
tscError("Failed to compile regex pattern %s. reason %s", pattern, regErrBuf);
return invalidOperationMsg(msgBuf, msg3);
}
regfree(&regex);
}
return TSDB_CODE_SUCCESS;
}
int32_t handleNeOptr(tSqlExpr** rexpr, tSqlExpr* expr) {
tSqlExpr* left = tSqlExprClone(expr);
tSqlExpr* right = expr;
@ -4540,6 +4592,12 @@ static int32_t handleExprInQueryCond(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, tSql
return code;
}
// validate the match expression
code = validateMatchExpr(*pExpr, pTableMeta, index.columnIndex, tscGetErrorMsgPayload(pCmd));
if (code != TSDB_CODE_SUCCESS) {
return code;
}
SSchema* pSchema = tscGetTableColumnSchema(pTableMeta, index.columnIndex);
if (pSchema->type == TSDB_DATA_TYPE_TIMESTAMP && index.columnIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) { // query on time range
if (!validateJoinExprNode(pCmd, pQueryInfo, *pExpr, &index)) {
@ -4867,65 +4925,66 @@ static int32_t setTableCondForSTableQuery(SSqlCmd* pCmd, SQueryInfo* pQueryInfo,
STagCond* pTagCond = &pQueryInfo->tagCond;
pTagCond->tbnameCond.uid = pTableMetaInfo->pTableMeta->id.uid;
assert(pExpr->tokenId == TK_LIKE || pExpr->tokenId == TK_IN);
assert(pExpr->tokenId == TK_LIKE || pExpr->tokenId == TK_MATCH || pExpr->tokenId == TK_IN);
if (pExpr->tokenId == TK_LIKE) {
if (pExpr->tokenId == TK_LIKE || pExpr->tokenId == TK_MATCH) {
char* str = taosStringBuilderGetResult(sb, NULL);
pQueryInfo->tagCond.tbnameCond.cond = strdup(str);
pQueryInfo->tagCond.tbnameCond.len = (int32_t) strlen(str);
return TSDB_CODE_SUCCESS;
}
} else {
SStringBuilder sb1;
memset(&sb1, 0, sizeof(sb1));
taosStringBuilderAppendStringLen(&sb1, QUERY_COND_REL_PREFIX_IN, QUERY_COND_REL_PREFIX_IN_LEN);
SStringBuilder sb1; memset(&sb1, 0, sizeof(sb1));
taosStringBuilderAppendStringLen(&sb1, QUERY_COND_REL_PREFIX_IN, QUERY_COND_REL_PREFIX_IN_LEN);
// remove the duplicated input table names
int32_t num = 0;
char* tableNameString = taosStringBuilderGetResult(sb, NULL);
// remove the duplicated input table names
int32_t num = 0;
char* tableNameString = taosStringBuilderGetResult(sb, NULL);
char** segments = strsplit(tableNameString + QUERY_COND_REL_PREFIX_IN_LEN, TBNAME_LIST_SEP, &num);
qsort(segments, num, POINTER_BYTES, tableNameCompar);
char** segments = strsplit(tableNameString + QUERY_COND_REL_PREFIX_IN_LEN, TBNAME_LIST_SEP, &num);
qsort(segments, num, POINTER_BYTES, tableNameCompar);
int32_t j = 1;
for (int32_t i = 1; i < num; ++i) {
if (strcmp(segments[i], segments[i - 1]) != 0) {
segments[j++] = segments[i];
int32_t j = 1;
for (int32_t i = 1; i < num; ++i) {
if (strcmp(segments[i], segments[i - 1]) != 0) {
segments[j++] = segments[i];
}
}
}
num = j;
num = j;
char name[TSDB_DB_NAME_LEN] = {0};
tNameGetDbName(&pTableMetaInfo->name, name);
SStrToken dbToken = { .type = TK_STRING, .z = name, .n = (uint32_t)strlen(name) };
for (int32_t i = 0; i < num; ++i) {
if (i >= 1) {
taosStringBuilderAppendStringLen(&sb1, TBNAME_LIST_SEP, 1);
char name[TSDB_DB_NAME_LEN] = {0};
tNameGetDbName(&pTableMetaInfo->name, name);
SStrToken dbToken = {.type = TK_STRING, .z = name, .n = (uint32_t)strlen(name)};
for (int32_t i = 0; i < num; ++i) {
if (i >= 1) {
taosStringBuilderAppendStringLen(&sb1, TBNAME_LIST_SEP, 1);
}
char idBuf[TSDB_TABLE_FNAME_LEN] = {0};
int32_t xlen = (int32_t)strlen(segments[i]);
SStrToken t = {.z = segments[i], .n = xlen, .type = TK_STRING};
int32_t ret = setObjFullName(idBuf, account, &dbToken, &t, &xlen);
if (ret != TSDB_CODE_SUCCESS) {
taosStringBuilderDestroy(&sb1);
tfree(segments);
invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg);
return ret;
}
taosStringBuilderAppendString(&sb1, idBuf);
}
char idBuf[TSDB_TABLE_FNAME_LEN] = {0};
int32_t xlen = (int32_t)strlen(segments[i]);
SStrToken t = {.z = segments[i], .n = xlen, .type = TK_STRING};
char* str = taosStringBuilderGetResult(&sb1, NULL);
pQueryInfo->tagCond.tbnameCond.cond = strdup(str);
pQueryInfo->tagCond.tbnameCond.len = (int32_t)strlen(str);
int32_t ret = setObjFullName(idBuf, account, &dbToken, &t, &xlen);
if (ret != TSDB_CODE_SUCCESS) {
taosStringBuilderDestroy(&sb1);
tfree(segments);
invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg);
return ret;
}
taosStringBuilderAppendString(&sb1, idBuf);
taosStringBuilderDestroy(&sb1);
tfree(segments);
return TSDB_CODE_SUCCESS;
}
char* str = taosStringBuilderGetResult(&sb1, NULL);
pQueryInfo->tagCond.tbnameCond.cond = strdup(str);
pQueryInfo->tagCond.tbnameCond.len = (int32_t) strlen(str);
taosStringBuilderDestroy(&sb1);
tfree(segments);
return TSDB_CODE_SUCCESS;
}
int32_t mergeTimeRange(SSqlCmd* pCmd, STimeWindow* res, STimeWindow* win, int32_t optr) {
@ -8112,7 +8171,7 @@ int32_t tscGetExprFilters(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pSelect
}
static int32_t handleExprInHavingClause(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pSelectNodeList, tSqlExpr* pExpr, int32_t sqlOptr) {
const char* msg1 = "non binary column not support like operator";
const char* msg1 = "non binary column not support like/match operator";
const char* msg2 = "invalid operator for binary column in having clause";
const char* msg3 = "invalid operator for bool column in having clause";
@ -8164,11 +8223,12 @@ static int32_t handleExprInHavingClause(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, S
&& pExpr->tokenId != TK_ISNULL
&& pExpr->tokenId != TK_NOTNULL
&& pExpr->tokenId != TK_LIKE
&& pExpr->tokenId != TK_MATCH
) {
return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg2);
}
} else {
if (pExpr->tokenId == TK_LIKE) {
if (pExpr->tokenId == TK_LIKE || pExpr->tokenId == TK_MATCH) {
return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg1);
}

View File

@ -2459,11 +2459,48 @@ static void doSendQueryReqs(SSchedMsg* pSchedMsg) {
tfree(p);
}
static void doConcurrentlySendSubQueries(SSqlObj* pSql) {
SSubqueryState *pState = &pSql->subState;
// concurrently sent the query requests.
const int32_t MAX_REQUEST_PER_TASK = 8;
int32_t numOfTasks = (pState->numOfSub + MAX_REQUEST_PER_TASK - 1)/MAX_REQUEST_PER_TASK;
assert(numOfTasks >= 1);
int32_t num;
if (pState->numOfSub / numOfTasks == MAX_REQUEST_PER_TASK) {
num = MAX_REQUEST_PER_TASK;
} else {
num = pState->numOfSub / numOfTasks + 1;
}
tscDebug("0x%"PRIx64 " query will be sent by %d threads", pSql->self, numOfTasks);
for(int32_t j = 0; j < numOfTasks; ++j) {
SSchedMsg schedMsg = {0};
schedMsg.fp = doSendQueryReqs;
schedMsg.ahandle = (void*)pSql;
schedMsg.thandle = NULL;
SPair* p = calloc(1, sizeof(SPair));
p->first = j * num;
if (j == numOfTasks - 1) {
p->second = pState->numOfSub;
} else {
p->second = (j + 1) * num;
}
schedMsg.msg = p;
taosScheduleTask(tscQhandle, &schedMsg);
}
}
int32_t tscHandleMasterSTableQuery(SSqlObj *pSql) {
SSqlRes *pRes = &pSql->res;
SSqlCmd *pCmd = &pSql->cmd;
// pRes->code check only serves in launching metric sub-queries
// pRes->code check only serves in launching super table sub-queries
if (pRes->code == TSDB_CODE_TSC_QUERY_CANCELLED) {
pCmd->command = TSDB_SQL_RETRIEVE_GLOBALMERGE; // enable the abort of kill super table function.
return pRes->code;
@ -2474,22 +2511,23 @@ int32_t tscHandleMasterSTableQuery(SSqlObj *pSql) {
pRes->qId = 0x1; // hack the qhandle check
const uint32_t nBufferSize = (1u << 18u); // 256KB
const uint32_t nBufferSize = (1u << 18u); // 256KB, default buffer size
SQueryInfo *pQueryInfo = tscGetQueryInfo(pCmd);
STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0);
SSubqueryState *pState = &pSql->subState;
pState->numOfSub = 0;
if (pTableMetaInfo->pVgroupTables == NULL) {
pState->numOfSub = pTableMetaInfo->vgroupList->numOfVgroups;
} else {
pState->numOfSub = (int32_t)taosArrayGetSize(pTableMetaInfo->pVgroupTables);
int32_t numOfSub = (pTableMetaInfo->pVgroupTables == NULL) ? pTableMetaInfo->vgroupList->numOfVgroups
: (int32_t)taosArrayGetSize(pTableMetaInfo->pVgroupTables);
int32_t ret = doInitSubState(pSql, numOfSub);
if (ret != 0) {
tscAsyncResultOnError(pSql);
return ret;
}
assert(pState->numOfSub > 0);
int32_t ret = tscCreateGlobalMergerEnv(pQueryInfo, &pMemoryBuf, pSql->subState.numOfSub, &pDesc, nBufferSize, pSql->self);
ret = tscCreateGlobalMergerEnv(pQueryInfo, &pMemoryBuf, pSql->subState.numOfSub, &pDesc, nBufferSize, pSql->self);
if (ret != 0) {
pRes->code = ret;
tscAsyncResultOnError(pSql);
@ -2499,32 +2537,6 @@ int32_t tscHandleMasterSTableQuery(SSqlObj *pSql) {
}
tscDebug("0x%"PRIx64" retrieved query data from %d vnode(s)", pSql->self, pState->numOfSub);
pSql->pSubs = calloc(pState->numOfSub, POINTER_BYTES);
if (pSql->pSubs == NULL) {
tfree(pSql->pSubs);
pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
tscDestroyGlobalMergerEnv(pMemoryBuf, pDesc,pState->numOfSub);
tscAsyncResultOnError(pSql);
return ret;
}
if (pState->states == NULL) {
pState->states = calloc(pState->numOfSub, sizeof(*pState->states));
if (pState->states == NULL) {
pRes->code = TSDB_CODE_TSC_OUT_OF_MEMORY;
tscDestroyGlobalMergerEnv(pMemoryBuf, pDesc,pState->numOfSub);
tscAsyncResultOnError(pSql);
return ret;
}
pthread_mutex_init(&pState->mutex, NULL);
}
memset(pState->states, 0, sizeof(*pState->states) * pState->numOfSub);
tscDebug("0x%"PRIx64" reset all sub states to 0", pSql->self);
pRes->code = TSDB_CODE_SUCCESS;
int32_t i = 0;
@ -2545,8 +2557,8 @@ int32_t tscHandleMasterSTableQuery(SSqlObj *pSql) {
break;
}
trs->subqueryIndex = i;
trs->pParentSql = pSql;
trs->subqueryIndex = i;
trs->pParentSql = pSql;
SSqlObj *pNew = tscCreateSTableSubquery(pSql, trs, NULL);
if (pNew == NULL) {
@ -2582,39 +2594,7 @@ int32_t tscHandleMasterSTableQuery(SSqlObj *pSql) {
return pRes->code;
}
// concurrently sent the query requests.
const int32_t MAX_REQUEST_PER_TASK = 8;
int32_t numOfTasks = (pState->numOfSub + MAX_REQUEST_PER_TASK - 1)/MAX_REQUEST_PER_TASK;
assert(numOfTasks >= 1);
int32_t num;
if (pState->numOfSub / numOfTasks == MAX_REQUEST_PER_TASK) {
num = MAX_REQUEST_PER_TASK;
} else {
num = pState->numOfSub / numOfTasks + 1;
}
tscDebug("0x%"PRIx64 " query will be sent by %d threads", pSql->self, numOfTasks);
for(int32_t j = 0; j < numOfTasks; ++j) {
SSchedMsg schedMsg = {0};
schedMsg.fp = doSendQueryReqs;
schedMsg.ahandle = (void*)pSql;
schedMsg.thandle = NULL;
SPair* p = calloc(1, sizeof(SPair));
p->first = j * num;
if (j == numOfTasks - 1) {
p->second = pState->numOfSub;
} else {
p->second = (j + 1) * num;
}
schedMsg.msg = p;
taosScheduleTask(tscQhandle, &schedMsg);
}
doConcurrentlySendSubQueries(pSql);
return TSDB_CODE_SUCCESS;
}

View File

@ -122,6 +122,10 @@ int32_t tscAcquireRpc(const char *key, const char *user, const char *secretEncry
void taos_init_imp(void) {
char temp[128] = {0};
// In the APIs of other program language, taos_cleanup is not available yet.
// So, to make sure taos_cleanup will be invoked to clean up the allocated resource to suppress the valgrind warning.
atexit(taos_cleanup);
errno = TSDB_CODE_SUCCESS;
srand(taosGetTimestampSec());
@ -197,10 +201,6 @@ void taos_init_imp(void) {
tscRefId = taosOpenRef(200, tscCloseTscObj);
// In the APIs of other program language, taos_cleanup is not available yet.
// So, to make sure taos_cleanup will be invoked to clean up the allocated resource to suppress the valgrind warning.
atexit(taos_cleanup);
tscDebug("client is initialized successfully");
}

View File

@ -3944,6 +3944,21 @@ static void tscSubqueryCompleteCallback(void* param, TAOS_RES* tres, int code) {
taos_fetch_rows_a(tres, tscSubqueryRetrieveCallback, param);
}
int32_t doInitSubState(SSqlObj* pSql, int32_t numOfSubqueries) {
assert(pSql->subState.numOfSub == 0 && pSql->pSubs == NULL && pSql->subState.states == NULL);
pSql->subState.numOfSub = numOfSubqueries;
pSql->pSubs = calloc(pSql->subState.numOfSub, POINTER_BYTES);
pSql->subState.states = calloc(pSql->subState.numOfSub, sizeof(int8_t));
int32_t code = pthread_mutex_init(&pSql->subState.mutex, NULL);
if (pSql->pSubs == NULL || pSql->subState.states == NULL || code != 0) {
return TSDB_CODE_TSC_OUT_OF_MEMORY;
}
return TSDB_CODE_SUCCESS;
}
// do execute the query according to the query execution plan
void executeQuery(SSqlObj* pSql, SQueryInfo* pQueryInfo) {
int32_t code = TSDB_CODE_SUCCESS;
@ -3959,16 +3974,8 @@ void executeQuery(SSqlObj* pSql, SQueryInfo* pQueryInfo) {
}
if (taosArrayGetSize(pQueryInfo->pUpstream) > 0) { // nest query. do execute it firstly
assert(pSql->subState.numOfSub == 0);
pSql->subState.numOfSub = (int32_t) taosArrayGetSize(pQueryInfo->pUpstream);
assert(pSql->pSubs == NULL);
pSql->pSubs = calloc(pSql->subState.numOfSub, POINTER_BYTES);
assert(pSql->subState.states == NULL);
pSql->subState.states = calloc(pSql->subState.numOfSub, sizeof(int8_t));
code = pthread_mutex_init(&pSql->subState.mutex, NULL);
if (pSql->pSubs == NULL || pSql->subState.states == NULL || code != TSDB_CODE_SUCCESS) {
code = TSDB_CODE_TSC_OUT_OF_MEMORY;
code = doInitSubState(pSql, (int32_t) taosArrayGetSize(pQueryInfo->pUpstream));
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
@ -4315,7 +4322,9 @@ void tscTryQueryNextClause(SSqlObj* pSql, __async_cb_func_t fp) {
}
tfree(pSql->pSubs);
tfree(pSql->subState.states);
pSql->subState.numOfSub = 0;
pthread_mutex_destroy(&pSql->subState.mutex);
pSql->fp = fp;

View File

@ -33,9 +33,11 @@ struct SSchema;
#define QUERY_COND_REL_PREFIX_IN "IN|"
#define QUERY_COND_REL_PREFIX_LIKE "LIKE|"
#define QUERY_COND_REL_PREFIX_MATCH "MATCH|"
#define QUERY_COND_REL_PREFIX_IN_LEN 3
#define QUERY_COND_REL_PREFIX_LIKE_LEN 5
#define QUERY_COND_REL_PREFIX_MATCH_LEN 6
typedef bool (*__result_filter_fn_t)(const void *, void *);
typedef void (*__do_filter_suppl_fn_t)(void *, void *);

View File

@ -74,6 +74,7 @@ extern int8_t tsKeepOriginalColumnName;
// client
extern int32_t tsMaxSQLStringLen;
extern int32_t tsMaxWildCardsLen;
extern int32_t tsMaxRegexStringLen;
extern int8_t tsTscEnableRecordSql;
extern int32_t tsMaxNumOfOrderedResults;
extern int32_t tsMinSlidingTime;

View File

@ -430,6 +430,17 @@ tExprNode* exprTreeFromTableName(const char* tbnameCond) {
pVal->nType = TSDB_DATA_TYPE_BINARY;
pVal->nLen = (int32_t)len;
} else if (strncmp(tbnameCond, QUERY_COND_REL_PREFIX_MATCH, QUERY_COND_REL_PREFIX_MATCH_LEN) == 0) {
right->nodeType = TSQL_NODE_VALUE;
expr->_node.optr = TSDB_RELATION_MATCH;
tVariant* pVal = exception_calloc(1, sizeof(tVariant));
right->pVal = pVal;
size_t len = strlen(tbnameCond + QUERY_COND_REL_PREFIX_MATCH_LEN) + 1;
pVal->pz = exception_malloc(len);
memcpy(pVal->pz, tbnameCond + QUERY_COND_REL_PREFIX_MATCH_LEN, len);
pVal->nType = TSDB_DATA_TYPE_BINARY;
pVal->nLen = (int32_t)len;
} else if (strncmp(tbnameCond, QUERY_COND_REL_PREFIX_IN, QUERY_COND_REL_PREFIX_IN_LEN) == 0) {
right->nodeType = TSQL_NODE_VALUE;
expr->_node.optr = TSDB_RELATION_IN;

View File

@ -85,6 +85,8 @@ int32_t tsCompressColData = -1;
// client
int32_t tsMaxSQLStringLen = TSDB_MAX_ALLOWED_SQL_LEN;
int32_t tsMaxWildCardsLen = TSDB_PATTERN_STRING_DEFAULT_LEN;
int32_t tsMaxRegexStringLen = TSDB_REGEX_STRING_DEFAULT_LEN;
int8_t tsTscEnableRecordSql = 0;
// the maximum number of results for projection query on super table that are returned from

View File

@ -827,6 +827,16 @@ def taos_insert_lines(connection, lines):
if errno != 0:
raise LinesError("insert lines error", errno)
def taos_insert_telnet_lines(connection, lines):
# type: (c_void_p, list[str] | tuple(str)) -> None
num_of_lines = len(lines)
lines = (c_char_p(line.encode("utf-8")) for line in lines)
lines_type = ctypes.c_char_p * num_of_lines
p_lines = lines_type(*lines)
errno = _libtaos.taos_insert_telnet_lines(connection, p_lines, num_of_lines)
if errno != 0:
raise LinesError("insert telnet lines error", errno)
class CTaosInterface(object):
def __init__(self, config=None):

View File

@ -145,6 +145,15 @@ class TaosConnection(object):
"""
return taos_insert_lines(self._conn, lines)
def insert_telnet_lines(self, lines):
"""OpenTSDB telnet style API format support
## Example
cpu_load 1626056811855516532ns 2.0f32 id="tb1",host="host0",interface="eth0"
"""
return taos_insert_telnet_lines(self._conn, lines)
def cursor(self):
# type: () -> TaosCursor
"""Return a new Cursor object using the connection."""

View File

@ -172,6 +172,8 @@ DLL_EXPORT int taos_load_table_info(TAOS *taos, const char* tableNameList);
DLL_EXPORT int taos_insert_lines(TAOS* taos, char* lines[], int numLines);
DLL_EXPORT int taos_insert_telnet_lines(TAOS* taos, char* lines[], int numLines);
#ifdef __cplusplus
}
#endif

View File

@ -164,6 +164,8 @@ do { \
#define TSDB_RELATION_OR 12
#define TSDB_RELATION_NOT 13
#define TSDB_RELATION_MATCH 14
#define TSDB_BINARY_OP_ADD 30
#define TSDB_BINARY_OP_SUBTRACT 31
#define TSDB_BINARY_OP_MULTIPLY 32

View File

@ -106,6 +106,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_TSC_DUP_COL_NAMES TAOS_DEF_ERROR_CODE(0, 0x021D) //"duplicated column names")
#define TSDB_CODE_TSC_INVALID_TAG_LENGTH TAOS_DEF_ERROR_CODE(0, 0x021E) //"Invalid tag length")
#define TSDB_CODE_TSC_INVALID_COLUMN_LENGTH TAOS_DEF_ERROR_CODE(0, 0x021F) //"Invalid column length")
#define TSDB_CODE_TSC_DUP_TAG_NAMES TAOS_DEF_ERROR_CODE(0, 0x0220) //"duplicated tag names")
// mnode
#define TSDB_CODE_MND_MSG_NOT_PROCESSED TAOS_DEF_ERROR_CODE(0, 0x0300) //"Message not processed")

View File

@ -37,160 +37,160 @@
#define TK_NOTNULL 19
#define TK_IS 20
#define TK_LIKE 21
#define TK_GLOB 22
#define TK_BETWEEN 23
#define TK_IN 24
#define TK_GT 25
#define TK_GE 26
#define TK_LT 27
#define TK_LE 28
#define TK_BITAND 29
#define TK_BITOR 30
#define TK_LSHIFT 31
#define TK_RSHIFT 32
#define TK_PLUS 33
#define TK_MINUS 34
#define TK_DIVIDE 35
#define TK_TIMES 36
#define TK_STAR 37
#define TK_SLASH 38
#define TK_REM 39
#define TK_CONCAT 40
#define TK_UMINUS 41
#define TK_UPLUS 42
#define TK_BITNOT 43
#define TK_SHOW 44
#define TK_DATABASES 45
#define TK_TOPICS 46
#define TK_FUNCTIONS 47
#define TK_MNODES 48
#define TK_DNODES 49
#define TK_ACCOUNTS 50
#define TK_USERS 51
#define TK_MODULES 52
#define TK_QUERIES 53
#define TK_CONNECTIONS 54
#define TK_STREAMS 55
#define TK_VARIABLES 56
#define TK_SCORES 57
#define TK_GRANTS 58
#define TK_VNODES 59
#define TK_DOT 60
#define TK_CREATE 61
#define TK_TABLE 62
#define TK_STABLE 63
#define TK_DATABASE 64
#define TK_TABLES 65
#define TK_STABLES 66
#define TK_VGROUPS 67
#define TK_DROP 68
#define TK_TOPIC 69
#define TK_FUNCTION 70
#define TK_DNODE 71
#define TK_USER 72
#define TK_ACCOUNT 73
#define TK_USE 74
#define TK_DESCRIBE 75
#define TK_DESC 76
#define TK_ALTER 77
#define TK_PASS 78
#define TK_PRIVILEGE 79
#define TK_LOCAL 80
#define TK_COMPACT 81
#define TK_LP 82
#define TK_RP 83
#define TK_IF 84
#define TK_EXISTS 85
#define TK_AS 86
#define TK_OUTPUTTYPE 87
#define TK_AGGREGATE 88
#define TK_BUFSIZE 89
#define TK_PPS 90
#define TK_TSERIES 91
#define TK_DBS 92
#define TK_STORAGE 93
#define TK_QTIME 94
#define TK_CONNS 95
#define TK_STATE 96
#define TK_COMMA 97
#define TK_KEEP 98
#define TK_CACHE 99
#define TK_REPLICA 100
#define TK_QUORUM 101
#define TK_DAYS 102
#define TK_MINROWS 103
#define TK_MAXROWS 104
#define TK_BLOCKS 105
#define TK_CTIME 106
#define TK_WAL 107
#define TK_FSYNC 108
#define TK_COMP 109
#define TK_PRECISION 110
#define TK_UPDATE 111
#define TK_CACHELAST 112
#define TK_PARTITIONS 113
#define TK_UNSIGNED 114
#define TK_TAGS 115
#define TK_USING 116
#define TK_NULL 117
#define TK_NOW 118
#define TK_SELECT 119
#define TK_UNION 120
#define TK_ALL 121
#define TK_DISTINCT 122
#define TK_FROM 123
#define TK_VARIABLE 124
#define TK_INTERVAL 125
#define TK_EVERY 126
#define TK_SESSION 127
#define TK_STATE_WINDOW 128
#define TK_FILL 129
#define TK_SLIDING 130
#define TK_ORDER 131
#define TK_BY 132
#define TK_ASC 133
#define TK_GROUP 134
#define TK_HAVING 135
#define TK_LIMIT 136
#define TK_OFFSET 137
#define TK_SLIMIT 138
#define TK_SOFFSET 139
#define TK_WHERE 140
#define TK_RESET 141
#define TK_QUERY 142
#define TK_SYNCDB 143
#define TK_ADD 144
#define TK_COLUMN 145
#define TK_MODIFY 146
#define TK_TAG 147
#define TK_CHANGE 148
#define TK_SET 149
#define TK_KILL 150
#define TK_CONNECTION 151
#define TK_STREAM 152
#define TK_COLON 153
#define TK_ABORT 154
#define TK_AFTER 155
#define TK_ATTACH 156
#define TK_BEFORE 157
#define TK_BEGIN 158
#define TK_CASCADE 159
#define TK_CLUSTER 160
#define TK_CONFLICT 161
#define TK_COPY 162
#define TK_DEFERRED 163
#define TK_DELIMITERS 164
#define TK_DETACH 165
#define TK_EACH 166
#define TK_END 167
#define TK_EXPLAIN 168
#define TK_FAIL 169
#define TK_FOR 170
#define TK_IGNORE 171
#define TK_IMMEDIATE 172
#define TK_INITIALLY 173
#define TK_INSTEAD 174
#define TK_MATCH 175
#define TK_MATCH 22
#define TK_GLOB 23
#define TK_BETWEEN 24
#define TK_IN 25
#define TK_GT 26
#define TK_GE 27
#define TK_LT 28
#define TK_LE 29
#define TK_BITAND 30
#define TK_BITOR 31
#define TK_LSHIFT 32
#define TK_RSHIFT 33
#define TK_PLUS 34
#define TK_MINUS 35
#define TK_DIVIDE 36
#define TK_TIMES 37
#define TK_STAR 38
#define TK_SLASH 39
#define TK_REM 40
#define TK_CONCAT 41
#define TK_UMINUS 42
#define TK_UPLUS 43
#define TK_BITNOT 44
#define TK_SHOW 45
#define TK_DATABASES 46
#define TK_TOPICS 47
#define TK_FUNCTIONS 48
#define TK_MNODES 49
#define TK_DNODES 50
#define TK_ACCOUNTS 51
#define TK_USERS 52
#define TK_MODULES 53
#define TK_QUERIES 54
#define TK_CONNECTIONS 55
#define TK_STREAMS 56
#define TK_VARIABLES 57
#define TK_SCORES 58
#define TK_GRANTS 59
#define TK_VNODES 60
#define TK_DOT 61
#define TK_CREATE 62
#define TK_TABLE 63
#define TK_STABLE 64
#define TK_DATABASE 65
#define TK_TABLES 66
#define TK_STABLES 67
#define TK_VGROUPS 68
#define TK_DROP 69
#define TK_TOPIC 70
#define TK_FUNCTION 71
#define TK_DNODE 72
#define TK_USER 73
#define TK_ACCOUNT 74
#define TK_USE 75
#define TK_DESCRIBE 76
#define TK_DESC 77
#define TK_ALTER 78
#define TK_PASS 79
#define TK_PRIVILEGE 80
#define TK_LOCAL 81
#define TK_COMPACT 82
#define TK_LP 83
#define TK_RP 84
#define TK_IF 85
#define TK_EXISTS 86
#define TK_AS 87
#define TK_OUTPUTTYPE 88
#define TK_AGGREGATE 89
#define TK_BUFSIZE 90
#define TK_PPS 91
#define TK_TSERIES 92
#define TK_DBS 93
#define TK_STORAGE 94
#define TK_QTIME 95
#define TK_CONNS 96
#define TK_STATE 97
#define TK_COMMA 98
#define TK_KEEP 99
#define TK_CACHE 100
#define TK_REPLICA 101
#define TK_QUORUM 102
#define TK_DAYS 103
#define TK_MINROWS 104
#define TK_MAXROWS 105
#define TK_BLOCKS 106
#define TK_CTIME 107
#define TK_WAL 108
#define TK_FSYNC 109
#define TK_COMP 110
#define TK_PRECISION 111
#define TK_UPDATE 112
#define TK_CACHELAST 113
#define TK_PARTITIONS 114
#define TK_UNSIGNED 115
#define TK_TAGS 116
#define TK_USING 117
#define TK_NULL 118
#define TK_NOW 119
#define TK_SELECT 120
#define TK_UNION 121
#define TK_ALL 122
#define TK_DISTINCT 123
#define TK_FROM 124
#define TK_VARIABLE 125
#define TK_INTERVAL 126
#define TK_EVERY 127
#define TK_SESSION 128
#define TK_STATE_WINDOW 129
#define TK_FILL 130
#define TK_SLIDING 131
#define TK_ORDER 132
#define TK_BY 133
#define TK_ASC 134
#define TK_GROUP 135
#define TK_HAVING 136
#define TK_LIMIT 137
#define TK_OFFSET 138
#define TK_SLIMIT 139
#define TK_SOFFSET 140
#define TK_WHERE 141
#define TK_RESET 142
#define TK_QUERY 143
#define TK_SYNCDB 144
#define TK_ADD 145
#define TK_COLUMN 146
#define TK_MODIFY 147
#define TK_TAG 148
#define TK_CHANGE 149
#define TK_SET 150
#define TK_KILL 151
#define TK_CONNECTION 152
#define TK_STREAM 153
#define TK_COLON 154
#define TK_ABORT 155
#define TK_AFTER 156
#define TK_ATTACH 157
#define TK_BEFORE 158
#define TK_BEGIN 159
#define TK_CASCADE 160
#define TK_CLUSTER 161
#define TK_CONFLICT 162
#define TK_COPY 163
#define TK_DEFERRED 164
#define TK_DELIMITERS 165
#define TK_DETACH 166
#define TK_EACH 167
#define TK_END 168
#define TK_EXPLAIN 169
#define TK_FAIL 170
#define TK_FOR 171
#define TK_IGNORE 172
#define TK_IMMEDIATE 173
#define TK_INITIALLY 174
#define TK_INSTEAD 175
#define TK_KEY 176
#define TK_OF 177
#define TK_RAISE 178

View File

@ -98,6 +98,7 @@ void shellParseArgument(int argc, char *argv[], SShellArguments *arguments) {
tstrncpy(g_password, (char *)(argv[i] + 2), SHELL_MAX_PASSWORD_LEN);
}
arguments->password = g_password;
arguments->is_use_passwd = true;
strcpy(argv[i], "");
argc -= 1;
}

File diff suppressed because it is too large Load Diff

View File

@ -31,7 +31,6 @@ void taosCloseDll(void *handle) {
int taosSetConsoleEcho(bool on)
{
#if 0
#define ECHOFLAGS (ECHO | ECHOE | ECHOK | ECHONL)
int err;
struct termios term;
@ -52,7 +51,6 @@ int taosSetConsoleEcho(bool on)
return -1;
}
#endif
return 0;
}

View File

@ -147,6 +147,7 @@ typedef struct HttpContext {
int32_t state;
uint8_t reqType;
uint8_t parsed;
uint8_t error;
char ipstr[22];
char user[TSDB_USER_LEN]; // parsed from auth token or login message
char pass[HTTP_PASSWORD_LEN];

View File

@ -188,11 +188,12 @@ void httpCloseContextByApp(HttpContext *pContext) {
pContext->parsed = false;
bool keepAlive = true;
if (parser && parser->httpVersion == HTTP_VERSION_10 && parser->keepAlive != HTTP_KEEPALIVE_ENABLE) {
if (pContext->error == true) {
keepAlive = false;
} else if (parser && parser->httpVersion == HTTP_VERSION_10 && parser->keepAlive != HTTP_KEEPALIVE_ENABLE) {
keepAlive = false;
} else if (parser && parser->httpVersion != HTTP_VERSION_10 && parser->keepAlive == HTTP_KEEPALIVE_DISABLE) {
keepAlive = false;
} else {
}
if (keepAlive) {

View File

@ -663,7 +663,7 @@ static int32_t httpParserOnTarget(HttpParser *parser, HTTP_PARSER_STATE state, c
HttpContext *pContext = parser->pContext;
int32_t ok = 0;
do {
if (!isspace(c) && c != '\r' && c != '\n') {
if (!isspace(c)) {
if (httpAppendString(&parser->str, &c, 1)) {
httpError("context:%p, fd:%d, parser state:%d, char:[%c]%02x, oom", pContext, pContext->fd, state, c, c);
ok = -1;
@ -1157,6 +1157,10 @@ static int32_t httpParseChar(HttpParser *parser, const char c, int32_t *again) {
httpOnError(parser, HTTP_CODE_INTERNAL_SERVER_ERROR, TSDB_CODE_HTTP_PARSE_ERROR_STATE);
}
if (ok != 0) {
pContext->error = true;
}
return ok;
}

View File

@ -11,7 +11,7 @@
%left OR.
%left AND.
%right NOT.
%left EQ NE ISNULL NOTNULL IS LIKE GLOB BETWEEN IN.
%left EQ NE ISNULL NOTNULL IS LIKE MATCH GLOB BETWEEN IN.
%left GT GE LT LE.
%left BITAND BITOR LSHIFT RSHIFT.
%left PLUS MINUS.
@ -751,6 +751,9 @@ expr(A) ::= expr(X) REM expr(Y). {A = tSqlExprCreate(X, Y, TK_REM); }
// like expression
expr(A) ::= expr(X) LIKE expr(Y). {A = tSqlExprCreate(X, Y, TK_LIKE); }
// match expression
expr(A) ::= expr(X) MATCH expr(Y). {A = tSqlExprCreate(X, Y, TK_MATCH); }
//in expression
expr(A) ::= expr(X) IN LP exprlist(Y) RP. {A = tSqlExprCreate(X, (tSqlExpr*)Y, TK_IN); }

View File

@ -28,6 +28,7 @@ OptrStr gOptrStr[] = {
{TSDB_RELATION_GREATER_EQUAL, ">="},
{TSDB_RELATION_NOT_EQUAL, "!="},
{TSDB_RELATION_LIKE, "like"},
{TSDB_RELATION_MATCH, "match"},
{TSDB_RELATION_ISNULL, "is null"},
{TSDB_RELATION_NOTNULL, "not null"},
{TSDB_RELATION_IN, "in"},
@ -156,7 +157,7 @@ int8_t filterGetRangeCompFuncFromOptrs(uint8_t optr, uint8_t optr2) {
__compar_fn_t gDataCompare[] = {compareInt32Val, compareInt8Val, compareInt16Val, compareInt64Val, compareFloatVal,
compareDoubleVal, compareLenPrefixedStr, compareStrPatternComp, compareFindItemInSet, compareWStrPatternComp,
compareLenPrefixedWStr, compareUint8Val, compareUint16Val, compareUint32Val, compareUint64Val,
setCompareBytes1, setCompareBytes2, setCompareBytes4, setCompareBytes8
setCompareBytes1, setCompareBytes2, setCompareBytes4, setCompareBytes8, compareStrRegexComp,
};
int8_t filterGetCompFuncIdx(int32_t type, int32_t optr) {
@ -195,7 +196,9 @@ int8_t filterGetCompFuncIdx(int32_t type, int32_t optr) {
case TSDB_DATA_TYPE_FLOAT: comparFn = 4; break;
case TSDB_DATA_TYPE_DOUBLE: comparFn = 5; break;
case TSDB_DATA_TYPE_BINARY: {
if (optr == TSDB_RELATION_LIKE) { /* wildcard query using like operator */
if (optr == TSDB_RELATION_MATCH) {
comparFn = 19;
} else if (optr == TSDB_RELATION_LIKE) { /* wildcard query using like operator */
comparFn = 7;
} else if (optr == TSDB_RELATION_IN) {
comparFn = 8;
@ -207,7 +210,9 @@ int8_t filterGetCompFuncIdx(int32_t type, int32_t optr) {
}
case TSDB_DATA_TYPE_NCHAR: {
if (optr == TSDB_RELATION_LIKE) {
if (optr == TSDB_RELATION_MATCH) {
comparFn = 19;
} else if (optr == TSDB_RELATION_LIKE) {
comparFn = 9;
} else if (optr == TSDB_RELATION_IN) {
comparFn = 8;
@ -1871,6 +1876,9 @@ bool filterDoCompare(__compar_fn_t func, uint8_t optr, void *left, void *right)
case TSDB_RELATION_LIKE: {
return ret == 0;
}
case TSDB_RELATION_MATCH: {
return ret == 0;
}
case TSDB_RELATION_IN: {
return ret == 1;
}
@ -2641,7 +2649,7 @@ int32_t filterRmUnitByRange(SFilterInfo *info, SDataStatis *pDataStatis, int32_t
}
if (cunit->optr == TSDB_RELATION_ISNULL || cunit->optr == TSDB_RELATION_NOTNULL
|| cunit->optr == TSDB_RELATION_IN || cunit->optr == TSDB_RELATION_LIKE
|| cunit->optr == TSDB_RELATION_IN || cunit->optr == TSDB_RELATION_LIKE || cunit->optr == TSDB_RELATION_MATCH
|| cunit->optr == TSDB_RELATION_NOT_EQUAL) {
continue;
}

File diff suppressed because it is too large Load Diff

View File

@ -3709,6 +3709,9 @@ static bool tableFilterFp(const void* pNode, void* param) {
case TSDB_RELATION_LIKE: {
return ret == 0;
}
case TSDB_RELATION_MATCH: {
return ret == 0;
}
case TSDB_RELATION_IN: {
return ret == 1;
}
@ -4042,6 +4045,8 @@ static int32_t setQueryCond(tQueryInfo *queryColInfo, SQueryCond* pCond) {
pCond->start->v = queryColInfo->q;
} else if (optr == TSDB_RELATION_LIKE) {
assert(0);
} else if (optr == TSDB_RELATION_MATCH) {
assert(0);
}
return TSDB_CODE_SUCCESS;
@ -4199,7 +4204,7 @@ static void queryIndexlessColumn(SSkipList* pSkipList, tQueryInfo* pQueryInfo, S
if (pQueryInfo->sch.colId == TSDB_TBNAME_COLUMN_INDEX) {
if (pQueryInfo->optr == TSDB_RELATION_IN) {
addToResult = pQueryInfo->compare(name, pQueryInfo->q);
} else if (pQueryInfo->optr == TSDB_RELATION_LIKE) {
} else if (pQueryInfo->optr == TSDB_RELATION_LIKE || pQueryInfo->optr == TSDB_RELATION_MATCH) {
addToResult = !pQueryInfo->compare(name, pQueryInfo->q);
}
} else {
@ -4231,7 +4236,8 @@ void getTableListfromSkipList(tExprNode *pExpr, SSkipList *pSkipList, SArray *re
param->setupInfoFn(pExpr, param->pExtInfo);
tQueryInfo *pQueryInfo = pExpr->_node.info;
if (pQueryInfo->indexed && (pQueryInfo->optr != TSDB_RELATION_LIKE && pQueryInfo->optr != TSDB_RELATION_IN)) {
if (pQueryInfo->indexed && (pQueryInfo->optr != TSDB_RELATION_LIKE && pQueryInfo->optr != TSDB_RELATION_MATCH
&& pQueryInfo->optr != TSDB_RELATION_IN)) {
queryIndexedColumn(pSkipList, pQueryInfo, result);
} else {
queryIndexlessColumn(pSkipList, pQueryInfo, result, param->nodeFilterFn);

View File

@ -22,10 +22,11 @@ extern "C" {
#include "os.h"
#define TSDB_PATTERN_MATCH 0
#define TSDB_PATTERN_NOMATCH 1
#define TSDB_PATTERN_NOWILDCARDMATCH 2
#define TSDB_PATTERN_STRING_DEFAULT_LEN 100
#define TSDB_PATTERN_MATCH 0
#define TSDB_PATTERN_NOMATCH 1
#define TSDB_PATTERN_NOWILDCARDMATCH 2
#define TSDB_PATTERN_STRING_DEFAULT_LEN 100
#define TSDB_REGEX_STRING_DEFAULT_LEN 128
#define FLT_COMPAR_TOL_FACTOR 4
#define FLT_EQUAL(_x, _y) (fabs((_x) - (_y)) <= (FLT_COMPAR_TOL_FACTOR * FLT_EPSILON))
@ -82,6 +83,7 @@ int32_t compareLenPrefixedStr(const void *pLeft, const void *pRight);
int32_t compareLenPrefixedWStr(const void *pLeft, const void *pRight);
int32_t compareStrPatternComp(const void* pLeft, const void* pRight);
int32_t compareStrRegexComp(const void* pLeft, const void* pRight);
int32_t compareFindItemInSet(const void *pLeft, const void* pRight);
int32_t compareWStrPatternComp(const void* pLeft, const void* pRight);

View File

@ -112,14 +112,15 @@ void taosArrayRemoveBatch(SArray *pArray, const int32_t* pData, int32_t numOfEle
i += 1;
}
assert(i == pData[numOfElems - 1] + 1);
assert(i == pData[numOfElems - 1] + 1 && i <= size);
int32_t dstIndex = pData[numOfElems - 1] - numOfElems + 1;
int32_t srcIndex = pData[numOfElems - 1] + 1;
char* dst = TARRAY_GET_ELEM(pArray, dstIndex);
char* src = TARRAY_GET_ELEM(pArray, srcIndex);
memmove(dst, src, pArray->elemSize * (pArray->size - numOfElems));
int32_t dstIndex = pData[numOfElems - 1] - numOfElems + 1;
if (pArray->size - srcIndex > 0) {
char* dst = TARRAY_GET_ELEM(pArray, dstIndex);
char* src = TARRAY_GET_ELEM(pArray, srcIndex);
memmove(dst, src, pArray->elemSize * (pArray->size - srcIndex));
}
pArray->size -= numOfElems;
}

View File

@ -71,6 +71,8 @@ static pthread_once_t cacheThreadInit = PTHREAD_ONCE_INIT;
static pthread_mutex_t guard = PTHREAD_MUTEX_INITIALIZER;
static SArray* pCacheArrayList = NULL;
static bool stopRefreshWorker = false;
static bool refreshWorkerNormalStopped = false;
static bool refreshWorkerUnexpectedStopped = false;
static void doInitRefreshThread(void) {
pCacheArrayList = taosArrayInit(4, POINTER_BYTES);
@ -537,8 +539,10 @@ void taosCacheCleanup(SCacheObj *pCacheObj) {
pCacheObj->deleting = 1;
// wait for the refresh thread quit before destroying the cache object.
// But in the dll, the child thread will be killed before atexit takes effect.So here we only wait for 2 seconds.
for (int i = 0; i < 40&&atomic_load_8(&pCacheObj->deleting) != 0; i++) {
// But in the dll, the child thread will be killed before atexit takes effect.
while(atomic_load_8(&pCacheObj->deleting) != 0) {
if (refreshWorkerNormalStopped) break;
if (refreshWorkerUnexpectedStopped) return;
taosMsleep(50);
}
@ -641,7 +645,7 @@ void doCleanupDataCache(SCacheObj *pCacheObj) {
// todo memory leak if there are object with refcount greater than 0 in hash table?
taosHashCleanup(pCacheObj->pHashTable);
taosTrashcanEmpty(pCacheObj, false);
taosTrashcanEmpty(pCacheObj, true);
__cache_lock_destroy(pCacheObj);
@ -677,6 +681,12 @@ static void doCacheRefresh(SCacheObj* pCacheObj, int64_t time, __cache_free_fn_t
taosHashCondTraverse(pCacheObj->pHashTable, travHashTableFn, &sup);
}
void taosCacheRefreshWorkerUnexpectedStopped(void) {
if(!refreshWorkerNormalStopped) {
refreshWorkerUnexpectedStopped=true;
}
}
void* taosCacheTimedRefresh(void *handle) {
assert(pCacheArrayList != NULL);
uDebug("cache refresh thread starts");
@ -685,6 +695,7 @@ void* taosCacheTimedRefresh(void *handle) {
const int32_t SLEEP_DURATION = 500; //500 ms
int64_t count = 0;
atexit(taosCacheRefreshWorkerUnexpectedStopped);
while(1) {
taosMsleep(SLEEP_DURATION);
@ -749,6 +760,7 @@ void* taosCacheTimedRefresh(void *handle) {
pCacheArrayList = NULL;
pthread_mutex_destroy(&guard);
refreshWorkerNormalStopped=true;
uDebug("cache refresh thread quits");
return NULL;
@ -763,6 +775,6 @@ void taosCacheRefresh(SCacheObj *pCacheObj, __cache_free_fn_t fp) {
doCacheRefresh(pCacheObj, now, fp);
}
void taosStopCacheRefreshWorker() {
stopRefreshWorker = false;
void taosStopCacheRefreshWorker(void) {
stopRefreshWorker = true;
}

View File

@ -12,11 +12,17 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _BSD_SOURCE
#define _GNU_SOURCE
#define _XOPEN_SOURCE
#define _DEFAULT_SOURCE
#include "tcompare.h"
#include "tulog.h"
#include "hash.h"
#include "regex.h"
#include "os.h"
#include "ttype.h"
#include "tcompare.h"
#include "hash.h"
int32_t setCompareBytes1(const void *pLeft, const void *pRight) {
return NULL != taosHashGet((SHashObj *)pRight, pLeft, 1) ? 1 : 0;
@ -344,6 +350,43 @@ int32_t compareStrPatternComp(const void* pLeft, const void* pRight) {
return (ret == TSDB_PATTERN_MATCH) ? 0 : 1;
}
int32_t compareStrRegexComp(const void* pLeft, const void* pRight) {
size_t sz = varDataLen(pRight);
char *pattern = malloc(sz + 1);
memcpy(pattern, varDataVal(pRight), varDataLen(pRight));
pattern[sz] = 0;
sz = varDataLen(pLeft);
char *str = malloc(sz + 1);
memcpy(str, varDataVal(pLeft), sz);
str[sz] = 0;
int errCode = 0;
regex_t regex;
char msgbuf[256] = {0};
int cflags = REG_EXTENDED;
if ((errCode = regcomp(&regex, pattern, cflags)) != 0) {
regerror(errCode, &regex, msgbuf, sizeof(msgbuf));
uError("Failed to compile regex pattern %s. reason %s", pattern, msgbuf);
regfree(&regex);
free(str);
free(pattern);
return 1;
}
errCode = regexec(&regex, str, 0, NULL, 0);
if (errCode != 0 && errCode != REG_NOMATCH) {
regerror(errCode, &regex, msgbuf, sizeof(msgbuf));
uDebug("Failed to match %s with pattern %s, reason %s", str, pattern, msgbuf)
}
int32_t result = (errCode == 0) ? 0 : 1;
regfree(&regex);
free(str);
free(pattern);
return result;
}
int32_t taosArrayCompareString(const void* a, const void* b) {
const char* x = *(const char**)a;
const char* y = *(const char**)b;
@ -405,7 +448,9 @@ __compar_fn_t getComparFunc(int32_t type, int32_t optr) {
case TSDB_DATA_TYPE_FLOAT: comparFn = compareFloatVal; break;
case TSDB_DATA_TYPE_DOUBLE: comparFn = compareDoubleVal; break;
case TSDB_DATA_TYPE_BINARY: {
if (optr == TSDB_RELATION_LIKE) { /* wildcard query using like operator */
if (optr == TSDB_RELATION_MATCH) {
comparFn = compareStrRegexComp;
} else if (optr == TSDB_RELATION_LIKE) { /* wildcard query using like operator */
comparFn = compareStrPatternComp;
} else if (optr == TSDB_RELATION_IN) {
comparFn = compareFindItemInSet;
@ -417,7 +462,9 @@ __compar_fn_t getComparFunc(int32_t type, int32_t optr) {
}
case TSDB_DATA_TYPE_NCHAR: {
if (optr == TSDB_RELATION_LIKE) {
if (optr == TSDB_RELATION_MATCH) {
comparFn = compareStrRegexComp;
} else if (optr == TSDB_RELATION_LIKE) {
comparFn = compareWStrPatternComp;
} else if (optr == TSDB_RELATION_IN) {
comparFn = compareFindItemInSet;

View File

@ -112,9 +112,10 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TSC_EXCEED_SQL_LIMIT, "SQL statement too lon
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_FILE_EMPTY, "File is empty")
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_LINE_SYNTAX_ERROR, "Syntax error in Line")
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_NO_META_CACHED, "No table meta cached")
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_DUP_COL_NAMES, "duplicated column names")
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_TAG_LENGTH, "Invalid tag length")
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_COLUMN_LENGTH, "Invalid column length")
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_DUP_COL_NAMES, "duplicated column names")
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_TAG_LENGTH, "Invalid tag length")
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_COLUMN_LENGTH, "Invalid column length")
TAOS_DEFINE_ERROR(TSDB_CODE_TSC_DUP_TAG_NAMES, "duplicated tag names")
// mnode
TAOS_DEFINE_ERROR(TSDB_CODE_MND_MSG_NOT_PROCESSED, "Message not processed")

View File

@ -53,6 +53,7 @@ static SKeyword keywordTable[] = {
{"NOTNULL", TK_NOTNULL},
{"IS", TK_IS},
{"LIKE", TK_LIKE},
{"MATCH", TK_MATCH},
{"GLOB", TK_GLOB},
{"BETWEEN", TK_BETWEEN},
{"IN", TK_IN},

View File

@ -0,0 +1,24 @@
FROM tdengine/tdengine-beta:latest
ENV DEBIAN_FRONTEND=noninteractive
ARG MIRROR=archive.ubuntu.com
RUN sed -Ei 's/\w+.ubuntu.com/'${MIRROR}'/' /etc/apt/sources.list && apt update && apt install mono-devel -y
RUN apt-get install wget -y \
&& wget https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb \
&& dpkg -i packages-microsoft-prod.deb \
&& rm packages-microsoft-prod.deb \
&& apt-get update && apt-get install -y dotnet-sdk-5.0
COPY ./*.cs *.csproj /tmp/
WORKDIR /tmp/
RUN dotnet build -c Release && cp bin/Release/net5.0/taosdemo bin/Release/net5.0/taosdemo.* /usr/local/bin/ && rm -rf /tmp/*
FROM tdengine/tdengine-beta:latest
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install wget -y \
&& wget https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb \
&& dpkg -i packages-microsoft-prod.deb \
&& rm packages-microsoft-prod.deb \
&& apt-get update && apt-get install -y dotnet-runtime-5.0
COPY --from=0 /usr/local/bin/taosdemo* /usr/local/bin/
CMD ["/usr/local/bin/taosdemo"]

View File

@ -1,13 +1,41 @@
# C# Taosdemo
## For Mono
install build environment
===
```sh
yum/apt install mono-complete
```
build C# version taosdemo
===
build C# version taosdemo.
```sh
mcs -out:taosdemo *.cs
./taosdemo --help
```
run C# version taosdemo
===
## For DotNet
install dotnet environment.
```sh
wget https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb \
&& dpkg -i packages-microsoft-prod.deb \
&& rm packages-microsoft-prod.deb \
&& apt-get update && apt-get install -y dotnet-sdk-5.0
```
Build DotNet version taosdemo.
```sh
dotnet build -c Release
./bin/Release/net5.0/taosdemo --help
```
## Usage
```
Usage: mono taosdemo.exe [OPTION...]
--help Show usage.
@ -34,3 +62,4 @@ Usage: mono taosdemo.exe [OPTION...]
-v Print verbose output
-g Print debug output
-y Skip read key for continous test, default is not skip
```

View File

@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
</PropertyGroup>
</Project>

View File

@ -1,12 +1,15 @@
// sample code to verify all TDengine API
// to compile: gcc -o apitest apitest.c -ltaos
#include "taoserror.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <taos.h>
#include <unistd.h>
static void prepare_data(TAOS* taos) {
TAOS_RES *result;
result = taos_query(taos, "drop database if exists test;");
@ -1014,6 +1017,186 @@ int32_t verify_schema_less(TAOS* taos) {
return (code);
}
void verify_telnet_insert(TAOS* taos) {
TAOS_RES *result;
result = taos_query(taos, "drop database if exists test;");
taos_free_result(result);
usleep(100000);
result = taos_query(taos, "create database db precision 'ms';");
taos_free_result(result);
usleep(100000);
(void)taos_select_db(taos, "db");
int32_t code = 0;
/* metric */
char* lines0[] = {
"stb0_0 1626006833639000000ns 4i8 host=\"host0\",interface=\"eth0\"",
"stb0_1 1626006833639000000ns 4i8 host=\"host0\",interface=\"eth0\"",
"stb0_2 1626006833639000000ns 4i8 host=\"host0\",interface=\"eth0\"",
};
code = taos_insert_telnet_lines(taos, lines0, 3);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
/* timestamp */
char* lines1[] = {
"stb1 1626006833s 1i8 host=\"host0\"",
"stb1 1626006833639000000ns 2i8 host=\"host0\"",
"stb1 1626006833640000us 3i8 host=\"host0\"",
"stb1 1626006833641123 4i8 host=\"host0\"",
"stb1 1626006833651ms 5i8 host=\"host0\"",
"stb1 0 6i8 host=\"host0\"",
};
code = taos_insert_telnet_lines(taos, lines1, 6);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
/* metric value */
//tinyin
char* lines2_0[] = {
"stb2_0 1626006833651ms -127i8 host=\"host0\"",
"stb2_0 1626006833652ms 127i8 host=\"host0\""
};
code = taos_insert_telnet_lines(taos, lines2_0, 2);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
//smallint
char* lines2_1[] = {
"stb2_1 1626006833651ms -32767i16 host=\"host0\"",
"stb2_1 1626006833652ms 32767i16 host=\"host0\""
};
code = taos_insert_telnet_lines(taos, lines2_1, 2);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
//int
char* lines2_2[] = {
"stb2_2 1626006833651ms -2147483647i32 host=\"host0\"",
"stb2_2 1626006833652ms 2147483647i32 host=\"host0\""
};
code = taos_insert_telnet_lines(taos, lines2_2, 2);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
//bigint
char* lines2_3[] = {
"stb2_3 1626006833651ms -9223372036854775807i64 host=\"host0\"",
"stb2_3 1626006833652ms 9223372036854775807i64 host=\"host0\""
};
code = taos_insert_telnet_lines(taos, lines2_3, 2);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
//float
char* lines2_4[] = {
"stb2_4 1626006833610ms 3f32 host=\"host0\"",
"stb2_4 1626006833620ms -3f32 host=\"host0\"",
"stb2_4 1626006833630ms 3.4f32 host=\"host0\"",
"stb2_4 1626006833640ms -3.4f32 host=\"host0\"",
"stb2_4 1626006833650ms 3.4E10f32 host=\"host0\"",
"stb2_4 1626006833660ms -3.4e10f32 host=\"host0\"",
"stb2_4 1626006833670ms 3.4E+2f32 host=\"host0\"",
"stb2_4 1626006833680ms -3.4e-2f32 host=\"host0\"",
"stb2_4 1626006833690ms 3.15 host=\"host0\"",
"stb2_4 1626006833700ms 3.4E38f32 host=\"host0\"",
"stb2_4 1626006833710ms -3.4E38f32 host=\"host0\""
};
code = taos_insert_telnet_lines(taos, lines2_4, 11);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
//double
char* lines2_5[] = {
"stb2_5 1626006833610ms 3f64 host=\"host0\"",
"stb2_5 1626006833620ms -3f64 host=\"host0\"",
"stb2_5 1626006833630ms 3.4f64 host=\"host0\"",
"stb2_5 1626006833640ms -3.4f64 host=\"host0\"",
"stb2_5 1626006833650ms 3.4E10f64 host=\"host0\"",
"stb2_5 1626006833660ms -3.4e10f64 host=\"host0\"",
"stb2_5 1626006833670ms 3.4E+2f64 host=\"host0\"",
"stb2_5 1626006833680ms -3.4e-2f64 host=\"host0\"",
"stb2_5 1626006833690ms 1.7E308f64 host=\"host0\"",
"stb2_5 1626006833700ms -1.7E308f64 host=\"host0\""
};
code = taos_insert_telnet_lines(taos, lines2_5, 10);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
//bool
char* lines2_6[] = {
"stb2_6 1626006833610ms t host=\"host0\"",
"stb2_6 1626006833620ms T host=\"host0\"",
"stb2_6 1626006833630ms true host=\"host0\"",
"stb2_6 1626006833640ms True host=\"host0\"",
"stb2_6 1626006833650ms TRUE host=\"host0\"",
"stb2_6 1626006833660ms f host=\"host0\"",
"stb2_6 1626006833670ms F host=\"host0\"",
"stb2_6 1626006833680ms false host=\"host0\"",
"stb2_6 1626006833690ms False host=\"host0\"",
"stb2_6 1626006833700ms FALSE host=\"host0\""
};
code = taos_insert_telnet_lines(taos, lines2_6, 10);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
//binary
char* lines2_7[] = {
"stb2_7 1626006833610ms \"binary_val.!@#$%^&*\" host=\"host0\"",
"stb2_7 1626006833620ms \"binary_val.:;,./?|+-=\" host=\"host0\"",
"stb2_7 1626006833630ms \"binary_val.()[]{}<>\" host=\"host0\""
};
code = taos_insert_telnet_lines(taos, lines2_7, 3);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
//nchar
char* lines2_8[] = {
"stb2_8 1626006833610ms L\"nchar_val数值一\" host=\"host0\"",
"stb2_8 1626006833620ms L\"nchar_val数值二\" host=\"host0\"",
};
code = taos_insert_telnet_lines(taos, lines2_8, 2);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
/* tags */
//tag value types
char* lines3_0[] = {
"stb3_0 1626006833610ms 1 t1=127i8,t2=32767i16,t3=2147483647i32,t4=9223372036854775807i64,t5=3.4E38f32,t6=1.7E308f64,t7=true,t8=\"binary_val_1\",t9=L\"标签值1\"",
"stb3_0 1626006833610ms 2 t1=-127i8,t2=-32767i16,t3=-2147483647i32,t4=-9223372036854775807i64,t5=-3.4E38f32,t6=-1.7E308f64,t7=false,t8=\"binary_val_2\",t9=L\"标签值2\""
};
code = taos_insert_telnet_lines(taos, lines3_0, 2);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
//tag ID as child table name
char* lines3_1[] = {
"stb3_1 1626006833610ms 1 id=\"child_table1\",host=\"host1\"",
"stb3_1 1626006833610ms 2 host=\"host2\",iD=\"child_table2\"",
"stb3_1 1626006833610ms 3 ID=\"child_table3\",host=\"host3\""
};
code = taos_insert_telnet_lines(taos, lines3_1, 3);
if (code) {
printf("code: %d, %s.\n", code, tstrerror(code));
}
return;
}
int main(int argc, char *argv[]) {
const char* host = "127.0.0.1";
const char* user = "root";
@ -1034,6 +1217,8 @@ int main(int argc, char *argv[]) {
printf("************ verify schema-less *************\n");
verify_schema_less(taos);
printf("************ verify telnet-insert *************\n");
verify_telnet_insert(taos);
printf("************ verify query *************\n");
verify_query(taos);
@ -1051,7 +1236,7 @@ int main(int argc, char *argv[]) {
verify_prepare2(taos);
printf("************ verify prepare3 *************\n");
verify_prepare3(taos);
printf("************ verify stream *************\n");
verify_stream(taos);
printf("done\n");

View File

@ -21,7 +21,8 @@ fi
today=`date +"%Y%m%d"`
WORK_DIR=/root/pxiao
PERFORMANCE_TEST_REPORT=$WORK_DIR/TDengine/tests/performance-report-$branch-$type-$today.log
name=`echo $branch | cut -d '/' -f2`
PERFORMANCE_TEST_REPORT=$WORK_DIR/TDinternal/community/tests/performance-report-$name-$type-$today.log
# Coloured Echoes #
function red_echo { echo -e "\033[31m$@\033[0m"; } #
@ -54,11 +55,12 @@ function stopTaosd {
}
function buildTDengine {
echoInfo "Build TDengine"
cd $WORK_DIR/TDengine
echoInfo "Build TDinternal"
cd $WORK_DIR/TDinternal
git remote update > /dev/null
git reset --hard HEAD
git fetch
git checkout $branch
REMOTE_COMMIT=`git rev-parse --short remotes/origin/$branch`
LOCAL_COMMIT=`git rev-parse --short @`
@ -69,13 +71,22 @@ function buildTDengine {
echo "repo up-to-date"
fi
cd community
git reset --hard HEAD
cd ..
echo "git submodule update --init --recursive"
git submodule update --init --recursive
git pull > /dev/null 2>&1
if [ $type = "jemalloc" ];then
echo "git submodule update --init --recursive"
git submodule update --init --recursive
fi
cd community
git remote update > /dev/null
git reset --hard HEAD
git fetch
git checkout $branch
REMOTE_COMMIT=`git rev-parse --short remotes/origin/$branch`
LOCAL_COMMIT=`git rev-parse --short @`
cd debug
cd ../debug
rm -rf *
if [ $type = "jemalloc" ];then
echo "cmake .. -DJEMALLOC_ENABLED=true > /dev/null"
@ -83,6 +94,10 @@ function buildTDengine {
else
cmake .. > /dev/null
fi
#cp $WORK_DIR/taosdemoPerformance.py $WORK_DIR/TDinternal/community/tests/pytest/tools/
#cp $WORK_DIR/insertFromCSVPerformance.py $WORK_DIR/TDinternal/community/tests/pytest/insert/
#cp $WORK_DIR/queryPerformance.py $WORK_DIR/TDinternal/community/tests/pytest/query/
rm -rf $WORK_DIR/TDinternal/community/tests/pytest/query/operator.py
make > /dev/null 2>&1
make install > /dev/null 2>&1
echo "Build TDengine on remote server"
@ -91,24 +106,24 @@ function buildTDengine {
function runQueryPerfTest {
[ -f $PERFORMANCE_TEST_REPORT ] && rm $PERFORMANCE_TEST_REPORT
nohup $WORK_DIR/TDengine/debug/build/bin/taosd -c /etc/perf/ > /dev/null 2>&1 &
nohup $WORK_DIR/TDinternal/debug/build/bin/taosd -c /etc/perf/ > /dev/null 2>&1 &
echoInfo "Wait TDengine to start"
sleep 60
echoInfo "Run Performance Test"
cd $WORK_DIR/TDengine/tests/pytest
cd $WORK_DIR/TDinternal/community/tests/pytest
python3 query/queryPerformance.py -c $LOCAL_COMMIT -b $branch -T $type | tee -a $PERFORMANCE_TEST_REPORT
python3 query/queryPerformance.py -c $LOCAL_COMMIT -b $branch -T $type -d perf2 | tee -a $PERFORMANCE_TEST_REPORT
python3 insert/insertFromCSVPerformance.py -c $LOCAL_COMMIT -b $branch -T $type | tee -a $PERFORMANCE_TEST_REPORT
echo "=========== taosdemo performance: 4 int columns, 10000 tables, 100000 recoreds per table ===========" | tee -a $PERFORMANCE_TEST_REPORT
python3 tools/taosdemoPerformance.py -c $LOCAL_COMMIT -b $branch -T $type | tee -a $PERFORMANCE_TEST_REPORT
echo "=========== taosdemo performance: 400 int columns, 400 double columns, 200 binary(128) columns, 10000 tables, 1000 recoreds per table ===========" | tee -a $PERFORMANCE_TEST_REPORT
python3 tools/taosdemoPerformance.py -c $LOCAL_COMMIT -b $branch -T $type -i 400 -D 400 -B 200 -t 10000 -r 100 | tee -a $PERFORMANCE_TEST_REPORT
echo "=========== taosdemo performance: 400 int columns, 400 double columns, 200 binary(128) columns, 10000 tables, 10 recoreds per table ===========" | tee -a $PERFORMANCE_TEST_REPORT
python3 tools/taosdemoPerformance.py -c $LOCAL_COMMIT -b $branch -T $type -i 400 -D 400 -B 200 -t 10000 -r 10 | tee -a $PERFORMANCE_TEST_REPORT
echo "=========== taosdemo performance: 1900 int columns, 1900 double columns, 200 binary(128) columns, 10000 tables, 1000 recoreds per table ===========" | tee -a $PERFORMANCE_TEST_REPORT
python3 tools/taosdemoPerformance.py -c $LOCAL_COMMIT -b $branch -T $type -i 1900 -D 1900 -B 200 -t 10000 -r 100 | tee -a $PERFORMANCE_TEST_REPORT
echo "=========== taosdemo performance: 1900 int columns, 1900 double columns, 200 binary(128) columns, 10000 tables, 10 recoreds per table ===========" | tee -a $PERFORMANCE_TEST_REPORT
python3 tools/taosdemoPerformance.py -c $LOCAL_COMMIT -b $branch -T $type -i 1900 -D 1900 -B 200 -t 10000 -r 10 | tee -a $PERFORMANCE_TEST_REPORT
}
@ -121,7 +136,7 @@ function sendReport {
sed -i 's/\x1b\[[0-9;]*m//g' $PERFORMANCE_TEST_REPORT
BODY_CONTENT=`cat $PERFORMANCE_TEST_REPORT`
echo -e "From: <support@taosdata.com>\nto: ${receiver}\nsubject: Query Performace Report ${branch} ${jemalloc} commit ID: ${LOCAL_COMMIT}\n\n${today}:\n${BODY_CONTENT}" | \
echo -e "From: <support@taosdata.com>\nto: ${receiver}\nsubject: Query Performace Report ${branch} ${type} commit ID: ${LOCAL_COMMIT}\n\n${today}:\n${BODY_CONTENT}" | \
(cat - && uuencode $PERFORMANCE_TEST_REPORT performance-test-report-$today.log) | \
/usr/sbin/ssmtp "${receiver}" && echo "Report Sent!"
}

View File

@ -15,6 +15,7 @@ import sys
from util.log import *
from util.cases import *
from util.sql import *
from math import floor
class TDTestCase:
@ -27,23 +28,22 @@ class TDTestCase:
sql = "select server_version()"
ret = tdSql.query(sql)
version = tdSql.getData(0, 0)[0:3]
expectedVersion_dev = "2.0"
expectedVersion_master = "2.1"
if(version == expectedVersion_dev or version == expectedVersion_master):
tdLog.info("sql:%s, row:%d col:%d data:%s == expect" % (sql, 0, 0, version))
version = floor(float(tdSql.getData(0, 0)[0:3]))
expectedVersion = 2
if(version == expectedVersion):
tdLog.info("sql:%s, row:%d col:%d data:%d == expect" % (sql, 0, 0, version))
else:
tdLog.exit("sql:%s, row:%d col:%d data:%s != expect:%s or %s " % (sql, 0, 0, version, expectedVersion_dev, expectedVersion_master))
tdLog.exit("sql:%s, row:%d col:%d data:%d != expect:%d " % (sql, 0, 0, version, expectedVersion))
sql = "select client_version()"
ret = tdSql.query(sql)
version = tdSql.getData(0, 0)[0:3]
expectedVersion_dev = "2.0"
expectedVersion_master = "2.1"
if(version == expectedVersion_dev or version == expectedVersion_master):
tdLog.info("sql:%s, row:%d col:%d data:%s == expect" % (sql, 0, 0, version))
version = floor(float(tdSql.getData(0, 0)[0:3]))
expectedVersion = 2
if(version == expectedVersion):
tdLog.info("sql:%s, row:%d col:%d data:%d == expect" % (sql, 0, 0, version))
else:
tdLog.exit("sql:%s, row:%d col:%d data:%s != expect:%s or %s " % (sql, 0, 0, version, expectedVersion_dev, expectedVersion_master))
tdLog.exit("sql:%s, row:%d col:%d data:%d != expect:%d " % (sql, 0, 0, version, expectedVersion))
def stop(self):

View File

@ -23,7 +23,7 @@ import string
from requests.auth import HTTPBasicAuth
func_list=['avg','count','twa','sum','stddev','leastsquares','min',
'max','first','last','top','bottom','percentile','apercentile',
'last_row','diff','spread']
'last_row','diff','spread','distinct']
condition_list=[
"where _c0 > now -10d ",
'interval(10s)',
@ -33,7 +33,7 @@ condition_list=[
'fill(null)'
]
where_list = ['_c0>now-10d',' <50','like',' is null']
where_list = ['_c0>now-10d',' <50','like',' is null','in']
class ConcurrentInquiry:
# def __init__(self,ts=1500000001000,host='127.0.0.1',user='root',password='taosdata',dbname='test',
# stb_prefix='st',subtb_prefix='t',n_Therads=10,r_Therads=10,probabilities=0.05,loop=5,
@ -152,6 +152,20 @@ class ConcurrentInquiry:
elif 'is null' in c:
conlist = ' ' + random.choice(tlist) + random.choice([' is null',' is not null'])
l.append(conlist)
elif 'in' in c:
in_list = []
temp = []
for i in range(random.randint(0,100)):
temp.append(random.randint(-10000,10000))
temp = (str(i) for i in temp)
in_list.append(temp)
temp1 = []
for i in range(random.randint(0,100)):
temp1.append("'" + ''.join(random.sample(string.ascii_letters, random.randint(0,10))) + "'")
in_list.append(temp1)
in_list.append(['NULL','NULL'])
conlist = ' ' + random.choice(tlist) + ' in (' + ','.join(random.choice(in_list)) + ')'
l.append(conlist)
else:
s_all = string.ascii_letters
conlist = ' ' + random.choice(tlist) + " like \'%" + random.choice(s_all) + "%\' "
@ -182,7 +196,14 @@ class ConcurrentInquiry:
def con_order(self,tlist,col_list,tag_list):
return 'order by '+random.choice(tlist)
def con_state_window(self,tlist,col_list,tag_list):
return 'state_window(' + random.choice(tlist + tag_list) + ')'
def con_session_window(self,tlist,col_list,tag_list):
session_window = 'session_window(' + random.choice(tlist + tag_list) + ',' + str(random.randint(0,20)) + random.choice(['a','s','d','w','n','y']) + ')'
return session_window
def gen_subquery_sql(self):
subsql ,col_num = self.gen_query_sql(1)
if col_num == 0:
@ -221,7 +242,7 @@ class ConcurrentInquiry:
else:
sql=sql+','.join(sel_col_list) #select col & func
sql = sql + ' from ('+ subsql +') '
con_func=[self.con_where,self.con_interval,self.con_limit,self.con_group,self.con_order,self.con_fill]
con_func=[self.con_where,self.con_interval,self.con_limit,self.con_group,self.con_order,self.con_fill,self.con_state_window,self.con_session_window]
sel_con=random.sample(con_func,random.randint(0,len(con_func)))
sel_con_list=[]
for i in sel_con:
@ -281,7 +302,7 @@ class ConcurrentInquiry:
sql = sql + ' from '+random.choice(self.subtb_list)+' '
else:
sql = sql + ' from '+random.choice(self.stb_list)+' '
con_func=[self.con_where,self.con_interval,self.con_limit,self.con_group,self.con_order,self.con_fill]
con_func=[self.con_where,self.con_interval,self.con_limit,self.con_group,self.con_order,self.con_fill,self.con_state_window,self.con_session_window]
sel_con=random.sample(con_func,random.randint(0,len(con_func)))
sel_con_list=[]
for i in sel_con:

View File

@ -11,45 +11,43 @@
# -*- coding: utf-8 -*-
import sys
import taos
from util.log import *
from util.cases import *
from util.sql import *
import numpy as np
class TDTestCase:
def init(self, conn, logSql):
tdLog.debug("start to execute %s" % __file__)
tdSql.init(conn.cursor())
self.rowNum = 10
self.ts = 1537146000000
def run(self):
tdSql.prepare()
tdSql.execute("create table ap1 (ts timestamp, pav float)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:54.119', 2.90799)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:54.317', 3.07399)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:54.517', 0.58117)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:54.717', 0.16150)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:54.918', 1.47885)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:56.569', 1.76472)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:57.381', 2.13722)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:57.574', 4.10256)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:57.776', 3.55345)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:57.976', 1.46624)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:58.187', 0.17943)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:58.372', 2.04101)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:58.573', 3.20924)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:58.768', 1.71807)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:58.964', 4.60900)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:59.155', 4.33907)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:59.359', 0.76940)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:59.553', 0.06458)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:59.742', 4.59857)")
tdSql.execute("insert into ap1 values ('2021-07-25 02:19:59.938', 1.55081)")
tdSql.execute("create table ap2 (ts timestamp, pav float) tags (t1 float)")
tdSql.execute("create table ap2_sub1 using ap2 tags (2.90799)")
tdSql.execute("create table ap2_sub2 using ap2 tags (2.90799)")
tdSql.execute("create table ap3 (ts timestamp, pav float) tags (t1 float)")
tdSql.execute("create table ap3_sub1 using ap3 tags (2.90799)")
for tb_name in ["ap1", "ap2_sub1", "ap3_sub1"]:
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:54.119', 2.90799)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:54.317', 3.07399)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:54.517', 0.58117)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:54.717', 0.16150)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:54.918', 1.47885)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:56.569', 1.76472)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:57.381', 2.13722)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:57.574', 4.10256)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:57.776', 3.55345)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:57.976', 1.46624)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:58.187', 0.17943)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:58.372', 2.04101)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:58.573', 3.20924)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:58.768', 1.71807)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:58.964', 4.60900)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:59.155', 4.33907)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:59.359', 0.76940)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:59.553', 0.06458)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:59.742', 4.59857)")
tdSql.execute(f"insert into {tb_name} values ('2021-07-25 02:19:59.938', 1.55081)")
tdSql.query("select interp(pav) from ap1 where ts = '2021-07-25 02:19:54' FILL (PREV)")
tdSql.checkRows(0)
@ -57,6 +55,29 @@ class TDTestCase:
tdSql.checkRows(0)
tdSql.query("select interp(pav) from ap1 where ts = '2021-07-25 02:19:54' FILL (LINEAR)")
tdSql.checkRows(0)
# check None
tdSql.query("select interp(pav) from ap1 where ts> '2021-07-25 02:19:54' and ts<'2021-07-25 02:20:00' every(1000a) FILL (None)")
tdSql.checkRows(0)
# check NULL
tdSql.query("select interp(pav) from ap1 where ts> '2021-07-25 02:19:54' and ts<'2021-07-25 02:20:00' every(1000a) FILL (NULL)")
tdSql.checkRows(6)
for i in range(5):
tdSql.checkData(i,1,None)
# checkout VALUE
tdSql.query("select interp(pav) from ap1 where ts> '2021-07-25 02:19:54' and ts<'2021-07-25 02:20:00' every(1000a) FILL (VALUE, 1)")
tdSql.checkRows(6)
for i in range(5):
tdSql.checkData(i,1,1.00000)
# check tag group by
tdSql.query("select interp(pav) from ap2 where ts>= '2021-07-25 02:19:54' and ts<'2021-07-25 02:20:00' every(1000a) FILL (VALUE,1) group by t1;")
for i in range(5):
tdSql.checkData(i,1,1.00000)
tdSql.checkData(i,2,2.90799)
# check multi ts lines
tdSql.query("select z1.ts,z1.val1,z2.val2 from (select interp(pav) val1 from ap2 where ts> '2021-07-25 02:19:54' and ts<'2021-07-25 02:20:00' every(1000a) FILL (value,1)) z1,(select interp(pav) val2 from ap3 where ts> '2021-07-25 02:19:54' and ts<'2021-07-25 02:20:00' every(1000a) FILL (value,2)) z2 where z1.ts=z2.ts ;")
for i in range(5):
tdSql.checkData(i,1,1.00000)
tdSql.checkData(i,2,2.00000)
tdSql.query("select interp(pav) from ap1 where ts> '2021-07-25 02:19:54' and ts<'2021-07-25 02:20:00' every(1000a) FILL (LINEAR)")
tdSql.checkRows(6)
tdSql.query("select interp(pav) from ap1 where ts>= '2021-07-25 02:19:54' and ts<'2021-07-25 02:20:00' every(1000a) FILL (NEXT)")

View File

@ -0,0 +1,313 @@
###################################################################
# Copyright (c) 2021 by TAOS Technologies, Inc.
# All rights reserved.
#
# This file is proprietary and confidential to TAOS Technologies.
# No part of this file may be reproduced, stored, transmitted,
# disclosed or used in any form or by any means other than as
# expressly provided by the written permission from Jianhui Tao
#
###################################################################
# -*- coding: utf-8 -*-
import sys
from util.log import *
from util.cases import *
from util.sql import *
class TDTestCase:
def init(self, conn, logSql):
tdLog.debug("start to execute %s" % __file__)
tdSql.init(conn.cursor(), logSql)
self._conn = conn
def run(self):
print("running {}".format(__file__))
tdSql.execute("drop database if exists test")
tdSql.execute("create database if not exists test precision 'us'")
tdSql.execute('use test')
### metric ###
print("============= step1 : test metric ================")
lines0 = [
"stb0_0 1626006833639000000ns 4i8 host=\"host0\",interface=\"eth0\"",
"stb0_1 1626006833639000000ns 4i8 host=\"host0\",interface=\"eth0\"",
"stb0_2 1626006833639000000ns 4i8 host=\"host0\",interface=\"eth0\"",
]
code = self._conn.insert_telnet_lines(lines0)
print("insert_telnet_lines result {}".format(code))
tdSql.query("show stables")
tdSql.checkRows(3)
tdSql.query("describe stb0_0")
tdSql.checkRows(4)
tdSql.query("describe stb0_1")
tdSql.checkRows(4)
tdSql.query("describe stb0_2")
tdSql.checkRows(4)
### timestamp ###
print("============= step2 : test timestamp ================")
lines1 = [
"stb1 1626006833s 1i8 host=\"host0\"",
"stb1 1626006833639000000ns 2i8 host=\"host0\"",
"stb1 1626006833640000us 3i8 host=\"host0\"",
"stb1 1626006833641123 4i8 host=\"host0\"",
"stb1 1626006833651ms 5i8 host=\"host0\"",
"stb1 0 6i8 host=\"host0\"",
]
code = self._conn.insert_telnet_lines(lines1)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb1")
tdSql.checkRows(6)
### metric value ###
print("============= step3 : test metric value ================")
#tinyint
lines2_0 = [
"stb2_0 1626006833651ms -127i8 host=\"host0\"",
"stb2_0 1626006833652ms 127i8 host=\"host0\""
]
code = self._conn.insert_telnet_lines(lines2_0)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb2_0")
tdSql.checkRows(2)
tdSql.query("describe stb2_0")
tdSql.checkRows(3)
tdSql.checkData(1, 1, "TINYINT")
#smallint
lines2_1 = [
"stb2_1 1626006833651ms -32767i16 host=\"host0\"",
"stb2_1 1626006833652ms 32767i16 host=\"host0\""
]
code = self._conn.insert_telnet_lines(lines2_1)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb2_1")
tdSql.checkRows(2)
tdSql.query("describe stb2_1")
tdSql.checkRows(3)
tdSql.checkData(1, 1, "SMALLINT")
#int
lines2_2 = [
"stb2_2 1626006833651ms -2147483647i32 host=\"host0\"",
"stb2_2 1626006833652ms 2147483647i32 host=\"host0\""
]
code = self._conn.insert_telnet_lines(lines2_2)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb2_2")
tdSql.checkRows(2)
tdSql.query("describe stb2_2")
tdSql.checkRows(3)
tdSql.checkData(1, 1, "INT")
#bigint
lines2_3 = [
"stb2_3 1626006833651ms -9223372036854775807i64 host=\"host0\"",
"stb2_3 1626006833652ms 9223372036854775807i64 host=\"host0\""
]
code = self._conn.insert_telnet_lines(lines2_3)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb2_3")
tdSql.checkRows(2)
tdSql.query("describe stb2_3")
tdSql.checkRows(3)
tdSql.checkData(1, 1, "BIGINT")
#float
lines2_4 = [
"stb2_4 1626006833610ms 3f32 host=\"host0\"",
"stb2_4 1626006833620ms -3f32 host=\"host0\"",
"stb2_4 1626006833630ms 3.4f32 host=\"host0\"",
"stb2_4 1626006833640ms -3.4f32 host=\"host0\"",
"stb2_4 1626006833650ms 3.4E10f32 host=\"host0\"",
"stb2_4 1626006833660ms -3.4e10f32 host=\"host0\"",
"stb2_4 1626006833670ms 3.4E+2f32 host=\"host0\"",
"stb2_4 1626006833680ms -3.4e-2f32 host=\"host0\"",
"stb2_4 1626006833690ms 3.15 host=\"host0\"",
"stb2_4 1626006833700ms 3.4E38f32 host=\"host0\"",
"stb2_4 1626006833710ms -3.4E38f32 host=\"host0\""
]
code = self._conn.insert_telnet_lines(lines2_4)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb2_4")
tdSql.checkRows(11)
tdSql.query("describe stb2_4")
tdSql.checkRows(3)
tdSql.checkData(1, 1, "FLOAT")
#double
lines2_5 = [
"stb2_5 1626006833610ms 3f64 host=\"host0\"",
"stb2_5 1626006833620ms -3f64 host=\"host0\"",
"stb2_5 1626006833630ms 3.4f64 host=\"host0\"",
"stb2_5 1626006833640ms -3.4f64 host=\"host0\"",
"stb2_5 1626006833650ms 3.4E10f64 host=\"host0\"",
"stb2_5 1626006833660ms -3.4e10f64 host=\"host0\"",
"stb2_5 1626006833670ms 3.4E+2f64 host=\"host0\"",
"stb2_5 1626006833680ms -3.4e-2f64 host=\"host0\"",
"stb2_5 1626006833690ms 1.7E308f64 host=\"host0\"",
"stb2_5 1626006833700ms -1.7E308f64 host=\"host0\""
]
code = self._conn.insert_telnet_lines(lines2_5)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb2_5")
tdSql.checkRows(10)
tdSql.query("describe stb2_5")
tdSql.checkRows(3)
tdSql.checkData(1, 1, "DOUBLE")
#bool
lines2_6 = [
"stb2_6 1626006833610ms t host=\"host0\"",
"stb2_6 1626006833620ms T host=\"host0\"",
"stb2_6 1626006833630ms true host=\"host0\"",
"stb2_6 1626006833640ms True host=\"host0\"",
"stb2_6 1626006833650ms TRUE host=\"host0\"",
"stb2_6 1626006833660ms f host=\"host0\"",
"stb2_6 1626006833670ms F host=\"host0\"",
"stb2_6 1626006833680ms false host=\"host0\"",
"stb2_6 1626006833690ms False host=\"host0\"",
"stb2_6 1626006833700ms FALSE host=\"host0\""
]
code = self._conn.insert_telnet_lines(lines2_6)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb2_6")
tdSql.checkRows(10)
tdSql.query("describe stb2_6")
tdSql.checkRows(3)
tdSql.checkData(1, 1, "BOOL")
#binary
lines2_7 = [
"stb2_7 1626006833610ms \"binary_val.!@#$%^&*\" host=\"host0\"",
"stb2_7 1626006833620ms \"binary_val.:;,./?|+-=\" host=\"host0\"",
"stb2_7 1626006833630ms \"binary_val.()[]{}<>\" host=\"host0\""
]
code = self._conn.insert_telnet_lines(lines2_7)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb2_7")
tdSql.checkRows(3)
tdSql.query("describe stb2_7")
tdSql.checkRows(3)
tdSql.checkData(1, 1, "BINARY")
#nchar
lines2_8 = [
"stb2_8 1626006833610ms L\"nchar_val数值一\" host=\"host0\"",
"stb2_8 1626006833620ms L\"nchar_val数值二\" host=\"host0\""
]
code = self._conn.insert_telnet_lines(lines2_8)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb2_8")
tdSql.checkRows(2)
tdSql.query("describe stb2_8")
tdSql.checkRows(3)
tdSql.checkData(1, 1, "NCHAR")
### tags ###
print("============= step3 : test tags ================")
#tag value types
lines3_0 = [
"stb3_0 1626006833610ms 1 t1=127i8,t2=32767i16,t3=2147483647i32,t4=9223372036854775807i64,t5=3.4E38f32,t6=1.7E308f64,t7=true,t8=\"binary_val_1\",t9=L\"标签值1\"",
"stb3_0 1626006833610ms 2 t1=-127i8,t2=-32767i16,t3=-2147483647i32,t4=-9223372036854775807i64,t5=-3.4E38f32,t6=-1.7E308f64,t7=false,t8=\"binary_val_2\",t9=L\"标签值2\""
]
code = self._conn.insert_telnet_lines(lines3_0)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb3_0")
tdSql.checkRows(2)
tdSql.query("describe stb3_0")
tdSql.checkRows(11)
tdSql.checkData(2, 1, "TINYINT")
tdSql.checkData(2, 3, "TAG")
tdSql.checkData(3, 1, "SMALLINT")
tdSql.checkData(3, 3, "TAG")
tdSql.checkData(4, 1, "INT")
tdSql.checkData(4, 3, "TAG")
tdSql.checkData(5, 1, "BIGINT")
tdSql.checkData(5, 3, "TAG")
tdSql.checkData(6, 1, "FLOAT")
tdSql.checkData(6, 3, "TAG")
tdSql.checkData(7, 1, "DOUBLE")
tdSql.checkData(7, 3, "TAG")
tdSql.checkData(8, 1, "BOOL")
tdSql.checkData(8, 3, "TAG")
tdSql.checkData(9, 1, "BINARY")
tdSql.checkData(9, 3, "TAG")
tdSql.checkData(10, 1, "NCHAR")
tdSql.checkData(10, 3, "TAG")
#tag ID as child table name
lines3_1 = [
"stb3_1 1626006833610ms 1 id=\"child_table1\",host=\"host1\"",
"stb3_1 1626006833610ms 2 host=\"host2\",iD=\"child_table2\"",
"stb3_1 1626006833610ms 3 ID=\"child_table3\",host=\"host3\""
]
code = self._conn.insert_telnet_lines(lines3_1)
print("insert_telnet_lines result {}".format(code))
tdSql.query("select * from stb3_1")
tdSql.checkRows(3)
tdSql.query("show tables like \"child%\"")
tdSql.checkRows(3)
tdSql.checkData(0, 0, "child_table1")
def stop(self):
tdSql.close()
tdLog.success("%s successfully executed" % __file__)
tdCases.addWindows(__file__, TDTestCase())
tdCases.addLinux(__file__, TDTestCase())

File diff suppressed because it is too large Load Diff

View File

@ -13,6 +13,8 @@
import sys
import taos
import string
import random
from util.log import *
from util.cases import *
from util.sql import *
@ -23,6 +25,11 @@ class TDTestCase:
tdLog.debug("start to execute %s" % __file__)
tdSql.init(conn.cursor())
def get_random_string(self, length):
letters = string.ascii_lowercase
result_str = ''.join(random.choice(letters) for i in range(length))
return result_str
def run(self):
tdSql.prepare()
@ -186,6 +193,20 @@ class TDTestCase:
tdSql.query("select t1.ts from t0,t1 where t0.ts = t1.ts")
tdSql.checkData(0,0,'2018-10-03 14:38:05.000000')
#TD-6425 join result more than 1MB
tdSql.execute("create database test_join")
tdSql.execute("use test_join")
ts = 1538548685000
tdSql.execute("create table stb(ts timestamp, c1 nchar(200)) tags(id int, loc binary(20))")
for i in range(2):
tdSql.execute("create table tb%d using stb tags(1, 'city%d')" % (i, i))
for j in range(1000):
tdSql.execute("insert into tb%d values(%d, '%s')" % (i, ts + j, self.get_random_string(200)))
tdSql.query("select tb0.c1, tb1.c1 from tb0, tb1 where tb0.ts = tb1.ts")
tdSql.checkRows(1000)
def stop(self):
tdSql.close()
tdLog.success("%s successfully executed" % __file__)

View File

@ -26,7 +26,7 @@ class TDTestCase:
tdLog.debug("start to execute %s" % __file__)
tdSql.init(conn.cursor(), logSql)
self.numberOfTables = 10
self.numberOfTables = 8
self.numberOfRecords = 1000000
def getBuildPath(self):
@ -86,7 +86,7 @@ class TDTestCase:
while True:
print("query started")
try:
tdSql.query("select * from test.t9")
tdSql.query("select * from test.t7")
except Exception as e:
tdLog.info("select * test failed")
time.sleep(2)
@ -100,8 +100,8 @@ class TDTestCase:
print("alter table test.meters add column c10 int")
tdSql.execute("alter table test.meters add column c10 int")
print("insert into test.t9 values (now, 1, 2, 3, 4, 0)")
tdSql.execute("insert into test.t9 values (now, 1, 2, 3, 4, 0)")
print("insert into test.t7 values (now, 1, 2, 3, 4, 0)")
tdSql.execute("insert into test.t7 values (now, 1, 2, 3, 4, 0)")
def run(self):
tdSql.prepare()

View File

@ -222,3 +222,4 @@ run general/stream/metrics_replica1_vnoden.sim
run general/db/show_create_db.sim
run general/db/show_create_table.sim
run general/parser/like.sim
run general/parser/regex.sim

View File

@ -0,0 +1,62 @@
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/cfg.sh -n dnode1 -c walLevel -v 1
system sh/cfg.sh -n dnode1 -c maxtablesPerVnode -v 4
system sh/exec.sh -n dnode1 -s start
sleep 100
sql connect
$db = testdb
sql drop database if exists $db
sql create database $db
sql use $db
print ======================== regular expression match test
$st_name = st
$ct1_name = ct1
$ct2_name = ct2
sql create table $st_name (ts timestamp, c1b binary(20)) tags(t1b binary(20));
sql create table $ct1_name using $st_name tags('taosdata1')
sql create table $ct2_name using $st_name tags('taosdata2')
sql create table not_match using $st_name tags('NOTMATCH')
sql select tbname from $st_name where tbname match '.*'
if $rows != 3 then
return -1
endi
sql select tbname from $st_name where tbname match '^ct[[:digit:]]'
if $rows != 2 then
return -1
endi
sql select tbname from $st_name where tbname match '.*'
if $rows !=3 then
return -1
endi
sql select tbname from $st_name where t1b match '[[:lower:]]+'
if $rows != 2 then
return -1
endi
sql insert into $ct1_name values(now, 'this is engine')
sql insert into $ct2_name values(now, 'this is app egnine')
sql select c1b from $st_name where c1b match 'engine'
if $data00 != @this is engine@ then
return -1
endi
if $rows != 1 then
return -1
endi
system sh/exec.sh -n dnode1 -s stop -x SIGINT