Merge branch '3.0' into enh/TD-21161-3.0

This commit is contained in:
kailixu 2023-04-21 16:04:09 +08:00
commit 77a8e3b213
73 changed files with 2328 additions and 1014 deletions

View File

@ -459,12 +459,17 @@ TO_JSON(str_literal)
#### TO_UNIXTIMESTAMP
```sql
TO_UNIXTIMESTAMP(expr)
TO_UNIXTIMESTAMP(expr [, return_timestamp])
return_timestamp: {
0
| 1
}
```
**Description**: UNIX timestamp converted from a string of date/time format
**Return value type**: BIGINT
**Return value type**: BIGINT, TIMESTAMP
**Applicable column types**: VARCHAR and NCHAR
@ -476,6 +481,7 @@ TO_UNIXTIMESTAMP(expr)
- The input string must be compatible with ISO8601/RFC3339 standard, NULL will be returned if the string can't be converted
- The precision of the returned timestamp is same as the precision set for the current data base in use
- return_timestamp indicates whether the returned value type is TIMESTAMP or not. If this parameter set to 1, function will return TIMESTAMP type. Otherwise function will return BIGINT type. If parameter is omitted, default return value type is BIGINT.
### Time and Date Functions

View File

@ -69,7 +69,7 @@ Provides information about SQL queries currently running. Similar to SHOW QUERIE
| 1 | consumer_id | BIGINT | Consumer ID |
| 2 | consumer_group | BINARY(192) | Consumer group |
| 3 | client_id | BINARY(192) | Client ID (user-defined) |
| 4 | status | BINARY(20) | Consumer status |
| 4 | status | BINARY(20) | Consumer status. All possible status include: ready(consumer is in normal state), lost(the connection between consumer and mnode is broken), rebalance(the redistribution of vgroups that belongs to current consumer is now in progress), unknown(consumer is in invalid state)
| 5 | topics | BINARY(204) | Subscribed topic. Returns one row for each topic. |
| 6 | up_time | TIMESTAMP | Time of first connection to TDengine Server |
| 7 | subscribe_time | TIMESTAMP | Time of first subscription |

View File

@ -42,3 +42,304 @@ An existing Grafana Notification Channel can be specified with parameter `-E`, t
Launch `TDinsight.sh` with the command above and restart Grafana, then open Dashboard `http://localhost:3000/d/tdinsight`.
For more use cases and restrictions please refer to [TDinsight](/reference/tdinsight/).
## log database
The data of tdinsight dashboard is stored in `log` database (default. You can change it in taoskeeper's config file. For more infrmation, please reference to [taoskeeper document](/reference/taosKeeper)). The taoskeeper will create log database on taoskeeper startup.
### cluster\_info table
`cluster_info` table contains cluster information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|first\_ep|VARCHAR||first ep of cluster|
|first\_ep\_dnode\_id|INT||dnode id or first\_ep|
|version|VARCHAR||tdengine version. such as: 3.0.4.0|
|master\_uptime|FLOAT||days of master's uptime|
|monitor\_interval|INT||monitor interval in second|
|dbs\_total|INT||total number of databases in cluster|
|tbs\_total|BIGINT||total number of tables in cluster|
|stbs\_total|INT||total number of stables in cluster|
|dnodes\_total|INT||total number of dnodes in cluster|
|dnodes\_alive|INT||total number of dnodes in ready state|
|mnodes\_total|INT||total number of mnodes in cluster|
|mnodes\_alive|INT||total number of mnodes in ready state|
|vgroups\_total|INT||total number of vgroups in cluster|
|vgroups\_alive|INT||total number of vgroups in ready state|
|vnodes\_total|INT||total number of vnode in cluster|
|vnodes\_alive|INT||total number of vnode in ready state|
|connections\_total|INT||total number of connections to cluster|
|topics\_total|INT||total number of topics in cluster|
|streams\_total|INT||total number of streams in cluster|
|protocol|INT||protocol version|
|cluster\_id|NCHAR|TAG|cluster id|
### d\_info table
`d_info` table contains dnodes information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|status|VARCHAR||dnode status|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### m\_info table
`m_info` table contains mnode information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|role|VARCHAR||the role of mnode. leader or follower|
|mnode\_id|INT|TAG|master node id|
|mnode\_ep|NCHAR|TAG|master node endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### dnodes\_info table
`dnodes_info` table contains dnodes information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|uptime|FLOAT||dnode uptime|
|cpu\_engine|FLOAT||cpu usage of tdengine. read from `/proc/<taosd_pid>/stat`|
|cpu\_system|FLOAT||cpu usage of server. read from `/proc/stat`|
|cpu\_cores|FLOAT||cpu cores of server|
|mem\_engine|INT||memory usage of tdengine. read from `/proc/<taosd_pid>/status`|
|mem\_system|INT||available memory on the server|
|mem\_total|INT||total memory of server in `KB`|
|disk\_engine|INT|||
|disk\_used|BIGINT||usage of data dir in `bytes`|
|disk\_total|BIGINT||the capacity of data dir in `bytes`|
|net\_in|FLOAT||network throughput rate in kb/s. read from `/proc/net/dev`|
|net\_out|FLOAT||network throughput rate in kb/s. read from `/proc/net/dev`|
|io\_read|FLOAT||io throughput rate in kb/s. read from `/proc/<taosd_pid>/io`|
|io\_write|FLOAT||io throughput rate in kb/s. read from `/proc/<taosd_pid>/io`|
|io\_read\_disk|FLOAT||io throughput rate of disk in kb/s. read from `/proc/<taosd_pid>/io`|
|io\_write\_disk|FLOAT||io throughput rate of disk in kb/s. read from `/proc/<taosd_pid>/io`|
|req\_select|INT||number of select queries received per dnode|
|req\_select\_rate|FLOAT||number of select queries received per dnode divided by monitor interval.|
|req\_insert|INT||number of insert queries received per dnode|
|req\_insert\_success|INT||number of successfully insert queries received per dnode|
|req\_insert\_rate|FLOAT||number of insert queries received per dnode divided by monitor interval|
|req\_insert\_batch|INT||number of batch insertions|
|req\_insert\_batch\_success|INT||number of successful batch insertions|
|req\_insert\_batch\_rate|FLOAT||number of batch insertions divided by monitor interval|
|errors|INT||dnode errors|
|vnodes\_num|INT||number of vnodes per dnode|
|masters|INT||number of master vnodes|
|has\_mnode|INT||if the dnode has mnode|
|has\_qnode|INT||if the dnode has qnode|
|has\_snode|INT||if the dnode has snode|
|has\_bnode|INT||if the dnode has bnode|
|dnode\_id|INT|TAG|dnode id|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### data\_dir table
`data_dir` table contains data directory information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|name|NCHAR||data directory. default is `/var/lib/taos`|
|level|INT||level for multi-level storage|
|avail|BIGINT||available space for data directory|
|used|BIGINT||used space for data directory|
|total|BIGINT||total space for data directory|
|dnode\_id|INT|TAG|dnode id|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### log\_dir table
`log_dir` table contains log directory information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|name|NCHAR||log directory. default is `/var/log/taos/`|
|avail|BIGINT||available space for log directory|
|used|BIGINT||used space for data directory|
|total|BIGINT||total space for data directory|
|dnode\_id|INT|TAG|dnode id|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### temp\_dir table
`temp_dir` table contains temp dir information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|name|NCHAR||temp directory. default is `/tmp/`|
|avail|BIGINT||available space for temp directory|
|used|BIGINT||used space for temp directory|
|total|BIGINT||total space for temp directory|
|dnode\_id|INT|TAG|dnode id|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### vgroups\_info table
`vgroups_info` table contains vgroups information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|vgroup\_id|INT||vgroup id|
|database\_name|VARCHAR||database for the vgroup|
|tables\_num|BIGINT||number of tables per vgroup|
|status|VARCHAR||status|
|dnode\_id|INT|TAG|dnode id|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### vnodes\_role table
`vnodes_role` table contains vnode role information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|vnode\_role|VARCHAR||role. leader or follower|
|dnode\_id|INT|TAG|dnode id|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### logs table
`logs` table contains login information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|level|VARCHAR||log level|
|content|NCHAR||log content|
|dnode\_id|INT|TAG|dnode id|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### log\_summary table
`log_summary` table contains log summary information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|error|INT||error count|
|info|INT||info count|
|debug|INT||debug count|
|trace|INT||trace count|
|dnode\_id|INT|TAG|dnode id|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### grants\_info table
`grants_info` table contains grants information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|expire\_time|BIGINT||time until grants expire in seconds|
|timeseries\_used|BIGINT||timeseries used|
|timeseries\_total|BIGINT||total timeseries|
|dnode\_id|INT|TAG|dnode id|
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### keeper\_monitor table
`keeper_monitor` table contains keeper monitor information records.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|ts|TIMESTAMP||timestamp|
|cpu|FLOAT||cpu usage|
|mem|FLOAT||memory usage|
|identify|NCHAR|TAG||
### taosadapter\_restful\_http\_request\_total table
`taosadapter_restful_http_request_total` table contains taosadapter rest request information record. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|guage|DOUBLE||metric value|
|client\_ip|NCHAR|TAG|client ip|
|endpoint|NCHAR|TAG|taosadpater endpoint|
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
|status\_code|NCHAR|TAG|status code|
### taosadapter\_restful\_http\_request\_fail table
`taosadapter_restful_http_request_fail` table contains taosadapter failed rest request information record. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|guage|DOUBLE||metric value|
|client\_ip|NCHAR|TAG|client ip|
|endpoint|NCHAR|TAG|taosadpater endpoint|
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
|status\_code|NCHAR|TAG|status code|
### taosadapter\_restful\_http\_request\_in\_flight table
`taosadapter_restful_http_request_in_flight` table contains taosadapter rest request information record in real time. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|guage|DOUBLE||metric value|
|endpoint|NCHAR|TAG|taosadpater endpoint|
### taosadapter\_restful\_http\_request\_summary\_milliseconds table
`taosadapter_restful_http_request_summary_milliseconds` table contains the summary or rest information record. The timestamp column of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|count|DOUBLE|||
|sum|DOUBLE|||
|0.5|DOUBLE|||
|0.9|DOUBLE|||
|0.99|DOUBLE|||
|0.1|DOUBLE|||
|0.2|DOUBLE|||
|endpoint|NCHAR|TAG|taosadpater endpoint|
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
### taosadapter\_system\_mem\_percent table
`taosadapter_system_mem_percent` table contains taosadapter memory usage information. The timestamp of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|guage|DOUBLE||metric value|
|endpoint|NCHAR|TAG|taosadpater endpoint|
### taosadapter\_system\_cpu\_percent table
`taosadapter_system_cpu_percent` table contains taosadapter cup usage information. The timestamp of this table is `_ts`.
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|guage|DOUBLE||mertic value|
|endpoint|NCHAR|TAG|taosadpater endpoint|

View File

@ -108,7 +108,7 @@ The following `launchctl` commands can help you manage taoskeeper service:
#### Launch With Configuration File
You can quickly launch taosKeeper with the following commands. If you do not specify a configuration file, `/etc/taos/keeper.toml` is used by default. If this file does not specify configurations, the default values are used.
You can quickly launch taosKeeper with the following commands. If you do not specify a configuration file, `/etc/taos/taoskeeper.toml` is used by default. If this file does not specify configurations, the default values are used.
```shell
$ taoskeeper -c <keeper config file>
@ -153,6 +153,10 @@ database = "log"
# standard tables to monitor
tables = ["normal_table"]
# database options for db storing metrics data
[metrics.databaseoptions]
cachemodel = "none"
```
### Obtain Monitoring Metrics
@ -203,7 +207,7 @@ taos_cluster_info_dnodes_total{cluster_id="5981392874047724755"} 1
taos_cluster_info_first_ep{cluster_id="5981392874047724755",value="hlb:6030"} 1
```
### check_health
### check\_health
```
$ curl -i http://127.0.0.1:6043/check_health

View File

@ -36,7 +36,6 @@ database_option: {
| TSDB_PAGESIZE value
| WAL_RETENTION_PERIOD value
| WAL_RETENTION_SIZE value
| WAL_ROLL_PERIOD value
| WAL_SEGMENT_SIZE value
}
```

View File

@ -459,12 +459,17 @@ TO_JSON(str_literal)
#### TO_UNIXTIMESTAMP
```sql
TO_UNIXTIMESTAMP(expr)
TO_UNIXTIMESTAMP(expr [, return_timestamp])
return_timestamp: {
0
| 1
}
```
**功能说明**:将日期时间格式的字符串转换成为 UNIX 时间戳。
**返回结果数据类型**BIGINT。
**返回结果数据类型**BIGINT, TIMESTAMP
**应用字段**VARCHAR, NCHAR。
@ -476,6 +481,7 @@ TO_UNIXTIMESTAMP(expr)
- 输入的日期时间字符串须符合 ISO8601/RFC3339 标准,无法转换的字符串格式将返回 NULL。
- 返回的时间戳精度与当前 DATABASE 设置的时间精度一致。
- return_timestamp 指定函数返回值是否为时间戳类型设置为1时返回 TIMESTAMP 类型设置为0时返回 BIGINT 类型。如不指定缺省返回 BIGINT 类型。
### 时间和日期函数

View File

@ -69,7 +69,7 @@ TDengine 3.0 版本开始提供一个内置数据库 `performance_schema`,其
| 1 | consumer_id | BIGINT | 消费者的唯一 ID |
| 2 | consumer_group | BINARY(192) | 消费者组 |
| 3 | client_id | BINARY(192) | 用户自定义字符串,通过创建 consumer 时指定 client_id 来展示 |
| 4 | status | BINARY(20) | 消费者当前状态 |
| 4 | status | BINARY(20) | 消费者当前状态。消费者状态包括ready(正常可用)、 lost(连接已丢失)、 rebalancing(消费者所属 vgroup 正在分配中)、unknown(未知状态)|
| 5 | topics | BINARY(204) | 被订阅的 topic。若订阅多个 topic则展示为多行 |
| 6 | up_time | TIMESTAMP | 第一次连接 taosd 的时间 |
| 7 | subscribe_time | TIMESTAMP | 上一次发起订阅的时间 |

View File

@ -111,7 +111,7 @@ Active: inactive (dead)
#### 配置文件启动
执行以下命令即可快速体验 taosKeeper。当不指定 taosKeeper 配置文件时,优先使用 `/etc/taos/keeper.toml` 配置,否则将使用默认配置。
执行以下命令即可快速体验 taosKeeper。当不指定 taosKeeper 配置文件时,优先使用 `/etc/taos/taoskeeper.toml` 配置,否则将使用默认配置。
```shell
$ taoskeeper -c <keeper config file>
@ -156,6 +156,10 @@ database = "log"
# 指定需要监控的普通表
tables = []
# database options for db storing metrics data
[metrics.databaseoptions]
cachemodel = "none"
```
### 获取监控指标
@ -206,7 +210,7 @@ taos_cluster_info_dnodes_total{cluster_id="5981392874047724755"} 1
taos_cluster_info_first_ep{cluster_id="5981392874047724755",value="hlb:6030"} 1
```
### check_health
### check\_health
```
$ curl -i http://127.0.0.1:6043/check_health

View File

@ -41,9 +41,9 @@ chmod +x TDinsight.sh
## log 库
TDinsight dashboard 数据来源于 log 库存放监控数据的默认db可以在 taoskeeper 配置文件中修改,具体参考 [taoskeeper 文档](..../reference/taosKeeper)。taoskeeper 启动后会自动创建 log 库,并将监控数据写入到该数据库中。
TDinsight dashboard 数据来源于 log 库存放监控数据的默认db可以在 taoskeeper 配置文件中修改,具体参考 [taoskeeper 文档](/reference/taosKeeper)。taoskeeper 启动后会自动创建 log 库,并将监控数据写入到该数据库中。
### cluster info 表
### cluster\_info 表
`cluster_info` 表记录集群信息。
@ -54,7 +54,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|first\_ep\_dnode\_id|INT||集群 first ep 的 dnode id|
|version|VARCHAR||tdengine version。例如3.0.4.0|
|master\_uptime|FLOAT||当前 master 节点的uptime。单位天|
|monitor_interval|INT||monitor interval。单位秒|
|monitor\_interval|INT||monitor interval。单位秒|
|dbs\_total|INT||database 总数|
|tbs\_total|BIGINT||当前集群 table 总数|
|stbs\_total|INT||当前集群 stable 总数|
@ -72,7 +72,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|protocol|INT||协议版本,目前为 1|
|cluster\_id|NCHAR|TAG|cluster id|
### d info 表
### d\_info 表
`d_info` 表记录 dnode 状态信息。
@ -83,7 +83,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### m info 表
### m\_info 表
`m_info` 表记录 mnode 角色信息。
@ -95,7 +95,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|mnode\_ep|NCHAR|TAG|master node endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### dnodes info 表
### dnodes\_info 表
`dnodes_info` 记录 dnode 信息。
@ -107,17 +107,17 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|cpu\_system|FLOAT||服务器 cpu 使用率,从 `/proc/stat` 读取|
|cpu\_cores|FLOAT||服务器 cpu 核数|
|mem\_engine|INT||taosd 内存使用率,从 `/proc/<taosd_pid>/status` 读取|
|mem\_system|INT||服务器内存使用率|
|mem\_system|INT||服务器可用内存|
|mem\_total|INT||服务器内存总量,单位 KB|
|disk\_engine|INT|||
|disk\_used|BIGINT||data dir 挂载的磁盘使用量,单位 bytes|
|disk\_total|BIGINT||data dir 挂载的磁盘总容量,单位 bytes|
|net\_in|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 received bytes。单位 bytes per second|
|net\_out|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 transmit bytes。单位 bytes per second|
|io\_read|FLOAT||io 吞吐率,从 `/proc/<taosd_pid>/io` 中读取的 rchar 与上次数值计算之后,计算得到速度。单位 bytes per second|
|io\_write|FLOAT||io 吞吐率,从 `/proc/<taosd_pid>/io` 中读取的 wchar 与上次数值计算之后,计算得到速度。单位 bytes per second|
|io\_read\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc/<taosd_pid>/io` 中读取的 read_bytes。单位 bytes per second|
|io\_write\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc/<taosd_pid>/io` 中读取的 write_bytes。单位 bytes per second|
|net\_in|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 received bytes。单位 kb/s|
|net\_out|FLOAT||网络吞吐率,从 `/proc/net/dev` 中读取的 transmit bytes。单位 kb/s|
|io\_read|FLOAT||io 吞吐率,从 `/proc/<taosd_pid>/io` 中读取的 rchar 与上次数值计算之后,计算得到速度。单位 kb/s|
|io\_write|FLOAT||io 吞吐率,从 `/proc/<taosd_pid>/io` 中读取的 wchar 与上次数值计算之后,计算得到速度。单位 kb/s|
|io\_read\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc/<taosd_pid>/io` 中读取的 read_bytes。单位 kb/s|
|io\_write\_disk|FLOAT||磁盘 io 吞吐率,从 `/proc/<taosd_pid>/io` 中读取的 write_bytes。单位 kb/s|
|req\_select|INT||两个间隔内发生的查询请求数目|
|req\_select\_rate|FLOAT||两个间隔内的查询请求速度 = `req_select / monitorInterval`|
|req\_insert|INT||两个间隔内发生的写入请求,包含的单条数据数目|
@ -137,7 +137,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### data dir 表
### data\_dir 表
`data_dir` 表记录 data 目录信息。
@ -153,7 +153,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### log dir 表
### log\_dir 表
`log_dir` 表记录 log 目录信息。
@ -168,7 +168,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### tmp dir 表
### temp\_dir 表
`temp_dir` 表记录 temp 目录信息。
@ -183,7 +183,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### vgroups info 表
### vgroups\_info 表
`vgroups_info` 表记录虚拟节点组信息。
@ -198,7 +198,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### vnodes role 表
### vnodes\_role 表
`vnodes_role` 表记录虚拟节点角色信息。
@ -223,7 +223,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### log summary 表
### log\_summary 表
`log_summary` 记录日志统计信息。
@ -238,7 +238,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### grants info 表
### grants\_info 表
`grants_info` 记录授权信息。
@ -252,7 +252,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|dnode\_ep|NCHAR|TAG|dnode endpoint|
|cluster\_id|NCHAR|TAG|cluster id|
### keeper monitor 表
### keeper\_monitor 表
`keeper_monitor` 记录 taoskeeper 监控数据。
@ -263,7 +263,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|mem|FLOAT||内存使用率|
|identify|NCHAR|TAG||
### taosadapter restful http request total 表
### taosadapter\_restful\_http\_request\_total 表
`taosadapter_restful_http_request_total` 记录 taosadapter rest 请求信息,该表为 schemaless 方式创建的表,时间戳字段名为 `_ts`
@ -277,7 +277,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|request\_uri|NCHAR|TAG|request uri|
|status\_code|NCHAR|TAG|status code|
### taosadapter restful http request fail 表
### taosadapter\_restful\_http\_request\_fail 表
`taosadapter_restful_http_request_fail` 记录 taosadapter rest 请求失败信息,该表为 schemaless 方式创建的表,时间戳字段名为 `_ts`
@ -291,7 +291,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|request\_uri|NCHAR|TAG|request uri|
|status\_code|NCHAR|TAG|status code|
### taosadapter restful http request in flight 表
### taosadapter\_restful\_http\_request\_in\_flight 表
`taosadapter_restful_http_request_in_flight` 记录 taosadapter rest 实时请求信息,该表为 schemaless 方式创建的表,时间戳字段名为 `_ts`
@ -301,7 +301,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|guage|DOUBLE||监控指标值|
|endpoint|NCHAR|TAG|taosadpater endpoint|
### taosadapter restful http request summary milliseconds 表
### taosadapter\_restful\_http\_request\_summary\_milliseconds 表
`taosadapter_restful_http_request_summary_milliseconds` 记录 taosadapter rest 请求汇总信息,该表为 schemaless 方式创建的表,时间戳字段名为 `_ts`
@ -319,7 +319,7 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|request\_method|NCHAR|TAG|request method|
|request\_uri|NCHAR|TAG|request uri|
### taosadapter system mem percent 表
### taosadapter\_system\_mem\_percent 表
`taosadapter_system_mem_percent` 表记录 taosadapter 内存使用情况,该表为 schemaless 方式创建的表,时间戳字段名为 `_ts`
@ -329,11 +329,12 @@ TDinsight dashboard 数据来源于 log 库存放监控数据的默认db
|guage|DOUBLE||监控指标值|
|endpoint|NCHAR|TAG|taosadpater endpoint|
### taosadapter system cpu percent 表
### taosadapter\_system\_cpu\_percent 表
`taosadapter_system_cpu_percent` 表记录 taosadapter cpu 使用情况,该表为 schemaless 方式创建的表,时间戳字段名为 `_ts`
|field|type|is\_tag|comment|
|:----|:---|:-----|:------|
|\_ts|TIMESTAMP||timestamp|
|guage|DOUBLE||监控指标值|
|endpoint|NCHAR|TAG|taosadpater endpoint|

View File

@ -77,7 +77,7 @@ static inline bool tmsgIsValid(tmsg_t type) {
}
static inline bool vnodeIsMsgBlock(tmsg_t type) {
return (type == TDMT_VND_CREATE_TABLE) || (type == TDMT_VND_ALTER_TABLE) || (type == TDMT_VND_DROP_TABLE) ||
(type == TDMT_VND_UPDATE_TAG_VAL) || (type == TDMT_VND_ALTER_CONFIRM);
(type == TDMT_VND_UPDATE_TAG_VAL) || (type == TDMT_VND_ALTER_CONFIRM) || (type == TDMT_VND_COMMIT);
}
static inline bool syncUtilUserCommit(tmsg_t msgType) {
@ -1286,6 +1286,9 @@ typedef struct {
int16_t hashSuffix;
int32_t tsdbPageSize;
int64_t reserved[8];
int8_t learnerReplica;
int8_t learnerSelfIndex;
SReplica learnerReplicas[TSDB_MAX_LEARNER_REPLICA];
} SCreateVnodeReq;
int32_t tSerializeSCreateVnodeReq(void* buf, int32_t bufLen, SCreateVnodeReq* pReq);
@ -1357,7 +1360,10 @@ typedef struct {
int8_t replica;
SReplica replicas[TSDB_MAX_REPLICA];
int64_t reserved[8];
} SAlterVnodeReplicaReq;
int8_t learnerSelfIndex;
int8_t learnerReplica;
SReplica learnerReplicas[TSDB_MAX_LEARNER_REPLICA];
} SAlterVnodeReplicaReq, SAlterVnodeTypeReq;
int32_t tSerializeSAlterVnodeReplicaReq(void* buf, int32_t bufLen, SAlterVnodeReplicaReq* pReq);
int32_t tDeserializeSAlterVnodeReplicaReq(void* buf, int32_t bufLen, SAlterVnodeReplicaReq* pReq);
@ -1629,7 +1635,10 @@ int32_t tDeserializeSCreateDropMQSNodeReq(void* buf, int32_t bufLen, SMCreateQno
typedef struct {
int8_t replica;
SReplica replicas[TSDB_MAX_REPLICA];
} SDCreateMnodeReq, SDAlterMnodeReq;
int8_t learnerReplica;
SReplica learnerReplicas[TSDB_MAX_LEARNER_REPLICA];
int64_t lastIndex;
} SDCreateMnodeReq, SDAlterMnodeReq, SDAlterMnodeTypeReq;
int32_t tSerializeSDCreateMnodeReq(void* buf, int32_t bufLen, SDCreateMnodeReq* pReq);
int32_t tDeserializeSDCreateMnodeReq(void* buf, int32_t bufLen, SDCreateMnodeReq* pReq);

View File

@ -83,6 +83,8 @@ enum {
TD_DEF_MSG_TYPE(TDMT_DND_CONFIG_DNODE, "config-dnode", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_DND_SYSTABLE_RETRIEVE, "dnode-retrieve", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_DND_MAX_MSG, "dnd-max", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_DND_ALTER_MNODE_TYPE, "dnode-alter-mnode-type", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_DND_ALTER_VNODE_TYPE, "dnode-alter-vnode-type", NULL, NULL)
TD_NEW_MSG_SEG(TDMT_MND_MSG)
TD_DEF_MSG_TYPE(TDMT_MND_CONNECT, "connect", NULL, NULL)

View File

@ -33,8 +33,11 @@ typedef struct {
bool deploy;
int8_t selfIndex;
int8_t numOfReplicas;
SReplica replicas[TSDB_MAX_REPLICA];
int8_t numOfTotalReplicas;
SReplica replicas[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
int32_t nodeRoles[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SMsgCb msgCb;
int64_t lastIndex;
} SMnodeOpt;
/* ------------------------ SMnode ------------------------ */
@ -69,6 +72,8 @@ int32_t mndStart(SMnode *pMnode);
*/
void mndStop(SMnode *pMnode);
int32_t mndIsCatchUp(SMnode *pMnode);
/**
* @brief Get mnode monitor info.
*

View File

@ -55,6 +55,8 @@ extern "C" {
#define SYNC_INDEX_INVALID -1
#define SYNC_TERM_INVALID -1
#define SYNC_LEARNER_CATCHUP 10
typedef enum {
SYNC_STRATEGY_NO_SNAPSHOT = 0,
SYNC_STRATEGY_STANDARD_SNAPSHOT = 1,
@ -76,19 +78,29 @@ typedef enum {
TAOS_SYNC_STATE_CANDIDATE = 101,
TAOS_SYNC_STATE_LEADER = 102,
TAOS_SYNC_STATE_ERROR = 103,
TAOS_SYNC_STATE_LEARNER = 104,
} ESyncState;
typedef enum {
TAOS_SYNC_ROLE_VOTER = 0,
TAOS_SYNC_ROLE_LEARNER = 1,
TAOS_SYNC_ROLE_ERROR = 2,
} ESyncRole;
typedef struct SNodeInfo {
int64_t clusterId;
int32_t nodeId;
uint16_t nodePort;
char nodeFqdn[TSDB_FQDN_LEN];
int64_t clusterId;
int32_t nodeId;
uint16_t nodePort;
char nodeFqdn[TSDB_FQDN_LEN];
ESyncRole nodeRole;
} SNodeInfo;
typedef struct SSyncCfg {
int32_t totalReplicaNum;
int32_t replicaNum;
int32_t myIndex;
SNodeInfo nodeInfo[TSDB_MAX_REPLICA];
SNodeInfo nodeInfo[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SyncIndex lastIndex;
} SSyncCfg;
typedef struct SFsmCbMeta {
@ -155,6 +167,7 @@ typedef struct SSyncFSM {
void (*FpBecomeLeaderCb)(const struct SSyncFSM* pFsm);
void (*FpBecomeFollowerCb)(const struct SSyncFSM* pFsm);
void (*FpBecomeLearnerCb)(const struct SSyncFSM* pFsm);
int32_t (*FpGetSnapshot)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot, void* pReaderParam, void** ppReader);
void (*FpGetSnapshotInfo)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot);
@ -236,6 +249,7 @@ void syncStop(int64_t rid);
void syncPreStop(int64_t rid);
void syncPostStop(int64_t rid);
int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak, int64_t* seq);
int32_t syncIsCatchUp(int64_t rid);
int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg);
int32_t syncReconfig(int64_t rid, SSyncCfg* pCfg);
int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex);

View File

@ -547,6 +547,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG TAOS_DEF_ERROR_CODE(0, 0x0915) // internal
#define TSDB_CODE_SYN_BUFFER_FULL TAOS_DEF_ERROR_CODE(0, 0x0916)
#define TSDB_CODE_SYN_WRITE_STALL TAOS_DEF_ERROR_CODE(0, 0x0917)
#define TSDB_CODE_SYN_NEGO_WIN_EXCEEDED TAOS_DEF_ERROR_CODE(0, 0X0918)
#define TSDB_CODE_SYN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x09FF)
// tq

View File

@ -279,8 +279,11 @@ typedef enum ELogicConditionType {
#define TSDB_DNODE_ROLE_VNODE 2
#define TSDB_MAX_REPLICA 5
#define TSDB_MAX_LEARNER_REPLICA 10
#define TSDB_SYNC_LOG_BUFFER_SIZE 4096
#define TSDB_SYNC_LOG_BUFFER_RETENTION (TSDB_SYNC_LOG_BUFFER_SIZE >> 4)
#define TSDB_SYNC_LOG_BUFFER_RETENTION 256
#define TSDB_SYNC_APPLYQ_SIZE_LIMIT 512
#define TSDB_SYNC_NEGOTIATION_WIN 512
#define TSDB_TBNAME_COLUMN_INDEX (-1)
#define TSDB_MULTI_TABLEMETA_MAX_NUM 100000 // maximum batch size allowed to load table meta

View File

@ -197,7 +197,8 @@ if [[ $productName == "TDengine" ]]; then
mkdir -p ${install_dir}/connector
if [[ "$pagMode" != "lite" ]] && [[ "$cpuType" != "aarch32" ]]; then
if [ "$osType" != "Darwin" ]; then
[ -f ${build_dir}/lib/*.jar ] && cp ${build_dir}/lib/*.jar ${install_dir}/connector || :
jars=$(ls ${build_dir}/lib/*.jar 2>/dev/null|wc -l)
[ "${jars}" != "0" ] && cp ${build_dir}/lib/*.jar ${install_dir}/connector || :
fi
git clone --depth 1 https://github.com/taosdata/driver-go ${install_dir}/connector/go
rm -rf ${install_dir}/connector/go/.git ||:

View File

@ -338,7 +338,20 @@ if [ "$verMode" == "cluster" ] || [ "$verMode" == "cloud" ]; then
connector_dir="${code_dir}/connector"
mkdir -p ${install_dir}/connector
if [[ "$pagMode" != "lite" ]] && [[ "$cpuType" != "aarch32" ]]; then
[ -f ${build_dir}/lib/*.jar ] && cp ${build_dir}/lib/*.jar ${install_dir}/connector || :
tmp_pwd=`pwd`
cd ${install_dir}/connector
if [ ! -d taos-connector-jdbc ];then
git clone -b main --depth=1 https://github.com/taosdata/taos-connector-jdbc.git ||:
fi
cd taos-connector-jdbc
mvn clean package -Dmaven.test.skip=true
echo ${build_dir}/lib/
cp target/*.jar ${build_dir}/lib/
cd ${install_dir}/connector
rm -rf taos-connector-jdbc
cd ${tmp_pwd}
jars=$(ls ${build_dir}/lib/*.jar 2>/dev/null|wc -l)
[ "${jars}" != "0" ] && cp ${build_dir}/lib/*.jar ${install_dir}/connector || :
git clone --depth 1 https://github.com/taosdata/driver-go ${install_dir}/connector/go
rm -rf ${install_dir}/connector/go/.git ||:

View File

@ -1259,8 +1259,9 @@ JNIEXPORT jobject JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_schemalessInse
int code = taos_errno(tres);
if (code != TSDB_CODE_SUCCESS) {
jniError("jobj:%p, conn:%p, code: 0x%x, msg:%s", jobj, taos, code, taos_errstr(tres));
jobject jobject = createSchemalessResp(env, 0, code, taos_errstr(tres));
taos_free_result(tres);
return createSchemalessResp(env, 0, code, taos_errstr(tres));
return jobject;
}
taos_free_result(tres);
@ -1286,8 +1287,9 @@ JNIEXPORT jobject JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_schemalessInse
int code = taos_errno(tres);
if (code != TSDB_CODE_SUCCESS) {
jniError("jobj:%p, conn:%p, code: 0x%x, msg:%s", jobj, taos, code, taos_errstr(tres));
jobject jobject = createSchemalessResp(env, 0, code, taos_errstr(tres));
taos_free_result(tres);
return createSchemalessResp(env, 0, code, taos_errstr(tres));
return jobject;
}
taos_free_result(tres);
@ -1315,8 +1317,9 @@ JNIEXPORT jobject JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_schemalessInse
int code = taos_errno(tres);
if (code != TSDB_CODE_SUCCESS) {
jniError("jobj:%p, conn:%p, code: 0x%x, msg:%s", jobj, taos, code, taos_errstr(tres));
jobject jobject = createSchemalessResp(env, 0, code, taos_errstr(tres));
taos_free_result(tres);
return createSchemalessResp(env, 0, code, taos_errstr(tres));
return jobject;
}
taos_free_result(tres);
@ -1343,8 +1346,9 @@ JNIEXPORT jobject JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_schemalessInse
int code = taos_errno(tres);
if (code != TSDB_CODE_SUCCESS) {
jniError("jobj:%p, conn:%p, code: 0x%x, msg:%s", jobj, taos, code, taos_errstr(tres));
jobject jobject = createSchemalessResp(env, 0, code, taos_errstr(tres));
taos_free_result(tres);
return createSchemalessResp(env, 0, code, taos_errstr(tres));
return jobject;
}
taos_free_result(tres);

View File

@ -4129,6 +4129,12 @@ int32_t tSerializeSCreateVnodeReq(void *buf, int32_t bufLen, SCreateVnodeReq *pR
for (int32_t i = 0; i < 8; ++i) {
if (tEncodeI64(&encoder, pReq->reserved[i]) < 0) return -1;
}
if (tEncodeI8(&encoder, pReq->learnerReplica) < 0) return -1;
if (tEncodeI8(&encoder, pReq->learnerSelfIndex) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
}
tEndEncode(&encoder);
@ -4207,6 +4213,14 @@ int32_t tDeserializeSCreateVnodeReq(void *buf, int32_t bufLen, SCreateVnodeReq *
for (int32_t i = 0; i < 8; ++i) {
if (tDecodeI64(&decoder, &pReq->reserved[i]) < 0) return -1;
}
if (!tDecodeIsEnd(&decoder)) {
if (tDecodeI8(&decoder, &pReq->learnerReplica) < 0) return -1;
if (tDecodeI8(&decoder, &pReq->learnerSelfIndex) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
}
}
tEndDecode(&decoder);
tDecoderClear(&decoder);
@ -4433,6 +4447,12 @@ int32_t tSerializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnodeRe
for (int32_t i = 0; i < 8; ++i) {
if (tEncodeI64(&encoder, pReq->reserved[i]) < 0) return -1;
}
if (tEncodeI8(&encoder, pReq->learnerSelfIndex) < 0) return -1;
if (tEncodeI8(&encoder, pReq->learnerReplica) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
}
tEndEncode(&encoder);
int32_t tlen = encoder.pos;
@ -4456,7 +4476,15 @@ int32_t tDeserializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnode
for (int32_t i = 0; i < 8; ++i) {
if (tDecodeI64(&decoder, &pReq->reserved[i]) < 0) return -1;
}
if (!tDecodeIsEnd(&decoder)) {
if (tDecodeI8(&decoder, &pReq->learnerSelfIndex) < 0) return -1;
if (tDecodeI8(&decoder, &pReq->learnerReplica) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
}
}
tEndDecode(&decoder);
tDecoderClear(&decoder);
return 0;
@ -4767,6 +4795,12 @@ int32_t tSerializeSDCreateMnodeReq(void *buf, int32_t bufLen, SDCreateMnodeReq *
SReplica *pReplica = &pReq->replicas[i];
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
}
if (tEncodeI8(&encoder, pReq->learnerReplica) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
}
if (tEncodeI64(&encoder, pReq->lastIndex) < 0) return -1;
tEndEncode(&encoder);
int32_t tlen = encoder.pos;
@ -4784,6 +4818,14 @@ int32_t tDeserializeSDCreateMnodeReq(void *buf, int32_t bufLen, SDCreateMnodeReq
SReplica *pReplica = &pReq->replicas[i];
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
}
if (!tDecodeIsEnd(&decoder)) {
if (tDecodeI8(&decoder, &pReq->learnerReplica) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
}
if (tDecodeI64(&decoder, &pReq->lastIndex) < 0) return -1;
}
tEndDecode(&decoder);
tDecoderClear(&decoder);

View File

@ -32,6 +32,7 @@ typedef struct SDnodeMgmt {
TdThread crashReportThread;
SSingleWorker mgmtWorker;
ProcessCreateNodeFp processCreateNodeFp;
ProcessAlterNodeTypeFp processAlterNodeTypeFp;
ProcessDropNodeFp processDropNodeFp;
SendMonitorReportFp sendMonitorReportFp;
GetVnodeLoadsFp getVnodeLoadsFp;

View File

@ -352,6 +352,7 @@ SArray *dmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_SERVER_STATUS, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_SYSTABLE_RETRIEVE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_MNODE_TYPE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
// Requests handled by MNODE
if (dmSetMgmtHandle(pArray, TDMT_MND_GRANT, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;

View File

@ -48,6 +48,7 @@ static int32_t dmOpenMgmt(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) {
pMgmt->path = pInput->path;
pMgmt->name = pInput->name;
pMgmt->processCreateNodeFp = pInput->processCreateNodeFp;
pMgmt->processAlterNodeTypeFp = pInput->processAlterNodeTypeFp;
pMgmt->processDropNodeFp = pInput->processDropNodeFp;
pMgmt->sendMonitorReportFp = pInput->sendMonitorReportFp;
pMgmt->getVnodeLoadsFp = pInput->getVnodeLoadsFp;

View File

@ -227,6 +227,9 @@ static void dmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
case TDMT_DND_DROP_SNODE:
code = (*pMgmt->processDropNodeFp)(SNODE, pMsg);
break;
case TDMT_DND_ALTER_MNODE_TYPE:
code = (*pMgmt->processAlterNodeTypeFp)(MNODE, pMsg);
break;
case TDMT_DND_SERVER_STATUS:
code = dmProcessServerRunStatus(pMgmt, pMsg);
break;

View File

@ -24,12 +24,16 @@ static int32_t mmDecodeOption(SJson *pJson, SMnodeOpt *pOption) {
if (code < 0) return -1;
tjsonGetInt32ValueFromDouble(pJson, "selfIndex", pOption->selfIndex, code);
if (code < 0) return 0;
tjsonGetInt32ValueFromDouble(pJson, "lastIndex", pOption->lastIndex, code);
if (code < 0) return 0;
SJson *replicas = tjsonGetObjectItem(pJson, "replicas");
if (replicas == NULL) return 0;
pOption->numOfReplicas = tjsonGetArraySize(replicas);
pOption->numOfTotalReplicas = tjsonGetArraySize(replicas);
for (int32_t i = 0; i < pOption->numOfReplicas; ++i) {
pOption->numOfReplicas = 0;
for (int32_t i = 0; i < pOption->numOfTotalReplicas; ++i) {
SJson *replica = tjsonGetArrayItem(replicas, i);
if (replica == NULL) return -1;
@ -40,6 +44,14 @@ static int32_t mmDecodeOption(SJson *pJson, SMnodeOpt *pOption) {
if (code < 0) return -1;
tjsonGetUInt16ValueFromDouble(replica, "port", pReplica->port, code);
if (code < 0) return -1;
tjsonGetInt32ValueFromDouble(replica, "role", pOption->nodeRoles[i], code);
if (code < 0) return -1;
if(pOption->nodeRoles[i] == TAOS_SYNC_ROLE_VOTER){
pOption->numOfReplicas++;
}
}
for (int32_t i = 0; i < pOption->numOfTotalReplicas; ++i) {
}
return 0;
@ -112,14 +124,14 @@ _OVER:
}
static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) {
if (pOption->deploy && pOption->numOfReplicas > 0) {
if (pOption->deploy && pOption->numOfTotalReplicas > 0) {
if (tjsonAddDoubleToObject(pJson, "selfIndex", pOption->selfIndex) < 0) return -1;
SJson *replicas = tjsonCreateArray();
if (replicas == NULL) return -1;
if (tjsonAddItemToObject(pJson, "replicas", replicas) < 0) return -1;
for (int32_t i = 0; i < pOption->numOfReplicas; ++i) {
for (int32_t i = 0; i < pOption->numOfTotalReplicas; ++i) {
SJson *replica = tjsonCreateObject();
if (replica == NULL) return -1;
@ -127,10 +139,13 @@ static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) {
if (tjsonAddDoubleToObject(replica, "id", pReplica->id) < 0) return -1;
if (tjsonAddStringToObject(replica, "fqdn", pReplica->fqdn) < 0) return -1;
if (tjsonAddDoubleToObject(replica, "port", pReplica->port) < 0) return -1;
if (tjsonAddDoubleToObject(replica, "role", pOption->nodeRoles[i]) < 0) return -1;
if (tjsonAddItemToArray(replicas, replica) < 0) return -1;
}
}
if (tjsonAddDoubleToObject(pJson, "lastIndex", pOption->lastIndex) < 0) return -1;
if (tjsonAddDoubleToObject(pJson, "deployed", pOption->deploy) < 0) return -1;
return 0;

View File

@ -33,12 +33,24 @@ int32_t mmProcessCreateReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) {
return -1;
}
SMnodeOpt option = {.deploy = true, .numOfReplicas = createReq.replica, .selfIndex = -1};
SMnodeOpt option = {.deploy = true, .numOfReplicas = createReq.replica,
.numOfTotalReplicas = createReq.replica + createReq.learnerReplica,
.selfIndex = -1, .lastIndex = createReq.lastIndex};
memcpy(option.replicas, createReq.replicas, sizeof(createReq.replicas));
for (int32_t i = 0; i < option.numOfReplicas; ++i) {
for (int32_t i = 0; i < createReq.replica; ++i) {
if (createReq.replicas[i].id == pInput->pData->dnodeId) {
option.selfIndex = i;
}
option.nodeRoles[i] = TAOS_SYNC_ROLE_VOTER;
}
memcpy(&(option.replicas[createReq.replica]), createReq.learnerReplicas, sizeof(createReq.learnerReplicas));
for (int32_t i = 0; i < createReq.learnerReplica; ++i) {
if (createReq.learnerReplicas[i].id == pInput->pData->dnodeId) {
option.selfIndex = createReq.replica + i;
}
option.nodeRoles[createReq.replica + i] = TAOS_SYNC_ROLE_LEARNER;
}
if (option.selfIndex == -1) {
@ -92,6 +104,8 @@ SArray *mmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_MNODE_TYPE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_VNODE_TYPE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_CONNECT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_ACCT, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;

View File

@ -45,9 +45,11 @@ static void mmBuildOptionForDeploy(SMnodeMgmt *pMgmt, const SMgmtInputOpt *pInpu
pOption->dnodeId = pMgmt->pData->dnodeId;
pOption->selfIndex = 0;
pOption->numOfReplicas = 1;
pOption->numOfTotalReplicas = 1;
pOption->replicas[0].id = 1;
pOption->replicas[0].port = tsServerPort;
tstrncpy(pOption->replicas[0].fqdn, tsLocalFqdn, TSDB_FQDN_LEN);
pOption->lastIndex = SYNC_INDEX_INVALID;
}
static void mmBuildOptionForOpen(SMnodeMgmt *pMgmt, SMnodeOpt *pOption) {
@ -123,9 +125,10 @@ static int32_t mmOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) {
}
tmsgReportStartup("mnode-worker", "initialized");
if (option.numOfReplicas > 0) {
if (option.numOfTotalReplicas > 0) {
option.deploy = true;
option.numOfReplicas = 0;
option.numOfTotalReplicas = 0;
if (mmWriteFile(pMgmt->path, &option) != 0) {
dError("failed to write mnode file since %s", terrstr());
return -1;
@ -152,6 +155,10 @@ static void mmStop(SMnodeMgmt *pMgmt) {
mndStop(pMgmt->pMnode);
}
static int32_t mmSyncIsCatchUp(SMnodeMgmt *pMgmt) {
return mndIsCatchUp(pMgmt->pMnode);
}
SMgmtFunc mmGetMgmtFunc() {
SMgmtFunc mgmtFunc = {0};
mgmtFunc.openFp = mmOpen;
@ -162,6 +169,7 @@ SMgmtFunc mmGetMgmtFunc() {
mgmtFunc.dropFp = (NodeDropFp)mmProcessDropReq;
mgmtFunc.requiredFp = mmRequire;
mgmtFunc.getHandlesFp = mmGetMsgHandles;
mgmtFunc.isCatchUpFp = (NodeIsCatchUpFp)mmSyncIsCatchUp;
return mgmtFunc;
}

View File

@ -90,6 +90,7 @@ int32_t vmProcessDropVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
// vmFile.c
int32_t vmGetVnodeListFromFile(SVnodeMgmt *pMgmt, SWrapperCfg **ppCfgs, int32_t *numOfVnodes);

View File

@ -131,15 +131,34 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) {
pCfg->tsdbPageSize = pCreate->tsdbPageSize * 1024;
pCfg->standby = 0;
pCfg->syncCfg.myIndex = pCreate->selfIndex;
pCfg->syncCfg.replicaNum = pCreate->replica;
pCfg->syncCfg.replicaNum = 0;
pCfg->syncCfg.totalReplicaNum = 0;
memset(&pCfg->syncCfg.nodeInfo, 0, sizeof(pCfg->syncCfg.nodeInfo));
for (int32_t i = 0; i < pCreate->replica; ++i) {
SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i];
pNode->nodeId = pCreate->replicas[i].id;
pNode->nodePort = pCreate->replicas[i].port;
tstrncpy(pNode->nodeFqdn, pCreate->replicas[i].fqdn, TSDB_FQDN_LEN);
pNode->nodeId = pCreate->replicas[pCfg->syncCfg.replicaNum].id;
pNode->nodePort = pCreate->replicas[pCfg->syncCfg.replicaNum].port;
pNode->nodeRole = TAOS_SYNC_ROLE_VOTER;
tstrncpy(pNode->nodeFqdn, pCreate->replicas[pCfg->syncCfg.replicaNum].fqdn, TSDB_FQDN_LEN);
tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
pCfg->syncCfg.replicaNum++;
}
if(pCreate->selfIndex != -1){
pCfg->syncCfg.myIndex = pCreate->selfIndex;
}
for (int32_t i = pCfg->syncCfg.replicaNum; i < pCreate->replica + pCreate->learnerReplica; ++i) {
SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i];
pNode->nodeId = pCreate->learnerReplicas[pCfg->syncCfg.totalReplicaNum].id;
pNode->nodePort = pCreate->learnerReplicas[pCfg->syncCfg.totalReplicaNum].port;
pNode->nodeRole = TAOS_SYNC_ROLE_LEARNER;
tstrncpy(pNode->nodeFqdn, pCreate->learnerReplicas[pCfg->syncCfg.totalReplicaNum].fqdn, TSDB_FQDN_LEN);
tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
pCfg->syncCfg.totalReplicaNum++;
}
pCfg->syncCfg.totalReplicaNum += pCfg->syncCfg.replicaNum;
if(pCreate->learnerSelfIndex != -1){
pCfg->syncCfg.myIndex = pCreate->replica + pCreate->learnerSelfIndex;
}
}
@ -182,23 +201,40 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return -1;
}
dInfo("vgId:%d, start to create vnode, page:%d pageSize:%d buffer:%d szPage:%d szBuf:%" PRIu64
if(req.learnerReplica == 0)
{
req.learnerSelfIndex = -1;
}
dInfo("vgId:%d, vnode management handle msgType:%s, start to create vnode, page:%d pageSize:%d buffer:%d szPage:%d szBuf:%" PRIu64
", cacheLast:%d cacheLastSize:%d sstTrigger:%d tsdbPageSize:%d %d dbname:%s dbId:%" PRId64
", days:%d keep0:%d keep1:%d keep2:%d tsma:%d precision:%d compression:%d minRows:%d maxRows:%d"
", wal fsync:%d level:%d retentionPeriod:%d retentionSize:%" PRId64 " rollPeriod:%d segSize:%" PRId64
", hash method:%d begin:%u end:%u prefix:%d surfix:%d replica:%d selfIndex:%d strict:%d",
req.vgId, req.pages, req.pageSize, req.buffer, req.pageSize * 1024, (uint64_t)req.buffer * 1024 * 1024,
", hash method:%d begin:%u end:%u prefix:%d surfix:%d replica:%d selfIndex:%d "
"learnerReplica:%d learnerSelfIndex:%d strict:%d",
req.vgId, TMSG_INFO(pMsg->msgType), req.pages, req.pageSize, req.buffer, req.pageSize * 1024,
(uint64_t)req.buffer * 1024 * 1024,
req.cacheLast, req.cacheLastSize, req.sstTrigger, req.tsdbPageSize, req.tsdbPageSize * 1024, req.db, req.dbUid,
req.daysPerFile, req.daysToKeep0, req.daysToKeep1, req.daysToKeep2, req.isTsma, req.precision, req.compression,
req.minRows, req.maxRows, req.walFsyncPeriod, req.walLevel, req.walRetentionPeriod, req.walRetentionSize,
req.walRollPeriod, req.walSegmentSize, req.hashMethod, req.hashBegin, req.hashEnd, req.hashPrefix,
req.hashSuffix, req.replica, req.selfIndex, req.strict);
req.hashSuffix, req.replica, req.selfIndex, req.learnerReplica, req.learnerSelfIndex, req.strict);
for (int32_t i = 0; i < req.replica; ++i) {
dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", req.vgId, i, req.replicas[i].fqdn, req.replicas[i].port,
req.replicas[i].id);
}
for (int32_t i = 0; i < req.learnerReplica; ++i) {
dInfo("vgId:%d, learnerReplica:%d ep:%s:%u dnode:%d", req.vgId, i, req.learnerReplicas[i].fqdn,
req.learnerReplicas[i].port, req.replicas[i].id);
}
SReplica *pReplica = &req.replicas[req.selfIndex];
SReplica *pReplica = NULL;
if(req.selfIndex != -1){
pReplica = &req.replicas[req.selfIndex];
}
else{
pReplica = &req.learnerReplicas[req.learnerSelfIndex];
}
if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort ||
strcmp(pReplica->fqdn, tsLocalFqdn) != 0) {
terrno = TSDB_CODE_INVALID_MSG;
@ -275,7 +311,8 @@ _OVER:
vnodeClose(pImpl);
vnodeDestroy(path, pMgmt->pTfs);
} else {
dInfo("vgId:%d, vnode is created", req.vgId);
dInfo("vgId:%d, vnode management handle msgType:%s, end to create vnode, vnode is created",
req.vgId, TMSG_INFO(pMsg->msgType));
}
tFreeSCreateVnodeReq(&req);
@ -283,6 +320,110 @@ _OVER:
return code;
}
int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
SAlterVnodeTypeReq req = {0};
if (tDeserializeSAlterVnodeReplicaReq(pMsg->pCont, pMsg->contLen, &req) != 0) {
terrno = TSDB_CODE_INVALID_MSG;
return -1;
}
if(req.learnerReplicas == 0){
req.learnerSelfIndex = -1;
}
dInfo("vgId:%d, vnode management handle msgType:%s, start to process alter-node-type-request",
req.vgId, TMSG_INFO(pMsg->msgType));
SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId);
if (pVnode == NULL) {
dError("vgId:%d, failed to alter hashrange since %s", req.vgId, terrstr());
terrno = TSDB_CODE_VND_NOT_EXIST;
return -1;
}
dInfo("vgId:%d, checking node catch up", req.vgId);
if(vnodeIsCatchUp(pVnode->pImpl) != 0){
return -1;
}
dInfo("node:%s, catched up leader, continue to process alter-node-type-request", pMgmt->name);
int32_t vgId = req.vgId;
dInfo("vgId:%d, start to alter vnode type replica:%d selfIndex:%d strict:%d", vgId, req.replica, req.selfIndex,
req.strict);
for (int32_t i = 0; i < req.replica; ++i) {
SReplica *pReplica = &req.replicas[i];
dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->id);
}
for (int32_t i = 0; i < req.learnerReplica; ++i) {
SReplica *pReplica = &req.learnerReplicas[i];
dInfo("vgId:%d, learnerReplicas:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->id);
}
if (req.replica <= 0 ||
(req.selfIndex < 0 && req.learnerSelfIndex <0)||
req.selfIndex >= req.replica || req.learnerSelfIndex >= req.learnerReplica) {
terrno = TSDB_CODE_INVALID_MSG;
dError("vgId:%d, failed to alter replica since invalid msg", vgId);
return -1;
}
SReplica *pReplica = NULL;
if(req.selfIndex > 0){
pReplica = &req.replicas[req.selfIndex];
}
else{
pReplica = &req.learnerReplicas[req.learnerSelfIndex];
}
if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort ||
strcmp(pReplica->fqdn, tsLocalFqdn) != 0) {
terrno = TSDB_CODE_INVALID_MSG;
dError("vgId:%d, dnodeId:%d ep:%s:%u not matched with local dnode", vgId, pReplica->id, pReplica->fqdn,
pReplica->port);
return -1;
}
dInfo("vgId:%d, start to close vnode", vgId);
SWrapperCfg wrapperCfg = {
.dropped = pVnode->dropped,
.vgId = pVnode->vgId,
.vgVersion = pVnode->vgVersion,
};
tstrncpy(wrapperCfg.path, pVnode->path, sizeof(wrapperCfg.path));
vmCloseVnode(pMgmt, pVnode, false);
char path[TSDB_FILENAME_LEN] = {0};
snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, vgId);
dInfo("vgId:%d, start to alter vnode replica at %s", vgId, path);
if (vnodeAlterReplica(path, &req, pMgmt->pTfs) < 0) {
dError("vgId:%d, failed to alter vnode at %s since %s", vgId, path, terrstr());
return -1;
}
dInfo("vgId:%d, begin to open vnode", vgId);
SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, pMgmt->msgCb);
if (pImpl == NULL) {
dError("vgId:%d, failed to open vnode at %s since %s", vgId, path, terrstr());
return -1;
}
if (vmOpenVnode(pMgmt, &wrapperCfg, pImpl) != 0) {
dError("vgId:%d, failed to open vnode mgmt since %s", vgId, terrstr());
return -1;
}
if (vnodeStart(pImpl) != 0) {
dError("vgId:%d, failed to start sync since %s", vgId, terrstr());
return -1;
}
dInfo("vgId:%d, vnode management handle msgType:%s, end to process alter-node-type-request, vnode config is altered",
req.vgId, TMSG_INFO(pMsg->msgType));
return 0;
}
int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
SDisableVnodeWriteReq req = {0};
if (tDeserializeSDisableVnodeWriteReq(pMsg->pCont, pMsg->contLen, &req) != 0) {
@ -377,21 +518,40 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return -1;
}
if(alterReq.learnerReplica == 0){
alterReq.learnerSelfIndex = -1;
}
int32_t vgId = alterReq.vgId;
dInfo("vgId:%d, start to alter vnode replica:%d selfIndex:%d strict:%d", vgId, alterReq.replica, alterReq.selfIndex,
alterReq.strict);
dInfo("vgId:%d,vnode management handle msgType:%s, start to alter vnode replica:%d selfIndex:%d leanerReplica:%d "
"learnerSelfIndex:%d strict:%d",
vgId, TMSG_INFO(pMsg->msgType), alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica,
alterReq.learnerSelfIndex, alterReq.strict);
for (int32_t i = 0; i < alterReq.replica; ++i) {
SReplica *pReplica = &alterReq.replicas[i];
dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->port);
}
if (alterReq.replica <= 0 || alterReq.selfIndex < 0 || alterReq.selfIndex >= alterReq.replica) {
for (int32_t i = 0; i < alterReq.learnerReplica; ++i) {
SReplica *pReplica = &alterReq.learnerReplicas[i];
dInfo("vgId:%d, learnerReplicas:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->port);
}
if (alterReq.replica <= 0 ||
(alterReq.selfIndex < 0 && alterReq.learnerSelfIndex <0)||
alterReq.selfIndex >= alterReq.replica || alterReq.learnerSelfIndex >= alterReq.learnerReplica) {
terrno = TSDB_CODE_INVALID_MSG;
dError("vgId:%d, failed to alter replica since invalid msg", vgId);
return -1;
}
SReplica *pReplica = &alterReq.replicas[alterReq.selfIndex];
SReplica *pReplica = NULL;
if(alterReq.selfIndex != -1){
pReplica = &alterReq.replicas[alterReq.selfIndex];
}
else{
pReplica = &alterReq.learnerReplicas[alterReq.learnerSelfIndex];
}
if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort ||
strcmp(pReplica->fqdn, tsLocalFqdn) != 0) {
terrno = TSDB_CODE_INVALID_MSG;
@ -425,7 +585,7 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return -1;
}
dInfo("vgId:%d, close vnode", vgId);
dInfo("vgId:%d, begin to open vnode", vgId);
SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, pMgmt->msgCb);
if (pImpl == NULL) {
dError("vgId:%d, failed to open vnode at %s since %s", vgId, path, terrstr());
@ -442,7 +602,10 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return -1;
}
dInfo("vgId:%d, vnode config is altered", vgId);
dInfo("vgId:%d, vnode management handle msgType:%s, end to alter vnode replica:%d selfIndex:%d leanerReplica:%d "
"learnerSelfIndex:%d strict:%d",
vgId, TMSG_INFO(pMsg->msgType), alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica,
alterReq.learnerSelfIndex, alterReq.strict);
return 0;
}
@ -548,6 +711,7 @@ SArray *vmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_VND_TRIM, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_VNODE_TYPE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_SYNC_TIMEOUT_ELECTION, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER;

View File

@ -49,6 +49,9 @@ static void vmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
case TDMT_VND_ALTER_HASHRANGE:
code = vmProcessAlterHashRangeReq(pMgmt, pMsg);
break;
case TDMT_DND_ALTER_VNODE_TYPE:
code = vmProcessAlterVnodeTypeReq(pMgmt, pMsg);
break;
default:
terrno = TSDB_CODE_MSG_NOT_PROCESSED;
dGError("msg:%p, not processed in vnode-mgmt queue", pMsg);

View File

@ -169,6 +169,8 @@ static int32_t dmProcessCreateNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
return -1;
}
dInfo("start to process create-node-request");
pWrapper = &pDnode->wrappers[ntype];
if (taosMkDir(pWrapper->path) != 0) {
dmReleaseWrapper(pWrapper);
@ -198,6 +200,64 @@ static int32_t dmProcessCreateNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
return code;
}
static int32_t dmProcessAlterNodeTypeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
SDnode *pDnode = dmInstance();
SMgmtWrapper *pWrapper = dmAcquireWrapper(pDnode, ntype);
if (pWrapper == NULL) {
dError("fail to process alter node type since node not exist");
return -1;
}
dmReleaseWrapper(pWrapper);
dInfo("node:%s, start to process alter-node-type-request", pWrapper->name);
pWrapper = &pDnode->wrappers[ntype];
if(pWrapper->func.isCatchUpFp != NULL){
dInfo("node:%s, checking node catch up", pWrapper->name);
if(!(*pWrapper->func.isCatchUpFp)(pWrapper->pMgmt) == 0){
return -1;
}
}
dInfo("node:%s, catched up leader, continue to process alter-node-type-request", pWrapper->name);
taosThreadMutexLock(&pDnode->mutex);
dInfo("node:%s, stopping node", pWrapper->name);
dmStopNode(pWrapper);
dInfo("node:%s, closing node", pWrapper->name);
dmCloseNode(pWrapper);
pWrapper = &pDnode->wrappers[ntype];
if (taosMkDir(pWrapper->path) != 0) {
dmReleaseWrapper(pWrapper);
terrno = TAOS_SYSTEM_ERROR(errno);
dError("failed to create dir:%s since %s", pWrapper->path, terrstr());
return -1;
}
SMgmtInputOpt input = dmBuildMgmtInputOpt(pWrapper);
dInfo("node:%s, start to create", pWrapper->name);
int32_t code = (*pWrapper->func.createFp)(&input, pMsg);
if (code != 0) {
dError("node:%s, failed to create since %s", pWrapper->name, terrstr());
} else {
dInfo("node:%s, has been created", pWrapper->name);
code = dmOpenNode(pWrapper);
if (code == 0) {
code = dmStartNode(pWrapper);
}
pWrapper->deployed = true;
pWrapper->required = true;
}
taosThreadMutexUnlock(&pDnode->mutex);
return code;
}
static int32_t dmProcessDropNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
SDnode *pDnode = dmInstance();
@ -251,6 +311,7 @@ SMgmtInputOpt dmBuildMgmtInputOpt(SMgmtWrapper *pWrapper) {
.name = pWrapper->name,
.pData = &pWrapper->pDnode->data,
.processCreateNodeFp = dmProcessCreateNodeReq,
.processAlterNodeTypeFp = dmProcessAlterNodeTypeReq,
.processDropNodeFp = dmProcessDropNodeReq,
.sendMonitorReportFp = dmSendMonitorReport,
.getVnodeLoadsFp = dmGetVnodeLoads,

View File

@ -89,6 +89,7 @@ typedef void (*SendMonitorReportFp)();
typedef void (*GetVnodeLoadsFp)(SMonVloadInfo *pInfo);
typedef void (*GetMnodeLoadsFp)(SMonMloadInfo *pInfo);
typedef void (*GetQnodeLoadsFp)(SQnodeLoad *pInfo);
typedef int32_t (*ProcessAlterNodeTypeFp)(EDndNodeType ntype, SRpcMsg *pMsg);
typedef struct {
int32_t dnodeId;
@ -112,6 +113,7 @@ typedef struct {
SDnodeData *pData;
SMsgCb msgCb;
ProcessCreateNodeFp processCreateNodeFp;
ProcessAlterNodeTypeFp processAlterNodeTypeFp;
ProcessDropNodeFp processDropNodeFp;
SendMonitorReportFp sendMonitorReportFp;
GetVnodeLoadsFp getVnodeLoadsFp;
@ -132,6 +134,7 @@ typedef int32_t (*NodeCreateFp)(const SMgmtInputOpt *pInput, SRpcMsg *pMsg);
typedef int32_t (*NodeDropFp)(const SMgmtInputOpt *pInput, SRpcMsg *pMsg);
typedef int32_t (*NodeRequireFp)(const SMgmtInputOpt *pInput, bool *required);
typedef SArray *(*NodeGetHandlesFp)(); // array of SMgmtHandle
typedef bool (*NodeIsCatchUpFp)(void *pMgmt);
typedef struct {
NodeOpenFp openFp;
@ -142,6 +145,7 @@ typedef struct {
NodeDropFp dropFp;
NodeRequireFp requiredFp;
NodeGetHandlesFp getHandlesFp;
NodeIsCatchUpFp isCatchUpFp;
} SMgmtFunc;
typedef struct {

View File

@ -215,6 +215,8 @@ typedef struct {
bool syncRestore;
int64_t stateStartTime;
SDnodeObj* pDnode;
int32_t role;
SyncIndex lastIndex;
} SMnodeObj;
typedef struct {
@ -341,6 +343,7 @@ typedef struct {
ESyncState syncState;
bool syncRestore;
bool syncCanRead;
ESyncRole nodeRole;
} SVnodeGid;
typedef struct {
@ -361,7 +364,7 @@ typedef struct {
int8_t compact;
int8_t isTsma;
int8_t replica;
SVnodeGid vnodeGid[TSDB_MAX_REPLICA];
SVnodeGid vnodeGid[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
void* pTsma;
int32_t numOfCachedTables;
} SVgObj;

View File

@ -92,8 +92,11 @@ typedef struct {
int64_t transSeq;
TdThreadMutex lock;
int8_t selfIndex;
int8_t numOfTotalReplicas;
int8_t numOfReplicas;
SReplica replicas[TSDB_MAX_REPLICA];
SReplica replicas[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
ESyncRole nodeRoles[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SyncIndex lastIndex;
} SSyncMgmt;
typedef struct {

View File

@ -491,7 +491,10 @@ static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
pMnode->selfDnodeId = pOption->dnodeId;
pMnode->syncMgmt.selfIndex = pOption->selfIndex;
pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
pMnode->syncMgmt.lastIndex = pOption->lastIndex;
memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
}
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
@ -582,6 +585,11 @@ int32_t mndStart(SMnode *pMnode) {
return mndInitTimer(pMnode);
}
int32_t mndIsCatchUp(SMnode *pMnode) {
int64_t rid = pMnode->syncMgmt.sync;
return syncIsCatchUp(rid);
}
void mndStop(SMnode *pMnode) {
mndSetStop(pMnode);
mndSyncStop(pMnode);

View File

@ -23,7 +23,7 @@
#include "mndTrans.h"
#include "tmisce.h"
#define MNODE_VER_NUMBER 1
#define MNODE_VER_NUMBER 2
#define MNODE_RESERVE_SIZE 64
static int32_t mndCreateDefaultMnode(SMnode *pMnode);
@ -53,6 +53,7 @@ int32_t mndInitMnode(SMnode *pMnode) {
mndSetMsgHandle(pMnode, TDMT_MND_CREATE_MNODE, mndProcessCreateMnodeReq);
mndSetMsgHandle(pMnode, TDMT_DND_CREATE_MNODE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_DND_ALTER_MNODE_TYPE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_MND_ALTER_MNODE, mndProcessAlterMnodeReq);
mndSetMsgHandle(pMnode, TDMT_MND_ALTER_MNODE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_MND_DROP_MNODE, mndProcessDropMnodeReq);
@ -126,6 +127,8 @@ static SSdbRaw *mndMnodeActionEncode(SMnodeObj *pObj) {
SDB_SET_INT32(pRaw, dataPos, pObj->id, _OVER)
SDB_SET_INT64(pRaw, dataPos, pObj->createdTime, _OVER)
SDB_SET_INT64(pRaw, dataPos, pObj->updateTime, _OVER)
SDB_SET_INT32(pRaw, dataPos, pObj->role, _OVER)
SDB_SET_INT64(pRaw, dataPos, pObj->lastIndex, _OVER)
SDB_SET_RESERVE(pRaw, dataPos, MNODE_RESERVE_SIZE, _OVER)
terrno = 0;
@ -149,7 +152,7 @@ static SSdbRow *mndMnodeActionDecode(SSdbRaw *pRaw) {
int8_t sver = 0;
if (sdbGetRawSoftVer(pRaw, &sver) != 0) return NULL;
if (sver != MNODE_VER_NUMBER) {
if (sver != 1 && sver != 2) {
terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
goto _OVER;
}
@ -164,6 +167,10 @@ static SSdbRow *mndMnodeActionDecode(SSdbRaw *pRaw) {
SDB_GET_INT32(pRaw, dataPos, &pObj->id, _OVER)
SDB_GET_INT64(pRaw, dataPos, &pObj->createdTime, _OVER)
SDB_GET_INT64(pRaw, dataPos, &pObj->updateTime, _OVER)
if(sver >=2){
SDB_GET_INT32(pRaw, dataPos, &pObj->role, _OVER)
SDB_GET_INT64(pRaw, dataPos, &pObj->lastIndex, _OVER)
}
SDB_GET_RESERVE(pRaw, dataPos, MNODE_RESERVE_SIZE, _OVER)
terrno = 0;
@ -204,7 +211,9 @@ static int32_t mndMnodeActionDelete(SSdb *pSdb, SMnodeObj *pObj) {
static int32_t mndMnodeActionUpdate(SSdb *pSdb, SMnodeObj *pOld, SMnodeObj *pNew) {
mTrace("mnode:%d, perform update action, old row:%p new row:%p", pOld->id, pOld, pNew);
pOld->role = pNew->role;
pOld->updateTime = pNew->updateTime;
pOld->lastIndex = pNew->lastIndex;
mndReloadSyncConfig(pSdb->pMnode);
return 0;
@ -302,6 +311,27 @@ static int32_t mndBuildCreateMnodeRedoAction(STrans *pTrans, SDCreateMnodeReq *p
return 0;
}
static int32_t mndBuildAlterMnodeTypeRedoAction(STrans *pTrans,
SDAlterMnodeTypeReq *pAlterMnodeTypeReq, SEpSet *pAlterMnodeTypeEpSet) {
int32_t contLen = tSerializeSDCreateMnodeReq(NULL, 0, pAlterMnodeTypeReq);
void *pReq = taosMemoryMalloc(contLen);
tSerializeSDCreateMnodeReq(pReq, contLen, pAlterMnodeTypeReq);
STransAction action = {
.epSet = *pAlterMnodeTypeEpSet,
.pCont = pReq,
.contLen = contLen,
.msgType = TDMT_DND_ALTER_MNODE_TYPE,
.acceptableCode = TSDB_CODE_MNODE_ALREADY_DEPLOYED,
};
if (mndTransAppendRedoAction(pTrans, &action) != 0) {
taosMemoryFree(pReq);
return -1;
}
return 0;
}
static int32_t mndBuildAlterMnodeRedoAction(STrans *pTrans, SDCreateMnodeReq *pAlterReq, SEpSet *pAlterEpSet) {
int32_t contLen = tSerializeSDCreateMnodeReq(NULL, 0, pAlterReq);
void *pReq = taosMemoryMalloc(contLen);
@ -347,6 +377,7 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno
SSdb *pSdb = pMnode->pSdb;
void *pIter = NULL;
int32_t numOfReplicas = 0;
int32_t numOfLearnerReplicas = 0;
SDCreateMnodeReq createReq = {0};
SEpSet createEpset = {0};
@ -355,18 +386,29 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno
pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pMObj);
if (pIter == NULL) break;
createReq.replicas[numOfReplicas].id = pMObj->id;
createReq.replicas[numOfReplicas].port = pMObj->pDnode->port;
memcpy(createReq.replicas[numOfReplicas].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
if(pMObj->role == TAOS_SYNC_ROLE_VOTER){
createReq.replicas[numOfReplicas].id = pMObj->id;
createReq.replicas[numOfReplicas].port = pMObj->pDnode->port;
memcpy(createReq.replicas[numOfReplicas].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
numOfReplicas++;
}
else{
createReq.learnerReplicas[numOfLearnerReplicas].id = pMObj->id;
createReq.learnerReplicas[numOfLearnerReplicas].port = pMObj->pDnode->port;
memcpy(createReq.learnerReplicas[numOfLearnerReplicas].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
numOfLearnerReplicas++;
}
numOfReplicas++;
sdbRelease(pSdb, pMObj);
}
createReq.replica = numOfReplicas + 1;
createReq.replicas[numOfReplicas].id = pDnode->id;
createReq.replicas[numOfReplicas].port = pDnode->port;
memcpy(createReq.replicas[numOfReplicas].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
createReq.replica = numOfReplicas;
createReq.learnerReplica = numOfLearnerReplicas + 1;
createReq.learnerReplicas[numOfLearnerReplicas].id = pDnode->id;
createReq.learnerReplicas[numOfLearnerReplicas].port = pDnode->port;
memcpy(createReq.learnerReplicas[numOfLearnerReplicas].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
createReq.lastIndex = pObj->lastIndex;
createEpset.inUse = 0;
createEpset.numOfEps = 1;
@ -378,23 +420,78 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno
return 0;
}
static int32_t mndSetAlterMnodeTypeRedoActions(SMnode *pMnode, STrans *pTrans, SDnodeObj *pDnode, SMnodeObj *pObj) {
SSdb *pSdb = pMnode->pSdb;
void *pIter = NULL;
SDAlterMnodeTypeReq alterReq = {0};
SEpSet createEpset = {0};
while (1) {
SMnodeObj *pMObj = NULL;
pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pMObj);
if (pIter == NULL) break;
if(pMObj->role == TAOS_SYNC_ROLE_VOTER){
alterReq.replicas[alterReq.replica].id = pMObj->id;
alterReq.replicas[alterReq.replica].port = pMObj->pDnode->port;
memcpy(alterReq.replicas[alterReq.replica].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
alterReq.replica++;
}
else{
alterReq.learnerReplicas[alterReq.learnerReplica].id = pMObj->id;
alterReq.learnerReplicas[alterReq.learnerReplica].port = pMObj->pDnode->port;
memcpy(alterReq.learnerReplicas[alterReq.learnerReplica].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
alterReq.learnerReplica++;
}
sdbRelease(pSdb, pMObj);
}
alterReq.replicas[alterReq.replica].id = pDnode->id;
alterReq.replicas[alterReq.replica].port = pDnode->port;
memcpy(alterReq.replicas[alterReq.replica].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
alterReq.replica++;
alterReq.lastIndex = pObj->lastIndex;
createEpset.inUse = 0;
createEpset.numOfEps = 1;
createEpset.eps[0].port = pDnode->port;
memcpy(createEpset.eps[0].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
if (mndBuildAlterMnodeTypeRedoAction(pTrans, &alterReq, &createEpset) != 0) return -1;
return 0;
}
static int32_t mndCreateMnode(SMnode *pMnode, SRpcMsg *pReq, SDnodeObj *pDnode, SMCreateMnodeReq *pCreate) {
int32_t code = -1;
SMnodeObj mnodeObj = {0};
mnodeObj.id = pDnode->id;
mnodeObj.createdTime = taosGetTimestampMs();
mnodeObj.updateTime = mnodeObj.createdTime;
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "create-mnode");
if (pTrans == NULL) goto _OVER;
mndTransSetSerial(pTrans);
mInfo("trans:%d, used to create mnode:%d", pTrans->id, pCreate->dnodeId);
if (mndTrancCheckConflict(pMnode, pTrans) != 0) goto _OVER;
SMnodeObj mnodeObj = {0};
mnodeObj.id = pDnode->id;
mnodeObj.createdTime = taosGetTimestampMs();
mnodeObj.updateTime = mnodeObj.createdTime;
mnodeObj.role = TAOS_SYNC_ROLE_LEARNER;
mnodeObj.lastIndex = pMnode->applied;
if (mndSetCreateMnodeRedoActions(pMnode, pTrans, pDnode, &mnodeObj) != 0) goto _OVER;
if (mndSetCreateMnodeRedoLogs(pMnode, pTrans, &mnodeObj) != 0) goto _OVER;
if (mndSetCreateMnodeCommitLogs(pMnode, pTrans, &mnodeObj) != 0) goto _OVER;
SMnodeObj mnodeLeaderObj = {0};
mnodeLeaderObj.id = pDnode->id;
mnodeLeaderObj.createdTime = taosGetTimestampMs();
mnodeLeaderObj.updateTime = mnodeLeaderObj.createdTime;
mnodeLeaderObj.role = TAOS_SYNC_ROLE_VOTER;
mnodeLeaderObj.lastIndex = pMnode->applied + 1;
if (mndSetAlterMnodeTypeRedoActions(pMnode, pTrans, pDnode, &mnodeLeaderObj) != 0) goto _OVER;
if (mndSetCreateMnodeCommitLogs(pMnode, pTrans, &mnodeLeaderObj) != 0) goto _OVER;
if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER;
code = 0;
@ -514,6 +611,7 @@ static int32_t mndSetDropMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDnode
int32_t mndSetDropMnodeInfoToTrans(SMnode *pMnode, STrans *pTrans, SMnodeObj *pObj, bool force) {
if (pObj == NULL) return 0;
pObj->lastIndex = pMnode->applied;
if (mndSetDropMnodeRedoActions(pMnode, pTrans, pObj->pDnode, pObj, force) != 0) return -1;
if (mndSetDropMnodeCommitLogs(pMnode, pTrans, pObj) != 0) return -1;
return 0;
@ -730,7 +828,8 @@ static void mndReloadSyncConfig(SMnode *pMnode) {
void *pIter = NULL;
int32_t updatingMnodes = 0;
int32_t readyMnodes = 0;
SSyncCfg cfg = {.myIndex = -1};
SSyncCfg cfg = {.myIndex = -1, .lastIndex = 0,};
SyncIndex maxIndex = 0;
while (1) {
pIter = sdbFetchAll(pSdb, SDB_MNODE, pIter, (void **)&pObj, &objStatus, false);
@ -745,26 +844,41 @@ static void mndReloadSyncConfig(SMnode *pMnode) {
}
if (objStatus == SDB_STATUS_READY || objStatus == SDB_STATUS_CREATING) {
SNodeInfo *pNode = &cfg.nodeInfo[cfg.replicaNum];
SNodeInfo *pNode = &cfg.nodeInfo[cfg.totalReplicaNum];
pNode->nodeId = pObj->pDnode->id;
pNode->clusterId = mndGetClusterId(pMnode);
pNode->nodePort = pObj->pDnode->port;
pNode->nodeRole = pObj->role;
tstrncpy(pNode->nodeFqdn, pObj->pDnode->fqdn, TSDB_FQDN_LEN);
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
mInfo("vgId:1, ep:%s:%u dnode:%d", pNode->nodeFqdn, pNode->nodePort, pNode->nodeId);
if (pObj->pDnode->id == pMnode->selfDnodeId) {
cfg.myIndex = cfg.replicaNum;
cfg.myIndex = cfg.totalReplicaNum;
}
if(pNode->nodeRole == TAOS_SYNC_ROLE_VOTER){
cfg.replicaNum++;
}
cfg.totalReplicaNum++;
if(pObj->lastIndex > cfg.lastIndex){
cfg.lastIndex = pObj->lastIndex;
}
cfg.replicaNum++;
}
if (objStatus == SDB_STATUS_DROPPING) {
if(pObj->lastIndex > cfg.lastIndex){
cfg.lastIndex = pObj->lastIndex;
}
}
mInfo("vgId:1, mnode:%d, role:%d, lastIndex:%" PRId64, pObj->id, pObj->role, pObj->lastIndex);
sdbReleaseLock(pSdb, pObj, false);
}
if (readyMnodes <= 0 || updatingMnodes <= 0) {
mInfo("vgId:1, mnode sync not reconfig since readyMnodes:%d updatingMnodes:%d", readyMnodes, updatingMnodes);
return;
}
//if (readyMnodes <= 0 || updatingMnodes <= 0) {
// mInfo("vgId:1, mnode sync not reconfig since readyMnodes:%d updatingMnodes:%d", readyMnodes, updatingMnodes);
// return;
//}
if (cfg.myIndex == -1) {
#if 1
@ -777,12 +891,14 @@ static void mndReloadSyncConfig(SMnode *pMnode) {
return;
}
if (updatingMnodes > 0) {
mInfo("vgId:1, mnode sync reconfig, replica:%d myIndex:%d", cfg.replicaNum, cfg.myIndex);
for (int32_t i = 0; i < cfg.replicaNum; ++i) {
if (pMnode->syncMgmt.sync > 0) {
mInfo("vgId:1, mnode sync reconfig, totalReplica:%d replica:%d myIndex:%d",
cfg.totalReplicaNum, cfg.replicaNum, cfg.myIndex);
for (int32_t i = 0; i < cfg.totalReplicaNum; ++i) {
SNodeInfo *pNode = &cfg.nodeInfo[i];
mInfo("vgId:1, index:%d, ep:%s:%u dnode:%d cluster:%" PRId64, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId,
pNode->clusterId);
mInfo("vgId:1, index:%d, ep:%s:%u dnode:%d cluster:%" PRId64 " role:%d", i, pNode->nodeFqdn, pNode->nodePort,
pNode->nodeId, pNode->clusterId, pNode->nodeRole);
}
int32_t code = syncReconfig(pMnode->syncMgmt.sync, &cfg);

View File

@ -237,6 +237,23 @@ static void mndBecomeFollower(const SSyncFSM *pFsm) {
taosThreadMutexUnlock(&pMgmt->lock);
}
static void mndBecomeLearner(const SSyncFSM *pFsm) {
SMnode *pMnode = pFsm->data;
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
mInfo("vgId:1, become learner");
taosThreadMutexLock(&pMgmt->lock);
if (pMgmt->transId != 0) {
mInfo("vgId:1, become learner and post sem, trans:%d, failed to propose since not leader", pMgmt->transId);
pMgmt->transId = 0;
pMgmt->transSec = 0;
pMgmt->transSeq = 0;
pMgmt->errCode = TSDB_CODE_SYN_NOT_LEADER;
tsem_post(&pMgmt->syncSem);
}
taosThreadMutexUnlock(&pMgmt->lock);
}
static void mndBecomeLeader(const SSyncFSM *pFsm) {
mInfo("vgId:1, become leader");
SMnode *pMnode = pFsm->data;
@ -278,6 +295,7 @@ SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) {
pFsm->FpReConfigCb = NULL;
pFsm->FpBecomeLeaderCb = mndBecomeLeader;
pFsm->FpBecomeFollowerCb = mndBecomeFollower;
pFsm->FpBecomeLearnerCb = mndBecomeLearner;
pFsm->FpGetSnapshot = mndSyncGetSnapshot;
pFsm->FpGetSnapshotInfo = mndSyncGetSnapshotInfo;
pFsm->FpSnapshotStartRead = mndSnapshotStartRead;
@ -317,13 +335,16 @@ int32_t mndInitSync(SMnode *pMnode) {
mInfo("vgId:1, start to open sync, replica:%d selfIndex:%d", pMgmt->numOfReplicas, pMgmt->selfIndex);
SSyncCfg *pCfg = &syncInfo.syncCfg;
pCfg->totalReplicaNum = pMgmt->numOfTotalReplicas;
pCfg->replicaNum = pMgmt->numOfReplicas;
pCfg->myIndex = pMgmt->selfIndex;
for (int32_t i = 0; i < pMgmt->numOfReplicas; ++i) {
pCfg->lastIndex = pMgmt->lastIndex;
for (int32_t i = 0; i < pMgmt->numOfTotalReplicas; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i];
pNode->nodeId = pMgmt->replicas[i].id;
pNode->nodePort = pMgmt->replicas[i].port;
tstrncpy(pNode->nodeFqdn, pMgmt->replicas[i].fqdn, sizeof(pNode->nodeFqdn));
pNode->nodeRole = pMgmt->nodeRoles[i];
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
mInfo("vgId:1, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId,
pNode->clusterId);

View File

@ -62,6 +62,7 @@ int32_t mndInitVgroup(SMnode *pMnode) {
mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_VND_DISABLE_WRITE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_SYNC_FORCE_FOLLOWER_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_DND_ALTER_VNODE_TYPE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_MND_REDISTRIBUTE_VGROUP, mndProcessRedistributeVgroupMsg);
mndSetMsgHandle(pMnode, TDMT_MND_SPLIT_VGROUP, mndProcessSplitVgroupMsg);
@ -203,7 +204,7 @@ static int32_t mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew) {
pNew->compStorage = pOld->compStorage;
pNew->pointsWritten = pOld->pointsWritten;
pNew->compact = pOld->compact;
memcpy(pOld->vnodeGid, pNew->vnodeGid, TSDB_MAX_REPLICA * sizeof(SVnodeGid));
memcpy(pOld->vnodeGid, pNew->vnodeGid, (TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA) * sizeof(SVnodeGid));
return 0;
}
@ -244,8 +245,10 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg
createReq.compression = pDb->cfg.compression;
createReq.strict = pDb->cfg.strict;
createReq.cacheLast = pDb->cfg.cacheLast;
createReq.replica = pVgroup->replica;
createReq.replica = 0;
createReq.learnerReplica = 0;
createReq.selfIndex = -1;
createReq.learnerSelfIndex = -1;
createReq.hashBegin = pVgroup->hashBegin;
createReq.hashEnd = pVgroup->hashEnd;
createReq.hashMethod = pDb->cfg.hashMethod;
@ -263,7 +266,15 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg
createReq.tsdbPageSize = pDb->cfg.tsdbPageSize;
for (int32_t v = 0; v < pVgroup->replica; ++v) {
SReplica *pReplica = &createReq.replicas[v];
SReplica *pReplica = NULL;
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
pReplica = &createReq.replicas[createReq.replica];
}
else{
pReplica = &createReq.learnerReplicas[createReq.learnerReplica];
}
SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
if (pVgidDnode == NULL) {
@ -275,21 +286,40 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg
memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
mndReleaseDnode(pMnode, pVgidDnode);
if (pDnode->id == pVgid->dnodeId) {
createReq.selfIndex = v;
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
if (pDnode->id == pVgid->dnodeId) {
createReq.selfIndex = createReq.replica;
}
}
else{
if (pDnode->id == pVgid->dnodeId) {
createReq.learnerSelfIndex = createReq.learnerReplica;
}
}
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
createReq.replica++;
}
else{
createReq.learnerReplica++;
}
}
if (createReq.selfIndex == -1) {
if (createReq.selfIndex == -1 && createReq.learnerSelfIndex == -1) {
terrno = TSDB_CODE_APP_ERROR;
return NULL;
}
mInfo("vgId:%d, build create vnode req, replica:%d selfIndex:%d strict:%d", createReq.vgId, createReq.replica,
createReq.selfIndex, createReq.strict);
mInfo("vgId:%d, build create vnode req, replica:%d selfIndex:%d learnerReplica:%d learnerSelfIndex:%d strict:%d",
createReq.vgId, createReq.replica, createReq.selfIndex, createReq.learnerReplica,
createReq.learnerReplica, createReq.strict);
for (int32_t i = 0; i < createReq.replica; ++i) {
mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.replicas[i].fqdn, createReq.replicas[i].port);
}
for (int32_t i = 0; i < createReq.learnerReplica; ++i) {
mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.learnerReplicas[i].fqdn,
createReq.learnerReplicas[i].port);
}
int32_t contLen = tSerializeSCreateVnodeReq(NULL, 0, &createReq);
if (contLen < 0) {
@ -356,12 +386,24 @@ static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *p
SAlterVnodeReplicaReq alterReq = {
.vgId = pVgroup->vgId,
.strict = pDb->cfg.strict,
.replica = pVgroup->replica,
.replica = 0,
.learnerReplica = 0,
.selfIndex = -1,
.learnerSelfIndex = -1,
};
for (int32_t v = 0; v < pVgroup->replica; ++v) {
SReplica *pReplica = &alterReq.replicas[v];
SReplica *pReplica = NULL;
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
pReplica = &alterReq.replicas[alterReq.replica];
alterReq.replica++;
}
else{
pReplica = &alterReq.learnerReplicas[alterReq.learnerReplica];
alterReq.learnerReplica++;
}
SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
if (pVgidDnode == NULL) return NULL;
@ -371,18 +413,30 @@ static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *p
memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
mndReleaseDnode(pMnode, pVgidDnode);
if (dnodeId == pVgid->dnodeId) {
alterReq.selfIndex = v;
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
if (dnodeId == pVgid->dnodeId) {
alterReq.selfIndex = v;
}
}
else{
if (dnodeId == pVgid->dnodeId) {
alterReq.learnerSelfIndex = v;
}
}
}
alterReq.replica = pVgroup->replica;
mInfo("vgId:%d, build alter vnode req, replica:%d selfIndex:%d strict:%d", alterReq.vgId, alterReq.replica,
alterReq.selfIndex, alterReq.strict);
mInfo("vgId:%d, build alter vnode req, replica:%d selfIndex:%d learnerReplica:%d learnerSelfIndex:%d strict:%d",
alterReq.vgId, alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica,
alterReq.learnerSelfIndex, alterReq.strict);
for (int32_t i = 0; i < alterReq.replica; ++i) {
mInfo("vgId:%d, replica:%d ep:%s:%u", alterReq.vgId, i, alterReq.replicas[i].fqdn, alterReq.replicas[i].port);
}
for (int32_t i = 0; i < alterReq.learnerReplica; ++i) {
mInfo("vgId:%d, learnerReplica:%d ep:%s:%u", alterReq.vgId, i,
alterReq.learnerReplicas[i].fqdn, alterReq.learnerReplicas[i].port);
}
if (alterReq.selfIndex == -1) {
if (alterReq.selfIndex == -1 && alterReq.learnerSelfIndex == -1) {
terrno = TSDB_CODE_APP_ERROR;
return NULL;
}
@ -1194,6 +1248,30 @@ int32_t mndAddAlterVnodeReplicaAction(SMnode *pMnode, STrans *pTrans, SDbObj *pD
return 0;
}
int32_t mndAddAlterVnodeTypeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId) {
SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
if (pDnode == NULL) return -1;
STransAction action = {0};
action.epSet = mndGetDnodeEpset(pDnode);
mndReleaseDnode(pMnode, pDnode);
int32_t contLen = 0;
void *pReq = mndBuildAlterVnodeReplicaReq(pMnode, pDb, pVgroup, dnodeId, &contLen);
if (pReq == NULL) return -1;
action.pCont = pReq;
action.contLen = contLen;
action.msgType = TDMT_DND_ALTER_VNODE_TYPE;
if (mndTransAppendRedoAction(pTrans, &action) != 0) {
taosMemoryFree(pReq);
return -1;
}
return 0;
}
static int32_t mndAddDisableVnodeWriteAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
int32_t dnodeId) {
SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
@ -1953,18 +2031,33 @@ int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pOldDb
mInfo("db:%s, vgId:%d, will add 2 vnodes, vn:0 dnode:%d", pVgroup->dbName, pVgroup->vgId,
pVgroup->vnodeGid[0].dnodeId);
//add first
if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1;
newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_LEARNER;
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
return -1;
if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[1]) != 0) return -1;
newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
if (mndAddAlterVnodeTypeAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId) != 0)
return -1;
if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
//add second
if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1;
newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
newVgroup.vnodeGid[2].nodeRole = TAOS_SYNC_ROLE_VOTER;
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
return -1;
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId) != 0)
return -1;
if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[2]) != 0) return -1;
if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
} else if (newVgroup.replica == 3 && pNewDb->cfg.replications == 1) {
mInfo("db:%s, vgId:%d, will remove 2 vnodes, vn:0 dnode:%d vn:1 dnode:%d vn:2 dnode:%d", pVgroup->dbName,

View File

@ -68,6 +68,7 @@ void vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot);
void vnodeGetInfo(SVnode *pVnode, const char **dbname, int32_t *vgId);
int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen);
int32_t vnodeGetAllTableList(SVnode *pVnode, uint64_t uid, SArray *list);
int32_t vnodeIsCatchUp(SVnode *pVnode);
int32_t vnodeGetCtbIdList(SVnode *pVnode, int64_t suid, SArray *list);
int32_t vnodeGetCtbIdListByFilter(SVnode *pVnode, int64_t suid, SArray *list, bool (*filter)(void *arg), void *arg);

View File

@ -97,7 +97,6 @@ int32_t vnodeGetBatchMeta(SVnode* pVnode, SRpcMsg* pMsg);
// vnodeCommit.c
int32_t vnodeBegin(SVnode* pVnode);
int32_t vnodeShouldCommit(SVnode* pVnode, bool atExit);
void vnodeUpdCommitSched(SVnode* pVnode);
void vnodeRollback(SVnode* pVnode);
int32_t vnodeSaveInfo(const char* dir, const SVnodeInfo* pCfg);
int32_t vnodeCommitInfo(const char* dir);

View File

@ -378,7 +378,6 @@ struct SVnode {
STQ* pTq;
SSink* pSink;
tsem_t canCommit;
SVCommitSched commitSched;
int64_t sync;
TdThreadMutex lock;
bool blocked;
@ -387,9 +386,6 @@ struct SVnode {
int32_t blockSec;
int64_t blockSeq;
SQHandle* pQuery;
#if 0
SRpcHandleInfo blockInfo;
#endif
};
#define TD_VID(PVNODE) ((PVNODE)->config.vgId)

View File

@ -362,15 +362,6 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32
p->ts = pCol->ts;
p->colVal = pCol->colVal;
singleTableLastTs = pCol->ts;
// only set value for last row query
if (HASTYPE(pr->type, CACHESCAN_RETRIEVE_LAST_ROW)) {
if (taosArrayGetSize(pTableUidList) == 0) {
taosArrayPush(pTableUidList, &pKeyInfo->uid);
} else {
taosArraySet(pTableUidList, 0, &pKeyInfo->uid);
}
}
}
} else {
SLastCol* p = taosArrayGet(pLastCols, slotId);
@ -417,6 +408,12 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32
}
}
if (taosArrayGetSize(pTableUidList) == 0) {
taosArrayPush(pTableUidList, &pKeyInfo->uid);
} else {
taosArraySet(pTableUidList, 0, &pKeyInfo->uid);
}
tsdbCacheRelease(lruCache, h);
}

View File

@ -57,6 +57,28 @@ int vnodeCheckCfg(const SVnodeCfg *pCfg) {
return 0;
}
const char* vnodeRoleToStr(ESyncRole role) {
switch (role) {
case TAOS_SYNC_ROLE_VOTER:
return "voter";
case TAOS_SYNC_ROLE_LEARNER:
return "learner";
default:
return "unknown";
}
}
const ESyncRole vnodeStrToRole(char* str) {
if(strcmp(str, "voter") == 0){
return TAOS_SYNC_ROLE_VOTER;
}
if(strcmp(str, "learner") == 0){
return TAOS_SYNC_ROLE_LEARNER;
}
return TAOS_SYNC_ROLE_ERROR;
}
int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
const SVnodeCfg *pCfg = (SVnodeCfg *)pObj;
@ -117,6 +139,7 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
if (tjsonAddIntegerToObject(pJson, "hashSuffix", pCfg->hashSuffix) < 0) return -1;
if (tjsonAddIntegerToObject(pJson, "syncCfg.replicaNum", pCfg->syncCfg.replicaNum) < 0) return -1;
if (tjsonAddIntegerToObject(pJson, "syncCfg.totalReplicaNum", pCfg->syncCfg.totalReplicaNum) < 0) return -1;
if (tjsonAddIntegerToObject(pJson, "syncCfg.myIndex", pCfg->syncCfg.myIndex) < 0) return -1;
if (tjsonAddIntegerToObject(pJson, "vndStats.stables", pCfg->vndStats.numOfSTables) < 0) return -1;
@ -128,9 +151,9 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
SJson *nodeInfo = tjsonCreateArray();
if (nodeInfo == NULL) return -1;
if (tjsonAddItemToObject(pJson, "syncCfg.nodeInfo", nodeInfo) < 0) return -1;
vDebug("vgId:%d, encode config, replicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
pCfg->syncCfg.myIndex);
for (int i = 0; i < pCfg->syncCfg.replicaNum; ++i) {
vDebug("vgId:%d, encode config, replicas:%d totalReplicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
pCfg->syncCfg.totalReplicaNum, pCfg->syncCfg.myIndex);
for (int i = 0; i < pCfg->syncCfg.totalReplicaNum; ++i) {
SJson *info = tjsonCreateObject();
SNodeInfo *pNode = (SNodeInfo *)&pCfg->syncCfg.nodeInfo[i];
if (info == NULL) return -1;
@ -138,6 +161,7 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
if (tjsonAddStringToObject(info, "nodeFqdn", pNode->nodeFqdn) < 0) return -1;
if (tjsonAddIntegerToObject(info, "nodeId", pNode->nodeId) < 0) return -1;
if (tjsonAddIntegerToObject(info, "clusterId", pNode->clusterId) < 0) return -1;
if (tjsonAddStringToObject(info, "nodeRole", vnodeRoleToStr(pNode->nodeRole)) < 0) return -1;
if (tjsonAddItemToArray(nodeInfo, info) < 0) return -1;
vDebug("vgId:%d, encode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort,
pNode->nodeId);
@ -235,6 +259,8 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
tjsonGetNumberValue(pJson, "syncCfg.replicaNum", pCfg->syncCfg.replicaNum, code);
if (code < 0) return -1;
tjsonGetNumberValue(pJson, "syncCfg.totalReplicaNum", pCfg->syncCfg.totalReplicaNum, code);
if (code < 0) return -1;
tjsonGetNumberValue(pJson, "syncCfg.myIndex", pCfg->syncCfg.myIndex, code);
if (code < 0) return -1;
@ -251,10 +277,13 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
SJson *nodeInfo = tjsonGetObjectItem(pJson, "syncCfg.nodeInfo");
int arraySize = tjsonGetArraySize(nodeInfo);
if (arraySize != pCfg->syncCfg.replicaNum) return -1;
if(pCfg->syncCfg.totalReplicaNum == 0 && pCfg->syncCfg.replicaNum > 0){
pCfg->syncCfg.totalReplicaNum = pCfg->syncCfg.replicaNum;
}
if (arraySize != pCfg->syncCfg.totalReplicaNum) return -1;
vDebug("vgId:%d, decode config, replicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
pCfg->syncCfg.myIndex);
vDebug("vgId:%d, decode config, replicas:%d totalReplicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
pCfg->syncCfg.totalReplicaNum, pCfg->syncCfg.myIndex);
for (int i = 0; i < arraySize; ++i) {
SJson *info = tjsonGetArrayItem(nodeInfo, i);
SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i];
@ -266,6 +295,15 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
if (code < 0) return -1;
tjsonGetNumberValue(info, "clusterId", pNode->clusterId, code);
if (code < 0) return -1;
char role[10] = {0};
code = tjsonGetStringValue(info, "nodeRole", role);
if (code < 0) return -1;
if(strlen(role) != 0){
pNode->nodeRole = vnodeStrToRole(role);
}
else{
pNode->nodeRole = TAOS_SYNC_ROLE_VOTER;
}
vDebug("vgId:%d, decode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort,
pNode->nodeId);
}

View File

@ -143,23 +143,13 @@ _exit:
return code;
}
void vnodeUpdCommitSched(SVnode *pVnode) {
int64_t randNum = taosRand();
pVnode->commitSched.commitMs = taosGetMonoTimestampMs();
pVnode->commitSched.maxWaitMs = tsVndCommitMaxIntervalMs + (randNum % tsVndCommitMaxIntervalMs);
}
int vnodeShouldCommit(SVnode *pVnode, bool atExit) {
SVCommitSched *pSched = &pVnode->commitSched;
int64_t nowMs = taosGetMonoTimestampMs();
bool diskAvail = osDataSpaceAvailable();
bool needCommit = false;
taosThreadMutexLock(&pVnode->mutex);
if (pVnode->inUse && diskAvail) {
needCommit =
((pVnode->inUse->size > pVnode->inUse->node.size) && (pSched->commitMs + SYNC_VND_COMMIT_MIN_MS < nowMs)) ||
((pVnode->inUse->size > 0) && atExit);
needCommit = (pVnode->inUse->size > pVnode->inUse->node.size) || (pVnode->inUse->size > 0 && atExit);
}
taosThreadMutexUnlock(&pVnode->mutex);
return needCommit;
@ -431,8 +421,6 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) {
vInfo("vgId:%d, start to commit, commitId:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode),
pInfo->info.state.commitID, pInfo->info.state.committed, pInfo->info.state.commitTerm);
vnodeUpdCommitSched(pVnode);
// persist wal before starting
if (walPersist(pVnode->pWal) < 0) {
vError("vgId:%d, failed to persist wal since %s", TD_VID(pVnode), terrstr());

View File

@ -76,19 +76,41 @@ int32_t vnodeAlterReplica(const char *path, SAlterVnodeReplicaReq *pReq, STfs *p
}
SSyncCfg *pCfg = &info.config.syncCfg;
pCfg->myIndex = pReq->selfIndex;
pCfg->replicaNum = pReq->replica;
pCfg->replicaNum = 0;
pCfg->totalReplicaNum = 0;
memset(&pCfg->nodeInfo, 0, sizeof(pCfg->nodeInfo));
vInfo("vgId:%d, save config while alter, replicas:%d selfIndex:%d", pReq->vgId, pCfg->replicaNum, pCfg->myIndex);
for (int i = 0; i < pReq->replica; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i];
pNode->nodeId = pReq->replicas[i].id;
pNode->nodePort = pReq->replicas[i].port;
tstrncpy(pNode->nodeFqdn, pReq->replicas[i].fqdn, sizeof(pNode->nodeFqdn));
pNode->nodeRole = TAOS_SYNC_ROLE_VOTER;
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
vInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", pReq->vgId, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId);
pCfg->replicaNum++;
}
if(pReq->selfIndex != -1){
pCfg->myIndex = pReq->selfIndex;
}
for (int i = pCfg->replicaNum; i < pReq->replica + pReq->learnerReplica; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i];
pNode->nodeId = pReq->learnerReplicas[pCfg->totalReplicaNum].id;
pNode->nodePort = pReq->learnerReplicas[pCfg->totalReplicaNum].port;
pNode->nodeRole = TAOS_SYNC_ROLE_LEARNER;
tstrncpy(pNode->nodeFqdn, pReq->learnerReplicas[pCfg->totalReplicaNum].fqdn, sizeof(pNode->nodeFqdn));
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
vInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", pReq->vgId, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId);
pCfg->totalReplicaNum++;
}
pCfg->totalReplicaNum += pReq->replica;
if(pReq->learnerSelfIndex != -1){
pCfg->myIndex = pReq->replica + pReq->learnerSelfIndex;
}
vInfo("vgId:%d, save config while alter, replicas:%d totalReplicas:%d selfIndex:%d",
pReq->vgId, pCfg->replicaNum, pCfg->totalReplicaNum, pCfg->myIndex);
info.config.syncCfg = *pCfg;
ret = vnodeSaveInfo(dir, &info);
@ -181,6 +203,7 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod
SSyncCfg *pCfg = &info.config.syncCfg;
pCfg->myIndex = 0;
pCfg->replicaNum = 1;
pCfg->totalReplicaNum = 1;
memset(&pCfg->nodeInfo, 0, sizeof(pCfg->nodeInfo));
vInfo("vgId:%d, alter vnode replicas to 1", pReq->srcVgId);
@ -248,7 +271,7 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) {
// save vnode info on dnode ep changed
bool updated = false;
SSyncCfg *pCfg = &info.config.syncCfg;
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i];
if (tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort)) {
updated = true;
@ -286,8 +309,6 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) {
taosThreadMutexInit(&pVnode->mutex, NULL);
taosThreadCondInit(&pVnode->poolNotEmpty, NULL);
vnodeUpdCommitSched(pVnode);
int8_t rollback = vnodeShouldRollback(pVnode);
// open buffer pool
@ -406,6 +427,10 @@ void vnodeClose(SVnode *pVnode) {
// start the sync timer after the queue is ready
int32_t vnodeStart(SVnode *pVnode) { return vnodeSyncStart(pVnode); }
int32_t vnodeIsCatchUp(SVnode *pVnode){
return syncIsCatchUp(pVnode->sync);
}
void vnodeStop(SVnode *pVnode) {}
int64_t vnodeGetSyncHandle(SVnode *pVnode) { return pVnode->sync; }

View File

@ -1447,7 +1447,8 @@ int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen) {
}
static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) {
vInfo("vgId:%d, alter replica confim msg is processed", TD_VID(pVnode));
vInfo("vgId:%d, vnode management handle msgType:alter-confirm, alter replica confim msg is processed",
TD_VID(pVnode));
pRsp->msgType = TDMT_VND_ALTER_CONFIRM_RSP;
pRsp->code = TSDB_CODE_SUCCESS;
pRsp->pCont = NULL;

View File

@ -112,9 +112,6 @@ static int32_t inline vnodeProposeMsg(SVnode *pVnode, SRpcMsg *pMsg, bool isWeak
pVnode->blocked = true;
pVnode->blockSec = taosGetTimestampSec();
pVnode->blockSeq = seq;
#if 0
pVnode->blockInfo = pMsg->info;
#endif
}
taosThreadMutexUnlock(&pVnode->lock);
@ -157,8 +154,6 @@ void vnodeProposeCommitOnNeed(SVnode *pVnode, bool atExit) {
} else {
tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &rpcMsg);
}
vnodeUpdCommitSched(pVnode);
}
#if BATCH_ENABLE
@ -571,6 +566,19 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) {
taosThreadMutexUnlock(&pVnode->lock);
}
static void vnodeBecomeLearner(const SSyncFSM *pFsm) {
SVnode *pVnode = pFsm->data;
vInfo("vgId:%d, become learner", pVnode->config.vgId);
taosThreadMutexLock(&pVnode->lock);
if (pVnode->blocked) {
pVnode->blocked = false;
vDebug("vgId:%d, become learner and post block", pVnode->config.vgId);
tsem_post(&pVnode->syncSem);
}
taosThreadMutexUnlock(&pVnode->lock);
}
static void vnodeBecomeLeader(const SSyncFSM *pFsm) {
SVnode *pVnode = pFsm->data;
vDebug("vgId:%d, become leader", pVnode->config.vgId);
@ -612,6 +620,7 @@ static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) {
pFsm->FpApplyQueueItems = vnodeApplyQueueItems;
pFsm->FpBecomeLeaderCb = vnodeBecomeLeader;
pFsm->FpBecomeFollowerCb = vnodeBecomeFollower;
pFsm->FpBecomeLearnerCb = vnodeBecomeLearner;
pFsm->FpReConfigCb = NULL;
pFsm->FpSnapshotStartRead = vnodeSnapshotStartRead;
pFsm->FpSnapshotStopRead = vnodeSnapshotStopRead;
@ -644,7 +653,7 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path) {
SSyncCfg *pCfg = &syncInfo.syncCfg;
vInfo("vgId:%d, start to open sync, replica:%d selfIndex:%d", pVnode->config.vgId, pCfg->replicaNum, pCfg->myIndex);
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i];
vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn, pNode->nodePort,
pNode->nodeId, pNode->clusterId);

View File

@ -45,13 +45,13 @@ static void destroyCacheScanOperator(void* param);
static int32_t extractCacheScanSlotId(const SArray* pColMatchInfo, SExecTaskInfo* pTaskInfo, int32_t** pSlotIds);
static int32_t removeRedundantTsCol(SLastRowScanPhysiNode* pScanNode, SColMatchInfo* pColMatchInfo);
#define SCAN_ROW_TYPE(_t) ((_t)? CACHESCAN_RETRIEVE_LAST : CACHESCAN_RETRIEVE_LAST_ROW)
#define SCAN_ROW_TYPE(_t) ((_t) ? CACHESCAN_RETRIEVE_LAST : CACHESCAN_RETRIEVE_LAST_ROW)
SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SReadHandle* readHandle,
STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) {
int32_t code = TSDB_CODE_SUCCESS;
int32_t code = TSDB_CODE_SUCCESS;
SCacheRowsScanInfo* pInfo = taosMemoryCalloc(1, sizeof(SCacheRowsScanInfo));
SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
if (pInfo == NULL || pOperator == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
tableListDestroy(pTableListInfo);
@ -91,7 +91,8 @@ SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SRe
uint64_t suid = tableListGetSuid(pTableListInfo);
code = tsdbCacherowsReaderOpen(pInfo->readHandle.vnode, pInfo->retrieveType, pList, totalTables,
taosArrayGetSize(pInfo->matchInfo.pList), suid, &pInfo->pLastrowReader, pTaskInfo->id.str);
taosArrayGetSize(pInfo->matchInfo.pList), suid, &pInfo->pLastrowReader,
pTaskInfo->id.str);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
@ -114,7 +115,8 @@ SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SRe
p->pCtx = createSqlFunctionCtx(p->pExprInfo, p->numOfExprs, &p->rowEntryInfoOffset);
}
setOperatorInfo(pOperator, "CachedRowScanOperator", QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN, false, OP_NOT_OPENED, pInfo, pTaskInfo);
setOperatorInfo(pOperator, "CachedRowScanOperator", QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN, false, OP_NOT_OPENED,
pInfo, pTaskInfo);
pOperator->exprSupp.numOfExprs = taosArrayGetSize(pInfo->pRes->pDataBlock);
pOperator->fpSet =
@ -123,7 +125,7 @@ SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SRe
pOperator->cost.openCost = 0;
return pOperator;
_error:
_error:
pTaskInfo->code = code;
destroyCacheScanOperator(pInfo);
taosMemoryFree(pOperator);
@ -136,8 +138,8 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) {
}
SCacheRowsScanInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
STableListInfo* pTableList = pInfo->pTableList;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
STableListInfo* pTableList = pInfo->pTableList;
uint64_t suid = tableListGetSuid(pTableList);
int32_t size = tableListGetSize(pTableList);
@ -194,8 +196,8 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) {
pRes->info.rows = 1;
SExprSupp* pSup = &pInfo->pseudoExprSup;
int32_t code = addTagPseudoColumnData(&pInfo->readHandle, pSup->pExprInfo, pSup->numOfExprs, pRes,
pRes->info.rows, GET_TASKID(pTaskInfo), NULL);
int32_t code = addTagPseudoColumnData(&pInfo->readHandle, pSup->pExprInfo, pSup->numOfExprs, pRes,
pRes->info.rows, GET_TASKID(pTaskInfo), NULL);
if (code != TSDB_CODE_SUCCESS) {
pTaskInfo->code = code;
return NULL;
@ -217,7 +219,7 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) {
}
STableKeyInfo* pList = NULL;
int32_t num = 0;
int32_t num = 0;
int32_t code = tableListGetGroupList(pTableList, pInfo->currentGroupIndex, &pList, &num);
if (code != TSDB_CODE_SUCCESS) {
@ -251,11 +253,9 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) {
pInfo->pRes->info.id.groupId = pKeyInfo->groupId;
if (taosArrayGetSize(pInfo->pUidList) > 0) {
ASSERT((pInfo->retrieveType & CACHESCAN_RETRIEVE_LAST_ROW) == CACHESCAN_RETRIEVE_LAST_ROW);
pInfo->pRes->info.id.uid = *(tb_uid_t*)taosArrayGet(pInfo->pUidList, 0);
code = addTagPseudoColumnData(&pInfo->readHandle, pSup->pExprInfo, pSup->numOfExprs, pInfo->pRes, pInfo->pRes->info.rows,
GET_TASKID(pTaskInfo), NULL);
code = addTagPseudoColumnData(&pInfo->readHandle, pSup->pExprInfo, pSup->numOfExprs, pInfo->pRes,
pInfo->pRes->info.rows, GET_TASKID(pTaskInfo), NULL);
if (code != TSDB_CODE_SUCCESS) {
pTaskInfo->code = code;
return NULL;
@ -325,7 +325,7 @@ int32_t removeRedundantTsCol(SLastRowScanPhysiNode* pScanNode, SColMatchInfo* pC
return TSDB_CODE_SUCCESS;
}
size_t size = taosArrayGetSize(pColMatchInfo->pList);
size_t size = taosArrayGetSize(pColMatchInfo->pList);
SArray* pMatchInfo = taosArrayInit(size, sizeof(SColMatchItem));
for (int32_t i = 0; i < size; ++i) {

View File

@ -1933,14 +1933,35 @@ static int32_t translateToIso8601(SFunctionNode* pFunc, char* pErrBuf, int32_t l
}
static int32_t translateToUnixtimestamp(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
if (1 != LIST_LENGTH(pFunc->pParameterList)) {
int32_t numOfParams = LIST_LENGTH(pFunc->pParameterList);
int16_t resType = TSDB_DATA_TYPE_BIGINT;
if (1 != numOfParams && 2 != numOfParams) {
return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName);
}
if (!IS_STR_DATA_TYPE(((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type)) {
uint8_t para1Type = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type;
if (!IS_STR_DATA_TYPE(para1Type)) {
return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName);
}
if (2 == numOfParams) {
uint8_t para2Type = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 1))->resType.type;
if (!IS_INTEGER_TYPE(para2Type)) {
return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName);
}
SValueNode* pValue = (SValueNode*)nodesListGetNode(pFunc->pParameterList, 1);
if (pValue->datum.i == 1) {
resType = TSDB_DATA_TYPE_TIMESTAMP;
} else if (pValue->datum.i == 0) {
resType = TSDB_DATA_TYPE_BIGINT;
} else {
return buildFuncErrMsg(pErrBuf, len, TSDB_CODE_FUNC_FUNTION_ERROR,
"TO_UNIXTIMESTAMP function second parameter should be 0/1");
}
}
// add database precision as param
uint8_t dbPrec = pFunc->node.resType.precision;
int32_t code = addDbPrecisonParam(&pFunc->pParameterList, dbPrec);
@ -1948,7 +1969,7 @@ static int32_t translateToUnixtimestamp(SFunctionNode* pFunc, char* pErrBuf, int
return code;
}
pFunc->node.resType = (SDataType){.bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes, .type = TSDB_DATA_TYPE_BIGINT};
pFunc->node.resType = (SDataType){.bytes = tDataTypes[resType].bytes, .type = resType};
return TSDB_CODE_SUCCESS;
}

View File

@ -343,7 +343,7 @@ typedef struct SUdfcFuncStub {
char udfName[TSDB_FUNC_NAME_LEN + 1];
UdfcFuncHandle handle;
int32_t refCount;
int64_t lastRefTime;
int64_t createTime;
} SUdfcFuncStub;
typedef struct SUdfcProxy {
@ -363,6 +363,7 @@ typedef struct SUdfcProxy {
uv_mutex_t udfStubsMutex;
SArray *udfStubs; // SUdfcFuncStub
SArray *expiredUdfStubs; //SUdfcFuncStub
uv_mutex_t udfcUvMutex;
int8_t initialized;
@ -959,7 +960,7 @@ int32_t udfcOpen();
int32_t udfcClose();
int32_t acquireUdfFuncHandle(char *udfName, UdfcFuncHandle *pHandle);
void releaseUdfFuncHandle(char *udfName);
void releaseUdfFuncHandle(char *udfName, UdfcFuncHandle handle);
int32_t cleanUpUdfs();
bool udfAggGetEnv(struct SFunctionNode *pFunc, SFuncExecEnv *pEnv);
@ -967,6 +968,8 @@ bool udfAggInit(struct SqlFunctionCtx *pCtx, struct SResultRowEntryInfo *pRes
int32_t udfAggProcess(struct SqlFunctionCtx *pCtx);
int32_t udfAggFinalize(struct SqlFunctionCtx *pCtx, SSDataBlock *pBlock);
void cleanupNotExpiredUdfs();
void cleanupExpiredUdfs();
int compareUdfcFuncSub(const void *elem1, const void *elem2) {
SUdfcFuncStub *stub1 = (SUdfcFuncStub *)elem1;
SUdfcFuncStub *stub2 = (SUdfcFuncStub *)elem2;
@ -982,16 +985,24 @@ int32_t acquireUdfFuncHandle(char *udfName, UdfcFuncHandle *pHandle) {
if (stubIndex != -1) {
SUdfcFuncStub *foundStub = taosArrayGet(gUdfcProxy.udfStubs, stubIndex);
UdfcFuncHandle handle = foundStub->handle;
if (handle != NULL && ((SUdfcUvSession *)handle)->udfUvPipe != NULL) {
*pHandle = foundStub->handle;
++foundStub->refCount;
foundStub->lastRefTime = taosGetTimestampUs();
uv_mutex_unlock(&gUdfcProxy.udfStubsMutex);
return 0;
int64_t currUs = taosGetTimestampUs();
bool expired = (currUs - foundStub->createTime) >= 10 * 1000 * 1000;
if (!expired) {
if (handle != NULL && ((SUdfcUvSession *)handle)->udfUvPipe != NULL) {
*pHandle = foundStub->handle;
++foundStub->refCount;
uv_mutex_unlock(&gUdfcProxy.udfStubsMutex);
return 0;
} else {
fnInfo("udf invalid handle for %s, refCount: %d, create time: %" PRId64 ". remove it from cache", udfName,
foundStub->refCount, foundStub->createTime);
taosArrayRemove(gUdfcProxy.udfStubs, stubIndex);
}
} else {
fnInfo("invalid handle for %s, refCount: %d, last ref time: %" PRId64 ". remove it from cache", udfName,
foundStub->refCount, foundStub->lastRefTime);
fnInfo("udf handle expired for %s, will setup udf. move it to expired list", udfName);
taosArrayRemove(gUdfcProxy.udfStubs, stubIndex);
taosArrayPush(gUdfcProxy.expiredUdfStubs, foundStub);
taosArraySort(gUdfcProxy.expiredUdfStubs, compareUdfcFuncSub);
}
}
*pHandle = NULL;
@ -1001,7 +1012,7 @@ int32_t acquireUdfFuncHandle(char *udfName, UdfcFuncHandle *pHandle) {
strncpy(stub.udfName, udfName, TSDB_FUNC_NAME_LEN);
stub.handle = *pHandle;
++stub.refCount;
stub.lastRefTime = taosGetTimestampUs();
stub.createTime = taosGetTimestampUs();
taosArrayPush(gUdfcProxy.udfStubs, &stub);
taosArraySort(gUdfcProxy.udfStubs, compareUdfcFuncSub);
} else {
@ -1012,21 +1023,75 @@ int32_t acquireUdfFuncHandle(char *udfName, UdfcFuncHandle *pHandle) {
return code;
}
void releaseUdfFuncHandle(char *udfName) {
void releaseUdfFuncHandle(char *udfName, UdfcFuncHandle handle) {
uv_mutex_lock(&gUdfcProxy.udfStubsMutex);
SUdfcFuncStub key = {0};
strncpy(key.udfName, udfName, TSDB_FUNC_NAME_LEN);
SUdfcFuncStub *foundStub = taosArraySearch(gUdfcProxy.udfStubs, &key, compareUdfcFuncSub, TD_EQ);
if (!foundStub) {
SUdfcFuncStub *expiredStub = taosArraySearch(gUdfcProxy.expiredUdfStubs, &key, compareUdfcFuncSub, TD_EQ);
if (!foundStub && !expiredStub) {
uv_mutex_unlock(&gUdfcProxy.udfStubsMutex);
return;
}
if (foundStub->refCount > 0) {
if (foundStub != NULL && foundStub->handle == handle && foundStub->refCount > 0) {
--foundStub->refCount;
}
if (expiredStub != NULL && expiredStub->handle == handle && expiredStub->refCount > 0) {
--expiredStub->refCount;
}
uv_mutex_unlock(&gUdfcProxy.udfStubsMutex);
}
void cleanupExpiredUdfs() {
int32_t i = 0;
SArray *expiredUdfStubs = taosArrayInit(16, sizeof(SUdfcFuncStub));
while (i < taosArrayGetSize(gUdfcProxy.expiredUdfStubs)) {
SUdfcFuncStub *stub = taosArrayGet(gUdfcProxy.expiredUdfStubs, i);
if (stub->refCount == 0) {
fnInfo("tear down udf. expired. udf name: %s, handle: %p, ref count: %d", stub->udfName, stub->handle, stub->refCount);
doTeardownUdf(stub->handle);
} else {
fnInfo("udf still in use. expired. udf name: %s, ref count: %d, create time: %" PRId64 ", handle: %p", stub->udfName,
stub->refCount, stub->createTime, stub->handle);
UdfcFuncHandle handle = stub->handle;
if (handle != NULL && ((SUdfcUvSession *)handle)->udfUvPipe != NULL) {
taosArrayPush(expiredUdfStubs, stub);
} else {
fnInfo("udf invalid handle for %s, expired. refCount: %d, create time: %" PRId64 ". remove it from cache",
stub->udfName, stub->refCount, stub->createTime);
}
}
++i;
}
taosArrayDestroy(gUdfcProxy.expiredUdfStubs);
gUdfcProxy.expiredUdfStubs = expiredUdfStubs;
}
void cleanupNotExpiredUdfs() {
SArray *udfStubs = taosArrayInit(16, sizeof(SUdfcFuncStub));
int32_t i = 0;
while (i < taosArrayGetSize(gUdfcProxy.udfStubs)) {
SUdfcFuncStub *stub = taosArrayGet(gUdfcProxy.udfStubs, i);
if (stub->refCount == 0) {
fnInfo("tear down udf. udf name: %s, handle: %p, ref count: %d", stub->udfName, stub->handle, stub->refCount);
doTeardownUdf(stub->handle);
} else {
fnInfo("udf still in use. udf name: %s, ref count: %d, create time: %" PRId64 ", handle: %p", stub->udfName,
stub->refCount, stub->createTime, stub->handle);
UdfcFuncHandle handle = stub->handle;
if (handle != NULL && ((SUdfcUvSession *)handle)->udfUvPipe != NULL) {
taosArrayPush(udfStubs, stub);
} else {
fnInfo("udf invalid handle for %s, refCount: %d, create time: %" PRId64 ". remove it from cache",
stub->udfName, stub->refCount, stub->createTime);
}
}
++i;
}
taosArrayDestroy(gUdfcProxy.udfStubs);
gUdfcProxy.udfStubs = udfStubs;
}
int32_t cleanUpUdfs() {
int8_t initialized = atomic_load_8(&gUdfcProxy.initialized);
if (!initialized) {
@ -1034,32 +1099,15 @@ int32_t cleanUpUdfs() {
}
uv_mutex_lock(&gUdfcProxy.udfStubsMutex);
if (gUdfcProxy.udfStubs == NULL || taosArrayGetSize(gUdfcProxy.udfStubs) == 0) {
if ((gUdfcProxy.udfStubs == NULL || taosArrayGetSize(gUdfcProxy.udfStubs) == 0) &&
(gUdfcProxy.expiredUdfStubs == NULL || taosArrayGetSize(gUdfcProxy.expiredUdfStubs) == 0)) {
uv_mutex_unlock(&gUdfcProxy.udfStubsMutex);
return TSDB_CODE_SUCCESS;
}
SArray *udfStubs = taosArrayInit(16, sizeof(SUdfcFuncStub));
int32_t i = 0;
while (i < taosArrayGetSize(gUdfcProxy.udfStubs)) {
SUdfcFuncStub *stub = taosArrayGet(gUdfcProxy.udfStubs, i);
if (stub->refCount == 0) {
fnInfo("tear down udf. udf name: %s, handle: %p, ref count: %d", stub->udfName, stub->handle, stub->refCount);
doTeardownUdf(stub->handle);
} else {
fnInfo("udf still in use. udf name: %s, ref count: %d, last ref time: %" PRId64 ", handle: %p", stub->udfName,
stub->refCount, stub->lastRefTime, stub->handle);
UdfcFuncHandle handle = stub->handle;
if (handle != NULL && ((SUdfcUvSession *)handle)->udfUvPipe != NULL) {
taosArrayPush(udfStubs, stub);
} else {
fnInfo("udf invalid handle for %s, refCount: %d, last ref time: %" PRId64 ". remove it from cache",
stub->udfName, stub->refCount, stub->lastRefTime);
}
}
++i;
}
taosArrayDestroy(gUdfcProxy.udfStubs);
gUdfcProxy.udfStubs = udfStubs;
cleanupNotExpiredUdfs();
cleanupExpiredUdfs();
uv_mutex_unlock(&gUdfcProxy.udfStubsMutex);
return 0;
}
@ -1075,7 +1123,7 @@ int32_t callUdfScalarFunc(char *udfName, SScalarParam *input, int32_t numOfCols,
code = doCallUdfScalarFunc(handle, input, numOfCols, output);
if (code != TSDB_CODE_SUCCESS) {
fnError("udfc scalar function execution failure");
releaseUdfFuncHandle(udfName);
releaseUdfFuncHandle(udfName, handle);
return code;
}
@ -1089,7 +1137,7 @@ int32_t callUdfScalarFunc(char *udfName, SScalarParam *input, int32_t numOfCols,
code = TSDB_CODE_UDF_INVALID_OUTPUT_TYPE;
}
}
releaseUdfFuncHandle(udfName);
releaseUdfFuncHandle(udfName, handle);
return code;
}
@ -1122,7 +1170,7 @@ bool udfAggInit(struct SqlFunctionCtx *pCtx, struct SResultRowEntryInfo *pResult
SUdfInterBuf buf = {0};
if ((udfCode = doCallUdfAggInit(handle, &buf)) != 0) {
fnError("udfAggInit error. step doCallUdfAggInit. udf code: %d", udfCode);
releaseUdfFuncHandle(pCtx->udfName);
releaseUdfFuncHandle(pCtx->udfName, handle);
return false;
}
if (buf.bufLen <= session->bufSize) {
@ -1131,10 +1179,10 @@ bool udfAggInit(struct SqlFunctionCtx *pCtx, struct SResultRowEntryInfo *pResult
udfRes->interResNum = buf.numOfResult;
} else {
fnError("udfc inter buf size %d is greater than function bufSize %d", buf.bufLen, session->bufSize);
releaseUdfFuncHandle(pCtx->udfName);
releaseUdfFuncHandle(pCtx->udfName, handle);
return false;
}
releaseUdfFuncHandle(pCtx->udfName);
releaseUdfFuncHandle(pCtx->udfName, handle);
freeUdfInterBuf(&buf);
return true;
}
@ -1191,7 +1239,7 @@ int32_t udfAggProcess(struct SqlFunctionCtx *pCtx) {
taosArrayDestroy(pTempBlock->pDataBlock);
taosMemoryFree(pTempBlock);
releaseUdfFuncHandle(pCtx->udfName);
releaseUdfFuncHandle(pCtx->udfName, handle);
freeUdfInterBuf(&newState);
return udfCode;
}
@ -1236,7 +1284,7 @@ int32_t udfAggFinalize(struct SqlFunctionCtx *pCtx, SSDataBlock *pBlock) {
freeUdfInterBuf(&resultBuf);
int32_t numOfResults = functionFinalizeWithResultBuf(pCtx, pBlock, udfRes->finalResBuf);
releaseUdfFuncHandle(pCtx->udfName);
releaseUdfFuncHandle(pCtx->udfName, handle);
return udfCallCode == 0 ? numOfResults : udfCallCode;
}
@ -1663,6 +1711,7 @@ int32_t udfcOpen() {
uv_barrier_wait(&proxy->initBarrier);
uv_mutex_init(&proxy->udfStubsMutex);
proxy->udfStubs = taosArrayInit(8, sizeof(SUdfcFuncStub));
proxy->expiredUdfStubs = taosArrayInit(8, sizeof(SUdfcFuncStub));
uv_mutex_init(&proxy->udfcUvMutex);
fnInfo("udfc initialized") return 0;
}
@ -1679,6 +1728,7 @@ int32_t udfcClose() {
uv_thread_join(&udfc->loopThread);
uv_mutex_destroy(&udfc->taskQueueMutex);
uv_barrier_destroy(&udfc->initBarrier);
taosArrayDestroy(udfc->expiredUdfStubs);
taosArrayDestroy(udfc->udfStubs);
uv_mutex_destroy(&udfc->udfStubsMutex);
uv_mutex_destroy(&udfc->udfcUvMutex);

View File

@ -591,7 +591,7 @@ SUdf *udfdNewUdf(const char *udfName) {
SUdf *udfdGetOrCreateUdf(const char *udfName) {
uv_mutex_lock(&global.udfsMutex);
SUdf **pUdfHash = taosHashGet(global.udfsHash, udfName, strlen(udfName));
int64_t currTime = taosGetTimestampSec();
int64_t currTime = taosGetTimestampMs();
bool expired = false;
if (pUdfHash) {
expired = currTime - (*pUdfHash)->lastFetchTime > 10 * 1000; // 10s
@ -688,6 +688,8 @@ void udfdProcessCallRequest(SUvUdfWork *uvUdf, SUdfRequest *request) {
output.colMeta.type = udf->outputType;
output.colMeta.precision = 0;
output.colMeta.scale = 0;
udfColEnsureCapacity(&output, call->block.info.rows);
SUdfDataBlock input = {0};
convertDataBlockToUdfDataBlock(&call->block, &input);
code = udf->scriptPlugin->udfScalarProcFunc(&input, &output, udf->scriptUdfCtx);

View File

@ -1047,8 +1047,8 @@ sliding_opt(A) ::= SLIDING NK_LP duration_literal(B) NK_RP.
fill_opt(A) ::= . { A = NULL; }
fill_opt(A) ::= FILL NK_LP fill_mode(B) NK_RP. { A = createFillNode(pCxt, B, NULL); }
fill_opt(A) ::= FILL NK_LP VALUE NK_COMMA literal_list(B) NK_RP. { A = createFillNode(pCxt, FILL_MODE_VALUE, createNodeListNode(pCxt, B)); }
fill_opt(A) ::= FILL NK_LP VALUE_F NK_COMMA literal_list(B) NK_RP. { A = createFillNode(pCxt, FILL_MODE_VALUE_F, createNodeListNode(pCxt, B)); }
fill_opt(A) ::= FILL NK_LP VALUE NK_COMMA expression_list(B) NK_RP. { A = createFillNode(pCxt, FILL_MODE_VALUE, createNodeListNode(pCxt, B)); }
fill_opt(A) ::= FILL NK_LP VALUE_F NK_COMMA expression_list(B) NK_RP. { A = createFillNode(pCxt, FILL_MODE_VALUE_F, createNodeListNode(pCxt, B)); }
%type fill_mode { EFillMode }
%destructor fill_mode { }

View File

@ -1310,7 +1310,8 @@ static EDealRes translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) {
}
static EDealRes haveVectorFunction(SNode* pNode, void* pContext) {
if (isAggFunc(pNode) || isIndefiniteRowsFunc(pNode) || isWindowPseudoColumnFunc(pNode) || isInterpPseudoColumnFunc(pNode)) {
if (isAggFunc(pNode) || isIndefiniteRowsFunc(pNode) || isWindowPseudoColumnFunc(pNode) ||
isInterpPseudoColumnFunc(pNode)) {
*((bool*)pContext) = true;
return DEAL_RES_END;
}
@ -2930,6 +2931,11 @@ static int32_t convertFillValue(STranslateContext* pCxt, SDataType dt, SNodeList
if (TSDB_CODE_SUCCESS == code) {
code = scalarCalculateConstants(pCaseFunc, &pCell->pNode);
}
if (TSDB_CODE_SUCCESS == code && QUERY_NODE_VALUE != nodeType(pCell->pNode)) {
code = generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_WRONG_VALUE_TYPE, "Fill value is just a constant");
} else if (TSDB_CODE_SUCCESS != code) {
code = generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_WRONG_VALUE_TYPE, "Filled data type mismatch");
}
return code;
}
@ -2946,9 +2952,9 @@ static int32_t checkFillValues(STranslateContext* pCxt, SFillNode* pFill, SNodeL
if (fillNo >= LIST_LENGTH(pFillValues->pNodeList)) {
return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_WRONG_VALUE_TYPE, "Filled values number mismatch");
}
if (TSDB_CODE_SUCCESS !=
convertFillValue(pCxt, ((SExprNode*)pProject)->resType, pFillValues->pNodeList, fillNo)) {
return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_WRONG_VALUE_TYPE, "Filled data type mismatch");
int32_t code = convertFillValue(pCxt, ((SExprNode*)pProject)->resType, pFillValues->pNodeList, fillNo);
if (TSDB_CODE_SUCCESS != code) {
return code;
}
++fillNo;
}
@ -3029,12 +3035,13 @@ static int32_t translateSelectList(STranslateContext* pCxt, SSelectStmt* pSelect
}
static int32_t translateHaving(STranslateContext* pCxt, SSelectStmt* pSelect) {
if (NULL == pSelect->pGroupByList && NULL != pSelect->pHaving) {
if (NULL == pSelect->pGroupByList && NULL == pSelect->pPartitionByList && NULL == pSelect->pWindow &&
NULL != pSelect->pHaving) {
return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_GROUPBY_LACK_EXPRESSION);
}
pCxt->currClause = SQL_CLAUSE_HAVING;
int32_t code = translateExpr(pCxt, &pSelect->pHaving);
if (TSDB_CODE_SUCCESS == code) {
if (TSDB_CODE_SUCCESS == code && (NULL != pSelect->pGroupByList || NULL != pSelect->pWindow)) {
code = checkExprForGroupBy(pCxt, &pSelect->pHaving);
}
return code;
@ -5744,6 +5751,14 @@ static int32_t translateDropCGroup(STranslateContext* pCxt, SDropCGroupStmt* pSt
static int32_t translateAlterLocal(STranslateContext* pCxt, SAlterLocalStmt* pStmt) {
// The statement is executed directly on the client without constructing a message.
if ('\0' != pStmt->value[0]) {
return TSDB_CODE_SUCCESS;
}
char* p = strchr(pStmt->config, ' ');
if (NULL != p) {
*p = 0;
strcpy(pStmt->value, p + 1);
}
return TSDB_CODE_SUCCESS;
}

File diff suppressed because it is too large Load Diff

View File

@ -740,6 +740,13 @@ static int32_t createWindowLogicNodeFinalize(SLogicPlanContext* pCxt, SSelectStm
code = createColumnByRewriteExprs(pWindow->pFuncs, &pWindow->node.pTargets);
}
if (TSDB_CODE_SUCCESS == code && NULL != pSelect->pHaving) {
pWindow->node.pConditions = nodesCloneNode(pSelect->pHaving);
if (NULL == pWindow->node.pConditions) {
code = TSDB_CODE_OUT_OF_MEMORY;
}
}
pSelect->hasAggFuncs = false;
if (TSDB_CODE_SUCCESS == code) {
@ -1132,6 +1139,14 @@ static int32_t createPartitionLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pS
}
}
if (TSDB_CODE_SUCCESS == code && NULL != pSelect->pHaving && !pSelect->hasAggFuncs && NULL == pSelect->pGroupByList &&
NULL == pSelect->pWindow) {
pPartition->node.pConditions = nodesCloneNode(pSelect->pHaving);
if (NULL == pPartition->node.pConditions) {
code = TSDB_CODE_OUT_OF_MEMORY;
}
}
if (TSDB_CODE_SUCCESS == code) {
*pLogicNode = (SLogicNode*)pPartition;
} else {

View File

@ -1095,7 +1095,7 @@ static int32_t sortPriKeyOptGetSequencingNodesImpl(SLogicNode* pNode, bool group
*pNotOptimize = false;
return TSDB_CODE_SUCCESS;
}
switch (nodeType(pNode)) {
case QUERY_NODE_LOGIC_PLAN_SCAN: {
SScanLogicNode* pScan = (SScanLogicNode*)pNode;
@ -2139,7 +2139,7 @@ typedef struct SLastRowScanOptLastParaCkCxt {
bool hasCol;
} SLastRowScanOptLastParaCkCxt;
static EDealRes lastRowScanOptLastParaCheckImpl(SNode* pNode, void* pContext) {
static EDealRes lastRowScanOptLastParaIsTagImpl(SNode* pNode, void* pContext) {
if (QUERY_NODE_COLUMN == nodeType(pNode)) {
SLastRowScanOptLastParaCkCxt* pCxt = pContext;
if (COLUMN_TYPE_TAG == ((SColumnNode*)pNode)->colType || COLUMN_TYPE_TBNAME == ((SColumnNode*)pNode)->colType) {
@ -2152,10 +2152,10 @@ static EDealRes lastRowScanOptLastParaCheckImpl(SNode* pNode, void* pContext) {
return DEAL_RES_CONTINUE;
}
static bool lastRowScanOptLastParaCheck(SNode* pExpr) {
static bool lastRowScanOptLastParaIsTag(SNode* pExpr) {
SLastRowScanOptLastParaCkCxt cxt = {.hasTag = false, .hasCol = false};
nodesWalkExpr(pExpr, lastRowScanOptLastParaCheckImpl, &cxt);
return !cxt.hasTag && cxt.hasCol;
nodesWalkExpr(pExpr, lastRowScanOptLastParaIsTagImpl, &cxt);
return cxt.hasTag && !cxt.hasCol;
}
static bool hasSuitableCache(int8_t cacheLastMode, bool hasLastRow, bool hasLast) {
@ -2195,15 +2195,19 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) {
FOREACH(pFunc, ((SAggLogicNode*)pNode)->pAggFuncs) {
SFunctionNode* pAggFunc = (SFunctionNode*)pFunc;
if (FUNCTION_TYPE_LAST == pAggFunc->funcType) {
if (hasSelectFunc || !lastRowScanOptLastParaCheck(nodesListGetNode(pAggFunc->pParameterList, 0))) {
if (hasSelectFunc || QUERY_NODE_VALUE == nodeType(nodesListGetNode(pAggFunc->pParameterList, 0))) {
return false;
}
hasLastFunc = true;
} else if (FUNCTION_TYPE_SELECT_VALUE == pAggFunc->funcType || FUNCTION_TYPE_GROUP_KEY == pAggFunc->funcType) {
} else if (FUNCTION_TYPE_SELECT_VALUE == pAggFunc->funcType) {
if (hasLastFunc) {
return false;
}
hasSelectFunc = true;
} else if (FUNCTION_TYPE_GROUP_KEY == pAggFunc->funcType) {
if (!lastRowScanOptLastParaIsTag(nodesListGetNode(pAggFunc->pParameterList, 0))) {
return false;
}
} else if (FUNCTION_TYPE_LAST_ROW != pAggFunc->funcType) {
return false;
}
@ -2237,7 +2241,7 @@ static EDealRes lastRowScanOptSetColDataType(SNode* pNode, void* pContext) {
return DEAL_RES_CONTINUE;
}
static void lastRowScanOptSetLastTargets(SNodeList* pTargets, SNodeList* pLastCols) {
static void lastRowScanOptSetLastTargets(SNodeList* pTargets, SNodeList* pLastCols, bool erase) {
SNode* pTarget = NULL;
WHERE_EACH(pTarget, pTargets) {
bool found = false;
@ -2249,7 +2253,7 @@ static void lastRowScanOptSetLastTargets(SNodeList* pTargets, SNodeList* pLastCo
break;
}
}
if (!found) {
if (!found && erase) {
ERASE_NODE(pTargets);
continue;
}
@ -2290,9 +2294,8 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic
pScan->igLastNull = pAgg->hasLast ? true : false;
if (NULL != cxt.pLastCols) {
cxt.doAgg = false;
lastRowScanOptSetLastTargets(pScan->pScanCols, cxt.pLastCols);
NODES_DESTORY_LIST(pScan->pScanPseudoCols);
lastRowScanOptSetLastTargets(pScan->node.pTargets, cxt.pLastCols);
lastRowScanOptSetLastTargets(pScan->pScanCols, cxt.pLastCols, true);
lastRowScanOptSetLastTargets(pScan->node.pTargets, cxt.pLastCols, false);
nodesClearList(cxt.pLastCols);
}
pAgg->hasLastRow = false;

View File

@ -1124,7 +1124,8 @@ _end:
int32_t toUnixtimestampFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput) {
int32_t type = GET_PARAM_TYPE(pInput);
int64_t timePrec;
GET_TYPED_DATA(timePrec, int64_t, GET_PARAM_TYPE(&pInput[1]), pInput[1].columnData->pData);
int32_t idx = (inputNum == 2) ? 1 : 2;
GET_TYPED_DATA(timePrec, int64_t, GET_PARAM_TYPE(&pInput[idx]), pInput[idx].columnData->pData);
for (int32_t i = 0; i < pInput[0].numOfRows; ++i) {
if (colDataIsNull_s(pInput[0].columnData, i)) {

View File

@ -24,12 +24,13 @@ extern "C" {
// SIndexMgr -----------------------------
typedef struct SSyncIndexMgr {
SRaftId (*replicas)[TSDB_MAX_REPLICA];
SyncIndex index[TSDB_MAX_REPLICA];
SyncTerm privateTerm[TSDB_MAX_REPLICA]; // for advanced function
int64_t startTimeArr[TSDB_MAX_REPLICA];
int64_t recvTimeArr[TSDB_MAX_REPLICA];
SRaftId (*replicas)[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SyncIndex index[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SyncTerm privateTerm[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA]; // for advanced function
int64_t startTimeArr[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
int64_t recvTimeArr[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
int32_t replicaNum;
int32_t totalReplicaNum;
SSyncNode *pNode;
} SSyncIndexMgr;

View File

@ -54,13 +54,13 @@ typedef struct SSyncLogReplMgr SSyncLogReplMgr;
#define MAX_CONFIG_INDEX_COUNT 256
typedef struct SRaftCfg {
SSyncCfg cfg;
int32_t batchSize;
int8_t isStandBy;
int8_t snapshotStrategy;
SyncIndex lastConfigIndex;
int32_t configIndexCount;
SyncIndex configIndexArr[MAX_CONFIG_INDEX_COUNT];
SSyncCfg cfg;
int32_t batchSize;
int8_t isStandBy;
int8_t snapshotStrategy;
SyncIndex lastConfigIndex;
int32_t configIndexCount;
SyncIndex configIndexArr[MAX_CONFIG_INDEX_COUNT];
} SRaftCfg;
typedef struct SRaftId {
@ -127,12 +127,13 @@ typedef struct SSyncNode {
SRaftId myRaftId;
int32_t peersNum;
SNodeInfo peersNodeInfo[TSDB_MAX_REPLICA];
SEpSet peersEpset[TSDB_MAX_REPLICA];
SRaftId peersId[TSDB_MAX_REPLICA];
SNodeInfo peersNodeInfo[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SEpSet peersEpset[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SRaftId peersId[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
int32_t replicaNum;
SRaftId replicasId[TSDB_MAX_REPLICA];
int32_t totalReplicaNum;
SRaftId replicasId[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
// raft algorithm
SSyncFSM* pFsm;
@ -188,7 +189,7 @@ typedef struct SSyncNode {
uint64_t heartbeatTimerCounter;
// peer heartbeat timer
SSyncTimer peerHeartbeatTimerArr[TSDB_MAX_REPLICA];
SSyncTimer peerHeartbeatTimerArr[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
// tools
SSyncRespMgr* pSyncRespMgr;
@ -196,13 +197,13 @@ typedef struct SSyncNode {
// restore state
bool restoreFinish;
// SSnapshot* pSnapshot;
SSyncSnapshotSender* senders[TSDB_MAX_REPLICA];
SSyncSnapshotSender* senders[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SSyncSnapshotReceiver* pNewNodeReceiver;
// log replication mgr
SSyncLogReplMgr* logReplMgrs[TSDB_MAX_REPLICA];
SSyncLogReplMgr* logReplMgrs[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SPeerState peerStates[TSDB_MAX_REPLICA];
SPeerState peerStates[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
// is config changing
bool changing;
@ -275,6 +276,7 @@ void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term);
void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term);
void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm);
void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr);
void syncNodeBecomeLearner(SSyncNode* pSyncNode, const char* debugStr);
void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr);
void syncNodeCandidate2Leader(SSyncNode* pSyncNode);
void syncNodeFollower2Candidate(SSyncNode* pSyncNode);

View File

@ -237,6 +237,7 @@ typedef struct SyncLeaderTransfer {
typedef enum {
SYNC_LOCAL_CMD_STEP_DOWN = 100,
SYNC_LOCAL_CMD_FOLLOWER_CMT,
SYNC_LOCAL_CMD_LEARNER_CMT,
} ESyncLocalCmd;
typedef struct SyncLocalCmd {

View File

@ -56,6 +56,8 @@ typedef struct SSyncLogBuffer {
int64_t size;
TdThreadMutex mutex;
TdThreadMutexAttr attr;
int64_t totalIndex;
bool isCatchup;
} SSyncLogBuffer;
// SSyncLogRepMgr

View File

@ -137,8 +137,10 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
pReply->term = pMsg->term;
}
syncNodeStepDown(ths, pMsg->term);
resetElect = true;
if(ths->raftCfg.cfg.nodeInfo[ths->raftCfg.cfg.myIndex].nodeRole != TAOS_SYNC_ROLE_LEARNER){
syncNodeStepDown(ths, pMsg->term);
resetElect = true;
}
if (pMsg->dataLen < sizeof(SSyncRaftEntry)) {
sError("vgId:%d, incomplete append entries received. prev index:%" PRId64 ", term:%" PRId64 ", datalen:%d",

View File

@ -41,6 +41,8 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
int32_t ret = 0;
for (int i = 0; i < pNode->peersNum; ++i) {
if(pNode->peersNodeInfo[i].nodeRole == TAOS_SYNC_ROLE_LEARNER) continue;
SRpcMsg rpcMsg = {0};
ret = syncBuildRequestVote(&rpcMsg, pNode->vgId);
if (ret < 0) {

View File

@ -26,6 +26,7 @@ SSyncIndexMgr *syncIndexMgrCreate(SSyncNode *pNode) {
pIndexMgr->replicas = &pNode->replicasId;
pIndexMgr->replicaNum = pNode->replicaNum;
pIndexMgr->totalReplicaNum = pNode->totalReplicaNum;
pIndexMgr->pNode = pNode;
syncIndexMgrClear(pIndexMgr);
@ -35,6 +36,7 @@ SSyncIndexMgr *syncIndexMgrCreate(SSyncNode *pNode) {
void syncIndexMgrUpdate(SSyncIndexMgr *pIndexMgr, SSyncNode *pNode) {
pIndexMgr->replicas = &pNode->replicasId;
pIndexMgr->replicaNum = pNode->replicaNum;
pIndexMgr->totalReplicaNum = pNode->totalReplicaNum;
pIndexMgr->pNode = pNode;
syncIndexMgrClear(pIndexMgr);
}
@ -50,14 +52,14 @@ void syncIndexMgrClear(SSyncIndexMgr *pIndexMgr) {
memset(pIndexMgr->privateTerm, 0, sizeof(pIndexMgr->privateTerm));
int64_t timeNow = taosGetTimestampMs();
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
pIndexMgr->startTimeArr[i] = 0;
pIndexMgr->recvTimeArr[i] = timeNow;
}
}
void syncIndexMgrSetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncIndex index) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
(pIndexMgr->index)[i] = index;
return;
@ -69,7 +71,7 @@ void syncIndexMgrSetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, Sync
}
SSyncLogReplMgr *syncNodeGetLogReplMgr(SSyncNode *pNode, SRaftId *pRaftId) {
for (int i = 0; i < pNode->replicaNum; i++) {
for (int i = 0; i < pNode->totalReplicaNum; i++) {
if (syncUtilSameId(&pNode->replicasId[i], pRaftId)) {
return pNode->logReplMgrs[i];
}
@ -80,7 +82,7 @@ SSyncLogReplMgr *syncNodeGetLogReplMgr(SSyncNode *pNode, SRaftId *pRaftId) {
}
SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
SyncIndex idx = (pIndexMgr->index)[i];
return idx;
@ -93,7 +95,7 @@ SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId)
}
void syncIndexMgrSetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, int64_t startTime) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
(pIndexMgr->startTimeArr)[i] = startTime;
return;
@ -105,7 +107,7 @@ void syncIndexMgrSetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId,
}
int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
int64_t startTime = (pIndexMgr->startTimeArr)[i];
return startTime;
@ -118,7 +120,7 @@ int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftI
}
void syncIndexMgrSetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, int64_t recvTime) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
(pIndexMgr->recvTimeArr)[i] = recvTime;
return;
@ -130,7 +132,7 @@ void syncIndexMgrSetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, i
}
int64_t syncIndexMgrGetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
int64_t recvTime = (pIndexMgr->recvTimeArr)[i];
return recvTime;
@ -143,7 +145,7 @@ int64_t syncIndexMgrGetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId
}
void syncIndexMgrSetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncTerm term) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
(pIndexMgr->privateTerm)[i] = term;
return;
@ -155,7 +157,7 @@ void syncIndexMgrSetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncT
}
SyncTerm syncIndexMgrGetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
SyncTerm term = (pIndexMgr->privateTerm)[i];
return term;

View File

@ -141,6 +141,13 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) return -1;
if(pSyncNode->raftCfg.lastConfigIndex >= pNewCfg->lastIndex){
syncNodeRelease(pSyncNode);
sInfo("vgId:%d, no need Reconfig, current index:%" PRId64 ", new index:%" PRId64, pSyncNode->vgId,
pSyncNode->raftCfg.lastConfigIndex, pNewCfg->lastIndex);
return 0;
}
if (!syncNodeCheckNewConfig(pSyncNode, pNewCfg)) {
syncNodeRelease(pSyncNode);
terrno = TSDB_CODE_SYN_NEW_CONFIG_ERROR;
@ -149,12 +156,12 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
}
syncNodeUpdateNewConfigIndex(pSyncNode, pNewCfg);
syncNodeDoConfigChange(pSyncNode, pNewCfg, SYNC_INDEX_INVALID);
syncNodeDoConfigChange(pSyncNode, pNewCfg, pNewCfg->lastIndex);
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
syncNodeStopHeartbeatTimer(pSyncNode);
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
syncHbTimerInit(pSyncNode, &pSyncNode->peerHeartbeatTimerArr[i], pSyncNode->replicasId[i]);
}
@ -315,8 +322,9 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
}
}
if (pSyncNode->replicaNum > 1) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER && pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER) {
if (pSyncNode->totalReplicaNum > 1) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER && pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER
&& pSyncNode->state != TAOS_SYNC_STATE_LEARNER) {
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 " candidate or unknown state, do not delete wal",
lastApplyIndex);
syncNodeRelease(pSyncNode);
@ -537,7 +545,8 @@ void syncGetRetryEpSet(int64_t rid, SEpSet* pEpSet) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) return;
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
if(pSyncNode->raftCfg.cfg.nodeInfo[i].nodeRole == TAOS_SYNC_ROLE_LEARNER) continue;
SEp* pEp = &pEpSet->eps[i];
tstrncpy(pEp->fqdn, pSyncNode->raftCfg.cfg.nodeInfo[i].nodeFqdn, TSDB_FQDN_LEN);
pEp->port = (pSyncNode->raftCfg.cfg.nodeInfo)[i].nodePort;
@ -564,6 +573,34 @@ int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak, int64_t* seq) {
return ret;
}
int32_t syncIsCatchUp(int64_t rid) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
sError("sync Node Acquire error since %d", errno);
return -1;
}
while(1){
if(pSyncNode->pLogBuf->totalIndex < 0 || pSyncNode->pLogBuf->commitIndex < 0 ||
pSyncNode->pLogBuf->totalIndex < pSyncNode->pLogBuf->commitIndex ||
pSyncNode->pLogBuf->totalIndex - pSyncNode->pLogBuf->commitIndex > SYNC_LEARNER_CATCHUP){
sInfo("vgId:%d, Not catch up, wait one second, totalIndex:%" PRId64 " commitIndex:%" PRId64 " matchIndex:%" PRId64,
pSyncNode->vgId, pSyncNode->pLogBuf->totalIndex, pSyncNode->pLogBuf->commitIndex,
pSyncNode->pLogBuf->matchIndex);
taosSsleep(1);
}
else{
sInfo("vgId:%d, Catch up, totalIndex:%" PRId64 " commitIndex:%" PRId64 " matchIndex:%" PRId64,
pSyncNode->vgId, pSyncNode->pLogBuf->totalIndex, pSyncNode->pLogBuf->commitIndex,
pSyncNode->pLogBuf->matchIndex);
break;
}
}
syncNodeRelease(pSyncNode);
return 0;
}
int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak, int64_t* seq) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
terrno = TSDB_CODE_SYN_NOT_LEADER;
@ -655,6 +692,8 @@ static int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer) {
pData->logicClock = pSyncTimer->logicClock;
pData->execTime = tsNow + pSyncTimer->timerMS;
sTrace("vgId:%d, start hb timer, rid:%" PRId64 " addr:%" PRId64, pSyncNode->vgId, pData->rid, pData->destId.addr);
taosTmrReset(pSyncTimer->timerCb, pSyncTimer->timerMS / HEARTBEAT_TICK_NUM, (void*)(pData->rid),
syncEnv()->pTimerManager, &pSyncTimer->pTimer);
} else {
@ -719,7 +758,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
sInfo("vgId:%d, create a new raft config file", pSyncNode->vgId);
pSyncNode->raftCfg.isStandBy = pSyncInfo->isStandBy;
pSyncNode->raftCfg.snapshotStrategy = pSyncInfo->snapshotStrategy;
pSyncNode->raftCfg.lastConfigIndex = SYNC_INDEX_INVALID;
pSyncNode->raftCfg.lastConfigIndex = pSyncInfo->syncCfg.lastIndex;
pSyncNode->raftCfg.batchSize = pSyncInfo->batchSize;
pSyncNode->raftCfg.cfg = pSyncInfo->syncCfg;
pSyncNode->raftCfg.configIndexCount = 1;
@ -736,7 +775,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
goto _error;
}
if (pSyncInfo->syncCfg.replicaNum > 0 && syncIsConfigChanged(&pSyncNode->raftCfg.cfg, &pSyncInfo->syncCfg)) {
if (pSyncInfo->syncCfg.totalReplicaNum > 0 && syncIsConfigChanged(&pSyncNode->raftCfg.cfg, &pSyncInfo->syncCfg)) {
sInfo("vgId:%d, use sync config from input options and write to cfg file", pSyncNode->vgId);
pSyncNode->raftCfg.cfg = pSyncInfo->syncCfg;
if (syncWriteCfgFile(pSyncNode) != 0) {
@ -753,8 +792,9 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->vgId = pSyncInfo->vgId;
SSyncCfg* pCfg = &pSyncNode->raftCfg.cfg;
bool updated = false;
sInfo("vgId:%d, start to open sync node, replica:%d selfIndex:%d", pSyncNode->vgId, pCfg->replicaNum, pCfg->myIndex);
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
sInfo("vgId:%d, start to open sync node, totalReplicaNum:%d replicaNum:%d selfIndex:%d",
pSyncNode->vgId, pCfg->totalReplicaNum, pCfg->replicaNum, pCfg->myIndex);
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SNodeInfo* pNode = &pCfg->nodeInfo[i];
if (tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort)) {
updated = true;
@ -792,9 +832,9 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
}
// init peersNum, peers, peersId
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.replicaNum - 1;
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.totalReplicaNum - 1;
int32_t j = 0;
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
if (i != pSyncNode->raftCfg.cfg.myIndex) {
pSyncNode->peersNodeInfo[j] = pSyncNode->raftCfg.cfg.nodeInfo[i];
syncUtilNodeInfo2EpSet(&pSyncNode->peersNodeInfo[j], &pSyncNode->peersEpset[j]);
@ -810,7 +850,8 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
// init replicaNum, replicasId
pSyncNode->replicaNum = pSyncNode->raftCfg.cfg.replicaNum;
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
pSyncNode->totalReplicaNum = pSyncNode->raftCfg.cfg.totalReplicaNum;
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
if (!syncUtilNodeInfo2RaftId(&pSyncNode->raftCfg.cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i])) {
sError("vgId:%d, failed to determine raft member id, replica:%d", pSyncNode->vgId, i);
goto _error;
@ -934,7 +975,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->heartbeatTimerCounter = 0;
// init peer heartbeat timer
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
syncHbTimerInit(pSyncNode, &(pSyncNode->peerHeartbeatTimerArr[i]), (pSyncNode->replicasId)[i]);
}
@ -949,7 +990,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->restoreFinish = false;
// snapshot senders
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
SSyncSnapshotSender* pSender = snapshotSenderCreate(pSyncNode, i);
if (pSender == NULL) return NULL;
@ -1059,14 +1100,19 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) {
int32_t syncNodeStart(SSyncNode* pSyncNode) {
// start raft
if (pSyncNode->replicaNum == 1) {
raftStoreNextTerm(pSyncNode);
syncNodeBecomeLeader(pSyncNode, "one replica start");
if(pSyncNode->raftCfg.cfg.nodeInfo[pSyncNode->raftCfg.cfg.myIndex].nodeRole == TAOS_SYNC_ROLE_LEARNER){
syncNodeBecomeLearner(pSyncNode, "first start");
}
else{
if (pSyncNode->replicaNum == 1) {
raftStoreNextTerm(pSyncNode);
syncNodeBecomeLeader(pSyncNode, "one replica start");
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
} else {
syncNodeBecomeFollower(pSyncNode, "first start");
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
} else {
syncNodeBecomeFollower(pSyncNode, "first start");
}
}
int32_t ret = 0;
@ -1157,7 +1203,7 @@ void syncNodeClose(SSyncNode* pSyncNode) {
syncLogBufferDestroy(pSyncNode->pLogBuf);
pSyncNode->pLogBuf = NULL;
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
if (pSyncNode->senders[i] != NULL) {
sDebug("vgId:%d, snapshot sender destroy while close, data:%p", pSyncNode->vgId, pSyncNode->senders[i]);
@ -1350,7 +1396,7 @@ inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) {
bool b1 = false;
bool b2 = false;
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
if (strcmp(pCfg->nodeInfo[i].nodeFqdn, pNode->myNodeInfo.nodeFqdn) == 0 &&
pCfg->nodeInfo[i].nodePort == pNode->myNodeInfo.nodePort) {
b1 = true;
@ -1358,7 +1404,7 @@ inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) {
}
}
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SRaftId raftId = {
.addr = SYNC_ADDR(&pCfg->nodeInfo[i]),
.vgId = pNode->vgId,
@ -1375,13 +1421,14 @@ inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) {
}
static bool syncIsConfigChanged(const SSyncCfg* pOldCfg, const SSyncCfg* pNewCfg) {
if (pOldCfg->replicaNum != pNewCfg->replicaNum) return true;
if (pOldCfg->totalReplicaNum != pNewCfg->totalReplicaNum) return true;
if (pOldCfg->myIndex != pNewCfg->myIndex) return true;
for (int32_t i = 0; i < pOldCfg->replicaNum; ++i) {
for (int32_t i = 0; i < pOldCfg->totalReplicaNum; ++i) {
const SNodeInfo* pOldInfo = &pOldCfg->nodeInfo[i];
const SNodeInfo* pNewInfo = &pNewCfg->nodeInfo[i];
if (strcmp(pOldInfo->nodeFqdn, pNewInfo->nodeFqdn) != 0) return true;
if (pOldInfo->nodePort != pNewInfo->nodePort) return true;
if(pOldInfo->nodeRole != pNewInfo->nodeRole) return true;
}
return false;
@ -1418,8 +1465,10 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
}
// log begin config change
sNInfo(pSyncNode, "begin do config change, from %d to %d, replicas:%d", pSyncNode->vgId, oldConfig.replicaNum,
pNewConfig->replicaNum);
sNInfo(pSyncNode, "begin do config change, from %d to %d, from %" PRId64 " to %" PRId64 ", replicas:%d",
pSyncNode->vgId,
oldConfig.totalReplicaNum, pNewConfig->totalReplicaNum,
oldConfig.lastIndex, pNewConfig->lastIndex);
if (IamInNew) {
pSyncNode->raftCfg.isStandBy = 0; // change isStandBy to normal
@ -1436,11 +1485,10 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
int32_t ret = 0;
// save snapshot senders
int32_t oldReplicaNum = pSyncNode->replicaNum;
SRaftId oldReplicasId[TSDB_MAX_REPLICA];
SRaftId oldReplicasId[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
memcpy(oldReplicasId, pSyncNode->replicasId, sizeof(oldReplicasId));
SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA];
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
oldSenders[i] = pSyncNode->senders[i];
sSTrace(oldSenders[i], "snapshot sender save old");
}
@ -1450,9 +1498,9 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
syncUtilNodeInfo2RaftId(&pSyncNode->myNodeInfo, pSyncNode->vgId, &pSyncNode->myRaftId);
// init peersNum, peers, peersId
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.replicaNum - 1;
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.totalReplicaNum - 1;
int32_t j = 0;
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
if (i != pSyncNode->raftCfg.cfg.myIndex) {
pSyncNode->peersNodeInfo[j] = pSyncNode->raftCfg.cfg.nodeInfo[i];
syncUtilNodeInfo2EpSet(&pSyncNode->peersNodeInfo[j], &pSyncNode->peersEpset[j]);
@ -1465,7 +1513,8 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
// init replicaNum, replicasId
pSyncNode->replicaNum = pSyncNode->raftCfg.cfg.replicaNum;
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
pSyncNode->totalReplicaNum = pSyncNode->raftCfg.cfg.totalReplicaNum;
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
syncUtilNodeInfo2RaftId(&pSyncNode->raftCfg.cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i]);
}
@ -1480,15 +1529,15 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
// reset snapshot senders
// clear new
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
pSyncNode->senders[i] = NULL;
}
// reset new
for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) {
for (int32_t i = 0; i < pSyncNode->totalReplicaNum; ++i) {
// reset sender
bool reset = false;
for (int32_t j = 0; j < TSDB_MAX_REPLICA; ++j) {
for (int32_t j = 0; j < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++j) {
if (syncUtilSameId(&(pSyncNode->replicasId)[i], &oldReplicasId[j]) && oldSenders[j] != NULL) {
sNTrace(pSyncNode, "snapshot sender reset for:%" PRId64 ", newIndex:%d, dnode:%d, %p",
(pSyncNode->replicasId)[i].addr, i, DID(&pSyncNode->replicasId[i]), oldSenders[j]);
@ -1510,7 +1559,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
}
// create new
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
if (pSyncNode->senders[i] == NULL) {
pSyncNode->senders[i] = snapshotSenderCreate(pSyncNode, i);
if (pSyncNode->senders[i] == NULL) {
@ -1525,7 +1574,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
}
// free old
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
if (oldSenders[i] != NULL) {
sSDebug(oldSenders[i], "snapshot sender destroy old, data:%p replica-index:%d", oldSenders[i], i);
snapshotSenderDestroy(oldSenders[i]);
@ -1550,12 +1599,12 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
} else {
// persist cfg
syncWriteCfgFile(pSyncNode);
sNInfo(pSyncNode, "do not config change from %d to %d", oldConfig.replicaNum, pNewConfig->replicaNum);
sNInfo(pSyncNode, "do not config change from %d to %d", oldConfig.totalReplicaNum, pNewConfig->totalReplicaNum);
}
_END:
// log end config change
sNInfo(pSyncNode, "end do config change, from %d to %d", oldConfig.replicaNum, pNewConfig->replicaNum);
sNInfo(pSyncNode, "end do config change, from %d to %d", oldConfig.totalReplicaNum, pNewConfig->totalReplicaNum);
}
// raft state change --------------
@ -1635,6 +1684,27 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) {
syncNodeResetElectTimer(pSyncNode);
}
void syncNodeBecomeLearner(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode->hbSlowNum = 0;
// state change
pSyncNode->state = TAOS_SYNC_STATE_LEARNER;
// trace log
sNTrace(pSyncNode, "become learner %s", debugStr);
// call back
if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpBecomeLearnerCb != NULL) {
pSyncNode->pFsm->FpBecomeLearnerCb(pSyncNode->pFsm);
}
// min match index
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
// reset log buffer
syncLogBufferReset(pSyncNode->pLogBuf, pSyncNode);
}
// TLA+ Spec
// \* Candidate i transitions to leader.
// BecomeLeader(i) ==
@ -1752,7 +1822,7 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); }
int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) {
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
pSyncNode->peerStates[i].lastSendIndex = SYNC_INDEX_INVALID;
pSyncNode->peerStates[i].lastSendTime = 0;
}
@ -2039,7 +2109,7 @@ static void syncNodeEqHeartbeatTimer(void* param, void* tmrId) {
if (!syncIsInit()) return;
SSyncNode* pNode = param;
if (pNode->replicaNum > 1) {
if (pNode->totalReplicaNum > 1) {
if (atomic_load_64(&pNode->heartbeatTimerLogicClockUser) <= atomic_load_64(&pNode->heartbeatTimerLogicClock)) {
SRpcMsg rpcMsg = {0};
int32_t code = syncBuildTimeout(&rpcMsg, SYNC_TIMEOUT_HEARTBEAT, atomic_load_64(&pNode->heartbeatTimerLogicClock),
@ -2074,7 +2144,7 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
SSyncHbTimerData* pData = syncHbTimerDataAcquire(hbDataRid);
if (pData == NULL) {
sError("hb timer get pData NULL, %" PRId64, hbDataRid);
sError("hb timer get pData NULL, rid:%" PRId64 " addr:%" PRId64, hbDataRid, pData->destId.addr);
return;
}
@ -2101,9 +2171,9 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
return;
}
// sTrace("vgId:%d, eq peer hb timer", pSyncNode->vgId);
sTrace("vgId:%d, eq peer hb timer, rid:%" PRId64 " addr:%" PRId64, pSyncNode->vgId, hbDataRid, pData->destId.addr);
if (pSyncNode->replicaNum > 1) {
if (pSyncNode->totalReplicaNum > 1) {
int64_t timerLogicClock = atomic_load_64(&pSyncTimer->logicClock);
int64_t msgLogicClock = atomic_load_64(&pData->logicClock);
@ -2130,6 +2200,7 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
pSyncTimer->timeStamp = tsNow;
// send msg
sTrace("vgId:%d, send heartbeat to dnode:%d", pSyncNode->vgId, DID(&(pSyncMsg->destId)));
syncLogSendHeartbeat(pSyncNode, pSyncMsg, false, timerElapsed, pData->execTime);
syncNodeSendHeartbeat(pSyncNode, &pSyncMsg->destId, &rpcMsg);
} else {
@ -2212,7 +2283,7 @@ int32_t syncNodeAppend(SSyncNode* ths, SSyncRaftEntry* pEntry) {
}
bool syncNodeHeartbeatReplyTimeout(SSyncNode* pSyncNode) {
if (pSyncNode->replicaNum == 1) {
if (pSyncNode->totalReplicaNum == 1) {
return false;
}
@ -2237,7 +2308,7 @@ bool syncNodeHeartbeatReplyTimeout(SSyncNode* pSyncNode) {
bool syncNodeSnapshotSending(SSyncNode* pSyncNode) {
if (pSyncNode == NULL) return false;
bool b = false;
for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) {
for (int32_t i = 0; i < pSyncNode->totalReplicaNum; ++i) {
if (pSyncNode->senders[i] != NULL && pSyncNode->senders[i]->start) {
b = true;
break;
@ -2328,13 +2399,17 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
pMsgReply->startTime = ths->startTime;
pMsgReply->timeStamp = tsMs;
sTrace(
"vgId:%d, heartbeat msg from dnode:%d, cluster:%d, Msgterm:%" PRId64 " currentTerm:%" PRId64,
ths->vgId, DID(&(pMsg->srcId)), CID(&(pMsg->srcId)), pMsg->term, currentTerm);
if (pMsg->term == currentTerm && ths->state != TAOS_SYNC_STATE_LEADER) {
syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), tsMs);
resetElect = true;
ths->minMatchIndex = pMsg->minMatchIndex;
if (ths->state == TAOS_SYNC_STATE_FOLLOWER) {
if (ths->state == TAOS_SYNC_STATE_FOLLOWER || ths->state == TAOS_SYNC_STATE_LEARNER) {
SRpcMsg rpcMsgLocalCmd = {0};
(void)syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId);
@ -2356,7 +2431,7 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
}
}
if (pMsg->term >= currentTerm && ths->state != TAOS_SYNC_STATE_FOLLOWER) {
if (pMsg->term >= currentTerm && ths->state == TAOS_SYNC_STATE_LEADER) {
SRpcMsg rpcMsgLocalCmd = {0};
(void)syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId);
@ -2376,6 +2451,26 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
}
}
if (pMsg->term >= currentTerm && ths->state == TAOS_SYNC_STATE_LEARNER) {
SRpcMsg rpcMsgLocalCmd = {0};
(void)syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId);
SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont;
pSyncMsg->cmd = SYNC_LOCAL_CMD_LEARNER_CMT;
pSyncMsg->currentTerm = pMsg->term;
pSyncMsg->commitIndex = pMsg->commitIndex;
if (ths->syncEqMsg != NULL && ths->msgcb != NULL) {
int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd);
if (code != 0) {
sError("vgId:%d, sync enqueue step-down msg error, code:%d", ths->vgId, code);
rpcFreeCont(rpcMsgLocalCmd.pCont);
} else {
sTrace("vgId:%d, sync enqueue step-down msg, new-term:%" PRId64, ths->vgId, pSyncMsg->currentTerm);
}
}
}
// reply
syncNodeSendMsgById(&pMsgReply->destId, ths, &rpcMsg);
@ -2439,7 +2534,20 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
sError("vgId:%d, failed to commit raft log since %s. commit index:%" PRId64 "", ths->vgId, terrstr(),
ths->commitIndex);
}
} else {
} else if (pMsg->cmd == SYNC_LOCAL_CMD_LEARNER_CMT){
if (syncLogBufferIsEmpty(ths->pLogBuf)) {
sError("vgId:%d, sync log buffer is empty.", ths->vgId);
return 0;
}
raftStoreSetTerm(ths, pMsg->currentTerm);
(void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex);
sTrace("vgId:%d, start to commit raft log in heartbeat. commit index:%" PRId64 "", ths->vgId, ths->commitIndex);
if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) {
sError("vgId:%d, failed to commit raft log since %s. commit index:%" PRId64 "", ths->vgId, terrstr(),
ths->commitIndex);
}
}
else {
sError("error local cmd");
}
@ -2502,13 +2610,15 @@ const char* syncStr(ESyncState state) {
return "error";
case TAOS_SYNC_STATE_OFFLINE:
return "offline";
case TAOS_SYNC_STATE_LEARNER:
return "learner";
default:
return "unknown";
}
}
int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg) {
for (int32_t i = 0; i < pNewCfg->replicaNum; ++i) {
for (int32_t i = 0; i < pNewCfg->totalReplicaNum; ++i) {
SRaftId raftId = {
.addr = SYNC_ADDR(&pNewCfg->nodeInfo[i]),
.vgId = ths->vgId,
@ -2528,7 +2638,7 @@ bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) {
}
bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) {
for (int32_t i = 0; i < ths->replicaNum; ++i) {
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) {
return true;
}
@ -2538,7 +2648,7 @@ bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) {
SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId) {
SSyncSnapshotSender* pSender = NULL;
for (int32_t i = 0; i < ths->replicaNum; ++i) {
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
pSender = (ths->senders)[i];
}
@ -2548,7 +2658,7 @@ SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId)
SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId) {
SSyncTimer* pTimer = NULL;
for (int32_t i = 0; i < ths->replicaNum; ++i) {
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
pTimer = &((ths->peerHeartbeatTimerArr)[i]);
}
@ -2558,7 +2668,7 @@ SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId) {
SPeerState* syncNodeGetPeerState(SSyncNode* ths, const SRaftId* pDestId) {
SPeerState* pState = NULL;
for (int32_t i = 0; i < ths->replicaNum; ++i) {
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
pState = &((ths->peerStates)[i]);
}

View File

@ -53,8 +53,15 @@ int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt
goto _err;
}
if (pNode->restoreFinish && index - pBuf->commitIndex >= TSDB_SYNC_NEGOTIATION_WIN) {
terrno = TSDB_CODE_SYN_NEGO_WIN_EXCEEDED;
sError("vgId:%d, failed to append since %s, index:%" PRId64 ", commit-index:%" PRId64, pNode->vgId, terrstr(),
index, pBuf->commitIndex);
goto _err;
}
SyncIndex appliedIndex = pNode->pFsm->FpAppliedIndexCb(pNode->pFsm);
if (pNode->restoreFinish && pBuf->commitIndex - appliedIndex >= pBuf->size) {
if (pNode->restoreFinish && pBuf->commitIndex - appliedIndex >= TSDB_SYNC_APPLYQ_SIZE_LIMIT) {
terrno = TSDB_CODE_SYN_WRITE_STALL;
sError("vgId:%d, failed to append since %s. index:%" PRId64 ", commit-index:%" PRId64 ", applied-index:%" PRId64,
pNode->vgId, terrstr(), index, pBuf->commitIndex, appliedIndex);
@ -83,6 +90,7 @@ int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt
_err:
syncLogBufferValidate(pBuf);
taosThreadMutexUnlock(&pBuf->mutex);
taosMsleep(1);
return -1;
}
@ -242,6 +250,8 @@ int32_t syncLogBufferInitWithoutLock(SSyncLogBuffer* pBuf, SSyncNode* pNode) {
// update startIndex
pBuf->startIndex = takeDummy ? index : index + 1;
pBuf->isCatchup = false;
sInfo("vgId:%d, init sync log buffer. buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId,
pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
@ -324,6 +334,15 @@ int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt
goto _out;
}
if(pNode->raftCfg.cfg.nodeInfo[pNode->raftCfg.cfg.myIndex].nodeRole == TAOS_SYNC_ROLE_LEARNER &&
index > 0 && index > pBuf->totalIndex){
pBuf->totalIndex = index;
sTrace("vgId:%d, update learner progress. index:%" PRId64 ", term:%" PRId64 ": prevterm:%" PRId64
" != lastmatch:%" PRId64 ". log buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, pEntry->index, pEntry->term, prevTerm, lastMatchTerm, pBuf->startIndex, pBuf->commitIndex,
pBuf->matchIndex, pBuf->endIndex);
}
if (index - pBuf->startIndex >= pBuf->size) {
sWarn("vgId:%d, out of buffer range. index:%" PRId64 ", term:%" PRId64 ". log buffer: [%" PRId64 " %" PRId64
" %" PRId64 ", %" PRId64 ")",
@ -483,7 +502,7 @@ _out:
}
int32_t syncFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry,
int32_t applyCode) {
int32_t applyCode) {
if (pNode->replicaNum == 1 && pNode->restoreFinish && pNode->vgId != 1) {
return 0;
}
@ -957,7 +976,7 @@ void syncLogReplDestroy(SSyncLogReplMgr* pMgr) {
}
int32_t syncNodeLogReplInit(SSyncNode* pNode) {
for (int i = 0; i < TSDB_MAX_REPLICA; i++) {
for (int i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; i++) {
ASSERT(pNode->logReplMgrs[i] == NULL);
pNode->logReplMgrs[i] = syncLogReplCreate();
if (pNode->logReplMgrs[i] == NULL) {
@ -970,7 +989,7 @@ int32_t syncNodeLogReplInit(SSyncNode* pNode) {
}
void syncNodeLogReplDestroy(SSyncNode* pNode) {
for (int i = 0; i < TSDB_MAX_REPLICA; i++) {
for (int i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; i++) {
syncLogReplDestroy(pNode->logReplMgrs[i]);
pNode->logReplMgrs[i] = NULL;
}
@ -1100,7 +1119,7 @@ int32_t syncLogBufferReset(SSyncLogBuffer* pBuf, SSyncNode* pNode) {
pBuf->endIndex = pBuf->matchIndex + 1;
// reset repl mgr
for (int i = 0; i < pNode->replicaNum; i++) {
for (int i = 0; i < pNode->totalReplicaNum; i++) {
SSyncLogReplMgr* pMgr = pNode->logReplMgrs[i];
syncLogReplReset(pMgr);
}

View File

@ -18,21 +18,45 @@
#include "syncUtil.h"
#include "tjson.h"
const char* syncRoleToStr(ESyncRole role) {
switch (role) {
case TAOS_SYNC_ROLE_VOTER:
return "voter";
case TAOS_SYNC_ROLE_LEARNER:
return "learner";
default:
return "unknown";
}
}
const ESyncRole syncStrToRole(char* str) {
if(strcmp(str, "voter") == 0){
return TAOS_SYNC_ROLE_VOTER;
}
if(strcmp(str, "learner") == 0){
return TAOS_SYNC_ROLE_LEARNER;
}
return TAOS_SYNC_ROLE_ERROR;
}
static int32_t syncEncodeSyncCfg(const void *pObj, SJson *pJson) {
SSyncCfg *pCfg = (SSyncCfg *)pObj;
if (tjsonAddDoubleToObject(pJson, "totalReplicaNum", pCfg->totalReplicaNum) < 0) return -1;
if (tjsonAddDoubleToObject(pJson, "replicaNum", pCfg->replicaNum) < 0) return -1;
if (tjsonAddDoubleToObject(pJson, "myIndex", pCfg->myIndex) < 0) return -1;
SJson *nodeInfo = tjsonCreateArray();
if (nodeInfo == NULL) return -1;
if (tjsonAddItemToObject(pJson, "nodeInfo", nodeInfo) < 0) return -1;
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SJson *info = tjsonCreateObject();
if (info == NULL) return -1;
if (tjsonAddDoubleToObject(info, "nodePort", pCfg->nodeInfo[i].nodePort) < 0) return -1;
if (tjsonAddStringToObject(info, "nodeFqdn", pCfg->nodeInfo[i].nodeFqdn) < 0) return -1;
if (tjsonAddIntegerToObject(info, "nodeId", pCfg->nodeInfo[i].nodeId) < 0) return -1;
if (tjsonAddIntegerToObject(info, "clusterId", pCfg->nodeInfo[i].clusterId) < 0) return -1;
if (tjsonAddStringToObject(info, "nodeRole", syncRoleToStr(pCfg->nodeInfo[i].nodeRole)) < 0) return -1;
if (tjsonAddItemToArray(nodeInfo, info) < 0) return -1;
}
@ -90,7 +114,8 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) {
if (taosRenameFile(file, realfile) != 0) goto _OVER;
code = 0;
sInfo("vgId:%d, succeed to write sync cfg file:%s, len:%d", pNode->vgId, realfile, len);
sInfo("vgId:%d, succeed to write sync cfg file:%s, len:%d, lastConfigIndex:%" PRId64, pNode->vgId,
realfile, len, pNode->raftCfg.lastConfigIndex);
_OVER:
if (pJson != NULL) tjsonDelete(pJson);
@ -108,6 +133,7 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) {
SSyncCfg *pCfg = (SSyncCfg *)pObj;
int32_t code = 0;
tjsonGetInt32ValueFromDouble(pJson, "totalReplicaNum", pCfg->totalReplicaNum, code);
tjsonGetInt32ValueFromDouble(pJson, "replicaNum", pCfg->replicaNum, code);
if (code < 0) return -1;
tjsonGetInt32ValueFromDouble(pJson, "myIndex", pCfg->myIndex, code);
@ -115,9 +141,9 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) {
SJson *nodeInfo = tjsonGetObjectItem(pJson, "nodeInfo");
if (nodeInfo == NULL) return -1;
pCfg->replicaNum = tjsonGetArraySize(nodeInfo);
pCfg->totalReplicaNum = tjsonGetArraySize(nodeInfo);
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SJson *info = tjsonGetArrayItem(nodeInfo, i);
if (info == NULL) return -1;
tjsonGetUInt16ValueFromDouble(info, "nodePort", pCfg->nodeInfo[i].nodePort, code);
@ -126,6 +152,15 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) {
if (code < 0) return -1;
tjsonGetNumberValue(info, "nodeId", pCfg->nodeInfo[i].nodeId, code);
tjsonGetNumberValue(info, "clusterId", pCfg->nodeInfo[i].clusterId, code);
char role[10] = {0};
code = tjsonGetStringValue(info, "nodeRole", role);
if(code < 0) return -1;
if(strlen(role) != 0){
pCfg->nodeInfo[i].nodeRole = syncStrToRole(role);
}
else{
pCfg->nodeInfo[i].nodeRole = TAOS_SYNC_ROLE_VOTER;
}
}
return 0;

View File

@ -66,10 +66,10 @@ int32_t syncNodeReplicate(SSyncNode* pNode) {
}
int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) {
if (pNode->state != TAOS_SYNC_STATE_LEADER || pNode->replicaNum == 1) {
if (pNode->state != TAOS_SYNC_STATE_LEADER || pNode->raftCfg.cfg.totalReplicaNum == 1) {
return -1;
}
for (int32_t i = 0; i < pNode->replicaNum; i++) {
for (int32_t i = 0; i < pNode->totalReplicaNum; i++) {
if (syncUtilSameId(&pNode->replicasId[i], &pNode->myRaftId)) {
continue;
}

View File

@ -795,13 +795,18 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
return -1;
}
if (pMsg->term > raftStoreGetTerm(pSyncNode)) {
syncNodeStepDown(pSyncNode, pMsg->term);
if(pSyncNode->raftCfg.cfg.nodeInfo[pSyncNode->raftCfg.cfg.myIndex].nodeRole != TAOS_SYNC_ROLE_LEARNER){
if (pMsg->term > raftStoreGetTerm(pSyncNode)) {
syncNodeStepDown(pSyncNode, pMsg->term);
}
}
else{
syncNodeUpdateTermWithoutStepDown(pSyncNode, pMsg->term);
}
// state, term, seq/ack
int32_t code = 0;
if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) {
if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER || pSyncNode->state == TAOS_SYNC_STATE_LEARNER) {
if (pMsg->term == raftStoreGetTerm(pSyncNode)) {
if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT) {
syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq pre-snapshot");

View File

@ -58,7 +58,7 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) {
static int32_t syncNodeTimerRoutine(SSyncNode* ths) {
ths->tmrRoutineNum++;
if (ths->tmrRoutineNum % 60 == 0 && ths->replicaNum > 1) {
if (ths->tmrRoutineNum % 60 == 0 && ths->totalReplicaNum > 1) {
sNInfo(ths, "timer routines");
} else {
sNTrace(ths, "timer routines");

View File

@ -202,21 +202,22 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo
// restore error code
terrno = errCode;
SyncIndex appliedIndex = pNode->pFsm->FpAppliedIndexCb(pNode->pFsm);
if (pNode != NULL) {
taosPrintLog(flags, level, dflag,
"vgId:%d, %s, sync:%s, term:%" PRIu64 ", commit-index:%" PRId64 ", first-ver:%" PRId64
", last-ver:%" PRId64 ", min:%" PRId64 ", snap:%" PRId64 ", snap-term:%" PRIu64
"vgId:%d, %s, sync:%s, term:%" PRIu64 ", commit-index:%" PRId64 ", applied-index:%" PRId64
", first-ver:%" PRId64 ", last-ver:%" PRId64 ", min:%" PRId64 ", snap:%" PRId64 ", snap-term:%" PRIu64
", elect-times:%d, as-leader-times:%d, cfg-ch-times:%d, hb-slow:%d, hbr-slow:%d, "
"aq-items:%d, snaping:%" PRId64 ", replicas:%d, last-cfg:%" PRId64
", chging:%d, restore:%d, quorum:%d, elect-lc-timer:%" PRId64 ", hb:%" PRId64
", buffer:%s, repl-mgrs:%s, members:%s, hb:%s, hb-reply:%s",
pNode->vgId, eventLog, syncStr(pNode->state), currentTerm, pNode->commitIndex, logBeginIndex,
logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->electNum,
pNode->becomeLeaderNum, pNode->configChangeNum, pNode->hbSlowNum, pNode->hbrSlowNum, aqItems,
pNode->snapshottingIndex, pNode->replicaNum, pNode->raftCfg.lastConfigIndex, pNode->changing,
pNode->restoreFinish, syncNodeDynamicQuorum(pNode), pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser,
bufferStatesStr, replMgrStatesStr, cfgStr, hbTimeStr, hbrTimeStr);
pNode->vgId, eventLog, syncStr(pNode->state), currentTerm, pNode->commitIndex, appliedIndex,
logBeginIndex, logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm,
pNode->electNum, pNode->becomeLeaderNum, pNode->configChangeNum, pNode->hbSlowNum, pNode->hbrSlowNum,
aqItems, pNode->snapshottingIndex, pNode->replicaNum, pNode->raftCfg.lastConfigIndex, pNode->changing,
pNode->restoreFinish, syncNodeDynamicQuorum(pNode), pNode->electTimerLogicClock,
pNode->heartbeatTimerLogicClockUser, bufferStatesStr, replMgrStatesStr, cfgStr, hbTimeStr, hbrTimeStr);
}
}

View File

@ -30,7 +30,7 @@ SVotesGranted *voteGrantedCreate(SSyncNode *pNode) {
return NULL;
}
pVotesGranted->replicas = &pNode->replicasId;
pVotesGranted->replicas = (void*)&pNode->replicasId;
pVotesGranted->replicaNum = pNode->replicaNum;
voteGrantedClearVotes(pVotesGranted);
@ -49,7 +49,7 @@ void voteGrantedDestroy(SVotesGranted *pVotesGranted) {
}
void voteGrantedUpdate(SVotesGranted *pVotesGranted, SSyncNode *pNode) {
pVotesGranted->replicas = &pNode->replicasId;
pVotesGranted->replicas = (void*)&pNode->replicasId;
pVotesGranted->replicaNum = pNode->replicaNum;
voteGrantedClearVotes(pVotesGranted);
@ -115,7 +115,7 @@ SVotesRespond *votesRespondCreate(SSyncNode *pNode) {
return NULL;
}
pVotesRespond->replicas = &pNode->replicasId;
pVotesRespond->replicas = (void*)&pNode->replicasId;
pVotesRespond->replicaNum = pNode->replicaNum;
pVotesRespond->term = 0;
pVotesRespond->pNode = pNode;
@ -130,7 +130,7 @@ void votesRespondDestory(SVotesRespond *pVotesRespond) {
}
void votesRespondUpdate(SVotesRespond *pVotesRespond, SSyncNode *pNode) {
pVotesRespond->replicas = &pNode->replicasId;
pVotesRespond->replicas = (void*)&pNode->replicasId;
pVotesRespond->replicaNum = pNode->replicaNum;
pVotesRespond->term = 0;
pVotesRespond->pNode = pNode;

View File

@ -426,6 +426,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_RESTORING, "Sync leader is restor
TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INVALID_SNAPSHOT_MSG, "Sync invalid snapshot msg")
TAOS_DEFINE_ERROR(TSDB_CODE_SYN_BUFFER_FULL, "Sync buffer is full")
TAOS_DEFINE_ERROR(TSDB_CODE_SYN_WRITE_STALL, "Sync write stall")
TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NEGO_WIN_EXCEEDED, "Sync negotiation win exceeded")
TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INTERNAL_ERROR, "Sync internal error")
//tq

View File

@ -26,7 +26,7 @@ class TDTestCase:
'c1':'int',
'c2':'float',
'c3':'binary(20)'
}
# structure of tag
self.tag_dict = {
@ -60,7 +60,7 @@ class TDTestCase:
if tb_type == 'ntb' or tb_type == 'ctb':
tdSql.checkRows(len(values_list))
elif tb_type == 'stb':
tdSql.checkRows(len(self.values_list)*tb_num)
tdSql.checkRows(len(self.values_list)*tb_num)
for time in ['2020-01-32T08:00:00','2020-13-32T08:00:00','acd']:
tdSql.query(f"select to_unixtimestamp('{time}') from {tbname}")
if tb_type == 'ntb' or tb_type == 'ctb':
@ -74,7 +74,7 @@ class TDTestCase:
if tb_type == 'ntb' or tb_type == 'ctb':
tdSql.checkRows(len(values_list))
elif tb_type == 'stb':
tdSql.checkRows(len(values_list)*tb_num)
tdSql.checkRows(len(values_list)*tb_num)
for time in self.error_param:
tdSql.error(f"select to_unixtimestamp({time}) from {tbname}")
def timestamp_change_check_ntb(self):
@ -95,9 +95,20 @@ class TDTestCase:
self.data_check(f'{self.stbname}_{i}',self.values_list,'ctb')
self.data_check(self.stbname,self.values_list,'stb',self.tbnum)
tdSql.execute(f'drop database {self.dbname}')
def timestamp_change_return_type(self):
tdSql.query(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', 0);")
tdSql.checkEqual(tdSql.queryResult[0][0], 0)
tdSql.query(f"select to_unixtimestamp('1970-01-01 00:00:00', 1);")
tdSql.checkData(0, 0, '1970-01-01 00:00:00')
tdSql.error(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', 2);")
tdSql.error(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', 1.5);")
tdSql.error(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', 'abc');")
tdSql.error(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', true);")
tdSql.error(f"select to_unixtimestamp('1970-01-01 08:00:00+08:00', 1, 3);")
def run(self): # sourcery skip: extract-duplicate-method
self.timestamp_change_check_ntb()
self.timestamp_change_check_stb()
self.timestamp_change_return_type()
def stop(self):
tdSql.close()
tdLog.success(f"{__file__} successfully executed")