From 1cc17dfacc0e90d8b485a685b69ec3412d574e24 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Thu, 6 Feb 2025 15:14:38 +0800 Subject: [PATCH] enh: regexp --- docs/en/14-reference/03-taos-sql/16-operators.md | 5 +++-- docs/zh/14-reference/03-taos-sql/16-operators.md | 5 +++-- source/libs/parser/inc/sql.y | 2 ++ source/libs/parser/src/parTokenizer.c | 1 + tests/system-test/2-query/match.py | 10 ++++++++++ 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/docs/en/14-reference/03-taos-sql/16-operators.md b/docs/en/14-reference/03-taos-sql/16-operators.md index aa4ac8a72f..04910bb10d 100644 --- a/docs/en/14-reference/03-taos-sql/16-operators.md +++ b/docs/en/14-reference/03-taos-sql/16-operators.md @@ -43,7 +43,8 @@ TDengine supports `UNION ALL` and `UNION` operators. UNION ALL combines the resu | 9 | LIKE | BINARY, NCHAR, and VARCHAR | Matches the specified pattern string with wildcard | | 10 | NOT LIKE | BINARY, NCHAR, and VARCHAR | Does not match the specified pattern string with wildcard | | 11 | MATCH, NMATCH | BINARY, NCHAR, and VARCHAR | Regular expression match | -| 12 | CONTAINS | JSON | Whether a key exists in JSON | +| 12 | REGEXP, NOT REGEXP | BINARY, NCHAR, and VARCHAR | Regular expression match | +| 13 | CONTAINS | JSON | Whether a key exists in JSON | LIKE conditions use wildcard strings for matching checks, with the following rules: @@ -51,7 +52,7 @@ LIKE conditions use wildcard strings for matching checks, with the following rul - If you want to match an underscore character that is originally in the string, you can write it as \_ in the wildcard string, i.e., add a backslash to escape it. - The wildcard string cannot exceed 100 bytes in length. It is not recommended to use too long wildcard strings, as it may severely affect the performance of the LIKE operation. -MATCH and NMATCH conditions use regular expressions for matching, with the following rules: +MATCH/REGEXP and NMATCH/NOT REGEXP conditions use regular expressions for matching, with the following rules: - Supports regular expressions that comply with the POSIX standard, see Regular Expressions for specific standards. - When MATCH matches a regular expression, it returns TRUE. When NMATCH does not match a regular expression, it returns TRUE. diff --git a/docs/zh/14-reference/03-taos-sql/16-operators.md b/docs/zh/14-reference/03-taos-sql/16-operators.md index 96a35e9ebf..08972b27a3 100644 --- a/docs/zh/14-reference/03-taos-sql/16-operators.md +++ b/docs/zh/14-reference/03-taos-sql/16-operators.md @@ -45,7 +45,8 @@ TDengine 支持 `UNION ALL` 和 `UNION` 操作符。UNION ALL 将查询返回的 | 9 | LIKE | BINARY、NCHAR 和 VARCHAR | 通配符匹配所指定的模式串 | | 10 | NOT LIKE | BINARY、NCHAR 和 VARCHAR | 通配符不匹配所指定的模式串 | | 11 | MATCH, NMATCH | BINARY、NCHAR 和 VARCHAR | 正则表达式匹配 | -| 12 | CONTAINS | JSON | JSON 中是否存在某键 | +| 12 | REGEXP, NOT REGEXP | BINARY、NCHAR 和 VARCHAR | 正则表达式匹配 | +| 13 | CONTAINS | JSON | JSON 中是否存在某键 | LIKE 条件使用通配符字符串进行匹配检查,规则如下: @@ -53,7 +54,7 @@ LIKE 条件使用通配符字符串进行匹配检查,规则如下: - 如果希望匹配字符串中原本就带有的 \_(下划线)字符,那么可以在通配符字符串中写作 \_,即加一个反斜线来进行转义。 - 通配符字符串最长不能超过 100 字节。不建议使用太长的通配符字符串,否则将有可能严重影响 LIKE 操作的执行性能。 -MATCH 条件和 NMATCH 条件使用正则表达式进行匹配,规则如下: +MATCH/REGEXP 条件和 NMATCH/NOT REGEXP 条件使用正则表达式进行匹配,规则如下: - 支持符合 POSIX 规范的正则表达式,具体规范内容可参见 Regular Expressions。 - MATCH 和正则表达式匹配时, 返回 TURE. NMATCH 和正则表达式不匹配时, 返回 TRUE. diff --git a/source/libs/parser/inc/sql.y b/source/libs/parser/inc/sql.y index 439af13d71..4c0ba0d4a0 100644 --- a/source/libs/parser/inc/sql.y +++ b/source/libs/parser/inc/sql.y @@ -1411,6 +1411,8 @@ compare_op(A) ::= LIKE. compare_op(A) ::= NOT LIKE. { A = OP_TYPE_NOT_LIKE; } compare_op(A) ::= MATCH. { A = OP_TYPE_MATCH; } compare_op(A) ::= NMATCH. { A = OP_TYPE_NMATCH; } +compare_op(A) ::= REGEXP. { A = OP_TYPE_MATCH; } +compare_op(A) ::= NOT REGEXP. { A = OP_TYPE_NMATCH; } compare_op(A) ::= CONTAINS. { A = OP_TYPE_JSON_CONTAINS; } %type in_op { EOperatorType } diff --git a/source/libs/parser/src/parTokenizer.c b/source/libs/parser/src/parTokenizer.c index 7ed438a7dc..3b08d403dc 100644 --- a/source/libs/parser/src/parTokenizer.c +++ b/source/libs/parser/src/parTokenizer.c @@ -358,6 +358,7 @@ static SKeyword keywordTable[] = { {"NOTIFY", TK_NOTIFY}, {"ON_FAILURE", TK_ON_FAILURE}, {"NOTIFY_HISTORY", TK_NOTIFY_HISTORY}, + {"REGEXP", TK_REGEXP}, }; // clang-format on diff --git a/tests/system-test/2-query/match.py b/tests/system-test/2-query/match.py index cd2ed5d96b..a5c90c9690 100644 --- a/tests/system-test/2-query/match.py +++ b/tests/system-test/2-query/match.py @@ -114,6 +114,16 @@ class TDTestCase: tdSql.query("select * from db.t3x where c1 match '中文'") tdSql.checkRows(5) tdSql.error("select * from db.t1x where c1 match '*d'") + + tdSql.query("select * from db.t3x where c1 regexp '%中文'") + tdSql.checkRows(2) + tdSql.query("select * from db.t3x where c1 regexp '中文'") + tdSql.checkRows(5) + tdSql.query("select * from db.t3x where c1 not regexp '%中文'") + tdSql.checkRows(3) + tdSql.query("select * from db.t3x where c1 not regexp '中文'") + tdSql.checkRows(0) + tdSql.error("select * from db.t1x where c1 regexp '*d'") for thread in threads: print(f"Thread waitting for finish...")