opti:deal with escape in the end of field in schemaless

This commit is contained in:
wangmm0220 2024-02-27 16:02:15 +08:00
parent 9fd26a5949
commit d62b82c295
3 changed files with 122 additions and 33 deletions

View File

@ -80,7 +80,7 @@ extern "C" {
#define IS_SAME_KEY (maxKV->type == kv->type && maxKV->keyLen == kv->keyLen && memcmp(maxKV->key, kv->key, kv->keyLen) == 0)
#define IS_SLASH_LETTER_IN_MEASUREMENT(sql) \
(*((sql)-1) == SLASH && (*(sql) == COMMA || *(sql) == SPACE))
(*((sql)-1) == SLASH && (*(sql) == COMMA || *(sql) == SPACE || *(sql) == SLASH))
#define MOVE_FORWARD_ONE(sql, len) (memmove((void *)((sql)-1), (sql), len))

View File

@ -20,14 +20,14 @@
#include "clientSml.h"
#define IS_COMMA(sql) (*(sql) == COMMA && *((sql)-1) != SLASH)
#define IS_SPACE(sql) (*(sql) == SPACE && *((sql)-1) != SLASH)
#define IS_EQUAL(sql) (*(sql) == EQUAL && *((sql)-1) != SLASH)
#define IS_COMMA(sql,escapeChar) (*(sql) == COMMA && (*((sql)-1) != SLASH || ((sql)-1 == escapeChar)))
#define IS_SPACE(sql,escapeChar) (*(sql) == SPACE && (*((sql)-1) != SLASH || ((sql)-1 == escapeChar)))
#define IS_EQUAL(sql,escapeChar) (*(sql) == EQUAL && (*((sql)-1) != SLASH || ((sql)-1 == escapeChar)))
#define IS_SLASH_LETTER_IN_FIELD_VALUE(sql) (*((sql)-1) == SLASH && (*(sql) == QUOTE || *(sql) == SLASH))
#define IS_SLASH_LETTER_IN_TAG_FIELD_KEY(sql) \
(*((sql)-1) == SLASH && (*(sql) == COMMA || *(sql) == SPACE || *(sql) == EQUAL))
(*((sql)-1) == SLASH && (*(sql) == COMMA || *(sql) == SPACE || *(sql) == EQUAL || *(sql) == SLASH))
#define PROCESS_SLASH_IN_FIELD_VALUE(key, keyLen) \
for (int i = 1; i < keyLen; ++i) { \
@ -198,7 +198,7 @@ static int32_t smlProcessTagLine(SSmlHandle *info, char **sql, char *sqlEnd){
int cnt = 0;
while (*sql < sqlEnd) {
if (unlikely(IS_SPACE(*sql))) {
if (unlikely(IS_SPACE(*sql,NULL))) {
break;
}
@ -207,18 +207,21 @@ static int32_t smlProcessTagLine(SSmlHandle *info, char **sql, char *sqlEnd){
size_t keyLen = 0;
bool keyEscaped = false;
size_t keyLenEscaped = 0;
const char *escapeChar = NULL;
while (*sql < sqlEnd) {
if (unlikely(IS_SPACE(*sql) || IS_COMMA(*sql))) {
if (unlikely(IS_SPACE(*sql,escapeChar) || IS_COMMA(*sql,escapeChar))) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid data", *sql);
terrno = TSDB_CODE_SML_INVALID_DATA;
return -1;
}
if (unlikely(IS_EQUAL(*sql))) {
if (unlikely(IS_EQUAL(*sql,escapeChar))) {
keyLen = *sql - key;
(*sql)++;
break;
}
if (IS_SLASH_LETTER_IN_TAG_FIELD_KEY(*sql)) {
escapeChar = *sql;
keyLenEscaped++;
keyEscaped = true;
}
@ -238,15 +241,16 @@ static int32_t smlProcessTagLine(SSmlHandle *info, char **sql, char *sqlEnd){
size_t valueLenEscaped = 0;
while (*sql < sqlEnd) {
// parse value
if (unlikely(IS_SPACE(*sql) || IS_COMMA(*sql))) {
if (unlikely(IS_SPACE(*sql,escapeChar) || IS_COMMA(*sql,escapeChar))) {
break;
} else if (unlikely(IS_EQUAL(*sql))) {
} else if (unlikely(IS_EQUAL(*sql,escapeChar))) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid data", *sql);
terrno = TSDB_CODE_SML_INVALID_DATA;
return -1;
}
if (IS_SLASH_LETTER_IN_TAG_FIELD_KEY(*sql)) {
escapeChar = *sql;
valueLenEscaped++;
valueEscaped = true;
}
@ -293,7 +297,7 @@ static int32_t smlProcessTagLine(SSmlHandle *info, char **sql, char *sqlEnd){
}
cnt++;
if (IS_SPACE(*sql)) {
if (IS_SPACE(*sql,escapeChar)) {
break;
}
(*sql)++;
@ -326,7 +330,7 @@ static int32_t smlParseTagLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL
static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlLineInfo *currElement) {
int cnt = 0;
while (*sql < sqlEnd) {
if (unlikely(IS_SPACE(*sql))) {
if (unlikely(IS_SPACE(*sql,NULL))) {
break;
}
@ -335,17 +339,19 @@ static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL
size_t keyLen = 0;
bool keyEscaped = false;
size_t keyLenEscaped = 0;
const char *escapeChar = NULL;
while (*sql < sqlEnd) {
if (unlikely(IS_SPACE(*sql) || IS_COMMA(*sql))) {
if (unlikely(IS_SPACE(*sql,escapeChar) || IS_COMMA(*sql,escapeChar))) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid data", *sql);
return TSDB_CODE_SML_INVALID_DATA;
}
if (unlikely(IS_EQUAL(*sql))) {
if (unlikely(IS_EQUAL(*sql,escapeChar))) {
keyLen = *sql - key;
(*sql)++;
break;
}
if (IS_SLASH_LETTER_IN_TAG_FIELD_KEY(*sql)) {
escapeChar = *sql;
keyLenEscaped++;
keyEscaped = true;
}
@ -363,7 +369,6 @@ static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL
bool valueEscaped = false;
size_t valueLenEscaped = 0;
int quoteNum = 0;
const char *escapeChar = NULL;
while (*sql < sqlEnd) {
// parse value
if (unlikely(*(*sql) == QUOTE && (*(*sql - 1) != SLASH || (*sql - 1) == escapeChar))) {
@ -374,7 +379,7 @@ static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL
}
continue;
}
if (quoteNum % 2 == 0 && (unlikely(IS_SPACE(*sql) || IS_COMMA(*sql)))) {
if (quoteNum % 2 == 0 && (unlikely(IS_SPACE(*sql,escapeChar) || IS_COMMA(*sql,escapeChar)))) {
break;
}
if (IS_SLASH_LETTER_IN_FIELD_VALUE(*sql) && (*sql - 1) != escapeChar) {
@ -437,7 +442,7 @@ static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL
}
cnt++;
if (IS_SPACE(*sql)) {
if (IS_SPACE(*sql,escapeChar)) {
break;
}
(*sql)++;
@ -453,19 +458,18 @@ int32_t smlParseInfluxString(SSmlHandle *info, char *sql, char *sqlEnd, SSmlLine
elements->measure = sql;
// parse measure
size_t measureLenEscaped = 0;
const char *escapeChar = NULL;
while (sql < sqlEnd) {
if (unlikely((sql != elements->measure) && IS_SLASH_LETTER_IN_MEASUREMENT(sql))) {
elements->measureEscaped = true;
measureLenEscaped++;
sql++;
continue;
}
if (unlikely(IS_COMMA(sql))) {
if (unlikely(IS_COMMA(sql,escapeChar) || IS_SPACE(sql,escapeChar))) {
break;
}
if (unlikely(IS_SPACE(sql))) {
break;
if (unlikely((sql != elements->measure) && IS_SLASH_LETTER_IN_MEASUREMENT(sql))) {
elements->measureEscaped = true;
escapeChar = sql;
measureLenEscaped++;
sql++;
continue;
}
sql++;
}
@ -478,9 +482,12 @@ int32_t smlParseInfluxString(SSmlHandle *info, char *sql, char *sqlEnd, SSmlLine
// to get measureTagsLen before
const char *tmp = sql;
while (tmp < sqlEnd) {
if (unlikely(IS_SPACE(tmp))) {
if (unlikely(IS_SPACE(tmp,escapeChar))) {
break;
}
if(unlikely(IS_SLASH_LETTER_IN_TAG_FIELD_KEY(sql))){
escapeChar = sql;
}
tmp++;
}
elements->measureTagsLen = tmp - elements->measure;

View File

@ -1018,7 +1018,7 @@ int sml_escape_Test() {
ASSERT(numFields == 5);
ASSERT(strncmp(fields[1].name, "inode\"i,= s_used", sizeof("inode\"i,= s_used") - 1) == 0);
ASSERT(strncmp(fields[2].name, "total", sizeof("total") - 1) == 0);
ASSERT(strncmp(fields[3].name, "inode\"i,= s_f\\\\ree", sizeof("inode\"i,= s_f\\\\ree") - 1) == 0);
ASSERT(strncmp(fields[3].name, "inode\"i,= s_f\\ree", sizeof("inode\"i,= s_f\\ree") - 1) == 0);
ASSERT(strncmp(fields[4].name, "dev\"i,= ce", sizeof("dev\"i,= ce") - 1) == 0);
TAOS_ROW row = NULL;
@ -1044,6 +1044,91 @@ int sml_escape_Test() {
return code;
}
// test field with end of escape
int sml_escape1_Test() {
TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0);
TAOS_RES *pRes = taos_query(taos, "create database if not exists db_escape");
taos_free_result(pRes);
pRes = taos_query(taos, "use db_escape");
taos_free_result(pRes);
const char *sql[] = {
"stab,t1\\=1 c1=3,c2=\"32fw\" 1661943970000000000",
"stab,t1=1\\ c1=3,c2=\"32fw\" 1661943980000000000",
"stab,t1=1 c1\\=3,c2=\"32fw\" 1661943990000000000",
};
for(int i = 0; i < sizeof(sql) / sizeof(sql[0]); i++){
pRes = taos_schemaless_insert(taos, (char**)&sql[i], 1, TSDB_SML_LINE_PROTOCOL, 0);
int code = taos_errno(pRes);
ASSERT(code);
}
const char *sql1[] = {
"stab\\,t1=1 c1=3,c2=\"32fw\" 1661943960000000000",
"stab\\\\,t1=1 c1=3,c2=\"32fw\" 1661943960000000000",
"stab,t1\\\\=1 c1=3,c2=\"32fw\" 1661943970000000000",
"stab,t1=1\\\\ c1=3,c2=\"32fw\" 1661943980000000000",
"stab,t1=1 c1\\\\=3,c2=\"32fw\" 1661943990000000000",
};
pRes = taos_schemaless_insert(taos, (char **)sql1, sizeof(sql1) / sizeof(sql1[0]), TSDB_SML_LINE_PROTOCOL, 0);
printf("%s result:%s, rows:%d\n", __FUNCTION__, taos_errstr(pRes), taos_affected_rows(pRes));
int code = taos_errno(pRes);
ASSERT(!code);
ASSERT(taos_affected_rows(pRes) == 5);
taos_free_result(pRes);
pRes = taos_query(taos, "select * from stab order by _ts"); //check stable name
ASSERT(pRes);
int fieldNum = taos_field_count(pRes);
ASSERT(fieldNum == 6);
printf("fieldNum:%d\n", fieldNum);
int numFields = taos_num_fields(pRes);
TAOS_FIELD *fields = taos_fetch_fields(pRes);
ASSERT(numFields == 6);
ASSERT(strncmp(fields[1].name, "c1", sizeof("c1") - 1) == 0);
ASSERT(strncmp(fields[2].name, "c2", sizeof("c2") - 1) == 0);
ASSERT(strncmp(fields[3].name, "c1\\", sizeof("c1\\") - 1) == 0);
ASSERT(strncmp(fields[4].name, "t1\\", sizeof("t1\\") - 1) == 0);
ASSERT(strncmp(fields[5].name, "t1", sizeof("t1") - 1) == 0);
TAOS_ROW row = NULL;
int32_t rowIndex = 0;
while ((row = taos_fetch_row(pRes)) != NULL) {
int64_t ts = *(int64_t *)row[0];
if (rowIndex == 0) {
ASSERT(ts == 1661943970000);
ASSERT(*(double *)row[1] == 3);
ASSERT(strncmp(row[2], "32fw", sizeof("32fw") - 1) == 0);
ASSERT(row[3] == NULL);
ASSERT(strncmp(row[4], "1", sizeof("1") - 1) == 0);
ASSERT(row[5] == NULL);
}else if (rowIndex == 1) {
ASSERT(ts == 1661943980000);
ASSERT(*(double *)row[1] == 3);
ASSERT(strncmp(row[2], "32fw", sizeof("32fw") - 1) == 0);
ASSERT(row[3] == NULL);
ASSERT(row[4] == NULL);
ASSERT(strncmp(row[5], "1\\", sizeof("1\\") - 1) == 0);
}else if (rowIndex == 2) {
ASSERT(ts == 1661943990000);
ASSERT(row[1] == NULL);
ASSERT(strncmp(row[2], "32fw", sizeof("32fw") - 1) == 0);
ASSERT(*(double *)row[3] == 3);
ASSERT(row[4] == NULL);
ASSERT(strncmp(row[5], "1", sizeof("1") - 1) == 0);
}
rowIndex++;
}
taos_free_result(pRes);
taos_close(taos);
return code;
}
int sml_19221_Test() {
TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0);
@ -1775,17 +1860,14 @@ int main(int argc, char *argv[]) {
ASSERT(ret);
ret = sml_escape_Test();
ASSERT(!ret);
ret = sml_escape1_Test();
ASSERT(!ret);
ret = sml_ts3116_Test();
ASSERT(!ret);
ret = sml_ts2385_Test(); // this test case need config sml table name using ./sml_test config_file
ASSERT(!ret);
ret = sml_ts3303_Test();
ASSERT(!ret);
// for(int i = 0; i < sizeof(str)/sizeof(str[0]); i++){
// printf("str:%s \t %d\n", str[i], smlCalTypeSum(str[i], strlen(str[i])));
// }
// int ret = 0;
ret = sml_ttl_Test();
ASSERT(!ret);
ret = sml_ts2164_Test();