From d62b82c295b2db35017cd8383baef70ea8cce680 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 27 Feb 2024 16:02:15 +0800 Subject: [PATCH] opti:deal with escape in the end of field in schemaless --- source/client/inc/clientSml.h | 2 +- source/client/src/clientSmlLine.c | 59 ++++++++++--------- utils/test/c/sml_test.c | 94 +++++++++++++++++++++++++++++-- 3 files changed, 122 insertions(+), 33 deletions(-) diff --git a/source/client/inc/clientSml.h b/source/client/inc/clientSml.h index b732abffb1..122914fd34 100644 --- a/source/client/inc/clientSml.h +++ b/source/client/inc/clientSml.h @@ -80,7 +80,7 @@ extern "C" { #define IS_SAME_KEY (maxKV->type == kv->type && maxKV->keyLen == kv->keyLen && memcmp(maxKV->key, kv->key, kv->keyLen) == 0) #define IS_SLASH_LETTER_IN_MEASUREMENT(sql) \ - (*((sql)-1) == SLASH && (*(sql) == COMMA || *(sql) == SPACE)) + (*((sql)-1) == SLASH && (*(sql) == COMMA || *(sql) == SPACE || *(sql) == SLASH)) #define MOVE_FORWARD_ONE(sql, len) (memmove((void *)((sql)-1), (sql), len)) diff --git a/source/client/src/clientSmlLine.c b/source/client/src/clientSmlLine.c index 0c610a4611..8bdbec0f2b 100644 --- a/source/client/src/clientSmlLine.c +++ b/source/client/src/clientSmlLine.c @@ -20,14 +20,14 @@ #include "clientSml.h" -#define IS_COMMA(sql) (*(sql) == COMMA && *((sql)-1) != SLASH) -#define IS_SPACE(sql) (*(sql) == SPACE && *((sql)-1) != SLASH) -#define IS_EQUAL(sql) (*(sql) == EQUAL && *((sql)-1) != SLASH) +#define IS_COMMA(sql,escapeChar) (*(sql) == COMMA && (*((sql)-1) != SLASH || ((sql)-1 == escapeChar))) +#define IS_SPACE(sql,escapeChar) (*(sql) == SPACE && (*((sql)-1) != SLASH || ((sql)-1 == escapeChar))) +#define IS_EQUAL(sql,escapeChar) (*(sql) == EQUAL && (*((sql)-1) != SLASH || ((sql)-1 == escapeChar))) #define IS_SLASH_LETTER_IN_FIELD_VALUE(sql) (*((sql)-1) == SLASH && (*(sql) == QUOTE || *(sql) == SLASH)) #define IS_SLASH_LETTER_IN_TAG_FIELD_KEY(sql) \ - (*((sql)-1) == SLASH && (*(sql) == COMMA || *(sql) == SPACE || *(sql) == EQUAL)) + (*((sql)-1) == SLASH && (*(sql) == COMMA || *(sql) == SPACE || *(sql) == EQUAL || *(sql) == SLASH)) #define PROCESS_SLASH_IN_FIELD_VALUE(key, keyLen) \ for (int i = 1; i < keyLen; ++i) { \ @@ -198,7 +198,7 @@ static int32_t smlProcessTagLine(SSmlHandle *info, char **sql, char *sqlEnd){ int cnt = 0; while (*sql < sqlEnd) { - if (unlikely(IS_SPACE(*sql))) { + if (unlikely(IS_SPACE(*sql,NULL))) { break; } @@ -207,18 +207,21 @@ static int32_t smlProcessTagLine(SSmlHandle *info, char **sql, char *sqlEnd){ size_t keyLen = 0; bool keyEscaped = false; size_t keyLenEscaped = 0; + const char *escapeChar = NULL; + while (*sql < sqlEnd) { - if (unlikely(IS_SPACE(*sql) || IS_COMMA(*sql))) { + if (unlikely(IS_SPACE(*sql,escapeChar) || IS_COMMA(*sql,escapeChar))) { smlBuildInvalidDataMsg(&info->msgBuf, "invalid data", *sql); terrno = TSDB_CODE_SML_INVALID_DATA; return -1; } - if (unlikely(IS_EQUAL(*sql))) { + if (unlikely(IS_EQUAL(*sql,escapeChar))) { keyLen = *sql - key; (*sql)++; break; } if (IS_SLASH_LETTER_IN_TAG_FIELD_KEY(*sql)) { + escapeChar = *sql; keyLenEscaped++; keyEscaped = true; } @@ -238,15 +241,16 @@ static int32_t smlProcessTagLine(SSmlHandle *info, char **sql, char *sqlEnd){ size_t valueLenEscaped = 0; while (*sql < sqlEnd) { // parse value - if (unlikely(IS_SPACE(*sql) || IS_COMMA(*sql))) { + if (unlikely(IS_SPACE(*sql,escapeChar) || IS_COMMA(*sql,escapeChar))) { break; - } else if (unlikely(IS_EQUAL(*sql))) { + } else if (unlikely(IS_EQUAL(*sql,escapeChar))) { smlBuildInvalidDataMsg(&info->msgBuf, "invalid data", *sql); terrno = TSDB_CODE_SML_INVALID_DATA; return -1; } if (IS_SLASH_LETTER_IN_TAG_FIELD_KEY(*sql)) { + escapeChar = *sql; valueLenEscaped++; valueEscaped = true; } @@ -293,7 +297,7 @@ static int32_t smlProcessTagLine(SSmlHandle *info, char **sql, char *sqlEnd){ } cnt++; - if (IS_SPACE(*sql)) { + if (IS_SPACE(*sql,escapeChar)) { break; } (*sql)++; @@ -326,7 +330,7 @@ static int32_t smlParseTagLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlLineInfo *currElement) { int cnt = 0; while (*sql < sqlEnd) { - if (unlikely(IS_SPACE(*sql))) { + if (unlikely(IS_SPACE(*sql,NULL))) { break; } @@ -335,17 +339,19 @@ static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL size_t keyLen = 0; bool keyEscaped = false; size_t keyLenEscaped = 0; + const char *escapeChar = NULL; while (*sql < sqlEnd) { - if (unlikely(IS_SPACE(*sql) || IS_COMMA(*sql))) { + if (unlikely(IS_SPACE(*sql,escapeChar) || IS_COMMA(*sql,escapeChar))) { smlBuildInvalidDataMsg(&info->msgBuf, "invalid data", *sql); return TSDB_CODE_SML_INVALID_DATA; } - if (unlikely(IS_EQUAL(*sql))) { + if (unlikely(IS_EQUAL(*sql,escapeChar))) { keyLen = *sql - key; (*sql)++; break; } if (IS_SLASH_LETTER_IN_TAG_FIELD_KEY(*sql)) { + escapeChar = *sql; keyLenEscaped++; keyEscaped = true; } @@ -363,7 +369,6 @@ static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL bool valueEscaped = false; size_t valueLenEscaped = 0; int quoteNum = 0; - const char *escapeChar = NULL; while (*sql < sqlEnd) { // parse value if (unlikely(*(*sql) == QUOTE && (*(*sql - 1) != SLASH || (*sql - 1) == escapeChar))) { @@ -374,7 +379,7 @@ static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL } continue; } - if (quoteNum % 2 == 0 && (unlikely(IS_SPACE(*sql) || IS_COMMA(*sql)))) { + if (quoteNum % 2 == 0 && (unlikely(IS_SPACE(*sql,escapeChar) || IS_COMMA(*sql,escapeChar)))) { break; } if (IS_SLASH_LETTER_IN_FIELD_VALUE(*sql) && (*sql - 1) != escapeChar) { @@ -437,7 +442,7 @@ static int32_t smlParseColLine(SSmlHandle *info, char **sql, char *sqlEnd, SSmlL } cnt++; - if (IS_SPACE(*sql)) { + if (IS_SPACE(*sql,escapeChar)) { break; } (*sql)++; @@ -453,19 +458,18 @@ int32_t smlParseInfluxString(SSmlHandle *info, char *sql, char *sqlEnd, SSmlLine elements->measure = sql; // parse measure size_t measureLenEscaped = 0; + const char *escapeChar = NULL; while (sql < sqlEnd) { - if (unlikely((sql != elements->measure) && IS_SLASH_LETTER_IN_MEASUREMENT(sql))) { - elements->measureEscaped = true; - measureLenEscaped++; - sql++; - continue; - } - if (unlikely(IS_COMMA(sql))) { + if (unlikely(IS_COMMA(sql,escapeChar) || IS_SPACE(sql,escapeChar))) { break; } - if (unlikely(IS_SPACE(sql))) { - break; + if (unlikely((sql != elements->measure) && IS_SLASH_LETTER_IN_MEASUREMENT(sql))) { + elements->measureEscaped = true; + escapeChar = sql; + measureLenEscaped++; + sql++; + continue; } sql++; } @@ -478,9 +482,12 @@ int32_t smlParseInfluxString(SSmlHandle *info, char *sql, char *sqlEnd, SSmlLine // to get measureTagsLen before const char *tmp = sql; while (tmp < sqlEnd) { - if (unlikely(IS_SPACE(tmp))) { + if (unlikely(IS_SPACE(tmp,escapeChar))) { break; } + if(unlikely(IS_SLASH_LETTER_IN_TAG_FIELD_KEY(sql))){ + escapeChar = sql; + } tmp++; } elements->measureTagsLen = tmp - elements->measure; diff --git a/utils/test/c/sml_test.c b/utils/test/c/sml_test.c index 2c334eb67b..f81f13fc68 100644 --- a/utils/test/c/sml_test.c +++ b/utils/test/c/sml_test.c @@ -1018,7 +1018,7 @@ int sml_escape_Test() { ASSERT(numFields == 5); ASSERT(strncmp(fields[1].name, "inode\"i,= s_used", sizeof("inode\"i,= s_used") - 1) == 0); ASSERT(strncmp(fields[2].name, "total", sizeof("total") - 1) == 0); - ASSERT(strncmp(fields[3].name, "inode\"i,= s_f\\\\ree", sizeof("inode\"i,= s_f\\\\ree") - 1) == 0); + ASSERT(strncmp(fields[3].name, "inode\"i,= s_f\\ree", sizeof("inode\"i,= s_f\\ree") - 1) == 0); ASSERT(strncmp(fields[4].name, "dev\"i,= ce", sizeof("dev\"i,= ce") - 1) == 0); TAOS_ROW row = NULL; @@ -1044,6 +1044,91 @@ int sml_escape_Test() { return code; } +// test field with end of escape +int sml_escape1_Test() { + TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0); + + TAOS_RES *pRes = taos_query(taos, "create database if not exists db_escape"); + taos_free_result(pRes); + + pRes = taos_query(taos, "use db_escape"); + taos_free_result(pRes); + + const char *sql[] = { + "stab,t1\\=1 c1=3,c2=\"32fw\" 1661943970000000000", + "stab,t1=1\\ c1=3,c2=\"32fw\" 1661943980000000000", + "stab,t1=1 c1\\=3,c2=\"32fw\" 1661943990000000000", + }; + for(int i = 0; i < sizeof(sql) / sizeof(sql[0]); i++){ + pRes = taos_schemaless_insert(taos, (char**)&sql[i], 1, TSDB_SML_LINE_PROTOCOL, 0); + int code = taos_errno(pRes); + ASSERT(code); + } + + const char *sql1[] = { + "stab\\,t1=1 c1=3,c2=\"32fw\" 1661943960000000000", + "stab\\\\,t1=1 c1=3,c2=\"32fw\" 1661943960000000000", + "stab,t1\\\\=1 c1=3,c2=\"32fw\" 1661943970000000000", + "stab,t1=1\\\\ c1=3,c2=\"32fw\" 1661943980000000000", + "stab,t1=1 c1\\\\=3,c2=\"32fw\" 1661943990000000000", + }; + pRes = taos_schemaless_insert(taos, (char **)sql1, sizeof(sql1) / sizeof(sql1[0]), TSDB_SML_LINE_PROTOCOL, 0); + printf("%s result:%s, rows:%d\n", __FUNCTION__, taos_errstr(pRes), taos_affected_rows(pRes)); + int code = taos_errno(pRes); + ASSERT(!code); + ASSERT(taos_affected_rows(pRes) == 5); + taos_free_result(pRes); + + pRes = taos_query(taos, "select * from stab order by _ts"); //check stable name + ASSERT(pRes); + int fieldNum = taos_field_count(pRes); + ASSERT(fieldNum == 6); + printf("fieldNum:%d\n", fieldNum); + + int numFields = taos_num_fields(pRes); + TAOS_FIELD *fields = taos_fetch_fields(pRes); + ASSERT(numFields == 6); + ASSERT(strncmp(fields[1].name, "c1", sizeof("c1") - 1) == 0); + ASSERT(strncmp(fields[2].name, "c2", sizeof("c2") - 1) == 0); + ASSERT(strncmp(fields[3].name, "c1\\", sizeof("c1\\") - 1) == 0); + ASSERT(strncmp(fields[4].name, "t1\\", sizeof("t1\\") - 1) == 0); + ASSERT(strncmp(fields[5].name, "t1", sizeof("t1") - 1) == 0); + + TAOS_ROW row = NULL; + int32_t rowIndex = 0; + while ((row = taos_fetch_row(pRes)) != NULL) { + int64_t ts = *(int64_t *)row[0]; + + if (rowIndex == 0) { + ASSERT(ts == 1661943970000); + ASSERT(*(double *)row[1] == 3); + ASSERT(strncmp(row[2], "32fw", sizeof("32fw") - 1) == 0); + ASSERT(row[3] == NULL); + ASSERT(strncmp(row[4], "1", sizeof("1") - 1) == 0); + ASSERT(row[5] == NULL); + }else if (rowIndex == 1) { + ASSERT(ts == 1661943980000); + ASSERT(*(double *)row[1] == 3); + ASSERT(strncmp(row[2], "32fw", sizeof("32fw") - 1) == 0); + ASSERT(row[3] == NULL); + ASSERT(row[4] == NULL); + ASSERT(strncmp(row[5], "1\\", sizeof("1\\") - 1) == 0); + }else if (rowIndex == 2) { + ASSERT(ts == 1661943990000); + ASSERT(row[1] == NULL); + ASSERT(strncmp(row[2], "32fw", sizeof("32fw") - 1) == 0); + ASSERT(*(double *)row[3] == 3); + ASSERT(row[4] == NULL); + ASSERT(strncmp(row[5], "1", sizeof("1") - 1) == 0); + } + rowIndex++; + } + taos_free_result(pRes); + taos_close(taos); + + return code; +} + int sml_19221_Test() { TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0); @@ -1775,17 +1860,14 @@ int main(int argc, char *argv[]) { ASSERT(ret); ret = sml_escape_Test(); ASSERT(!ret); + ret = sml_escape1_Test(); + ASSERT(!ret); ret = sml_ts3116_Test(); ASSERT(!ret); ret = sml_ts2385_Test(); // this test case need config sml table name using ./sml_test config_file ASSERT(!ret); ret = sml_ts3303_Test(); ASSERT(!ret); - - // for(int i = 0; i < sizeof(str)/sizeof(str[0]); i++){ - // printf("str:%s \t %d\n", str[i], smlCalTypeSum(str[i], strlen(str[i]))); - // } - // int ret = 0; ret = sml_ttl_Test(); ASSERT(!ret); ret = sml_ts2164_Test();