From 2742159fde19020be9dec4d28b94aecaa4ffbdf7 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 29 Apr 2021 23:16:33 +0800 Subject: [PATCH 1/8] [td-3967]1). fix escape char error in sql. 2) refactor. --- src/client/inc/tschemautil.h | 2 +- src/client/src/tscParseInsert.c | 33 ++++++++++++------- src/client/src/tscPrepare.c | 2 +- src/client/src/tscSQLParser.c | 14 ++++---- src/client/src/tscSql.c | 2 +- src/client/src/tscUtil.c | 8 ++--- src/client/tests/timeParseTest.cpp | 2 +- src/common/inc/tname.h | 2 +- src/common/inc/tvariant.h | 2 +- src/common/src/tname.c | 2 +- src/common/src/tvariant.c | 14 ++++---- src/os/tests/test.cpp | 2 +- src/query/inc/qSqlparser.h | 2 +- src/query/src/qSqlParser.c | 6 ++-- src/query/src/sql.c | 6 ++-- src/query/tests/tsBufTest.cpp | 4 +-- src/query/tests/unitTest.cpp | 14 ++++---- src/util/inc/{tstoken.h => ttoken.h} | 29 ++++++++-------- .../qTokenizer.c => util/src/ttokenizer.c} | 32 +++++++++++++----- 19 files changed, 102 insertions(+), 76 deletions(-) rename src/util/inc/{tstoken.h => ttoken.h} (93%) rename src/{query/src/qTokenizer.c => util/src/ttokenizer.c} (92%) diff --git a/src/client/inc/tschemautil.h b/src/client/inc/tschemautil.h index a9dcd230a6..0026a27e19 100644 --- a/src/client/inc/tschemautil.h +++ b/src/client/inc/tschemautil.h @@ -21,8 +21,8 @@ extern "C" { #endif #include "taosmsg.h" -#include "tstoken.h" #include "tsclient.h" +#include "ttoken.h" /** * get the number of tags of this table diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index 79ceb81cbc..a9d8afd09e 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -29,8 +29,7 @@ #include "taosdef.h" #include "tscLog.h" -#include "tscSubquery.h" -#include "tstoken.h" +#include "ttoken.h" #include "tdataformat.h" @@ -463,23 +462,33 @@ int tsParseOneRow(char **str, STableDataBlocks *pDataBlocks, SSqlCmd *pCmd, int1 // Remove quotation marks if (TK_STRING == sToken.type) { // delete escape character: \\, \', \" - char delim = sToken.z[0]; +// char delim = sToken.z[0]; int32_t cnt = 0; int32_t j = 0; for (uint32_t k = 1; k < sToken.n - 1; ++k) { - if (sToken.z[k] == delim || sToken.z[k] == '\\') { - if (sToken.z[k + 1] == delim) { - cnt++; - tmpTokenBuf[j] = sToken.z[k + 1]; - j++; - k++; - continue; - } + if (sToken.z[k] == '\\') { + cnt++; + + tmpTokenBuf[j] = GET_ESCAPE_CHAR(sToken.z[k+1]); + j++; + k++; + continue; } +// if (sToken.z[k] == delim || sToken.z[k] == '\\') { +// if (sToken.z[k + 1] == delim) { +// cnt++; +// tmpTokenBuf[j] = sToken.z[k + 1]; +// j++;s +// k++; +// continue; +// } +// } + tmpTokenBuf[j] = sToken.z[k]; j++; } + tmpTokenBuf[j] = 0; sToken.z = tmpTokenBuf; sToken.n -= 2 + cnt; @@ -1005,7 +1014,7 @@ int validateTableName(char *tblName, int len, SStrToken* psTblToken) { psTblToken->n = len; psTblToken->type = TK_ID; - tSQLGetToken(psTblToken->z, &psTblToken->type); + tGetToken(psTblToken->z, &psTblToken->type); return tscValidateName(psTblToken); } diff --git a/src/client/src/tscPrepare.c b/src/client/src/tscPrepare.c index 4efaf7c2b5..73e39b471b 100644 --- a/src/client/src/tscPrepare.c +++ b/src/client/src/tscPrepare.c @@ -151,7 +151,7 @@ static int normalStmtPrepare(STscStmt* stmt) { while (sql[i] != 0) { SStrToken token = {0}; - token.n = tSQLGetToken(sql + i, &token.type); + token.n = tGetToken(sql + i, &token.type); if (token.type == TK_QUESTION) { sql[i] = 0; diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index 4cf1423c43..e33b8ef33a 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -21,19 +21,19 @@ #endif // __APPLE__ #include "os.h" -#include "ttype.h" -#include "texpr.h" #include "taos.h" #include "taosmsg.h" #include "tcompare.h" +#include "texpr.h" #include "tname.h" #include "tscLog.h" #include "tscUtil.h" #include "tschemautil.h" #include "tsclient.h" -#include "tstoken.h" #include "tstrbuild.h" +#include "ttoken.h" #include "ttokendef.h" +#include "ttype.h" #include "qUtil.h" #define DEFAULT_PRIMARY_TIMESTAMP_COL_NAME "_c0" @@ -4629,7 +4629,7 @@ int32_t getTimeRange(STimeWindow* win, tSqlExpr* pRight, int32_t optr, int16_t t } } else { SStrToken token = {.z = pRight->value.pz, .n = pRight->value.nLen, .type = TK_ID}; - int32_t len = tSQLGetToken(pRight->value.pz, &token.type); + int32_t len = tGetToken(pRight->value.pz, &token.type); if ((token.type != TK_INTEGER && token.type != TK_FLOAT) || len != pRight->value.nLen) { return TSDB_CODE_TSC_INVALID_SQL; @@ -5498,13 +5498,13 @@ int32_t validateLocalConfig(SMiscInfo* pOptions) { } int32_t validateColumnName(char* name) { - bool ret = isKeyWord(name, (int32_t)strlen(name)); + bool ret = taosIsKeyWordToken(name, (int32_t)strlen(name)); if (ret) { return TSDB_CODE_TSC_INVALID_SQL; } SStrToken token = {.z = name}; - token.n = tSQLGetToken(name, &token.type); + token.n = tGetToken(name, &token.type); if (token.type != TK_STRING && token.type != TK_ID) { return TSDB_CODE_TSC_INVALID_SQL; @@ -5515,7 +5515,7 @@ int32_t validateColumnName(char* name) { strntolower(token.z, token.z, token.n); token.n = (uint32_t)strtrim(token.z); - int32_t k = tSQLGetToken(token.z, &token.type); + int32_t k = tGetToken(token.z, &token.type); if (k != token.n) { return TSDB_CODE_TSC_INVALID_SQL; } diff --git a/src/client/src/tscSql.c b/src/client/src/tscSql.c index 36d5cec06f..02cd2bd5ef 100644 --- a/src/client/src/tscSql.c +++ b/src/client/src/tscSql.c @@ -962,7 +962,7 @@ static int tscParseTblNameList(SSqlObj *pSql, const char *tblNameList, int32_t t len = (int32_t)strtrim(tblName); SStrToken sToken = {.n = len, .type = TK_ID, .z = tblName}; - tSQLGetToken(tblName, &sToken.type); + tGetToken(tblName, &sToken.type); // Check if the table name available or not if (tscValidateName(&sToken) != TSDB_CODE_SUCCESS) { diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index e8d7d5d03c..a9826f2fdf 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -1519,7 +1519,7 @@ void tscColumnListDestroy(SArray* pColumnList) { static int32_t validateQuoteToken(SStrToken* pToken) { tscDequoteAndTrimToken(pToken); - int32_t k = tSQLGetToken(pToken->z, &pToken->type); + int32_t k = tGetToken(pToken->z, &pToken->type); if (pToken->type == TK_STRING) { return tscValidateName(pToken); @@ -1587,7 +1587,7 @@ int32_t tscValidateName(SStrToken* pToken) { tscStrToLower(pToken->z, pToken->n); //pToken->n = (uint32_t)strtrim(pToken->z); - int len = tSQLGetToken(pToken->z, &pToken->type); + int len = tGetToken(pToken->z, &pToken->type); // single token, validate it if (len == pToken->n) { @@ -1613,7 +1613,7 @@ int32_t tscValidateName(SStrToken* pToken) { pToken->n = (uint32_t)strtrim(pToken->z); } - pToken->n = tSQLGetToken(pToken->z, &pToken->type); + pToken->n = tGetToken(pToken->z, &pToken->type); if (pToken->z[pToken->n] != TS_PATH_DELIMITER[0]) { return TSDB_CODE_TSC_INVALID_SQL; } @@ -1630,7 +1630,7 @@ int32_t tscValidateName(SStrToken* pToken) { pToken->z = sep + 1; pToken->n = (uint32_t)(oldLen - (sep - pStr) - 1); - int32_t len = tSQLGetToken(pToken->z, &pToken->type); + int32_t len = tGetToken(pToken->z, &pToken->type); if (len != pToken->n || (pToken->type != TK_STRING && pToken->type != TK_ID)) { return TSDB_CODE_TSC_INVALID_SQL; } diff --git a/src/client/tests/timeParseTest.cpp b/src/client/tests/timeParseTest.cpp index d7325430cd..ba06a6b9aa 100644 --- a/src/client/tests/timeParseTest.cpp +++ b/src/client/tests/timeParseTest.cpp @@ -4,7 +4,7 @@ #include #include "taos.h" -#include "tstoken.h" +#include "ttoken.h" #include "tutil.h" int main(int argc, char** argv) { diff --git a/src/common/inc/tname.h b/src/common/inc/tname.h index 465b298973..cacd6d2ae7 100644 --- a/src/common/inc/tname.h +++ b/src/common/inc/tname.h @@ -18,7 +18,7 @@ #include "os.h" #include "taosmsg.h" -#include "tstoken.h" +#include "ttoken.h" #include "tvariant.h" typedef struct SDataStatis { diff --git a/src/common/inc/tvariant.h b/src/common/inc/tvariant.h index f8f715c6ca..21b7fd8223 100644 --- a/src/common/inc/tvariant.h +++ b/src/common/inc/tvariant.h @@ -16,8 +16,8 @@ #ifndef TDENGINE_TVARIANT_H #define TDENGINE_TVARIANT_H -#include "tstoken.h" #include "tarray.h" +#include "ttoken.h" #ifdef __cplusplus extern "C" { diff --git a/src/common/src/tname.c b/src/common/src/tname.c index 787aa1e95b..65725455e8 100644 --- a/src/common/src/tname.c +++ b/src/common/src/tname.c @@ -2,7 +2,7 @@ #include "tutil.h" #include "tname.h" -#include "tstoken.h" +#include "ttoken.h" #include "tvariant.h" #define VALIDNUMOFCOLS(x) ((x) >= TSDB_MIN_COLUMNS && (x) <= TSDB_MAX_COLUMNS) diff --git a/src/common/src/tvariant.c b/src/common/src/tvariant.c index c872d8731b..9988450c30 100644 --- a/src/common/src/tvariant.c +++ b/src/common/src/tvariant.c @@ -14,14 +14,14 @@ */ #include "os.h" -#include "tvariant.h" #include "hash.h" #include "taos.h" #include "taosdef.h" -#include "tstoken.h" +#include "ttoken.h" #include "ttokendef.h" -#include "tutil.h" #include "ttype.h" +#include "tutil.h" +#include "tvariant.h" void tVariantCreate(tVariant *pVar, SStrToken *token) { int32_t ret = 0; @@ -49,7 +49,7 @@ void tVariantCreate(tVariant *pVar, SStrToken *token) { ret = tStrToInteger(token->z, token->type, token->n, &pVar->i64, true); if (ret != 0) { SStrToken t = {0}; - tSQLGetToken(token->z, &t.type); + tGetToken(token->z, &t.type); if (t.type == TK_MINUS) { // it is a signed number which is greater than INT64_MAX or less than INT64_MIN pVar->nType = -1; // -1 means error type return; @@ -460,7 +460,7 @@ static FORCE_INLINE int32_t convertToInteger(tVariant *pVariant, int64_t *result *result = (int64_t) pVariant->dKey; } else if (pVariant->nType == TSDB_DATA_TYPE_BINARY) { SStrToken token = {.z = pVariant->pz, .n = pVariant->nLen}; - /*int32_t n = */tSQLGetToken(pVariant->pz, &token.type); + /*int32_t n = */tGetToken(pVariant->pz, &token.type); if (token.type == TK_NULL) { if (releaseVariantPtr) { @@ -495,10 +495,10 @@ static FORCE_INLINE int32_t convertToInteger(tVariant *pVariant, int64_t *result wchar_t *endPtr = NULL; SStrToken token = {0}; - token.n = tSQLGetToken(pVariant->pz, &token.type); + token.n = tGetToken(pVariant->pz, &token.type); if (token.type == TK_MINUS || token.type == TK_PLUS) { - token.n = tSQLGetToken(pVariant->pz + token.n, &token.type); + token.n = tGetToken(pVariant->pz + token.n, &token.type); } if (token.type == TK_FLOAT) { diff --git a/src/os/tests/test.cpp b/src/os/tests/test.cpp index 600e5d71a7..bdd3114363 100644 --- a/src/os/tests/test.cpp +++ b/src/os/tests/test.cpp @@ -4,7 +4,7 @@ #include #include "taos.h" -#include "tstoken.h" +#include "ttoken.h" #include "tutil.h" int main(int argc, char** argv) { diff --git a/src/query/inc/qSqlparser.h b/src/query/inc/qSqlparser.h index 0a0587f701..70ba48e551 100644 --- a/src/query/inc/qSqlparser.h +++ b/src/query/inc/qSqlparser.h @@ -22,8 +22,8 @@ extern "C" { #include "taos.h" #include "taosmsg.h" -#include "tstoken.h" #include "tstrbuild.h" +#include "ttoken.h" #include "tvariant.h" #define ParseTOKENTYPE SStrToken diff --git a/src/query/src/qSqlParser.c b/src/query/src/qSqlParser.c index 6b38536b15..56d395e498 100644 --- a/src/query/src/qSqlParser.c +++ b/src/query/src/qSqlParser.c @@ -13,13 +13,13 @@ * along with this program. If not, see . */ -#include "os.h" #include "qSqlparser.h" +#include "os.h" #include "taosdef.h" #include "taosmsg.h" #include "tcmdtype.h" -#include "tstoken.h" #include "tstrbuild.h" +#include "ttoken.h" #include "ttokendef.h" #include "tutil.h" @@ -38,7 +38,7 @@ SSqlInfo qSqlParse(const char *pStr) { goto abort_parse; } - t0.n = tSQLGetToken((char *)&pStr[i], &t0.type); + t0.n = tGetToken((char *)&pStr[i], &t0.type); t0.z = (char *)(pStr + i); i += t0.n; diff --git a/src/query/src/sql.c b/src/query/src/sql.c index 96d33a8ed6..8f36638dc9 100644 --- a/src/query/src/sql.c +++ b/src/query/src/sql.c @@ -26,14 +26,14 @@ #include /************ Begin %include sections from the grammar ************************/ +#include +#include #include #include #include -#include -#include #include "qSqlparser.h" #include "tcmdtype.h" -#include "tstoken.h" +#include "ttoken.h" #include "ttokendef.h" #include "tutil.h" #include "tvariant.h" diff --git a/src/query/tests/tsBufTest.cpp b/src/query/tests/tsBufTest.cpp index 8ca636b834..dd7f03a494 100644 --- a/src/query/tests/tsBufTest.cpp +++ b/src/query/tests/tsBufTest.cpp @@ -3,10 +3,10 @@ #include #include +#include "qTsbuf.h" #include "taos.h" #include "tsdb.h" -#include "qTsbuf.h" -#include "tstoken.h" +#include "ttoken.h" #include "tutil.h" namespace { diff --git a/src/query/tests/unitTest.cpp b/src/query/tests/unitTest.cpp index 3406d83090..d2b058cf7c 100644 --- a/src/query/tests/unitTest.cpp +++ b/src/query/tests/unitTest.cpp @@ -21,7 +21,7 @@ int32_t testValidateName(char* name) { token.n = strlen(name); token.type = 0; - tSQLGetToken(name, &token.type); + tGetToken(name, &token.type); return tscValidateName(&token); } } @@ -691,32 +691,32 @@ TEST(testCase, tGetToken_Test) { char* s = ".123 "; uint32_t type = 0; - int32_t len = tSQLGetToken(s, &type); + int32_t len = tGetToken(s, &type); EXPECT_EQ(type, TK_FLOAT); EXPECT_EQ(len, strlen(s) - 1); char s1[] = "1.123e10 "; - len = tSQLGetToken(s1, &type); + len = tGetToken(s1, &type); EXPECT_EQ(type, TK_FLOAT); EXPECT_EQ(len, strlen(s1) - 1); char s4[] = "0xff "; - len = tSQLGetToken(s4, &type); + len = tGetToken(s4, &type); EXPECT_EQ(type, TK_HEX); EXPECT_EQ(len, strlen(s4) - 1); // invalid data type char s2[] = "e10 "; - len = tSQLGetToken(s2, &type); + len = tGetToken(s2, &type); EXPECT_FALSE(type == TK_FLOAT); char s3[] = "1.1.1.1"; - len = tSQLGetToken(s3, &type); + len = tGetToken(s3, &type); EXPECT_EQ(type, TK_IPTOKEN); EXPECT_EQ(len, strlen(s3)); char s5[] = "0x "; - len = tSQLGetToken(s5, &type); + len = tGetToken(s5, &type); EXPECT_FALSE(type == TK_HEX); } diff --git a/src/util/inc/tstoken.h b/src/util/inc/ttoken.h similarity index 93% rename from src/util/inc/tstoken.h rename to src/util/inc/ttoken.h index ab1ef7b279..c1e2170ac3 100644 --- a/src/util/inc/tstoken.h +++ b/src/util/inc/ttoken.h @@ -37,13 +37,25 @@ typedef struct SStrToken { char *z; } SStrToken; +extern const char escapeChar[]; + +/** + * check if it is a number or not + * @param pToken + * @return + */ +#define isNumber(tk) \ +((tk)->type == TK_INTEGER || (tk)->type == TK_FLOAT || (tk)->type == TK_HEX || (tk)->type == TK_BIN) + +#define GET_ESCAPE_CHAR(c) (escapeChar[(uint8_t)(c)]) + /** * tokenizer for sql string * @param z * @param tokenType * @return */ -uint32_t tSQLGetToken(char *z, uint32_t *tokenType); +uint32_t tGetToken(char *z, uint32_t *tokenType); /** * enhanced tokenizer for sql string. @@ -61,21 +73,12 @@ SStrToken tStrGetToken(char *str, int32_t *i, bool isPrevOptr); * @param len * @return */ -bool isKeyWord(const char *z, int32_t len); - -/** - * check if it is a number or not - * @param pToken - * @return - */ -#define isNumber(tk) \ -((tk)->type == TK_INTEGER || (tk)->type == TK_FLOAT || (tk)->type == TK_HEX || (tk)->type == TK_BIN) - +bool taosIsKeyWordToken(const char *z, int32_t len); /** * check if it is a token or not - * @param pToken - * @return token type, if it is not a number, TK_ILLEGAL will return + * @param pToken + * @return token type, if it is not a number, TK_ILLEGAL will return */ static FORCE_INLINE int32_t tGetNumericStringType(const SStrToken* pToken) { const char* z = pToken->z; diff --git a/src/query/src/qTokenizer.c b/src/util/src/ttokenizer.c similarity index 92% rename from src/query/src/qTokenizer.c rename to src/util/src/ttokenizer.c index 7869e27707..794420d55b 100644 --- a/src/query/src/qTokenizer.c +++ b/src/util/src/ttokenizer.c @@ -18,7 +18,7 @@ #include "hash.h" #include "hashfunc.h" #include "taosdef.h" -#include "tstoken.h" +#include "ttoken.h" #include "ttokendef.h" #include "tutil.h" @@ -232,6 +232,18 @@ static const char isIdChar[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ }; +const char escapeChar[] = { + /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, /* 0x */ + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, /* 1x */ + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, /* 2x */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, /* 3x */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,/* 4x */ + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,/* 5x */ + 0x60, 0x07, 0x08, 0x63, 0x64, 0x65, 0x0C, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x0A, 0x6F,/* 6x */ + 0x70, 0x71, 0x0D, 0x73, 0x09, 0x75, 0x0B, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,/* 7x */ +}; + static void* keywordHashTable = NULL; static void doInitKeywordsTable(void) { @@ -247,7 +259,7 @@ static void doInitKeywordsTable(void) { static pthread_once_t keywordsHashTableInit = PTHREAD_ONCE_INIT; -int tSQLKeywordCode(const char* z, int n) { +static int32_t tKeywordCode(const char* z, int n) { pthread_once(&keywordsHashTableInit, doInitKeywordsTable); char key[512] = {0}; @@ -271,7 +283,7 @@ int tSQLKeywordCode(const char* z, int n) { * Return the length of the token that begins at z[0]. * Store the token type in *type before returning. */ -uint32_t tSQLGetToken(char* z, uint32_t* tokenId) { +uint32_t tGetToken(char* z, uint32_t* tokenId) { uint32_t i; switch (*z) { case ' ': @@ -408,7 +420,7 @@ uint32_t tSQLGetToken(char* z, uint32_t* tokenId) { continue; } - if (z[i] == delim ) { + if (z[i] == delim) { if (z[i + 1] == delim) { i++; } else { @@ -551,7 +563,7 @@ uint32_t tSQLGetToken(char* z, uint32_t* tokenId) { } for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) { } - *tokenId = tSQLKeywordCode(z, i); + *tokenId = tKeywordCode(z, i); return i; } } @@ -584,7 +596,7 @@ SStrToken tStrGetToken(char* str, int32_t* i, bool isPrevOptr) { t = str[++(*i)]; } - t0.n = tSQLGetToken(&str[*i], &t0.type); + t0.n = tGetToken(&str[*i], &t0.type); break; // not support user specfied ignored symbol list @@ -613,7 +625,7 @@ SStrToken tStrGetToken(char* str, int32_t* i, bool isPrevOptr) { // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot! if ('.' == str[*i + t0.n]) { - len = tSQLGetToken(&str[*i + t0.n + 1], &type); + len = tGetToken(&str[*i + t0.n + 1], &type); // only id and string are valid if ((TK_STRING != t0.type) && (TK_ID != t0.type)) { @@ -628,7 +640,7 @@ SStrToken tStrGetToken(char* str, int32_t* i, bool isPrevOptr) { } else { // support parse the -/+number format if ((isPrevOptr) && (t0.type == TK_MINUS || t0.type == TK_PLUS)) { - len = tSQLGetToken(&str[*i + t0.n], &type); + len = tGetToken(&str[*i + t0.n], &type); if (type == TK_INTEGER || type == TK_FLOAT) { t0.type = type; t0.n += len; @@ -642,7 +654,9 @@ SStrToken tStrGetToken(char* str, int32_t* i, bool isPrevOptr) { return t0; } -bool isKeyWord(const char* z, int32_t len) { return (tSQLKeywordCode((char*)z, len) != TK_ID); } +bool taosIsKeyWordToken(const char* z, int32_t len) { + return (tKeywordCode((char*)z, len) != TK_ID); +} void taosCleanupKeywordsTable() { void* m = keywordHashTable; From c64fd41be7f4dba7f929565acdf033a89bd07042 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 30 Apr 2021 10:46:13 +0800 Subject: [PATCH 2/8] [td-3967]refactor. --- src/client/src/tscParseInsert.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index a9d8afd09e..35dc7bca55 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -475,16 +475,6 @@ int tsParseOneRow(char **str, STableDataBlocks *pDataBlocks, SSqlCmd *pCmd, int1 continue; } -// if (sToken.z[k] == delim || sToken.z[k] == '\\') { -// if (sToken.z[k + 1] == delim) { -// cnt++; -// tmpTokenBuf[j] = sToken.z[k + 1]; -// j++;s -// k++; -// continue; -// } -// } - tmpTokenBuf[j] = sToken.z[k]; j++; } From bdc887741ece56dc7e1afa9789338d55b652c35c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 30 Apr 2021 10:47:42 +0800 Subject: [PATCH 3/8] [td-3967] --- src/client/src/tscParseInsert.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index 35dc7bca55..298f1245e7 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -462,7 +462,6 @@ int tsParseOneRow(char **str, STableDataBlocks *pDataBlocks, SSqlCmd *pCmd, int1 // Remove quotation marks if (TK_STRING == sToken.type) { // delete escape character: \\, \', \" -// char delim = sToken.z[0]; int32_t cnt = 0; int32_t j = 0; for (uint32_t k = 1; k < sToken.n - 1; ++k) { @@ -704,6 +703,8 @@ static int32_t doParseInsertStatement(SSqlCmd* pCmd, char **str, STableDataBlock } code = TSDB_CODE_TSC_INVALID_SQL; + + // todo the size should be limited to the current sql length char *tmpTokenBuf = calloc(1, 16*1024); // used for deleting Escape character: \\, \', \" if (NULL == tmpTokenBuf) { return TSDB_CODE_TSC_OUT_OF_MEMORY; From d78ac9848c3efd37107b886bd1f6e1780b805d86 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 30 Apr 2021 17:12:20 +0800 Subject: [PATCH 4/8] [td-3967]1) refactor. --- src/client/src/tscParseInsert.c | 12 +++++++++--- .../script/general/parser/binary_escapeCharacter.sim | 9 +++++++++ tests/script/general/parser/testSuite.sim | 1 + 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index 298f1245e7..9130118f80 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -462,13 +462,19 @@ int tsParseOneRow(char **str, STableDataBlocks *pDataBlocks, SSqlCmd *pCmd, int1 // Remove quotation marks if (TK_STRING == sToken.type) { // delete escape character: \\, \', \" + char delim = sToken.z[0]; + int32_t cnt = 0; int32_t j = 0; for (uint32_t k = 1; k < sToken.n - 1; ++k) { - if (sToken.z[k] == '\\') { - cnt++; + if (sToken.z[k] == '\\' || (sToken.z[k] == delim && sToken.z[k + 1] == delim)) { + if (sToken.z[k] == '\\') { + tmpTokenBuf[j] = GET_ESCAPE_CHAR(sToken.z[k+1]); + } else { + tmpTokenBuf[j] = sToken.z[k + 1]; + } - tmpTokenBuf[j] = GET_ESCAPE_CHAR(sToken.z[k+1]); + cnt++; j++; k++; continue; diff --git a/tests/script/general/parser/binary_escapeCharacter.sim b/tests/script/general/parser/binary_escapeCharacter.sim index f0589d154f..1c6bc5b37f 100644 --- a/tests/script/general/parser/binary_escapeCharacter.sim +++ b/tests/script/general/parser/binary_escapeCharacter.sim @@ -93,5 +93,14 @@ if $data41 != @udp005@ then print "[ERROR] expect: udp005, act:$data41" endi +print ---------------------> TD-3967 +sql insert into tb values(now, '\\abc\\\\'); +sql insert into tb values(now, '\\abc\\\\'); +sql insert into tb values(now, '\\\\'); + +print ------------->sim bug +# sql_error insert into tb values(now, '\\\'); +# sql_error insert into tb values(now, '\\'); +# sql_error insert into tb values(now, '\\n'); system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/general/parser/testSuite.sim b/tests/script/general/parser/testSuite.sim index f05474d158..b605a04df7 100644 --- a/tests/script/general/parser/testSuite.sim +++ b/tests/script/general/parser/testSuite.sim @@ -55,4 +55,5 @@ run general/parser/sliding.sim run general/parser/function.sim run general/parser/stableOp.sim run general/parser/slimit_alter_tags.sim +run general/parser/binary_escapeCharacter.sim From b0ab4573bba52b91df50edde52819ecb7e6c1468 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 30 Apr 2021 17:14:57 +0800 Subject: [PATCH 5/8] [td-3967]update the sim. --- tests/script/general/parser/binary_escapeCharacter.sim | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/script/general/parser/binary_escapeCharacter.sim b/tests/script/general/parser/binary_escapeCharacter.sim index 1c6bc5b37f..b5bb10284b 100644 --- a/tests/script/general/parser/binary_escapeCharacter.sim +++ b/tests/script/general/parser/binary_escapeCharacter.sim @@ -100,7 +100,8 @@ sql insert into tb values(now, '\\\\'); print ------------->sim bug # sql_error insert into tb values(now, '\\\'); -# sql_error insert into tb values(now, '\\'); -# sql_error insert into tb values(now, '\\n'); +sql_error insert into tb values(now, '\'); +#sql_error insert into tb values(now, '\\\n'); +sql insert into tb values(now, '\n'); system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file From b6e04604cb268e731e24f0141fac529752c80ab9 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 7 May 2021 17:29:00 +0800 Subject: [PATCH 6/8] [td-3967] --- src/client/src/tscParseInsert.c | 4 ---- src/util/src/ttokenizer.c | 2 +- tests/pytest/insert/nchar-unicode.py | 5 ++++- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index 3882be56dc..923d95c888 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -468,11 +468,7 @@ int tsParseOneRow(char **str, STableDataBlocks *pDataBlocks, SSqlCmd *pCmd, int1 int32_t j = 0; for (uint32_t k = 1; k < sToken.n - 1; ++k) { if (sToken.z[k] == '\\' || (sToken.z[k] == delim && sToken.z[k + 1] == delim)) { - if (sToken.z[k] == '\\') { - tmpTokenBuf[j] = GET_ESCAPE_CHAR(sToken.z[k+1]); - } else { tmpTokenBuf[j] = sToken.z[k + 1]; - } cnt++; j++; diff --git a/src/util/src/ttokenizer.c b/src/util/src/ttokenizer.c index 794420d55b..54da75cae0 100644 --- a/src/util/src/ttokenizer.c +++ b/src/util/src/ttokenizer.c @@ -415,7 +415,7 @@ uint32_t tGetToken(char* z, uint32_t* tokenId) { int delim = z[0]; bool strEnd = false; for (i = 1; z[i]; i++) { - if (z[i] == '\\') { + if (z[i] == '\\') { // ignore the escaped character that follows this backslash i++; continue; } diff --git a/tests/pytest/insert/nchar-unicode.py b/tests/pytest/insert/nchar-unicode.py index c417a6bca2..4afcf5b760 100644 --- a/tests/pytest/insert/nchar-unicode.py +++ b/tests/pytest/insert/nchar-unicode.py @@ -57,12 +57,15 @@ class TDTestCase: # https://www.ltg.ed.ac.uk/~richard/unicode-sample.html # Basic Latin - data = r'! # $ % & ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~' + data = r'! # $ % & ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \\ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~' tdLog.info("insert Basic Latin %d length data: %s" % (len(data), data)) tdSql.execute("insert into tb values (now, '%s')" % data) tdSql.query("select * from tb") tdSql.checkRows(3) + + data = data.replace('\\\\', '\\') tdSql.checkData(2, 1, data) + # tdSql.execute("insert into tb values(now, 'abc')") # Latin-1 Supplement data = ' ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯ ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ' From f2fa15156543728d68e3502970bcf4e2ee55381a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 10 May 2021 17:01:46 +0800 Subject: [PATCH 7/8] [td-3967]update the test case. --- .../taosdata/jdbc/cases/InsertSpecialCharacterJniTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/connector/jdbc/src/test/java/com/taosdata/jdbc/cases/InsertSpecialCharacterJniTest.java b/src/connector/jdbc/src/test/java/com/taosdata/jdbc/cases/InsertSpecialCharacterJniTest.java index 9014e82a9e..a3c46dc232 100644 --- a/src/connector/jdbc/src/test/java/com/taosdata/jdbc/cases/InsertSpecialCharacterJniTest.java +++ b/src/connector/jdbc/src/test/java/com/taosdata/jdbc/cases/InsertSpecialCharacterJniTest.java @@ -12,7 +12,7 @@ public class InsertSpecialCharacterJniTest { private static String tbname1 = "test"; private static String tbname2 = "weather"; private static String special_character_str_1 = "$asd$$fsfsf$"; - private static String special_character_str_2 = "\\asdfsfsf\\\\"; + private static String special_character_str_2 = "\\\\asdfsfsf\\\\"; private static String special_character_str_3 = "\\\\asdfsfsf\\"; private static String special_character_str_4 = "?asd??fsf?sf?"; private static String special_character_str_5 = "?#sd@$f(('<(s[P)>\"){]}f?s[]{}%vaew|\"fsfs^a&d*jhg)(j))(f@~!?$"; @@ -70,7 +70,7 @@ public class InsertSpecialCharacterJniTest { String f1 = new String(rs.getBytes(2)); //TODO: bug to be fixed // Assert.assertEquals(special_character_str_2, f1); - Assert.assertEquals(special_character_str_2.substring(0, special_character_str_1.length() - 2), f1); + Assert.assertEquals(special_character_str_2.substring(1, special_character_str_1.length() - 1), f1); String f2 = rs.getString(3); Assert.assertNull(f2); } From edbc123e2e0b4265e2ac89d85f10bb82bb34c39e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 11 May 2021 10:43:53 +0800 Subject: [PATCH 8/8] [td-225]update the test case. --- .../jdbc/cases/InsertSpecialCharacterRestfulTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/connector/jdbc/src/test/java/com/taosdata/jdbc/cases/InsertSpecialCharacterRestfulTest.java b/src/connector/jdbc/src/test/java/com/taosdata/jdbc/cases/InsertSpecialCharacterRestfulTest.java index 0cbbe76716..ea0d1aec41 100644 --- a/src/connector/jdbc/src/test/java/com/taosdata/jdbc/cases/InsertSpecialCharacterRestfulTest.java +++ b/src/connector/jdbc/src/test/java/com/taosdata/jdbc/cases/InsertSpecialCharacterRestfulTest.java @@ -13,7 +13,7 @@ public class InsertSpecialCharacterRestfulTest { private static String tbname1 = "test"; private static String tbname2 = "weather"; private static String special_character_str_1 = "$asd$$fsfsf$"; - private static String special_character_str_2 = "\\asdfsfsf\\\\"; + private static String special_character_str_2 = "\\\\asdfsfsf\\\\"; private static String special_character_str_3 = "\\\\asdfsfsf\\"; private static String special_character_str_4 = "?asd??fsf?sf?"; private static String special_character_str_5 = "?#sd@$f(('<(s[P)>\"){]}f?s[]{}%vaew|\"fsfs^a&d*jhg)(j))(f@~!?$"; @@ -49,7 +49,7 @@ public class InsertSpecialCharacterRestfulTest { @Test public void testCase02() throws SQLException { //TODO: - // Expected :\asdfsfsf\\ + // Expected :\asdfsfsf\ // Actual :\asdfsfsf\ final long now = System.currentTimeMillis(); @@ -71,7 +71,7 @@ public class InsertSpecialCharacterRestfulTest { String f1 = new String(rs.getBytes(2)); //TODO: bug to be fixed // Assert.assertEquals(special_character_str_2, f1); - Assert.assertEquals(special_character_str_2.substring(0, special_character_str_1.length() - 2), f1); + Assert.assertEquals(special_character_str_2.substring(1, special_character_str_1.length() - 1), f1); String f2 = rs.getString(3); Assert.assertNull(f2); }