fix several issues in string token parsing
1. the shell should not remove escape sequence \' and \" in a string. 2. `tsParseTime` should not unescape the next string token (this issue appears after the first issue was fixed). 3. `value[4] != '-'` in `tsParseTime` crashes in rare case if `value[4]` is in unallocated virtual memory. 4. `operator[x]` and `delimiter[x]` may result in unexcepted behavior as string is utf-8 encoded and `x < 0` could be true. 5. changes the behavior of `tscGetToken` a little: now, unescaped single quotation is allowed in double quoted strings and unescaped double quotation is allowed in single quoted strings. 6. minor performance improvements and other improvements.
This commit is contained in:
parent
90e5690df5
commit
693ee662af
|
@ -94,16 +94,12 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
|
|||
int64_t useconds = 0;
|
||||
|
||||
char *pTokenEnd = *next;
|
||||
tscGetToken(pTokenEnd, &token, &tokenlen);
|
||||
if (tokenlen == 0 && strlen(value) == 0) {
|
||||
INVALID_SQL_RET_MSG(error, "missing time stamp");
|
||||
}
|
||||
|
||||
if (strncmp(value, "now", 3) == 0 && valuelen == 3) {
|
||||
if (valuelen == 3 && (strncmp(value, "now", 3) == 0)) {
|
||||
useconds = taosGetTimestamp(timePrec);
|
||||
} else if (strncmp(value, "0", 1) == 0 && valuelen == 1) {
|
||||
} else if (valuelen == 1 && value[0] == '0') {
|
||||
// do nothing
|
||||
} else if (value[4] != '-') {
|
||||
} else if (valuelen <= 4 || value[4] != '-') {
|
||||
for (int32_t i = 0; i < valuelen; ++i) {
|
||||
/*
|
||||
* filter illegal input.
|
||||
|
@ -126,7 +122,6 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
|
|||
for (int k = valuelen; value[k] != '\0'; k++) {
|
||||
if (value[k] == ' ' || value[k] == '\t') continue;
|
||||
if (value[k] == ',') {
|
||||
*next = pTokenEnd;
|
||||
*time = useconds;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ typedef struct SSQLToken {
|
|||
} SSQLToken;
|
||||
|
||||
char *tscGetToken(char *string, char **token, int *tokenLen);
|
||||
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters);
|
||||
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters);
|
||||
|
||||
/**
|
||||
* tokenizer for sql string
|
||||
|
|
|
@ -111,6 +111,7 @@ TAOS *shellInit(struct arguments *args) {
|
|||
void shellReplaceCtrlChar(char *str) {
|
||||
_Bool ctrlOn = false;
|
||||
char *pstr = NULL;
|
||||
char quote = 0;
|
||||
|
||||
for (pstr = str; *str != '\0'; ++str) {
|
||||
if (ctrlOn) {
|
||||
|
@ -131,6 +132,13 @@ void shellReplaceCtrlChar(char *str) {
|
|||
*pstr = '\\';
|
||||
pstr++;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
if (quote) {
|
||||
*pstr++ = '\\';
|
||||
*pstr++ = *str;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -139,6 +147,11 @@ void shellReplaceCtrlChar(char *str) {
|
|||
if (*str == '\\') {
|
||||
ctrlOn = true;
|
||||
} else {
|
||||
if (quote == *str) {
|
||||
quote = 0;
|
||||
} else if (*str == '\'' || *str == '"') {
|
||||
quote = *str;
|
||||
}
|
||||
*pstr = *str;
|
||||
pstr++;
|
||||
}
|
||||
|
|
|
@ -23,77 +23,68 @@
|
|||
#include "shash.h"
|
||||
#include "tstoken.h"
|
||||
|
||||
static char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
static const char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+',
|
||||
0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0};
|
||||
|
||||
static char delimiter[] = {
|
||||
static const char delimiter[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
bool isCharInDelimiter(char c, char *delimiter) {
|
||||
for (int i = 0; i < strlen(delimiter); i++) {
|
||||
if (delimiter[i] == c) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters) {
|
||||
while (*string != 0) {
|
||||
if (isCharInDelimiter(*string, delimiters)) {
|
||||
++string;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters) {
|
||||
while ((*string != 0) && strchr(delimiters, *string)) {
|
||||
++string;
|
||||
}
|
||||
|
||||
*token = string;
|
||||
|
||||
char *str = string;
|
||||
*tokenLen = 0;
|
||||
while (*str != 0) {
|
||||
if (!isCharInDelimiter(*str, delimiters)) {
|
||||
*tokenLen = *tokenLen + 1;
|
||||
str++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
while ((*str != 0) && (strchr(delimiters, *str) == NULL)) {
|
||||
++str;
|
||||
}
|
||||
|
||||
*tokenLen = str - string;
|
||||
|
||||
return string;
|
||||
}
|
||||
|
||||
static bool isOperator(char c) {
|
||||
return (c < 0) ? false : (operator[c] != 0);
|
||||
}
|
||||
|
||||
static bool isDelimiter(char c) {
|
||||
return (c < 0) ? false : (delimiter[c] != 0);
|
||||
}
|
||||
|
||||
char *tscGetToken(char *string, char **token, int *tokenLen) {
|
||||
char quote = 0;
|
||||
|
||||
while (*string != 0) {
|
||||
if (delimiter[*string]) {
|
||||
if (isDelimiter(*string)) {
|
||||
++string;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
char quotaChar = 0;
|
||||
if (*string == '\'' || *string == '\"') {
|
||||
quote = 1;
|
||||
quotaChar = *string;
|
||||
quote = *string;
|
||||
string++;
|
||||
}
|
||||
|
||||
*token = string;
|
||||
/* not in string, return token */
|
||||
if (*string > 0 && operator[*string] && quote == 0) {
|
||||
if (quote == 0 && isOperator(*string)) {
|
||||
string++;
|
||||
/* handle the case: insert into tabx using stable1 tags(-1)/tags(+1)
|
||||
* values(....) */
|
||||
if (operator[*string] &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
|
||||
if (isOperator(*string) &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
|
||||
*tokenLen = 2;
|
||||
else
|
||||
*tokenLen = 1;
|
||||
|
@ -102,28 +93,24 @@ char *tscGetToken(char *string, char **token, int *tokenLen) {
|
|||
|
||||
while (*string != 0) {
|
||||
if (quote) {
|
||||
// handle escape situation: '\"', the " should not be eliminated
|
||||
if (*string == quotaChar) {
|
||||
if (*(string - 1) != '\\') {
|
||||
break;
|
||||
} else {
|
||||
if (*string == '\'' || *string == '"') {
|
||||
// handle escape situation, " and ' should not be eliminated
|
||||
if (*(string - 1) == '\\') {
|
||||
shiftStr(string - 1, string);
|
||||
continue;
|
||||
} else if (*string == quote) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
++string;
|
||||
}
|
||||
} else {
|
||||
if (delimiter[*string]) break;
|
||||
|
||||
if (operator[*string]) break;
|
||||
|
||||
++string;
|
||||
} else if (isDelimiter(*string) || isOperator(*string)) {
|
||||
break;
|
||||
}
|
||||
++string;
|
||||
}
|
||||
|
||||
*tokenLen = (int)(string - *token);
|
||||
|
||||
if (quotaChar != 0 && *string != 0 && *(string + 1) != 0) {
|
||||
if (quote && *string != 0) {
|
||||
return string + 1;
|
||||
} else {
|
||||
return string;
|
||||
|
@ -135,7 +122,7 @@ void shiftStr(char *dst, char *src) {
|
|||
do {
|
||||
dst[i] = src[i];
|
||||
i++;
|
||||
} while (delimiter[src[i]] == 0);
|
||||
} while (!isDelimiter(src[i]));
|
||||
|
||||
src[i - 1] = ' ';
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue