fix several issues in string token parsing

1. the shell should not remove escape sequence \' and \" in a string.
2. `tsParseTime` should not unescape the next string token (this issue
appears after the first issue was fixed).
3. `value[4] != '-'` in `tsParseTime` crashes in rare case if `value[4]`
is in unallocated virtual memory.
4. `operator[x]` and `delimiter[x]` may result in unexcepted behavior
as string is utf-8 encoded and `x < 0` could be true.
5. changes the behavior of `tscGetToken` a little: now, unescaped single
quotation is allowed in double quoted strings and unescaped double quotation
is allowed in single quoted strings.
6. minor performance improvements and other improvements.
This commit is contained in:
localvar 2019-08-24 14:04:56 +08:00
parent 90e5690df5
commit 693ee662af
4 changed files with 49 additions and 54 deletions

View File

@ -94,16 +94,12 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
int64_t useconds = 0; int64_t useconds = 0;
char *pTokenEnd = *next; char *pTokenEnd = *next;
tscGetToken(pTokenEnd, &token, &tokenlen);
if (tokenlen == 0 && strlen(value) == 0) {
INVALID_SQL_RET_MSG(error, "missing time stamp");
}
if (strncmp(value, "now", 3) == 0 && valuelen == 3) { if (valuelen == 3 && (strncmp(value, "now", 3) == 0)) {
useconds = taosGetTimestamp(timePrec); useconds = taosGetTimestamp(timePrec);
} else if (strncmp(value, "0", 1) == 0 && valuelen == 1) { } else if (valuelen == 1 && value[0] == '0') {
// do nothing // do nothing
} else if (value[4] != '-') { } else if (valuelen <= 4 || value[4] != '-') {
for (int32_t i = 0; i < valuelen; ++i) { for (int32_t i = 0; i < valuelen; ++i) {
/* /*
* filter illegal input. * filter illegal input.
@ -126,7 +122,6 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
for (int k = valuelen; value[k] != '\0'; k++) { for (int k = valuelen; value[k] != '\0'; k++) {
if (value[k] == ' ' || value[k] == '\t') continue; if (value[k] == ' ' || value[k] == '\t') continue;
if (value[k] == ',') { if (value[k] == ',') {
*next = pTokenEnd;
*time = useconds; *time = useconds;
return 0; return 0;
} }

View File

@ -30,7 +30,7 @@ typedef struct SSQLToken {
} SSQLToken; } SSQLToken;
char *tscGetToken(char *string, char **token, int *tokenLen); char *tscGetToken(char *string, char **token, int *tokenLen);
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters); char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters);
/** /**
* tokenizer for sql string * tokenizer for sql string

View File

@ -111,6 +111,7 @@ TAOS *shellInit(struct arguments *args) {
void shellReplaceCtrlChar(char *str) { void shellReplaceCtrlChar(char *str) {
_Bool ctrlOn = false; _Bool ctrlOn = false;
char *pstr = NULL; char *pstr = NULL;
char quote = 0;
for (pstr = str; *str != '\0'; ++str) { for (pstr = str; *str != '\0'; ++str) {
if (ctrlOn) { if (ctrlOn) {
@ -131,6 +132,13 @@ void shellReplaceCtrlChar(char *str) {
*pstr = '\\'; *pstr = '\\';
pstr++; pstr++;
break; break;
case '\'':
case '"':
if (quote) {
*pstr++ = '\\';
*pstr++ = *str;
}
break;
default: default:
break; break;
} }
@ -139,6 +147,11 @@ void shellReplaceCtrlChar(char *str) {
if (*str == '\\') { if (*str == '\\') {
ctrlOn = true; ctrlOn = true;
} else { } else {
if (quote == *str) {
quote = 0;
} else if (*str == '\'' || *str == '"') {
quote = *str;
}
*pstr = *str; *pstr = *str;
pstr++; pstr++;
} }

View File

@ -23,77 +23,68 @@
#include "shash.h" #include "shash.h"
#include "tstoken.h" #include "tstoken.h"
static char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, static const char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+',
0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0, 0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0}; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0};
static char delimiter[] = { static const char delimiter[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}; };
bool isCharInDelimiter(char c, char *delimiter) { char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters) {
for (int i = 0; i < strlen(delimiter); i++) { while ((*string != 0) && strchr(delimiters, *string)) {
if (delimiter[i] == c) return true; ++string;
}
return false;
}
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters) {
while (*string != 0) {
if (isCharInDelimiter(*string, delimiters)) {
++string;
} else {
break;
}
} }
*token = string; *token = string;
char *str = string; char *str = string;
*tokenLen = 0; while ((*str != 0) && (strchr(delimiters, *str) == NULL)) {
while (*str != 0) { ++str;
if (!isCharInDelimiter(*str, delimiters)) {
*tokenLen = *tokenLen + 1;
str++;
} else {
break;
}
} }
*tokenLen = str - string;
return string; return string;
} }
static bool isOperator(char c) {
return (c < 0) ? false : (operator[c] != 0);
}
static bool isDelimiter(char c) {
return (c < 0) ? false : (delimiter[c] != 0);
}
char *tscGetToken(char *string, char **token, int *tokenLen) { char *tscGetToken(char *string, char **token, int *tokenLen) {
char quote = 0; char quote = 0;
while (*string != 0) { while (*string != 0) {
if (delimiter[*string]) { if (isDelimiter(*string)) {
++string; ++string;
} else { } else {
break; break;
} }
} }
char quotaChar = 0;
if (*string == '\'' || *string == '\"') { if (*string == '\'' || *string == '\"') {
quote = 1; quote = *string;
quotaChar = *string;
string++; string++;
} }
*token = string; *token = string;
/* not in string, return token */ /* not in string, return token */
if (*string > 0 && operator[*string] && quote == 0) { if (quote == 0 && isOperator(*string)) {
string++; string++;
/* handle the case: insert into tabx using stable1 tags(-1)/tags(+1) /* handle the case: insert into tabx using stable1 tags(-1)/tags(+1)
* values(....) */ * values(....) */
if (operator[*string] &&(*string != '(' && *string != ')' && *string != '-' && *string != '+')) if (isOperator(*string) &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
*tokenLen = 2; *tokenLen = 2;
else else
*tokenLen = 1; *tokenLen = 1;
@ -102,28 +93,24 @@ char *tscGetToken(char *string, char **token, int *tokenLen) {
while (*string != 0) { while (*string != 0) {
if (quote) { if (quote) {
// handle escape situation: '\"', the " should not be eliminated if (*string == '\'' || *string == '"') {
if (*string == quotaChar) { // handle escape situation, " and ' should not be eliminated
if (*(string - 1) != '\\') { if (*(string - 1) == '\\') {
break;
} else {
shiftStr(string - 1, string); shiftStr(string - 1, string);
continue;
} else if (*string == quote) {
break;
} }
} else {
++string;
} }
} else { } else if (isDelimiter(*string) || isOperator(*string)) {
if (delimiter[*string]) break; break;
if (operator[*string]) break;
++string;
} }
++string;
} }
*tokenLen = (int)(string - *token); *tokenLen = (int)(string - *token);
if (quotaChar != 0 && *string != 0 && *(string + 1) != 0) { if (quote && *string != 0) {
return string + 1; return string + 1;
} else { } else {
return string; return string;
@ -135,7 +122,7 @@ void shiftStr(char *dst, char *src) {
do { do {
dst[i] = src[i]; dst[i] = src[i];
i++; i++;
} while (delimiter[src[i]] == 0); } while (!isDelimiter(src[i]));
src[i - 1] = ' '; src[i - 1] = ' ';
} }