fix several issues in string token parsing

1. the shell should not remove escape sequence \' and \" in a string.
2. `tsParseTime` should not unescape the next string token (this issue
appears after the first issue was fixed).
3. `value[4] != '-'` in `tsParseTime` crashes in rare case if `value[4]`
is in unallocated virtual memory.
4. `operator[x]` and `delimiter[x]` may result in unexcepted behavior
as string is utf-8 encoded and `x < 0` could be true.
5. changes the behavior of `tscGetToken` a little: now, unescaped single
quotation is allowed in double quoted strings and unescaped double quotation
is allowed in single quoted strings.
6. minor performance improvements and other improvements.
This commit is contained in:
localvar 2019-08-24 14:04:56 +08:00
parent 90e5690df5
commit 693ee662af
4 changed files with 49 additions and 54 deletions

View File

@ -94,16 +94,12 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
int64_t useconds = 0;
char *pTokenEnd = *next;
tscGetToken(pTokenEnd, &token, &tokenlen);
if (tokenlen == 0 && strlen(value) == 0) {
INVALID_SQL_RET_MSG(error, "missing time stamp");
}
if (strncmp(value, "now", 3) == 0 && valuelen == 3) {
if (valuelen == 3 && (strncmp(value, "now", 3) == 0)) {
useconds = taosGetTimestamp(timePrec);
} else if (strncmp(value, "0", 1) == 0 && valuelen == 1) {
} else if (valuelen == 1 && value[0] == '0') {
// do nothing
} else if (value[4] != '-') {
} else if (valuelen <= 4 || value[4] != '-') {
for (int32_t i = 0; i < valuelen; ++i) {
/*
* filter illegal input.
@ -126,7 +122,6 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
for (int k = valuelen; value[k] != '\0'; k++) {
if (value[k] == ' ' || value[k] == '\t') continue;
if (value[k] == ',') {
*next = pTokenEnd;
*time = useconds;
return 0;
}

View File

@ -30,7 +30,7 @@ typedef struct SSQLToken {
} SSQLToken;
char *tscGetToken(char *string, char **token, int *tokenLen);
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters);
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters);
/**
* tokenizer for sql string

View File

@ -111,6 +111,7 @@ TAOS *shellInit(struct arguments *args) {
void shellReplaceCtrlChar(char *str) {
_Bool ctrlOn = false;
char *pstr = NULL;
char quote = 0;
for (pstr = str; *str != '\0'; ++str) {
if (ctrlOn) {
@ -131,6 +132,13 @@ void shellReplaceCtrlChar(char *str) {
*pstr = '\\';
pstr++;
break;
case '\'':
case '"':
if (quote) {
*pstr++ = '\\';
*pstr++ = *str;
}
break;
default:
break;
}
@ -139,6 +147,11 @@ void shellReplaceCtrlChar(char *str) {
if (*str == '\\') {
ctrlOn = true;
} else {
if (quote == *str) {
quote = 0;
} else if (*str == '\'' || *str == '"') {
quote = *str;
}
*pstr = *str;
pstr++;
}

View File

@ -23,77 +23,68 @@
#include "shash.h"
#include "tstoken.h"
static char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
static const char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+',
0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0};
static char delimiter[] = {
static const char delimiter[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
bool isCharInDelimiter(char c, char *delimiter) {
for (int i = 0; i < strlen(delimiter); i++) {
if (delimiter[i] == c) return true;
}
return false;
}
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters) {
while (*string != 0) {
if (isCharInDelimiter(*string, delimiters)) {
++string;
} else {
break;
}
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters) {
while ((*string != 0) && strchr(delimiters, *string)) {
++string;
}
*token = string;
char *str = string;
*tokenLen = 0;
while (*str != 0) {
if (!isCharInDelimiter(*str, delimiters)) {
*tokenLen = *tokenLen + 1;
str++;
} else {
break;
}
while ((*str != 0) && (strchr(delimiters, *str) == NULL)) {
++str;
}
*tokenLen = str - string;
return string;
}
static bool isOperator(char c) {
return (c < 0) ? false : (operator[c] != 0);
}
static bool isDelimiter(char c) {
return (c < 0) ? false : (delimiter[c] != 0);
}
char *tscGetToken(char *string, char **token, int *tokenLen) {
char quote = 0;
while (*string != 0) {
if (delimiter[*string]) {
if (isDelimiter(*string)) {
++string;
} else {
break;
}
}
char quotaChar = 0;
if (*string == '\'' || *string == '\"') {
quote = 1;
quotaChar = *string;
quote = *string;
string++;
}
*token = string;
/* not in string, return token */
if (*string > 0 && operator[*string] && quote == 0) {
if (quote == 0 && isOperator(*string)) {
string++;
/* handle the case: insert into tabx using stable1 tags(-1)/tags(+1)
* values(....) */
if (operator[*string] &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
if (isOperator(*string) &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
*tokenLen = 2;
else
*tokenLen = 1;
@ -102,28 +93,24 @@ char *tscGetToken(char *string, char **token, int *tokenLen) {
while (*string != 0) {
if (quote) {
// handle escape situation: '\"', the " should not be eliminated
if (*string == quotaChar) {
if (*(string - 1) != '\\') {
break;
} else {
if (*string == '\'' || *string == '"') {
// handle escape situation, " and ' should not be eliminated
if (*(string - 1) == '\\') {
shiftStr(string - 1, string);
continue;
} else if (*string == quote) {
break;
}
} else {
++string;
}
} else {
if (delimiter[*string]) break;
if (operator[*string]) break;
++string;
} else if (isDelimiter(*string) || isOperator(*string)) {
break;
}
++string;
}
*tokenLen = (int)(string - *token);
if (quotaChar != 0 && *string != 0 && *(string + 1) != 0) {
if (quote && *string != 0) {
return string + 1;
} else {
return string;
@ -135,7 +122,7 @@ void shiftStr(char *dst, char *src) {
do {
dst[i] = src[i];
i++;
} while (delimiter[src[i]] == 0);
} while (!isDelimiter(src[i]));
src[i - 1] = ' ';
}