fix several issues in string token parsing
1. the shell should not remove escape sequence \' and \" in a string. 2. `tsParseTime` should not unescape the next string token (this issue appears after the first issue was fixed). 3. `value[4] != '-'` in `tsParseTime` crashes in rare case if `value[4]` is in unallocated virtual memory. 4. `operator[x]` and `delimiter[x]` may result in unexcepted behavior as string is utf-8 encoded and `x < 0` could be true. 5. changes the behavior of `tscGetToken` a little: now, unescaped single quotation is allowed in double quoted strings and unescaped double quotation is allowed in single quoted strings. 6. minor performance improvements and other improvements.
This commit is contained in:
parent
90e5690df5
commit
693ee662af
|
@ -94,16 +94,12 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
|
||||||
int64_t useconds = 0;
|
int64_t useconds = 0;
|
||||||
|
|
||||||
char *pTokenEnd = *next;
|
char *pTokenEnd = *next;
|
||||||
tscGetToken(pTokenEnd, &token, &tokenlen);
|
|
||||||
if (tokenlen == 0 && strlen(value) == 0) {
|
|
||||||
INVALID_SQL_RET_MSG(error, "missing time stamp");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strncmp(value, "now", 3) == 0 && valuelen == 3) {
|
if (valuelen == 3 && (strncmp(value, "now", 3) == 0)) {
|
||||||
useconds = taosGetTimestamp(timePrec);
|
useconds = taosGetTimestamp(timePrec);
|
||||||
} else if (strncmp(value, "0", 1) == 0 && valuelen == 1) {
|
} else if (valuelen == 1 && value[0] == '0') {
|
||||||
// do nothing
|
// do nothing
|
||||||
} else if (value[4] != '-') {
|
} else if (valuelen <= 4 || value[4] != '-') {
|
||||||
for (int32_t i = 0; i < valuelen; ++i) {
|
for (int32_t i = 0; i < valuelen; ++i) {
|
||||||
/*
|
/*
|
||||||
* filter illegal input.
|
* filter illegal input.
|
||||||
|
@ -126,7 +122,6 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
|
||||||
for (int k = valuelen; value[k] != '\0'; k++) {
|
for (int k = valuelen; value[k] != '\0'; k++) {
|
||||||
if (value[k] == ' ' || value[k] == '\t') continue;
|
if (value[k] == ' ' || value[k] == '\t') continue;
|
||||||
if (value[k] == ',') {
|
if (value[k] == ',') {
|
||||||
*next = pTokenEnd;
|
|
||||||
*time = useconds;
|
*time = useconds;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ typedef struct SSQLToken {
|
||||||
} SSQLToken;
|
} SSQLToken;
|
||||||
|
|
||||||
char *tscGetToken(char *string, char **token, int *tokenLen);
|
char *tscGetToken(char *string, char **token, int *tokenLen);
|
||||||
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters);
|
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tokenizer for sql string
|
* tokenizer for sql string
|
||||||
|
|
|
@ -111,6 +111,7 @@ TAOS *shellInit(struct arguments *args) {
|
||||||
void shellReplaceCtrlChar(char *str) {
|
void shellReplaceCtrlChar(char *str) {
|
||||||
_Bool ctrlOn = false;
|
_Bool ctrlOn = false;
|
||||||
char *pstr = NULL;
|
char *pstr = NULL;
|
||||||
|
char quote = 0;
|
||||||
|
|
||||||
for (pstr = str; *str != '\0'; ++str) {
|
for (pstr = str; *str != '\0'; ++str) {
|
||||||
if (ctrlOn) {
|
if (ctrlOn) {
|
||||||
|
@ -131,6 +132,13 @@ void shellReplaceCtrlChar(char *str) {
|
||||||
*pstr = '\\';
|
*pstr = '\\';
|
||||||
pstr++;
|
pstr++;
|
||||||
break;
|
break;
|
||||||
|
case '\'':
|
||||||
|
case '"':
|
||||||
|
if (quote) {
|
||||||
|
*pstr++ = '\\';
|
||||||
|
*pstr++ = *str;
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -139,6 +147,11 @@ void shellReplaceCtrlChar(char *str) {
|
||||||
if (*str == '\\') {
|
if (*str == '\\') {
|
||||||
ctrlOn = true;
|
ctrlOn = true;
|
||||||
} else {
|
} else {
|
||||||
|
if (quote == *str) {
|
||||||
|
quote = 0;
|
||||||
|
} else if (*str == '\'' || *str == '"') {
|
||||||
|
quote = *str;
|
||||||
|
}
|
||||||
*pstr = *str;
|
*pstr = *str;
|
||||||
pstr++;
|
pstr++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,77 +23,68 @@
|
||||||
#include "shash.h"
|
#include "shash.h"
|
||||||
#include "tstoken.h"
|
#include "tstoken.h"
|
||||||
|
|
||||||
static char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
static const char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+',
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+',
|
||||||
0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0,
|
0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0};
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0};
|
||||||
|
|
||||||
static char delimiter[] = {
|
static const char delimiter[] = {
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0,
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
bool isCharInDelimiter(char c, char *delimiter) {
|
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters) {
|
||||||
for (int i = 0; i < strlen(delimiter); i++) {
|
while ((*string != 0) && strchr(delimiters, *string)) {
|
||||||
if (delimiter[i] == c) return true;
|
++string;
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters) {
|
|
||||||
while (*string != 0) {
|
|
||||||
if (isCharInDelimiter(*string, delimiters)) {
|
|
||||||
++string;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
*token = string;
|
*token = string;
|
||||||
|
|
||||||
char *str = string;
|
char *str = string;
|
||||||
*tokenLen = 0;
|
while ((*str != 0) && (strchr(delimiters, *str) == NULL)) {
|
||||||
while (*str != 0) {
|
++str;
|
||||||
if (!isCharInDelimiter(*str, delimiters)) {
|
|
||||||
*tokenLen = *tokenLen + 1;
|
|
||||||
str++;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
*tokenLen = str - string;
|
||||||
|
|
||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isOperator(char c) {
|
||||||
|
return (c < 0) ? false : (operator[c] != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isDelimiter(char c) {
|
||||||
|
return (c < 0) ? false : (delimiter[c] != 0);
|
||||||
|
}
|
||||||
|
|
||||||
char *tscGetToken(char *string, char **token, int *tokenLen) {
|
char *tscGetToken(char *string, char **token, int *tokenLen) {
|
||||||
char quote = 0;
|
char quote = 0;
|
||||||
|
|
||||||
while (*string != 0) {
|
while (*string != 0) {
|
||||||
if (delimiter[*string]) {
|
if (isDelimiter(*string)) {
|
||||||
++string;
|
++string;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
char quotaChar = 0;
|
|
||||||
if (*string == '\'' || *string == '\"') {
|
if (*string == '\'' || *string == '\"') {
|
||||||
quote = 1;
|
quote = *string;
|
||||||
quotaChar = *string;
|
|
||||||
string++;
|
string++;
|
||||||
}
|
}
|
||||||
|
|
||||||
*token = string;
|
*token = string;
|
||||||
/* not in string, return token */
|
/* not in string, return token */
|
||||||
if (*string > 0 && operator[*string] && quote == 0) {
|
if (quote == 0 && isOperator(*string)) {
|
||||||
string++;
|
string++;
|
||||||
/* handle the case: insert into tabx using stable1 tags(-1)/tags(+1)
|
/* handle the case: insert into tabx using stable1 tags(-1)/tags(+1)
|
||||||
* values(....) */
|
* values(....) */
|
||||||
if (operator[*string] &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
|
if (isOperator(*string) &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
|
||||||
*tokenLen = 2;
|
*tokenLen = 2;
|
||||||
else
|
else
|
||||||
*tokenLen = 1;
|
*tokenLen = 1;
|
||||||
|
@ -102,28 +93,24 @@ char *tscGetToken(char *string, char **token, int *tokenLen) {
|
||||||
|
|
||||||
while (*string != 0) {
|
while (*string != 0) {
|
||||||
if (quote) {
|
if (quote) {
|
||||||
// handle escape situation: '\"', the " should not be eliminated
|
if (*string == '\'' || *string == '"') {
|
||||||
if (*string == quotaChar) {
|
// handle escape situation, " and ' should not be eliminated
|
||||||
if (*(string - 1) != '\\') {
|
if (*(string - 1) == '\\') {
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
shiftStr(string - 1, string);
|
shiftStr(string - 1, string);
|
||||||
|
continue;
|
||||||
|
} else if (*string == quote) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
++string;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else if (isDelimiter(*string) || isOperator(*string)) {
|
||||||
if (delimiter[*string]) break;
|
break;
|
||||||
|
|
||||||
if (operator[*string]) break;
|
|
||||||
|
|
||||||
++string;
|
|
||||||
}
|
}
|
||||||
|
++string;
|
||||||
}
|
}
|
||||||
|
|
||||||
*tokenLen = (int)(string - *token);
|
*tokenLen = (int)(string - *token);
|
||||||
|
|
||||||
if (quotaChar != 0 && *string != 0 && *(string + 1) != 0) {
|
if (quote && *string != 0) {
|
||||||
return string + 1;
|
return string + 1;
|
||||||
} else {
|
} else {
|
||||||
return string;
|
return string;
|
||||||
|
@ -135,7 +122,7 @@ void shiftStr(char *dst, char *src) {
|
||||||
do {
|
do {
|
||||||
dst[i] = src[i];
|
dst[i] = src[i];
|
||||||
i++;
|
i++;
|
||||||
} while (delimiter[src[i]] == 0);
|
} while (!isDelimiter(src[i]));
|
||||||
|
|
||||||
src[i - 1] = ' ';
|
src[i - 1] = ' ';
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue