From 7e8f3579de3d3e37c9bf9173227bf74bd74f63c4 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Thu, 30 Nov 2023 21:06:06 +0800 Subject: [PATCH] accept float data as int data input --- include/common/ttypes.h | 2 + include/common/tvariant.h | 3 + source/common/src/tvariant.c | 190 ++++++++++++++++++++++++-- source/common/test/commonTests.cpp | 94 ++++++++++++- source/libs/parser/src/parInsertSql.c | 50 ++++--- 5 files changed, 309 insertions(+), 30 deletions(-) diff --git a/include/common/ttypes.h b/include/common/ttypes.h index 279799b172..741e3663db 100644 --- a/include/common/ttypes.h +++ b/include/common/ttypes.h @@ -275,9 +275,11 @@ typedef struct { #define IS_VALID_TINYINT(_t) ((_t) >= INT8_MIN && (_t) <= INT8_MAX) #define IS_VALID_SMALLINT(_t) ((_t) >= INT16_MIN && (_t) <= INT16_MAX) #define IS_VALID_INT(_t) ((_t) >= INT32_MIN && (_t) <= INT32_MAX) +#define IS_VALID_INT64(_t) ((_t) >= INT64_MIN && (_t) <= INT64_MAX) #define IS_VALID_UTINYINT(_t) ((_t) >= 0 && (_t) <= UINT8_MAX) #define IS_VALID_USMALLINT(_t) ((_t) >= 0 && (_t) <= UINT16_MAX) #define IS_VALID_UINT(_t) ((_t) >= 0 && (_t) <= UINT32_MAX) +#define IS_VALID_UINT64(_t) ((_t) >= 0 && (_t) <= UINT64_MAX) #define IS_VALID_FLOAT(_t) ((_t) >= -FLT_MAX && (_t) <= FLT_MAX) #define IS_VALID_DOUBLE(_t) ((_t) >= -DBL_MAX && (_t) <= DBL_MAX) diff --git a/include/common/tvariant.h b/include/common/tvariant.h index 130945cce5..61cf277bb7 100644 --- a/include/common/tvariant.h +++ b/include/common/tvariant.h @@ -37,6 +37,9 @@ typedef struct SVariant { }; } SVariant; +int32_t toIntegerEx(const char *z, int32_t n, int64_t *value); +int32_t toUIntegerEx(const char *z, int32_t n, uint64_t *value); + int32_t toInteger(const char *z, int32_t n, int32_t base, int64_t *value); int32_t toUInteger(const char *z, int32_t n, int32_t base, uint64_t *value); diff --git a/source/common/src/tvariant.c b/source/common/src/tvariant.c index 5f2796260c..adb0964fe6 100644 --- a/source/common/src/tvariant.c +++ b/source/common/src/tvariant.c @@ -19,6 +19,181 @@ #include "ttokendef.h" #include "tvariant.h" +int32_t parseBinaryUInteger(const char *z, int32_t n, uint64_t *value) { + // skip head 0b + const char *p = z + 2; + int32_t l = n - 2; + + while (*p == '0' && l > 0) { + p++; + l--; + } + if (l > 64) { // too big + return TSDB_CODE_FAILED; + } + + uint64_t val = 0; + for (int32_t i = 0; i < l; i++) { + val = val << 1; + if (p[i] == '1') { + val |= 1; + } else if (p[i] != '0') { // abnormal char + return TSDB_CODE_FAILED; + } + } + *value = val; + return TSDB_CODE_SUCCESS; +} + +int32_t parseDecimalUInteger(const char *z, int32_t n, uint64_t *value) { + errno = 0; + char *endPtr = NULL; + while (*z == '0' && n > 1) { + z++; + n--; + } + *value = taosStr2UInt64(z, &endPtr, 10); + if (errno == ERANGE || errno == EINVAL || endPtr - z != n) { + return TSDB_CODE_FAILED; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t parseHexUInteger(const char *z, int32_t n, uint64_t *value) { + errno = 0; + char *endPtr = NULL; + *value = taosStr2UInt64(z, &endPtr, 16); + if (errno == ERANGE || errno == EINVAL || endPtr - z != n) { + return TSDB_CODE_FAILED; + } + return TSDB_CODE_SUCCESS; +} + +int32_t parseSignAndUInteger(const char *z, int32_t n, bool *is_neg, uint64_t *value) { + *is_neg = false; + if (n < 1) { + return TSDB_CODE_FAILED; + } + + // parse sign + bool has_sign = false; + if (z[0] == '-') { + *is_neg = true; + has_sign = true; + } else if (z[0] == '+') { + has_sign = true; + } else if (z[0] < '0' || z[0] > '9') { + return TSDB_CODE_FAILED; + } + if (has_sign) { + z++; + n--; + if (n < 1) { + return TSDB_CODE_FAILED; + } + } + + if (n > 2 && z[0] == '0') { + if (z[1] == 'b' || z[1] == 'B') { + // paring as binary + return parseBinaryUInteger(z, n, value); + } + + if (z[1] == 'x' || z[1] == 'X') { + // parsing as hex + return parseHexUInteger(z, n, value); + } + } + + //rm flag u-unsigned, l-long, f-float(if not in hex str) + char last = tolower(z[n-1]); + if (last == 'u' || last == 'l' || last == 'f') { + n--; + if (n < 1) { + return TSDB_CODE_FAILED; + } + } + + // parsing as decimal + bool parse_int = true; + for (int32_t i = 0; i < n; i++) { + if (z[i] < '0' || z[i] > '9') { + parse_int = false; + break; + } + } + if (parse_int) { + return parseDecimalUInteger(z, n, value); + } + + // parsing as double + errno = 0; + char *endPtr = NULL; + double val = taosStr2Double(z, &endPtr); + if (errno == ERANGE || errno == EINVAL || endPtr - z != n || !IS_VALID_UINT64(val)) { + return TSDB_CODE_FAILED; + } + *value = val; + return TSDB_CODE_SUCCESS; +} + +int32_t removeSpace(const char **pp, int32_t n) { + // rm blank space from both head and tail + const char *z = *pp; + while (*z == ' ' && n > 0) { + z++; + n--; + } + if (n > 0) { + for (int32_t i = n - 1; i > 0; i--) { + if (z[i] == ' ') { + n--; + } else { + break; + } + } + } + *pp = z; + return n; +} + +int32_t toIntegerEx(const char *z, int32_t n, int64_t *value) { + n = removeSpace(&z, n); + if (n < 1) { // fail: all char is space + return TSDB_CODE_FAILED; + } + + bool is_neg = false; + uint64_t uv = 0; + int32_t code = parseSignAndUInteger(z, n, &is_neg, &uv); + if (code == TSDB_CODE_SUCCESS) { + // truncate into int64 + if (uv > INT64_MAX) { + *value = is_neg ? INT64_MIN : INT64_MAX; + return TSDB_CODE_FAILED; + } else { + *value = is_neg ? -uv : uv; + } + } + + return code; +} + +int32_t toUIntegerEx(const char *z, int32_t n, uint64_t *value) { + n = removeSpace(&z, n); + if (n < 1) { // fail: all char is space + return TSDB_CODE_FAILED; + } + + bool is_neg = false; + int32_t code = parseSignAndUInteger(z, n, &is_neg, value); + if (is_neg) { + return TSDB_CODE_FAILED; + } + return code; +} + int32_t toInteger(const char *z, int32_t n, int32_t base, int64_t *value) { errno = 0; char *endPtr = NULL; @@ -26,10 +201,10 @@ int32_t toInteger(const char *z, int32_t n, int32_t base, int64_t *value) { *value = taosStr2Int64(z, &endPtr, base); if (errno == ERANGE || errno == EINVAL || endPtr - z != n) { errno = 0; - return -1; + return TSDB_CODE_FAILED; } - return 0; + return TSDB_CODE_SUCCESS; } int32_t toUInteger(const char *z, int32_t n, int32_t base, uint64_t *value) { @@ -39,16 +214,15 @@ int32_t toUInteger(const char *z, int32_t n, int32_t base, uint64_t *value) { const char *p = z; while (*p == ' ') p++; if (*p == '-') { - return -1; + return TSDB_CODE_FAILED; } *value = taosStr2UInt64(z, &endPtr, base); if (errno == ERANGE || errno == EINVAL || endPtr - z != n) { errno = 0; - return -1; + return TSDB_CODE_FAILED; } - - return 0; + return TSDB_CODE_SUCCESS; } /** @@ -147,14 +321,13 @@ void taosVariantDestroy(SVariant *pVar) { taosMemoryFreeClear(pVar->pz); pVar->nLen = 0; } - } void taosVariantAssign(SVariant *pDst, const SVariant *pSrc) { if (pSrc == NULL || pDst == NULL) return; pDst->nType = pSrc->nType; - if (pSrc->nType == TSDB_DATA_TYPE_BINARY ||pSrc->nType == TSDB_DATA_TYPE_VARBINARY || + if (pSrc->nType == TSDB_DATA_TYPE_BINARY || pSrc->nType == TSDB_DATA_TYPE_VARBINARY || pSrc->nType == TSDB_DATA_TYPE_NCHAR || pSrc->nType == TSDB_DATA_TYPE_JSON || pSrc->nType == TSDB_DATA_TYPE_GEOMETRY) { int32_t len = pSrc->nLen + TSDB_NCHAR_SIZE; @@ -172,7 +345,6 @@ void taosVariantAssign(SVariant *pDst, const SVariant *pSrc) { if (IS_NUMERIC_TYPE(pSrc->nType) || (pSrc->nType == TSDB_DATA_TYPE_BOOL)) { pDst->i = pSrc->i; } - } int32_t taosVariantCompare(const SVariant *p1, const SVariant *p2) { diff --git a/source/common/test/commonTests.cpp b/source/common/test/commonTests.cpp index 107276d7f9..035bca0e15 100644 --- a/source/common/test/commonTests.cpp +++ b/source/common/test/commonTests.cpp @@ -24,6 +24,93 @@ int main(int argc, char** argv) { return RUN_ALL_TESTS(); } + +TEST(testCase, toIntegerEx_test) { + int64_t val = 0; + + char* s = "123"; + int32_t ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 123); + + s = "9223372036854775807"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 9223372036854775807); + + s = "9323372036854775807"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, -1); + + s = "-9323372036854775807"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, -1); + + s = "-1"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, -1); + + s = "-9223372036854775807"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, -9223372036854775807); + + s = "1000u"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 1000); + + s = "1000l"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 1000); + + s = "1000.0f"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 1000); + + s = "0x1f"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 31); + + s = "-0x40"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, -64); + + s = "0b110"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 6); + + s = "-0b10010"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, -18); + + s = "-5.2343544534e10"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, -52343544534); + + s = "1.869895343e4"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, 0); + ASSERT_EQ(val, 18698); + + // UINT64_MAX + s = "18446744073709551615"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, -1); + + s = "18446744073709551616"; + ret = toIntegerEx(s, strlen(s), &val); + ASSERT_EQ(ret, -1); +} + TEST(testCase, toInteger_test) { char* s = "123"; uint32_t type = 0; @@ -39,10 +126,9 @@ TEST(testCase, toInteger_test) { ASSERT_EQ(ret, 0); ASSERT_EQ(val, 9223372036854775807); - s = "9323372036854775807"; + s = "9323372036854775807"; // out of range, > int64_max ret = toInteger(s, strlen(s), 10, &val); - ASSERT_EQ(ret, 0); - ASSERT_EQ(val, 9323372036854775807u); + ASSERT_EQ(ret, -1); s = "-9323372036854775807"; ret = toInteger(s, strlen(s), 10, &val); @@ -139,7 +225,7 @@ TEST(testCase, Datablock_test) { printf("binary column length:%d\n", *(int32_t*)p1->pData); - ASSERT_EQ(blockDataGetNumOfCols(b), 2); + ASSERT_EQ(blockDataGetNumOfCols(b), 3); ASSERT_EQ(blockDataGetNumOfRows(b), 40); char* pData = colDataGetData(p1, 3); diff --git a/source/libs/parser/src/parInsertSql.c b/source/libs/parser/src/parInsertSql.c index 31b016458a..c993efb9ae 100644 --- a/source/libs/parser/src/parInsertSql.c +++ b/source/libs/parser/src/parInsertSql.c @@ -433,7 +433,8 @@ static int32_t parseTagToken(const char** end, SToken* pToken, SSchema* pSchema, } case TSDB_DATA_TYPE_TINYINT: { - if (TSDB_CODE_SUCCESS != toInteger(pToken->z, pToken->n, 10, &iv)) { + code = toIntegerEx(pToken->z, pToken->n, &iv); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(pMsgBuf, "invalid tinyint data", pToken->z); } else if (!IS_VALID_TINYINT(iv)) { return buildSyntaxErrMsg(pMsgBuf, "tinyint data overflow", pToken->z); @@ -444,7 +445,8 @@ static int32_t parseTagToken(const char** end, SToken* pToken, SSchema* pSchema, } case TSDB_DATA_TYPE_UTINYINT: { - if (TSDB_CODE_SUCCESS != toUInteger(pToken->z, pToken->n, 10, &uv)) { + code = toUIntegerEx(pToken->z, pToken->n, &uv); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(pMsgBuf, "invalid unsigned tinyint data", pToken->z); } else if (uv > UINT8_MAX) { return buildSyntaxErrMsg(pMsgBuf, "unsigned tinyint data overflow", pToken->z); @@ -454,7 +456,8 @@ static int32_t parseTagToken(const char** end, SToken* pToken, SSchema* pSchema, } case TSDB_DATA_TYPE_SMALLINT: { - if (TSDB_CODE_SUCCESS != toInteger(pToken->z, pToken->n, 10, &iv)) { + code = toIntegerEx(pToken->z, pToken->n, &iv); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(pMsgBuf, "invalid smallint data", pToken->z); } else if (!IS_VALID_SMALLINT(iv)) { return buildSyntaxErrMsg(pMsgBuf, "smallint data overflow", pToken->z); @@ -464,7 +467,8 @@ static int32_t parseTagToken(const char** end, SToken* pToken, SSchema* pSchema, } case TSDB_DATA_TYPE_USMALLINT: { - if (TSDB_CODE_SUCCESS != toUInteger(pToken->z, pToken->n, 10, &uv)) { + code = toUIntegerEx(pToken->z, pToken->n, &uv); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(pMsgBuf, "invalid unsigned smallint data", pToken->z); } else if (uv > UINT16_MAX) { return buildSyntaxErrMsg(pMsgBuf, "unsigned smallint data overflow", pToken->z); @@ -474,7 +478,8 @@ static int32_t parseTagToken(const char** end, SToken* pToken, SSchema* pSchema, } case TSDB_DATA_TYPE_INT: { - if (TSDB_CODE_SUCCESS != toInteger(pToken->z, pToken->n, 10, &iv)) { + code = toIntegerEx(pToken->z, pToken->n, &iv); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(pMsgBuf, "invalid int data", pToken->z); } else if (!IS_VALID_INT(iv)) { return buildSyntaxErrMsg(pMsgBuf, "int data overflow", pToken->z); @@ -484,7 +489,8 @@ static int32_t parseTagToken(const char** end, SToken* pToken, SSchema* pSchema, } case TSDB_DATA_TYPE_UINT: { - if (TSDB_CODE_SUCCESS != toUInteger(pToken->z, pToken->n, 10, &uv)) { + code = toUIntegerEx(pToken->z, pToken->n, &uv); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(pMsgBuf, "invalid unsigned int data", pToken->z); } else if (uv > UINT32_MAX) { return buildSyntaxErrMsg(pMsgBuf, "unsigned int data overflow", pToken->z); @@ -494,7 +500,8 @@ static int32_t parseTagToken(const char** end, SToken* pToken, SSchema* pSchema, } case TSDB_DATA_TYPE_BIGINT: { - if (TSDB_CODE_SUCCESS != toInteger(pToken->z, pToken->n, 10, &iv)) { + code = toIntegerEx(pToken->z, pToken->n, &iv); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(pMsgBuf, "invalid bigint data", pToken->z); } val->i64 = iv; @@ -502,7 +509,8 @@ static int32_t parseTagToken(const char** end, SToken* pToken, SSchema* pSchema, } case TSDB_DATA_TYPE_UBIGINT: { - if (TSDB_CODE_SUCCESS != toUInteger(pToken->z, pToken->n, 10, &uv)) { + code = toUIntegerEx(pToken->z, pToken->n, &uv); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(pMsgBuf, "invalid unsigned bigint data", pToken->z); } *(uint64_t*)(&val->i64) = uv; @@ -1351,7 +1359,8 @@ static int32_t parseValueTokenImpl(SInsertParseContext* pCxt, const char** pSql, break; } case TSDB_DATA_TYPE_TINYINT: { - if (TSDB_CODE_SUCCESS != toInteger(pToken->z, pToken->n, 10, &pVal->value.val)) { + int32_t code = toIntegerEx(pToken->z, pToken->n, &pVal->value.val); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(&pCxt->msg, "invalid tinyint data", pToken->z); } else if (!IS_VALID_TINYINT(pVal->value.val)) { return buildSyntaxErrMsg(&pCxt->msg, "tinyint data overflow", pToken->z); @@ -1359,7 +1368,8 @@ static int32_t parseValueTokenImpl(SInsertParseContext* pCxt, const char** pSql, break; } case TSDB_DATA_TYPE_UTINYINT: { - if (TSDB_CODE_SUCCESS != toUInteger(pToken->z, pToken->n, 10, &pVal->value.val)) { + int32_t code = toUIntegerEx(pToken->z, pToken->n, &pVal->value.val); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(&pCxt->msg, "invalid unsigned tinyint data", pToken->z); } else if (pVal->value.val > UINT8_MAX) { return buildSyntaxErrMsg(&pCxt->msg, "unsigned tinyint data overflow", pToken->z); @@ -1367,7 +1377,8 @@ static int32_t parseValueTokenImpl(SInsertParseContext* pCxt, const char** pSql, break; } case TSDB_DATA_TYPE_SMALLINT: { - if (TSDB_CODE_SUCCESS != toInteger(pToken->z, pToken->n, 10, &pVal->value.val)) { + int32_t code = toIntegerEx(pToken->z, pToken->n, &pVal->value.val); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(&pCxt->msg, "invalid smallint data", pToken->z); } else if (!IS_VALID_SMALLINT(pVal->value.val)) { return buildSyntaxErrMsg(&pCxt->msg, "smallint data overflow", pToken->z); @@ -1375,7 +1386,8 @@ static int32_t parseValueTokenImpl(SInsertParseContext* pCxt, const char** pSql, break; } case TSDB_DATA_TYPE_USMALLINT: { - if (TSDB_CODE_SUCCESS != toUInteger(pToken->z, pToken->n, 10, &pVal->value.val)) { + int32_t code = toUIntegerEx(pToken->z, pToken->n, &pVal->value.val); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(&pCxt->msg, "invalid unsigned smallint data", pToken->z); } else if (pVal->value.val > UINT16_MAX) { return buildSyntaxErrMsg(&pCxt->msg, "unsigned smallint data overflow", pToken->z); @@ -1383,7 +1395,8 @@ static int32_t parseValueTokenImpl(SInsertParseContext* pCxt, const char** pSql, break; } case TSDB_DATA_TYPE_INT: { - if (TSDB_CODE_SUCCESS != toInteger(pToken->z, pToken->n, 10, &pVal->value.val)) { + int32_t code = toIntegerEx(pToken->z, pToken->n, &pVal->value.val); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(&pCxt->msg, "invalid int data", pToken->z); } else if (!IS_VALID_INT(pVal->value.val)) { return buildSyntaxErrMsg(&pCxt->msg, "int data overflow", pToken->z); @@ -1391,7 +1404,8 @@ static int32_t parseValueTokenImpl(SInsertParseContext* pCxt, const char** pSql, break; } case TSDB_DATA_TYPE_UINT: { - if (TSDB_CODE_SUCCESS != toUInteger(pToken->z, pToken->n, 10, &pVal->value.val)) { + int32_t code = toUIntegerEx(pToken->z, pToken->n, &pVal->value.val); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(&pCxt->msg, "invalid unsigned int data", pToken->z); } else if (pVal->value.val > UINT32_MAX) { return buildSyntaxErrMsg(&pCxt->msg, "unsigned int data overflow", pToken->z); @@ -1399,14 +1413,16 @@ static int32_t parseValueTokenImpl(SInsertParseContext* pCxt, const char** pSql, break; } case TSDB_DATA_TYPE_BIGINT: { - if (TSDB_CODE_SUCCESS != toInteger(pToken->z, pToken->n, 10, &pVal->value.val)) { + int32_t code = toIntegerEx(pToken->z, pToken->n, &pVal->value.val); + if (TSDB_CODE_SUCCESS != code) { return buildSyntaxErrMsg(&pCxt->msg, "invalid bigint data", pToken->z); } break; } case TSDB_DATA_TYPE_UBIGINT: { - if (TSDB_CODE_SUCCESS != toUInteger(pToken->z, pToken->n, 10, &pVal->value.val)) { - return buildSyntaxErrMsg(&pCxt->msg, "invalid unsigned bigint data", pToken->z); + int32_t code = toUIntegerEx(pToken->z, pToken->n, &pVal->value.val); + if (TSDB_CODE_SUCCESS != code) { + return buildSyntaxErrMsg(&pCxt->msg, "invalid unsigned int data", pToken->z); } break; }