From 3a2ace484c33ad500b86efb0983b9f995dbf99b4 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 14 Jul 2021 11:12:35 +0800 Subject: [PATCH 01/27] fix schemaversion error that caused taosd core dump --- src/client/inc/tsclient.h | 1 + src/client/src/tscParseInsert.c | 2 +- src/client/src/tscParseLineProtocol.c | 8 ++++++-- src/client/src/tscPrepare.c | 2 ++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/client/inc/tsclient.h b/src/client/inc/tsclient.h index c0a1afda77..efc9914c28 100644 --- a/src/client/inc/tsclient.h +++ b/src/client/inc/tsclient.h @@ -369,6 +369,7 @@ int32_t tscSQLSyntaxErrMsg(char* msg, const char* additionalInfo, const char* s int32_t tscValidateSqlInfo(SSqlObj *pSql, struct SSqlInfo *pInfo); +int32_t tsSetBlockInfo(SSubmitBlk *pBlocks, const STableMeta *pTableMeta, int32_t numOfRows); extern int32_t sentinel; extern SHashObj *tscVgroupMap; extern SHashObj *tscTableMetaInfo; diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index 26d9cf0e49..15a157816f 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -643,7 +643,7 @@ int32_t tscAllocateMemIfNeed(STableDataBlocks *pDataBlock, int32_t rowSize, int3 return TSDB_CODE_SUCCESS; } -static int32_t tsSetBlockInfo(SSubmitBlk *pBlocks, const STableMeta *pTableMeta, int32_t numOfRows) { +int32_t tsSetBlockInfo(SSubmitBlk *pBlocks, const STableMeta *pTableMeta, int32_t numOfRows) { pBlocks->tid = pTableMeta->id.tid; pBlocks->uid = pTableMeta->id.uid; pBlocks->sversion = pTableMeta->sversion; diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 37264e8eaa..f7858f174e 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1140,6 +1140,7 @@ void destroySmlDataPoint(TAOS_SML_DATA_POINT* point) { } int taos_insert_lines(TAOS* taos, char* lines[], int numLines) { + int32_t code = 0; SArray* lpPoints = taosArrayInit(numLines, sizeof(SLPPoint)); tscParseLines(lines, numLines, lpPoints, NULL); @@ -1202,13 +1203,16 @@ int taos_insert_lines(TAOS* taos, char* lines[], int numLines) { } } - taos_sml_insert(taos, points, (int)numPoints); + code = taos_sml_insert(taos, points, (int)numPoints); + if (code != 0) { + tscError("taos_sml_insert error: %s", tstrerror((code))); + } for (int i=0; ipData; pCmd->batchSize = pBlk->numOfRows; + tsSetBlockInfo(pBlk, (*t1)->pTableMeta, pBlk->numOfRows); + taosHashPut(pCmd->insertParam.pTableBlockHashList, (void *)&pStmt->mtb.currentUid, sizeof(pStmt->mtb.currentUid), (void*)t1, POINTER_BYTES); tscDebug("0x%"PRIx64" table:%s is already prepared, uid:%" PRIu64, pSql->self, name, pStmt->mtb.currentUid); From 1148a133b55a1167a1331fef3b5ef4673ed1b649 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 14 Jul 2021 14:59:24 +0800 Subject: [PATCH 02/27] [TD-4647]: auto add column through schemaless line protocol --- src/connector/python/taos/cinterface.py | 14 +++++ src/connector/python/taos/connection.py | 8 +++ tests/examples/c/apitest.c | 16 ++--- tests/pytest/fulltest.sh | 1 + tests/pytest/insert/line_insert.py | 82 +++++++++++++++++++++++++ tests/script/fullGeneralSuite.sim | 1 + 6 files changed, 115 insertions(+), 7 deletions(-) create mode 100644 tests/pytest/insert/line_insert.py diff --git a/src/connector/python/taos/cinterface.py b/src/connector/python/taos/cinterface.py index cc7c279458..6d8ceb7a29 100644 --- a/src/connector/python/taos/cinterface.py +++ b/src/connector/python/taos/cinterface.py @@ -403,6 +403,20 @@ class CTaosInterface(object): """ return CTaosInterface.libtaos.taos_affected_rows(result) + @staticmethod + def insertLines(connection, lines): + ''' + insert through lines protocol + @lines: list of str + @rtype: tsdb error codes + ''' + numLines = len(lines) + c_lines_type = ctypes.c_char_p*numLines + c_lines = c_lines_type() + for i in range(numLines): + c_lines[i] = ctypes.c_char_p(lines[i].encode('utf-8')) + return CTaosInterface.libtaos.taos_insert_lines(connection, c_lines, ctypes.c_int(numLines)) + @staticmethod def subscribe(connection, restart, topic, sql, interval): """Create a subscription diff --git a/src/connector/python/taos/connection.py b/src/connector/python/taos/connection.py index f6c395342c..88d06cd718 100644 --- a/src/connector/python/taos/connection.py +++ b/src/connector/python/taos/connection.py @@ -66,6 +66,14 @@ class TDengineConnection(object): self._conn, restart, topic, sql, interval) return TDengineSubscription(sub) + def insertLines(self, lines): + """ + insert lines through line protocol + """ + if self._conn is None: + return None + return CTaosInterface.insertLines(self._conn, lines) + def cursor(self): """Return a new Cursor object using the connection. """ diff --git a/tests/examples/c/apitest.c b/tests/examples/c/apitest.c index a377bbc7b4..157eb8da13 100644 --- a/tests/examples/c/apitest.c +++ b/tests/examples/c/apitest.c @@ -975,9 +975,14 @@ int32_t verify_schema_less(TAOS* taos) { "stf,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin_stf\",c2=false,c5=5,c6=7u 1626006933641a" }; -// int code = taos_insert_lines(taos, lines , sizeof(lines)/sizeof(char*)); - int code = taos_insert_lines(taos, &lines[0], 1); - code = taos_insert_lines(taos, &lines[1], 1); + int code = 0; + code = taos_insert_lines(taos, lines , sizeof(lines)/sizeof(char*)); + char* lines2[] = { + "stg,t1=3i,t2=4,t3=\"t3\" c1=3i,c3=L\"passit\",c2=false,c4=4 1626006833639000000", + "stg,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin\",c2=true,c4=5,c5=5 1626006833640000000" + }; + code = taos_insert_lines(taos, &lines2[0], 1); + code = taos_insert_lines(taos, &lines2[1], 1); return code; } @@ -1000,10 +1005,7 @@ int main(int argc, char *argv[]) { printf("client info: %s\n", info); printf("************ verify shemaless *************\n"); - int code = verify_schema_less(taos); - if (code == 0) { - return code; - } + verify_schema_less(taos); printf("************ verify query *************\n"); verify_query(taos); diff --git a/tests/pytest/fulltest.sh b/tests/pytest/fulltest.sh index 1f45cab13a..d802db1f48 100755 --- a/tests/pytest/fulltest.sh +++ b/tests/pytest/fulltest.sh @@ -27,6 +27,7 @@ python3 ./test.py -f insert/bug3654.py python3 ./test.py -f insert/insertDynamicColBeforeVal.py python3 ./test.py -f insert/in_function.py python3 ./test.py -f insert/modify_column.py +python3 ./test.py -f insert/line_insert.py #table python3 ./test.py -f table/alter_wal0.py diff --git a/tests/pytest/insert/line_insert.py b/tests/pytest/insert/line_insert.py new file mode 100644 index 0000000000..1e13dee07c --- /dev/null +++ b/tests/pytest/insert/line_insert.py @@ -0,0 +1,82 @@ +################################################################### +# Copyright (c) 2021 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +from util.log import * +from util.cases import * +from util.sql import * + + +class TDTestCase: + def init(self, conn, logSql): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), logSql) + self._conn = conn + + def run(self): + print("running {}".format(__file__)) + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test precision 'us'") + tdSql.execute('use test') + + tdSql.execute('create stable ste(ts timestamp, f int) tags(t1 bigint)') + + lines = [ + "st,t1=3i,t2=4,t3=\"t3\" c1=3i,c3=L\"passit\",c2=false,c4=4 1626006833639000000", + "st,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin\",c2=true,c4=5,c5=5 1626006833640000000", + "ste,t2=5,t3=L\"ste\" c1=true,c2=4,c3=\"iam\" 1626056811823316532", + "st,t1=4i,t2=5,t3=\"t4\" c1=3i,c3=L\"passitagain\",c2=true,c4=5 1626006833642000000", + "ste,t2=5,t3=L\"ste2\" c3=\"iamszhou\",c4=false 1626056811843316532", + "ste,t2=5,t3=L\"ste2\" c3=\"iamszhou\",c4=false,c5=32b,c6=64s,c7=32w,c8=88.88f 1626056812843316532", + "st,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin\",c2=true,c4=5,c5=5,c6=7u 1626006933640000000", + "stf,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin\",c2=true,c4=5,c5=5,c6=7u 1626006933640000000", + "stf,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin_stf\",c2=false,c5=5,c6=7u 1626006933641a"] + + code = self._conn.insertLines(lines) + print("insertLines result {}".format(code)) + + lines2 = [ + "stg,t1=3i,t2=4,t3=\"t3\" c1=3i,c3=L\"passit\",c2=false,c4=4 1626006833639000000", + "stg,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin\",c2=true,c4=5,c5=5 1626006833640000000"] + + code = self._conn.insertLines([ lines2[0] ]) + print("insertLines result {}".format(code)) + + self._conn.insertLines([ lines2[1] ]) + print("insertLines result {}".format(code)) + + tdSql.query("select * from st"); + tdSql.checkRows(4) + + tdSql.query("select * from ste"); + tdSql.checkRows(3) + + tdSql.query("select * from stf"); + tdSql.checkRows(2) + + tdSql.query("select * from stg"); + tdSql.checkRows(2) + + tdSql.query("show tables"); + tdSql.checkRows(8) + + tdSql.query("describe stf"); + tdSql.checkData(3,2, 14) + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/script/fullGeneralSuite.sim b/tests/script/fullGeneralSuite.sim index c820dd3bf5..5b5a911558 100644 --- a/tests/script/fullGeneralSuite.sim +++ b/tests/script/fullGeneralSuite.sim @@ -105,6 +105,7 @@ run general/parser/import_commit2.sim run general/parser/import_commit3.sim run general/parser/insert_tb.sim run general/parser/first_last.sim +run general/parser/line_insert.sim #unsupport run general/parser/import_file.sim run general/parser/lastrow.sim run general/parser/nchar.sim From 0991ca8eb5459c10b5973a7222b50a0f45fd3c10 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 14 Jul 2021 18:57:14 +0800 Subject: [PATCH 03/27] integrate parser from ganlin zhao --- src/client/src/tscParseLineProtocol.c | 1082 +++++++++++++++++-------- src/inc/taoserror.h | 1 + tests/examples/c/apitest.c | 22 +- 3 files changed, 771 insertions(+), 334 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index f7858f174e..d3b4052d9f 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -825,304 +825,792 @@ clean_up: //========================================================================= -typedef enum { - LP_ITEM_TAG, - LP_ITEM_FIELD -} LPItemKind; +bool is_timestamp(char *pVal, uint16_t len) { + if ((len == 1) && pVal[0] == '0') { + printf("Type is timestamp(%s)\n", pVal); + return true; + } + if (len < 2) { + return false; + } + if (pVal[len - 1] == 's') { + switch (pVal[len - 2]) { + case 'm': + case 'u': + case 'n': + break; + default: + if (isdigit(pVal[len - 2])) { + break; + } else { + return false; + } + } + printf("Type is timestamp\n"); + return true; + } + return false; +} -typedef struct { - SStrToken keyToken; - SStrToken valueToken; +bool is_bool(char *pVal, uint16_t len, bool *b_val) { + if ((len == 1) && + (pVal[len - 1] == 't' || + pVal[len - 1] == 'T')) { + printf("Type is bool(%c)\n", pVal[len - 1]); + *b_val = true; + return true; + } + if ((len == 1) && + (pVal[len - 1] == 'f' || + pVal[len - 1] == 'F')) { + printf("Type is bool(%c)\n", pVal[len - 1]); + *b_val = false; + return true; + } + + if((len == 4) && + (!strcmp(&pVal[len - 4], "true") || + !strcmp(&pVal[len - 4], "True") || + !strcmp(&pVal[len - 4], "TRUE"))) { + printf("Type is bool(%s)\n", &pVal[len - 4]); + *b_val = true; + return true; + } + if((len == 5) && + (!strcmp(&pVal[len - 5], "false") || + !strcmp(&pVal[len - 5], "False") || + !strcmp(&pVal[len - 5], "FALSE"))) { + printf("Type is bool(%s)\n", &pVal[len - 5]); + *b_val = false; + return true; + } + return false; +} + +bool is_binary(char *pVal, uint16_t len) { + //binary: "abc" + if (len < 2) { + return false; + } + //binary + if (pVal[0] == '"' && pVal[len - 1] == '"') { + printf("Type is binary(%s)\n", pVal); + return true; + } + return false; +} + +bool is_nchar(char *pVal, uint16_t len) { + //nchar: L"abc" + if (len < 3) { + return false; + } + if (pVal[0] == 'L' && pVal[1] == '"' && pVal[len - 1] == '"') { + printf("Type is nchar(%s)\n", pVal); + return true; + } + return false; +} + +bool is_tiny_int(char *pVal, uint16_t len) { + if (len <= 2) { + return false; + } + if (!strcmp(&pVal[len - 2], "i8")) { + printf("Type is int8(%s)\n", pVal); + return true; + } + return false; +} + +bool is_tiny_uint(char *pVal, uint16_t len) { + if (len <= 2) { + return false; + } + if (pVal[0] == '-') { + return false; + } + if (!strcmp(&pVal[len - 2], "u8")) { + printf("Type is uint8(%s)\n", pVal); + return true; + } + return false; +} + +bool is_small_int(char *pVal, uint16_t len) { + if (len <= 3) { + return false; + } + if (!strcmp(&pVal[len - 3], "i16")) { + printf("Type is int16(%s)\n", pVal); + return true; + } + return false; +} + +bool is_small_uint(char *pVal, uint16_t len) { + if (len <= 3) { + return false; + } + if (pVal[0] == '-') { + return false; + } + if (strcmp(&pVal[len - 3], "u16") == 0) { + printf("Type is uint16(%s)\n", pVal); + return true; + } + return false; +} + +bool is_int(char *pVal, uint16_t len) { + if (len <= 3) { + return false; + } + if (strcmp(&pVal[len - 3], "i32") == 0) { + printf("Type is int32(%s)\n", pVal); + return true; + } + return false; +} + +bool is_uint(char *pVal, uint16_t len) { + if (len <= 3) { + return false; + } + if (pVal[0] == '-') { + return false; + } + if (strcmp(&pVal[len - 3], "u32") == 0) { + printf("Type is uint32(%s)\n", pVal); + return true; + } + return false; +} + +bool is_big_int(char *pVal, uint16_t len) { + if (len <= 3) { + return false; + } + if (strcmp(&pVal[len - 3], "i64") == 0) { + printf("Type is int64(%s)\n", pVal); + return true; + } + return false; +} + +bool is_big_uint(char *pVal, uint16_t len) { + if (len <= 3) { + return false; + } + if (pVal[0] == '-') { + return false; + } + if (strcmp(&pVal[len - 3], "u64") == 0) { + printf("Type is uint64(%s)\n", pVal); + return true; + } + return false; +} + +bool is_float(char *pVal, uint16_t len) { + if (len <= 3) { + return false; + } + if (strcmp(&pVal[len - 3], "f32") == 0) { + printf("Type is float(%s)\n", pVal); + return true; + } + return false; +} + +bool is_double(char *pVal, uint16_t len) { + if (len <= 3) { + return false; + } + if (strcmp(&pVal[len - 3], "f64") == 0) { + printf("Type is double(%s)\n", pVal); + return true; + } + return false; +} + +bool is_valid_integer(char *str) { + char *c = str; + if (*c != '+' && *c != '-' && !isdigit(*c)) { + return false; + } + c++; + while (*c != '\0') { + if (!isdigit(*c)) { + return false; + } + c++; + } + return true; +} + +bool is_valid_float(char *str) { + char *c = str; + uint8_t has_dot, has_exp, has_sign; + has_dot = 0; + has_exp = 0; + has_sign = 0; + + if (*c != '+' && *c != '-' && *c != '.' && !isdigit(*c)) { + return false; + } + if (*c == '.' && isdigit(*(c + 1))) { + has_dot = 1; + } + c++; + while (*c != '\0') { + if (!isdigit(*c)) { + switch (*c) { + case '.': { + if (!has_dot && !has_exp && isdigit(*(c + 1))) { + has_dot = 1; + } else { + return false; + } + break; + } + case 'e': + case 'E': { + if (!has_exp && isdigit(*(c - 1)) && + (isdigit(*(c + 1)) || + *(c + 1) == '+' || + *(c + 1) == '-')) { + has_exp = 1; + } else { + return false; + } + break; + } + case '+': + case '-': { + if (!has_sign && has_exp && isdigit(*(c + 1))) { + has_sign = 1; + } else { + return false; + } + break; + } + default: { + return false; + } + } + } + c++; + } //while + return true; +} + +bool taos_sml_timestamp_convert(TAOS_SML_KV *pVal, char *value, + uint16_t len) { + if (is_timestamp(value, len)) { + pVal->type = TSDB_DATA_TYPE_TIMESTAMP; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + pVal->value = calloc(pVal->length, 1); + int64_t val = (int64_t)strtoll(value, NULL, 10); + memcpy(pVal->value, &val, pVal->length); + return true; + } + return false; +} +//len does not include '\0' from value. +bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, + uint16_t len) { + if (len <= 0) { + return false; + } + //bool + bool b_val; + if (is_bool(value, len, &b_val)) { + pVal->type = TSDB_DATA_TYPE_BOOL; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + pVal->value = calloc(pVal->length, 1); + memcpy(pVal->value, &b_val, pVal->length); + return true; + } + //binary + if (is_binary(value, len)) { + pVal->type = TSDB_DATA_TYPE_BINARY; + pVal->length = len - 2; + pVal->value = calloc(pVal->length, 1); + //copy after " + memcpy(pVal->value, value + 1, pVal->length); + return true; + } + //nchar + if (is_nchar(value, len)) { + pVal->type = TSDB_DATA_TYPE_NCHAR; + pVal->length = len - 3; + pVal->value = calloc(pVal->length, 1); + //copy after L" + memcpy(pVal->value, value + 2, pVal->length); + return true; + } + //floating number + if (is_float(value, len)) { + pVal->type = TSDB_DATA_TYPE_FLOAT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!is_valid_float(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + float val = (float)strtold(value, NULL); + memcpy(pVal->value, &val, pVal->length); + return true; + } + if (is_double(value, len)) { + pVal->type = TSDB_DATA_TYPE_DOUBLE; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!is_valid_float(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + double val = (double)strtold(value, NULL); + memcpy(pVal->value, &val, pVal->length); + return true; + } + //integer number + if (is_tiny_int(value, len)) { + pVal->type = TSDB_DATA_TYPE_TINYINT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 2] = '\0'; + if (!is_valid_integer(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + int8_t val = (int8_t)strtoll(value, NULL, 10); + memcpy(pVal->value, &val, pVal->length); + return true; + } + if (is_tiny_uint(value, len)) { + pVal->type = TSDB_DATA_TYPE_UTINYINT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 2] = '\0'; + if (!is_valid_integer(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + uint8_t val = (uint8_t)strtoul(value, NULL, 10); + memcpy(pVal->value, &val, pVal->length); + return true; + } + if (is_small_int(value, len)) { + pVal->type = TSDB_DATA_TYPE_SMALLINT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!is_valid_integer(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + int16_t val = (int16_t)strtoll(value, NULL, 10); + memcpy(pVal->value, &val, pVal->length); + return true; + } + if (is_small_uint(value, len)) { + pVal->type = TSDB_DATA_TYPE_USMALLINT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!is_valid_integer(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + uint16_t val = (uint16_t)strtoul(value, NULL, 10); + memcpy(pVal->value, &val, pVal->length); + //memcpy(pVal->value, &val, pVal->length); + return true; + } + if (is_int(value, len)) { + pVal->type = TSDB_DATA_TYPE_INT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!is_valid_integer(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + int32_t val = (int32_t)strtoll(value, NULL, 10); + memcpy(pVal->value, &val, pVal->length); + return true; + } + if (is_uint(value, len)) { + pVal->type = TSDB_DATA_TYPE_UINT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!is_valid_integer(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + uint32_t val = (uint32_t)strtoul(value, NULL, 10); + memcpy(pVal->value, &val, pVal->length); + return true; + } + if (is_big_int(value, len)) { + pVal->type = TSDB_DATA_TYPE_BIGINT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!is_valid_integer(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + int64_t val = (int64_t)strtoll(value, NULL, 10); + memcpy(pVal->value, &val, pVal->length); + return true; + } + if (is_big_uint(value, len)) { + pVal->type = TSDB_DATA_TYPE_UBIGINT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!is_valid_integer(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + uint64_t val = (uint64_t)strtoul(value, NULL, 10); + memcpy(pVal->value, &val, pVal->length); + return true; + } + //TODO: handle default is float here + return false; +} + +/* Field Escape charaters + 1: measurement Comma,Space + 2: tag_key, tag_value, field_key Comma,Equal Sign,Space + 3: field_value Double quote,Backslash +*/ +void escape_special_char(uint8_t field, const char **pos) { + const char *cur = *pos; + if (*cur != '\\') { + return; + } + switch (field) { + case 1: + switch (*(cur + 1)) { + case ',': + case ' ': + cur++; + break; + default: + break; + } + break; + case 2: + switch (*(cur + 1)) { + case ',': + case ' ': + case '=': + cur++; + break; + default: + break; + } + break; + case 3: + switch (*(cur + 1)) { + case '"': + case '\\': + cur++; + break; + default: + break; + } + break; + default: + break; + } + *pos = cur; +} + +bool taos_sml_parse_measurement(TAOS_SML_DATA_POINT *pSml, const char **index, uint8_t *has_tags) { + const char *cur = *index; + uint16_t len = 0; + + pSml->stableName = calloc(TSDB_TABLE_NAME_LEN, 1); + if (*cur == '_') { + printf("Measurement field cannnot start with \'_\'\n"); + return false; + } + + while (*cur != '\0') { + if (len > TSDB_TABLE_NAME_LEN) { + printf("Measurement field cannot exceeds 193 characters"); + return false; + } + //first unescaped comma or space identifies measurement + //if space detected first, meaning no tag in the input + if (*cur == ',' && *(cur - 1) != '\\') { + *has_tags = 1; + printf("measurement:found comma\n"); + break; + } + if (*cur == ' ' && *(cur - 1) != '\\') { + printf("measurement:found space\n"); + break; + } + //Comma, Space, Backslash needs to be escaped if any + if (*cur == '\\') { + escape_special_char(1, &cur); + } + pSml->stableName[len] = *cur; + cur++; + len++; + } + pSml->stableName[len] = '\0'; + *index = cur + 1; + printf("stable name:%s|len:%d\n", pSml->stableName, len); + + return true; +} + + +bool taos_sml_parse_key(TAOS_SML_KV *pKV, const char **index) { + const char *cur = *index; char key[TSDB_COL_NAME_LEN]; - int8_t type; - int16_t length; + uint16_t len = 0; - char* value; -}SLPItem; - -typedef struct { - SStrToken measToken; - SStrToken tsToken; - - char sTableName[TSDB_TABLE_NAME_LEN]; - SArray* tags; - SArray* fields; - int64_t ts; - -} SLPPoint; - -typedef enum { - LP_MEASUREMENT, - LP_TAG_KEY, - LP_TAG_VALUE, - LP_FIELD_KEY, - LP_FIELD_VALUE -} LPPart; - -int32_t scanToCommaOrSpace(SStrToken s, int32_t start, int32_t* index, LPPart part) { - for (int32_t i = start; i < s.n; ++i) { - if (s.z[i] == ',' || s.z[i] == ' ') { - *index = i; - return 0; + //key field cannot start with '_' + if (*cur == '_') { + printf("Tag key cannnot start with \'_\'\n"); + return false; + } + //TODO: If tag key has ID field, use corresponding + //tag value as child table name + while (*cur != '\0') { + if (len > TSDB_COL_NAME_LEN) { + printf("Key field cannot exceeds 65 characters"); + return false; } - } - return -1; -} - -int32_t scanToEqual(SStrToken s, int32_t start, int32_t* index) { - for (int32_t i = start; i < s.n; ++i) { - if (s.z[i] == '=') { - *index = i; - return 0; + //unescaped '=' identifies a tag key + if (*cur == '=' && *(cur - 1) != '\\') { + printf("key: found equal sign\n"); + break; } + //Escape special character + if (*cur == '\\') { + escape_special_char(2, &cur); + } + key[len] = *cur; + cur++; + len++; } - return -1; + key[len] = '\0'; + + pKV->key = calloc(len + 1, 1); + memcpy(pKV->key, key, len + 1); + printf("key:%s|len:%d\n", pKV->key, len); + *index = cur + 1; + return true; } -int32_t setPointMeasurement(SLPPoint* point, SStrToken token) { - point->measToken = token; - if (point->measToken.n < TSDB_TABLE_NAME_LEN) { - strncpy(point->sTableName, point->measToken.z, point->measToken.n); - point->sTableName[point->measToken.n] = '\0'; +bool taos_sml_parse_value(TAOS_SML_KV *pKV, const char **index, + bool *is_last_kv) { + const char *start, *cur; + char *value = NULL; + uint16_t len = 0; + start = cur = *index; + + while (1) { + // unescaped ',' or ' ' or '\0' identifies a value + if ((*cur == ',' || *cur == ' ' || *cur == '\0') && *(cur - 1) != '\\') { + value = calloc(len + 1, 1); + memcpy(value, start, len); + value[len] = '\0'; + if (!taos_sml_type_convert(pKV, value, len)) { + free(value); + return false; + } + //unescaped ' ' or '\0' indicates end of value + *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false; + break; + } + //Escape special character + if (*cur == '\\') { + escape_special_char(2, &cur); + } + cur++; + len++; } - return 0; -} -int32_t setItemKey(SLPItem* item, SStrToken key, LPPart part) { - item->keyToken = key; - if (item->keyToken.n < TSDB_COL_NAME_LEN) { - strncpy(item->key, item->keyToken.z, item->keyToken.n); - item->key[item->keyToken.n] = '\0'; + if (value) { + free(value); } - return 0; + + *index = (*cur == '\0') ? cur : cur + 1; + return true; } -int32_t setItemValue(SLPItem* item, SStrToken value, LPPart part) { - item->valueToken = value; - return 0; -} +bool taos_sml_parse_kv_pairs(TAOS_SML_KV **pKVs, int *num_kvs, const char **index, bool isField) { + const char *cur = *index; + TAOS_SML_KV *pkv; + bool is_last_kv = false; -int32_t parseItemValue(SLPItem* item, LPItemKind kind) { - char* sv = item->valueToken.z; - char* last = item->valueToken.z + item->valueToken.n - 1; + if (isField) { + //leave space for timestamp + *pKVs = calloc(2, sizeof(TAOS_SML_KV)); + pkv = *pKVs; + pkv++; + } + else { + *pKVs = calloc(1, sizeof(TAOS_SML_KV)); + pkv = *pKVs; + } - if (isdigit(sv[0]) || sv[0] == '-') { - if (*last == 'i') { - item->type = TSDB_DATA_TYPE_BIGINT; - item->length = (int16_t)tDataTypes[item->type].bytes; - item->value = malloc(item->length); - char* endptr = NULL; - *(int64_t*)(item->value) = strtoll(sv, &endptr, 10); - } else if (*last == 'u') { - item->type = TSDB_DATA_TYPE_UBIGINT; - item->length = (int16_t)tDataTypes[item->type].bytes; - item->value = malloc(item->length); - char* endptr = NULL; - *(uint64_t*)(item->value) = (uint64_t)strtoull(sv, &endptr, 10); - } else if (*last == 'b') { - item->type = TSDB_DATA_TYPE_TINYINT; - item->length = (int16_t)tDataTypes[item->type].bytes; - item->value = malloc(item->length); - char* endptr = NULL; - *(int8_t*)(item->value) = (int8_t)strtoll(sv, &endptr, 10); - } else if (*last == 's') { - item->type = TSDB_DATA_TYPE_SMALLINT; - item->length = (int16_t)tDataTypes[item->type].bytes; - item->value = malloc(item->length); - char* endptr = NULL; - *(int16_t*)(item->value) = (int16_t)strtoll(sv, &endptr, 10); - } else if (*last == 'w') { - item->type = TSDB_DATA_TYPE_INT; - item->length = (int16_t)tDataTypes[item->type].bytes; - item->value = malloc(item->length); - char* endptr = NULL; - *(int32_t*)(item->value) = (int32_t)strtoll(sv, &endptr, 10); - } else if (*last == 'f') { - item->type = TSDB_DATA_TYPE_FLOAT; - item->length = (int16_t)tDataTypes[item->type].bytes; - item->value = malloc(item->length); - char* endptr = NULL; - *(float*)(item->value) = (float)strtold(sv, &endptr); + while (*cur != '\0') { + if (!taos_sml_parse_key(pkv, &cur)) { + printf("Unable to parse key field\n"); + goto error; + } + if (!taos_sml_parse_value(pkv, &cur, &is_last_kv)) { + printf("Unable to parse value field\n"); + goto error; + } + *num_kvs += 1; + + if(is_last_kv) { + printf("last key value field detected\n"); + goto done; + } + + //reallocate addtional memory for more kvs + TAOS_SML_KV *more_kvs = NULL; + if (isField) { + more_kvs = realloc(*pKVs, (*num_kvs + 2) * sizeof(TAOS_SML_KV)); } else { - item->type = TSDB_DATA_TYPE_DOUBLE; - item->length = (int16_t)tDataTypes[item->type].bytes; - item->value = malloc(item->length); - char* endptr = NULL; - *(double*)(item->value) = strtold(sv, &endptr); + more_kvs = realloc(*pKVs, (*num_kvs + 1) * sizeof(TAOS_SML_KV)); } - } else if ((sv[0] == 'L' && sv[1] =='"') || sv[0] == '"' ) { - if (sv[0] == 'L') { - item->type = TSDB_DATA_TYPE_NCHAR; - uint32_t bytes = item->valueToken.n - 3; - item->length = bytes; - item->value = malloc(bytes); - memcpy(item->value, sv+2, bytes); - } else if (sv[0]=='"'){ - item->type = TSDB_DATA_TYPE_BINARY; - uint32_t bytes = item->valueToken.n - 2; - item->length = bytes; - item->value = malloc(bytes); - memcpy(item->value, sv+1, bytes); - } - } else if (sv[0] == 't' || sv[0] == 'f' || sv[0]=='T' || sv[0] == 'F') { - item->type = TSDB_DATA_TYPE_BOOL; - item->length = tDataTypes[item->type].bytes; - item->value = malloc(tDataTypes[item->type].bytes); - *(uint8_t*)(item->value) = tolower(sv[0])=='t' ? TSDB_TRUE : TSDB_FALSE; - } - return 0; -} - -int32_t compareLPItemKey(const void* p1, const void* p2) { - const SLPItem* t1 = p1; - const SLPItem* t2 = p2; - uint32_t min = (t1->keyToken.n < t2->keyToken.n) ? t1->keyToken.n : t2->keyToken.n; - int res = strncmp(t1->keyToken.z, t2->keyToken.z, min); - if (res != 0) { - return res; - } else { - return (int)(t1->keyToken.n) - (int)(t2->keyToken.n); - } -} - -int32_t setPointTimeStamp(SLPPoint* point, SStrToken tsToken) { - point->tsToken = tsToken; - return 0; -} - -int32_t parsePointTime(SLPPoint* point) { - if (point->tsToken.n <= 0) { - point->ts = taosGetTimestampNs(); - } else { - char* endptr = NULL; - point->ts = strtoll(point->tsToken.z, &endptr, 10); - char* last = point->tsToken.z + point->tsToken.n - 1; - if (*last == 's') { - point->ts *= (int64_t)1e9; - } else if (*last == 'a') { - point->ts *= (int64_t)1e6; - } else if (*last == 'u') { - point->ts *= (int64_t)1e3; - } else if (*last == 'b') { - point->ts *= 1; - } - } - return 0; -} - -int32_t tscParseLine(SStrToken line, SLPPoint* point) { - int32_t pos = 0; - - int32_t start = 0; - int32_t err = scanToCommaOrSpace(line, start, &pos, LP_MEASUREMENT); - if (err != 0) { - tscError("a"); - return err; - } - - SStrToken measurement = {.z = line.z+start, .n = pos-start}; - setPointMeasurement(point, measurement); - point->tags = taosArrayInit(64, sizeof(SLPItem)); - start = pos; - while (line.z[start] == ',') { - SLPItem item; - - start++; - err = scanToEqual(line, start, &pos); - if (err != 0) { - tscError("b"); + if (!more_kvs) { goto error; } - - SStrToken tagKey = {.z = line.z + start, .n = pos-start}; - setItemKey(&item, tagKey, LP_TAG_KEY); - - start = pos + 1; - err = scanToCommaOrSpace(line, start, &pos, LP_TAG_VALUE); - if (err != 0) { - tscError("c"); - goto error; + *pKVs = more_kvs; + //move pKV points to next TAOS_SML_KV block + if (isField) { + pkv = *pKVs + *num_kvs + 1; + } else { + pkv = *pKVs + *num_kvs; } - - SStrToken tagValue = {.z = line.z + start, .n = pos-start}; - setItemValue(&item, tagValue, LP_TAG_VALUE); - - parseItemValue(&item, LP_ITEM_TAG); - taosArrayPush(point->tags, &item); - - start = pos; } - - taosArraySort(point->tags, compareLPItemKey); - - point->fields = taosArrayInit(64, sizeof(SLPItem)); - - start++; - do { - SLPItem item; - - err = scanToEqual(line, start, &pos); - if (err != 0) { - goto error; - } - SStrToken fieldKey = {.z = line.z + start, .n = pos- start}; - setItemKey(&item, fieldKey, LP_FIELD_KEY); - - start = pos + 1; - err = scanToCommaOrSpace(line, start, &pos, LP_FIELD_VALUE); - if (err != 0) { - goto error; - } - SStrToken fieldValue = {.z = line.z + start, .n = pos - start}; - setItemValue(&item, fieldValue, LP_TAG_VALUE); - - parseItemValue(&item, LP_ITEM_FIELD); - taosArrayPush(point->fields, &item); - - start = pos + 1; - } while (line.z[pos] == ','); - - taosArraySort(point->fields, compareLPItemKey); - - SStrToken tsToken = {.z = line.z+start, .n = line.n-start}; - setPointTimeStamp(point, tsToken); - parsePointTime(point); - goto done; error: - // free array - return err; + free(*pKVs); + return false; done: - return 0; + *index = cur; + return true; } +bool taos_sml_parse_timestamp(TAOS_SML_KV **pTS, const char **index) { + const char *start, *cur; + int len = 0; + char key[] = "_ts"; + char *value = NULL; + + start = cur = *index; + *pTS = calloc(1, sizeof(TAOS_SML_KV)); + + if (*cur == '\0') { + //no timestamp given, use current system time + return true; + } + + while(*cur != '\0') { + cur++; + len++; + } + value = calloc(len, 1); + memcpy(value, start, len); + if (!taos_sml_timestamp_convert(*pTS, value, len)) { + free(*pTS); + return false; + } + free(value); + + + (*pTS)->key = calloc(sizeof(key), 1); + memcpy((*pTS)->key, key, sizeof(key)); + return true; +} + +bool tscParseLine(const char* sql, TAOS_SML_DATA_POINT* sml_data) { + const char* index = sql; + uint8_t has_tags = 0; + TAOS_SML_KV *timestamp = NULL; + + + if (!taos_sml_parse_measurement(sml_data, &index, &has_tags)) { + printf("Unable to parse measurement\n"); + free(sml_data->stableName); + free(sml_data); + return false; + } + printf("============Parse measurement finished, has_tags:%d===============\n", has_tags); + + //Parse Tags + if (has_tags) { + if (!taos_sml_parse_kv_pairs(&sml_data->tags, &sml_data->tagNum, &index, false)) { + printf("Unable to parse tag\n"); + //TODO free allocated fileds inside TAOS_SML_DATA_POINT first + return false; + } + } else { + //no tags given + } + + printf("============Parse tags finished, num_tags:%d===============\n", sml_data->tagNum); + //Parse fields + if (!taos_sml_parse_kv_pairs(&sml_data->fields, &sml_data->fieldNum, &index, true)) { + printf("Unable to parse field\n"); + //TODO free allocated fileds inside TAOS_SML_DATA_POINT first + return false; + } + printf("============Parse fields finished, num_fields:%d===============\n", sml_data->fieldNum); + //Parse timestamp + if (!taos_sml_parse_timestamp(×tamp, &index)) { + printf("Unable to parse timestamp\n"); + + return false; + } + + sml_data->fieldNum = sml_data->fieldNum + 1; + TAOS_SML_KV* tsField = sml_data->fields; + tsField->length = timestamp->length; + tsField->type = timestamp->type; + tsField->value = malloc(timestamp->length); + tsField->key = malloc(strlen(timestamp->key)+1); + memcpy(tsField->key, timestamp->key, strlen(timestamp->key)+1); + memcpy(tsField->value, timestamp->value, timestamp->length); + + free(timestamp->key); + free(timestamp->value); + free(timestamp); + printf("============Parse timestamp finished===============\n"); + + return true; +} + + int32_t tscParseLines(char* lines[], int numLines, SArray* points, SArray* failedLines) { for (int32_t i = 0; i < numLines; ++i) { - SStrToken tkLine = {.z = lines[i], .n = (uint32_t)strlen(lines[i])}; - SLPPoint point; - tscParseLine(tkLine, &point); + TAOS_SML_DATA_POINT point = {0}; + bool succ = tscParseLine(lines[i], &point); + if (!succ) { + tscError("data point line parse failed. line %d", i); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } else { + tscDebug("data point line parse success. line %d", i); + } + taosArrayPush(points, &point); } return 0; } -void destroyLPPoint(void* p) { - SLPPoint* lpPoint = p; - for (int i=0; ifields); ++i) { - SLPItem* item = taosArrayGet(lpPoint->fields, i); - free(item->value); - } - taosArrayDestroy(lpPoint->fields); - - for (int i=0; itags); ++i) { - SLPItem* item = taosArrayGet(lpPoint->tags, i); - free(item->value); - } - taosArrayDestroy(lpPoint->tags); -} void destroySmlDataPoint(TAOS_SML_DATA_POINT* point) { for (int i=0; itagNum; ++i) { @@ -1141,78 +1629,26 @@ void destroySmlDataPoint(TAOS_SML_DATA_POINT* point) { int taos_insert_lines(TAOS* taos, char* lines[], int numLines) { int32_t code = 0; - SArray* lpPoints = taosArrayInit(numLines, sizeof(SLPPoint)); - tscParseLines(lines, numLines, lpPoints, NULL); + SArray* lpPoints = taosArrayInit(numLines, sizeof(TAOS_SML_DATA_POINT)); - size_t numPoints = taosArrayGetSize(lpPoints); - TAOS_SML_DATA_POINT* points = calloc(numPoints, sizeof(TAOS_SML_DATA_POINT)); - for (int i = 0; i < numPoints; ++i) { - SLPPoint* lpPoint = taosArrayGet(lpPoints, i); - TAOS_SML_DATA_POINT* point = points+i; - point->stableName = calloc(1, strlen(lpPoint->sTableName)+1); - strncpy(point->stableName, lpPoint->sTableName, strlen(lpPoint->sTableName)); - point->stableName[strlen(lpPoint->sTableName)] = '\0'; - - size_t lpTagSize = taosArrayGetSize(lpPoint->tags); - point->tags = calloc(lpTagSize, sizeof(TAOS_SML_KV)); - point->tagNum = (int)lpTagSize; - for (int j=0; jtags, j); - TAOS_SML_KV* tagKv = point->tags + j; - - size_t kenLen = strlen(lpTag->key); - tagKv->key = calloc(1, kenLen+1); - strncpy(tagKv->key, lpTag->key, kenLen); - tagKv->key[kenLen] = '\0'; - - tagKv->type = lpTag->type; - tagKv->length = lpTag->length; - tagKv->value = malloc(tagKv->length); - memcpy(tagKv->value, lpTag->value, tagKv->length); - } - - size_t lpFieldsSize = taosArrayGetSize(lpPoint->fields); - point->fields = calloc(lpFieldsSize + 1, sizeof(TAOS_SML_KV)); - point->fieldNum = (int)(lpFieldsSize + 1); - - TAOS_SML_KV* tsField = point->fields + 0; - char tsKey[256]; - snprintf(tsKey, 256, "_%s_ts", point->stableName); - size_t tsKeyLen = strlen(tsKey); - tsField->key = calloc(1, tsKeyLen+1); - strncpy(tsField->key, tsKey, tsKeyLen); - tsField->key[tsKeyLen] = '\0'; - tsField->type = TSDB_DATA_TYPE_TIMESTAMP; - tsField->length = tDataTypes[TSDB_DATA_TYPE_TIMESTAMP].bytes; - tsField->value = malloc(tsField->length); - memcpy(tsField->value, &(lpPoint->ts), tsField->length); - - for (int j=0; jfields, j); - TAOS_SML_KV* fieldKv = point->fields + j + 1; - - size_t kenLen = strlen(lpField->key); - fieldKv->key = calloc(1, kenLen+1); - strncpy(fieldKv->key, lpField->key, kenLen); - fieldKv->key[kenLen] = '\0'; - - fieldKv->type = lpField->type; - fieldKv->length = lpField->length; - fieldKv->value = malloc(fieldKv->length); - memcpy(fieldKv->value, lpField->value, fieldKv->length); - } + code = tscParseLines(lines, numLines, lpPoints, NULL); + if (code != 0) { + goto cleanup; } + size_t numPoints = taosArrayGetSize(lpPoints); + TAOS_SML_DATA_POINT* points = TARRAY_GET_START(lpPoints); code = taos_sml_insert(taos, points, (int)numPoints); if (code != 0) { tscError("taos_sml_insert error: %s", tstrerror((code))); } +cleanup: for (int i=0; i Date: Wed, 14 Jul 2021 21:27:43 +0800 Subject: [PATCH 04/27] fix mac os build error --- src/client/src/tscParseLineProtocol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index d3b4052d9f..ab10cc2fec 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1632,11 +1632,12 @@ int taos_insert_lines(TAOS* taos, char* lines[], int numLines) { SArray* lpPoints = taosArrayInit(numLines, sizeof(TAOS_SML_DATA_POINT)); code = tscParseLines(lines, numLines, lpPoints, NULL); + size_t numPoints = taosArrayGetSize(lpPoints); + if (code != 0) { goto cleanup; } - size_t numPoints = taosArrayGetSize(lpPoints); TAOS_SML_DATA_POINT* points = TARRAY_GET_START(lpPoints); code = taos_sml_insert(taos, points, (int)numPoints); if (code != 0) { From 54ca09468d4b5caa36995aa88831234db095e426 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Thu, 15 Jul 2021 08:19:48 +0800 Subject: [PATCH 05/27] modify test to comply with new parser --- tests/pytest/insert/line_insert.py | 28 ++++++++++----------- tests/script/general/parser/line_insert.sim | 11 ++++---- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/tests/pytest/insert/line_insert.py b/tests/pytest/insert/line_insert.py index 1e13dee07c..910396384f 100644 --- a/tests/pytest/insert/line_insert.py +++ b/tests/pytest/insert/line_insert.py @@ -31,23 +31,23 @@ class TDTestCase: tdSql.execute('create stable ste(ts timestamp, f int) tags(t1 bigint)') - lines = [ - "st,t1=3i,t2=4,t3=\"t3\" c1=3i,c3=L\"passit\",c2=false,c4=4 1626006833639000000", - "st,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin\",c2=true,c4=5,c5=5 1626006833640000000", - "ste,t2=5,t3=L\"ste\" c1=true,c2=4,c3=\"iam\" 1626056811823316532", - "st,t1=4i,t2=5,t3=\"t4\" c1=3i,c3=L\"passitagain\",c2=true,c4=5 1626006833642000000", - "ste,t2=5,t3=L\"ste2\" c3=\"iamszhou\",c4=false 1626056811843316532", - "ste,t2=5,t3=L\"ste2\" c3=\"iamszhou\",c4=false,c5=32b,c6=64s,c7=32w,c8=88.88f 1626056812843316532", - "st,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin\",c2=true,c4=5,c5=5,c6=7u 1626006933640000000", - "stf,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin\",c2=true,c4=5,c5=5,c6=7u 1626006933640000000", - "stf,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin_stf\",c2=false,c5=5,c6=7u 1626006933641a"] + lines = [ "st,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"passit\",c2=false,c4=4f64 1626006833639000000ns", + "st,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64 1626006833640000000ns", + "ste,t2=5f64,t3=L\"ste\" c1=true,c2=4i64,c3=\"iam\" 1626056811823316532ns", + "stf,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64,c6=7u64 1626006933640000000ns", + "st,t1=4i64,t2=5f64,t3=\"t4\" c1=3i64,c3=L\"passitagain\",c2=true,c4=5f64 1626006833642000000ns", + "ste,t2=5f64,t3=L\"ste2\" c3=\"iamszhou\",c4=false 1626056811843316532ns", + "ste,t2=5f64,t3=L\"ste2\" c3=\"iamszhou\",c4=false,c5=32i8,c6=64i16,c7=32i32,c8=88.88f32 1626056812843316532ns", + "st,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64,c6=7u64 1626006933640000000ns", + "stf,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin_stf\",c2=false,c5=5f64,c6=7u64 1626006933641000000ns" + ] code = self._conn.insertLines(lines) print("insertLines result {}".format(code)) - lines2 = [ - "stg,t1=3i,t2=4,t3=\"t3\" c1=3i,c3=L\"passit\",c2=false,c4=4 1626006833639000000", - "stg,t1=4i,t3=\"t4\",t2=5,t4=5 c1=3i,c3=L\"passitagin\",c2=true,c4=5,c5=5 1626006833640000000"] + lines2 = [ "stg,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"passit\",c2=false,c4=4f64 1626006833639000000ns", + "stg,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64 1626006833640000000ns" + ] code = self._conn.insertLines([ lines2[0] ]) print("insertLines result {}".format(code)) @@ -71,7 +71,7 @@ class TDTestCase: tdSql.checkRows(8) tdSql.query("describe stf"); - tdSql.checkData(3,2, 14) + tdSql.checkData(2, 2, 14) def stop(self): tdSql.close() diff --git a/tests/script/general/parser/line_insert.sim b/tests/script/general/parser/line_insert.sim index f3067a3bbe..85f2714ad3 100644 --- a/tests/script/general/parser/line_insert.sim +++ b/tests/script/general/parser/line_insert.sim @@ -16,11 +16,10 @@ sql create database $db precision 'us' sql use $db sql create stable $mte (ts timestamp, f int) TAGS(t1 bigint) -line_insert st,t1=3i,t2=4,t3="t3" c1=3i,c3=L"passit",c2=false,c4=4 1626006833639000000 -line_insert st,t1=4i,t3="t41",t2=5 c1=3i,c3=L"passiT",c2=true,c4=5 1626006833640000000 -line_insert stf,t1=4i,t2=5,t3="t4" c1=3i,c3=L"passitagain",c2=true,c4=5 1626006833642000000 -line_insert ste,t2=5,t3=L"ste" c1=true,c2=4,c3="iam" 1626056811823316532 - +line_insert st,t1=3i64,t2=4f64,t3="t3" c1=3i64,c3=L"passit",c2=false,c4=4f64 1626006833639000000ns +line_insert st,t1=4i64,t3="t4",t2=5f64,t4=5f64 c1=3i64,c3=L"passitagin",c2=true,c4=5f64,c5=5f64 1626006833640000000ns +line_insert ste,t2=5f64,t3=L"ste" c1=true,c2=4i64,c3="iam" 1626056811823316532ns +line_insert stf,t1=4i64,t3="t4",t2=5f64,t4=5f64 c1=3i64,c3=L"passitagin",c2=true,c4=5f64,c5=5f64,c6=7u64 1626006933640000000ns sql select * from st if $rows != 2 then return -1 @@ -30,7 +29,7 @@ if $data00 != @21-07-11 20:33:53.639000@ then return -1 endi -if $data03 != @passit@ then +if $data02 != @passit@ then return -1 endi From 3b477f6c1c4e530c53d1d0eaa650a92f263ef77d Mon Sep 17 00:00:00 2001 From: glzhao89 Date: Thu, 15 Jul 2021 16:37:10 +0800 Subject: [PATCH 06/27] [TD-5208] add timestamp parsing and child table name processing --- src/client/src/tscParseLineProtocol.c | 721 +++++++++++++++----------- 1 file changed, 421 insertions(+), 300 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index ab10cc2fec..bdeaa8913b 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -17,6 +17,11 @@ #include "tscLog.h" #include "taos.h" + +#define SECONDS_TO_MILLI(TS) TS * 1000 +#define SECONDS_TO_MICRO(TS) TS * 1000000 +#define SECONDS_TO_NANO(TS) TS * 1000000000 + typedef struct { char sTableName[TSDB_TABLE_NAME_LEN]; SHashObj* tagHash; @@ -51,6 +56,14 @@ typedef struct { SSmlSTableSchema* schema; } TAOS_SML_DATA_POINT; +typedef enum { + SML_TIME_STAMP_NOW, + SML_TIME_STAMP_SECONDS, + SML_TIME_STAMP_MILLI_SECONDS, + SML_TIME_STAMP_MICRO_SECONDS, + SML_TIME_STAMP_NANO_SECONDS +} SMLTimeStampType; + //================================================================================================= int compareSmlColKv(const void* p1, const void* p2) { @@ -825,93 +838,125 @@ clean_up: //========================================================================= -bool is_timestamp(char *pVal, uint16_t len) { - if ((len == 1) && pVal[0] == '0') { - printf("Type is timestamp(%s)\n", pVal); - return true; +/* Field Escape charaters + 1: measurement Comma,Space + 2: tag_key, tag_value, field_key Comma,Equal Sign,Space + 3: field_value Double quote,Backslash +*/ +void escape_special_char(uint8_t field, const char **pos) { + const char *cur = *pos; + if (*cur != '\\') { + return; } - if (len < 2) { + switch (field) { + case 1: + switch (*(cur + 1)) { + case ',': + case ' ': + cur++; + break; + default: + break; + } + break; + case 2: + switch (*(cur + 1)) { + case ',': + case ' ': + case '=': + cur++; + break; + default: + break; + } + break; + case 3: + switch (*(cur + 1)) { + case '"': + case '\\': + cur++; + break; + default: + break; + } + break; + default: + break; + } + *pos = cur; +} + +bool is_valid_integer(char *str) { + char *c = str; + if (*c != '+' && *c != '-' && !isdigit(*c)) { return false; } - if (pVal[len - 1] == 's') { - switch (pVal[len - 2]) { - case 'm': - case 'u': - case 'n': - break; - default: - if (isdigit(pVal[len - 2])) { + c++; + while (*c != '\0') { + if (!isdigit(*c)) { + return false; + } + c++; + } + return true; +} + +bool is_valid_float(char *str) { + char *c = str; + uint8_t has_dot, has_exp, has_sign; + has_dot = 0; + has_exp = 0; + has_sign = 0; + + if (*c != '+' && *c != '-' && *c != '.' && !isdigit(*c)) { + return false; + } + if (*c == '.' && isdigit(*(c + 1))) { + has_dot = 1; + } + c++; + while (*c != '\0') { + if (!isdigit(*c)) { + switch (*c) { + case '.': { + if (!has_dot && !has_exp && isdigit(*(c + 1))) { + has_dot = 1; + } else { + return false; + } break; - } else { + } + case 'e': + case 'E': { + if (!has_exp && isdigit(*(c - 1)) && + (isdigit(*(c + 1)) || + *(c + 1) == '+' || + *(c + 1) == '-')) { + has_exp = 1; + } else { + return false; + } + break; + } + case '+': + case '-': { + if (!has_sign && has_exp && isdigit(*(c + 1))) { + has_sign = 1; + } else { + return false; + } + break; + } + default: { return false; } + } } - printf("Type is timestamp\n"); - return true; - } - return false; + c++; + } //while + return true; } -bool is_bool(char *pVal, uint16_t len, bool *b_val) { - if ((len == 1) && - (pVal[len - 1] == 't' || - pVal[len - 1] == 'T')) { - printf("Type is bool(%c)\n", pVal[len - 1]); - *b_val = true; - return true; - } - - if ((len == 1) && - (pVal[len - 1] == 'f' || - pVal[len - 1] == 'F')) { - printf("Type is bool(%c)\n", pVal[len - 1]); - *b_val = false; - return true; - } - - if((len == 4) && - (!strcmp(&pVal[len - 4], "true") || - !strcmp(&pVal[len - 4], "True") || - !strcmp(&pVal[len - 4], "TRUE"))) { - printf("Type is bool(%s)\n", &pVal[len - 4]); - *b_val = true; - return true; - } - if((len == 5) && - (!strcmp(&pVal[len - 5], "false") || - !strcmp(&pVal[len - 5], "False") || - !strcmp(&pVal[len - 5], "FALSE"))) { - printf("Type is bool(%s)\n", &pVal[len - 5]); - *b_val = false; - return true; - } - return false; -} - -bool is_binary(char *pVal, uint16_t len) { - //binary: "abc" - if (len < 2) { - return false; - } - //binary - if (pVal[0] == '"' && pVal[len - 1] == '"') { - printf("Type is binary(%s)\n", pVal); - return true; - } - return false; -} - -bool is_nchar(char *pVal, uint16_t len) { - //nchar: L"abc" - if (len < 3) { - return false; - } - if (pVal[0] == 'L' && pVal[1] == '"' && pVal[len - 1] == '"') { - printf("Type is nchar(%s)\n", pVal); - return true; - } - return false; -} bool is_tiny_int(char *pVal, uint16_t len) { if (len <= 2) { @@ -1035,89 +1080,111 @@ bool is_double(char *pVal, uint16_t len) { return false; } -bool is_valid_integer(char *str) { - char *c = str; - if (*c != '+' && *c != '-' && !isdigit(*c)) { - return false; +bool is_bool(char *pVal, uint16_t len, bool *b_val) { + if ((len == 1) && + (pVal[len - 1] == 't' || + pVal[len - 1] == 'T')) { + printf("Type is bool(%c)\n", pVal[len - 1]); + *b_val = true; + return true; } - c++; - while (*c != '\0') { - if (!isdigit(*c)) { - return false; - } - c++; - } - return true; -} -bool is_valid_float(char *str) { - char *c = str; - uint8_t has_dot, has_exp, has_sign; - has_dot = 0; - has_exp = 0; - has_sign = 0; - - if (*c != '+' && *c != '-' && *c != '.' && !isdigit(*c)) { - return false; + if ((len == 1) && + (pVal[len - 1] == 'f' || + pVal[len - 1] == 'F')) { + printf("Type is bool(%c)\n", pVal[len - 1]); + *b_val = false; + return true; } - if (*c == '.' && isdigit(*(c + 1))) { - has_dot = 1; - } - c++; - while (*c != '\0') { - if (!isdigit(*c)) { - switch (*c) { - case '.': { - if (!has_dot && !has_exp && isdigit(*(c + 1))) { - has_dot = 1; - } else { - return false; - } - break; - } - case 'e': - case 'E': { - if (!has_exp && isdigit(*(c - 1)) && - (isdigit(*(c + 1)) || - *(c + 1) == '+' || - *(c + 1) == '-')) { - has_exp = 1; - } else { - return false; - } - break; - } - case '+': - case '-': { - if (!has_sign && has_exp && isdigit(*(c + 1))) { - has_sign = 1; - } else { - return false; - } - break; - } - default: { - return false; - } - } - } - c++; - } //while - return true; -} -bool taos_sml_timestamp_convert(TAOS_SML_KV *pVal, char *value, - uint16_t len) { - if (is_timestamp(value, len)) { - pVal->type = TSDB_DATA_TYPE_TIMESTAMP; - pVal->length = (int16_t)tDataTypes[pVal->type].bytes; - pVal->value = calloc(pVal->length, 1); - int64_t val = (int64_t)strtoll(value, NULL, 10); - memcpy(pVal->value, &val, pVal->length); + if((len == 4) && + (!strcmp(&pVal[len - 4], "true") || + !strcmp(&pVal[len - 4], "True") || + !strcmp(&pVal[len - 4], "TRUE"))) { + printf("Type is bool(%s)\n", &pVal[len - 4]); + *b_val = true; + return true; + } + if((len == 5) && + (!strcmp(&pVal[len - 5], "false") || + !strcmp(&pVal[len - 5], "False") || + !strcmp(&pVal[len - 5], "FALSE"))) { + printf("Type is bool(%s)\n", &pVal[len - 5]); + *b_val = false; return true; } return false; } + +bool is_binary(char *pVal, uint16_t len) { + //binary: "abc" + if (len < 2) { + return false; + } + //binary + if (pVal[0] == '"' && pVal[len - 1] == '"') { + printf("Type is binary(%s)\n", pVal); + return true; + } + return false; +} + +bool is_nchar(char *pVal, uint16_t len) { + //nchar: L"abc" + if (len < 3) { + return false; + } + if (pVal[0] == 'L' && pVal[1] == '"' && pVal[len - 1] == '"') { + printf("Type is nchar(%s)\n", pVal); + return true; + } + return false; +} + +bool is_timestamp(char *pVal, uint16_t len, SMLTimeStampType *tsType) { + if (len == 0) { + return true; + } + if ((len == 1) && pVal[0] == '0') { + *tsType = SML_TIME_STAMP_NOW; + printf("Type is timestamp(%s)\n", pVal); + return true; + } + if (len < 2) { + return false; + } + //No appendix use usec as default + if (isdigit(pVal[len - 1]) && isdigit(pVal[len - 2])) { + *tsType = SML_TIME_STAMP_MICRO_SECONDS; + printf("Type is timestamp(%s)\n", pVal); + return true; + } + if (pVal[len - 1] == 's') { + switch (pVal[len - 2]) { + case 'm': + *tsType = SML_TIME_STAMP_MILLI_SECONDS; + break; + case 'u': + *tsType = SML_TIME_STAMP_MICRO_SECONDS; + break; + case 'n': + *tsType = SML_TIME_STAMP_NANO_SECONDS; + break; + default: + if (isdigit(pVal[len - 2])) { + *tsType = SML_TIME_STAMP_SECONDS; + break; + } else { + return false; + } + } + printf("Type is timestamp(%s)\n", pVal); + return true; + } + return false; +} + + //len does not include '\0' from value. bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, uint16_t len) { @@ -1278,97 +1345,113 @@ bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, return false; } -/* Field Escape charaters - 1: measurement Comma,Space - 2: tag_key, tag_value, field_key Comma,Equal Sign,Space - 3: field_value Double quote,Backslash -*/ -void escape_special_char(uint8_t field, const char **pos) { - const char *cur = *pos; - if (*cur != '\\') { - return; +int32_t tscGetTimeStampValue(char *value, uint16_t len, SMLTimeStampType type, int64_t *ts) { + + if (len >= 2) { + for (int i = 0; i < len - 2; ++i) { + if(!isdigit(value[i])) { + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } + } } - switch (field) { - case 1: - switch (*(cur + 1)) { - case ',': - case ' ': - cur++; - break; - default: - break; - } - break; - case 2: - switch (*(cur + 1)) { - case ',': - case ' ': - case '=': - cur++; - break; - default: - break; - } - break; - case 3: - switch (*(cur + 1)) { - case '"': - case '\\': - cur++; - break; - default: - break; - } - break; - default: - break; + //No appendix or no timestamp given (len = 0) + if (len >= 1 && isdigit(value[len - 1]) && type != SML_TIME_STAMP_NOW) { + type = SML_TIME_STAMP_MICRO_SECONDS; } - *pos = cur; + if (len != 0) { + *ts = (int64_t)strtoll(value, NULL, 10); + } else { + type = SML_TIME_STAMP_NOW; + } + switch (type) { + case SML_TIME_STAMP_NOW: { + time_t now = time(NULL); + *ts = SECONDS_TO_MICRO((int64_t)now); + break; + } + case SML_TIME_STAMP_SECONDS: { + break; + } + case SML_TIME_STAMP_MILLI_SECONDS: { + *ts = SECONDS_TO_MILLI(*ts); + break; + } + case SML_TIME_STAMP_MICRO_SECONDS: { + *ts = SECONDS_TO_MICRO(*ts); + break; + } + case SML_TIME_STAMP_NANO_SECONDS: { + *ts = SECONDS_TO_NANO(*ts); + break; + } + default: { + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } + } + return TSDB_CODE_SUCCESS; } -bool taos_sml_parse_measurement(TAOS_SML_DATA_POINT *pSml, const char **index, uint8_t *has_tags) { - const char *cur = *index; - uint16_t len = 0; +int32_t taos_sml_timestamp_convert(TAOS_SML_KV *pVal, char *value, uint16_t len) { + int32_t ret; + SMLTimeStampType type; + int64_t tsVal; - pSml->stableName = calloc(TSDB_TABLE_NAME_LEN, 1); - if (*cur == '_') { - printf("Measurement field cannnot start with \'_\'\n"); - return false; + + if (!is_timestamp(value, len, &type)) { + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } - while (*cur != '\0') { - if (len > TSDB_TABLE_NAME_LEN) { - printf("Measurement field cannot exceeds 193 characters"); - return false; - } - //first unescaped comma or space identifies measurement - //if space detected first, meaning no tag in the input - if (*cur == ',' && *(cur - 1) != '\\') { - *has_tags = 1; - printf("measurement:found comma\n"); + ret = tscGetTimeStampValue(value, len, type, &tsVal); + if (ret) { + return ret; + } + printf("Timestamp after conversion:%lld\n", tsVal); + + pVal->type = TSDB_DATA_TYPE_TIMESTAMP; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + pVal->value = calloc(pVal->length, 1); + memcpy(pVal->value, &tsVal, pVal->length); + return TSDB_CODE_SUCCESS; +} + +bool taos_sml_parse_value(TAOS_SML_KV *pKV, const char **index, + bool *is_last_kv) { + const char *start, *cur; + char *value = NULL; + uint16_t len = 0; + start = cur = *index; + + while (1) { + // unescaped ',' or ' ' or '\0' identifies a value + if ((*cur == ',' || *cur == ' ' || *cur == '\0') && *(cur - 1) != '\\') { + value = calloc(len + 1, 1); + memcpy(value, start, len); + value[len] = '\0'; + if (!taos_sml_type_convert(pKV, value, len)) { + free(value); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } + //unescaped ' ' or '\0' indicates end of value + *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false; break; } - if (*cur == ' ' && *(cur - 1) != '\\') { - printf("measurement:found space\n"); - break; - } - //Comma, Space, Backslash needs to be escaped if any + //Escape special character if (*cur == '\\') { - escape_special_char(1, &cur); + escape_special_char(2, &cur); } - pSml->stableName[len] = *cur; cur++; len++; } - pSml->stableName[len] = '\0'; - *index = cur + 1; - printf("stable name:%s|len:%d\n", pSml->stableName, len); - return true; + if (value) { + free(value); + } + + *index = (*cur == '\0') ? cur : cur + 1; + return TSDB_CODE_SUCCESS; } - -bool taos_sml_parse_key(TAOS_SML_KV *pKV, const char **index) { +int32_t taos_sml_parse_key(TAOS_SML_KV *pKV, const char **index) { const char *cur = *index; char key[TSDB_COL_NAME_LEN]; uint16_t len = 0; @@ -1376,14 +1459,12 @@ bool taos_sml_parse_key(TAOS_SML_KV *pKV, const char **index) { //key field cannot start with '_' if (*cur == '_') { printf("Tag key cannnot start with \'_\'\n"); - return false; + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } - //TODO: If tag key has ID field, use corresponding - //tag value as child table name while (*cur != '\0') { if (len > TSDB_COL_NAME_LEN) { printf("Key field cannot exceeds 65 characters"); - return false; + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } //unescaped '=' identifies a tag key if (*cur == '=' && *(cur - 1) != '\\') { @@ -1404,48 +1485,46 @@ bool taos_sml_parse_key(TAOS_SML_KV *pKV, const char **index) { memcpy(pKV->key, key, len + 1); printf("key:%s|len:%d\n", pKV->key, len); *index = cur + 1; - return true; + return TSDB_CODE_SUCCESS; } -bool taos_sml_parse_value(TAOS_SML_KV *pKV, const char **index, - bool *is_last_kv) { - const char *start, *cur; - char *value = NULL; - uint16_t len = 0; - start = cur = *index; - while (1) { - // unescaped ',' or ' ' or '\0' identifies a value - if ((*cur == ',' || *cur == ' ' || *cur == '\0') && *(cur - 1) != '\\') { - value = calloc(len + 1, 1); - memcpy(value, start, len); - value[len] = '\0'; - if (!taos_sml_type_convert(pKV, value, len)) { - free(value); - return false; - } - //unescaped ' ' or '\0' indicates end of value - *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false; - break; - } - //Escape special character - if (*cur == '\\') { - escape_special_char(2, &cur); - } +int32_t taos_sml_parse_timestamp(TAOS_SML_KV **pTS, const char **index) { + const char *start, *cur; + int32_t ret = TSDB_CODE_SUCCESS; + int len = 0; + char key[] = "_ts"; + char *value = NULL; + + start = cur = *index; + *pTS = calloc(1, sizeof(TAOS_SML_KV)); + + while(*cur != '\0') { cur++; len++; } - if (value) { - free(value); + if (len > 0) { + value = calloc(len, 1); + memcpy(value, start, len); } - *index = (*cur == '\0') ? cur : cur + 1; - return true; + ret = taos_sml_timestamp_convert(*pTS, value, len); + if (ret) { + free(value); + free(*pTS); + return ret; + } + free(value); + + (*pTS)->key = calloc(sizeof(key), 1); + memcpy((*pTS)->key, key, sizeof(key)); + return ret; } -bool taos_sml_parse_kv_pairs(TAOS_SML_KV **pKVs, int *num_kvs, const char **index, bool isField) { +int32_t taos_sml_parse_kv_pairs(TAOS_SML_KV **pKVs, int *num_kvs, const char **index, bool isField) { const char *cur = *index; + int32_t ret = TSDB_CODE_SUCCESS; TAOS_SML_KV *pkv; bool is_last_kv = false; @@ -1461,17 +1540,19 @@ bool taos_sml_parse_kv_pairs(TAOS_SML_KV **pKVs, int *num_kvs, const char **inde } while (*cur != '\0') { - if (!taos_sml_parse_key(pkv, &cur)) { + ret = taos_sml_parse_key(pkv, &cur); + if (ret) { printf("Unable to parse key field\n"); goto error; } - if (!taos_sml_parse_value(pkv, &cur, &is_last_kv)) { + ret = taos_sml_parse_value(pkv, &cur, &is_last_kv); + if (ret) { printf("Unable to parse value field\n"); goto error; } *num_kvs += 1; - if(is_last_kv) { + if (is_last_kv) { printf("last key value field detected\n"); goto done; } @@ -1498,82 +1579,122 @@ bool taos_sml_parse_kv_pairs(TAOS_SML_KV **pKVs, int *num_kvs, const char **inde error: free(*pKVs); - return false; + return ret; done: *index = cur; - return true; + return ret; } -bool taos_sml_parse_timestamp(TAOS_SML_KV **pTS, const char **index) { - const char *start, *cur; - int len = 0; - char key[] = "_ts"; - char *value = NULL; +int32_t taos_sml_parse_measurement(TAOS_SML_DATA_POINT *pSml, const char **index, uint8_t *has_tags) { + const char *cur = *index; + uint16_t len = 0; - start = cur = *index; - *pTS = calloc(1, sizeof(TAOS_SML_KV)); - - if (*cur == '\0') { - //no timestamp given, use current system time - return true; + pSml->stableName = calloc(TSDB_TABLE_NAME_LEN, 1); + if (*cur == '_') { + printf("Measurement field cannnot start with \'_\'\n"); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } - while(*cur != '\0') { + while (*cur != '\0') { + if (len > TSDB_TABLE_NAME_LEN) { + printf("Measurement field cannot exceeds 193 characters"); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } + //first unescaped comma or space identifies measurement + //if space detected first, meaning no tag in the input + if (*cur == ',' && *(cur - 1) != '\\') { + *has_tags = 1; + printf("measurement:found comma\n"); + break; + } + if (*cur == ' ' && *(cur - 1) != '\\') { + printf("measurement:found space\n"); + break; + } + //Comma, Space, Backslash needs to be escaped if any + if (*cur == '\\') { + escape_special_char(1, &cur); + } + pSml->stableName[len] = *cur; cur++; len++; } - value = calloc(len, 1); - memcpy(value, start, len); - if (!taos_sml_timestamp_convert(*pTS, value, len)) { - free(*pTS); - return false; - } - free(value); + pSml->stableName[len] = '\0'; + *index = cur + 1; + printf("stable name:%s|len:%d\n", pSml->stableName, len); - - (*pTS)->key = calloc(sizeof(key), 1); - memcpy((*pTS)->key, key, sizeof(key)); - return true; + return TSDB_CODE_SUCCESS; } -bool tscParseLine(const char* sql, TAOS_SML_DATA_POINT* sml_data) { + +bool tscGetChildTableName(TAOS_SML_DATA_POINT *pData) { + TAOS_SML_KV *pTags = pData->tags; + int tagNum = pData->tagNum; + char *childTableName = pData->childTableName; + + for (int i = 0; i < tagNum; ++i) { + //use tag value as child table name if key is "ID" + //tag value has to be binary for now + if (!strcmp(pTags->key, "ID") && pTags->type == TSDB_DATA_TYPE_BINARY) { + memcpy(childTableName, pTags->value, pTags->length); + return true; + } + pTags++; + } + return false; +} + +int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* sml_data) { const char* index = sql; + int32_t ret = TSDB_CODE_SUCCESS; uint8_t has_tags = 0; TAOS_SML_KV *timestamp = NULL; - if (!taos_sml_parse_measurement(sml_data, &index, &has_tags)) { + ret = taos_sml_parse_measurement(sml_data, &index, &has_tags); + if (ret) { printf("Unable to parse measurement\n"); free(sml_data->stableName); free(sml_data); - return false; + return ret; } printf("============Parse measurement finished, has_tags:%d===============\n", has_tags); //Parse Tags if (has_tags) { - if (!taos_sml_parse_kv_pairs(&sml_data->tags, &sml_data->tagNum, &index, false)) { + ret = taos_sml_parse_kv_pairs(&sml_data->tags, &sml_data->tagNum, &index, false); + if (ret) { printf("Unable to parse tag\n"); //TODO free allocated fileds inside TAOS_SML_DATA_POINT first - return false; + return ret; } + sml_data->childTableName = calloc(TSDB_TABLE_NAME_LEN, 1); + if (!tscGetChildTableName(sml_data)) { + free(sml_data->childTableName); + } + printf("Child table name:%02x:%02x:%02x:%02x\n", sml_data->childTableName[0], + sml_data->childTableName[1], + sml_data->childTableName[2], + sml_data->childTableName[3]); } else { //no tags given } printf("============Parse tags finished, num_tags:%d===============\n", sml_data->tagNum); //Parse fields - if (!taos_sml_parse_kv_pairs(&sml_data->fields, &sml_data->fieldNum, &index, true)) { + ret = taos_sml_parse_kv_pairs(&sml_data->fields, &sml_data->fieldNum, &index, true); + if (ret) { printf("Unable to parse field\n"); //TODO free allocated fileds inside TAOS_SML_DATA_POINT first - return false; + return ret; } + printf("============Parse fields finished, num_fields:%d===============\n", sml_data->fieldNum); //Parse timestamp - if (!taos_sml_parse_timestamp(×tamp, &index)) { + ret = taos_sml_parse_timestamp(×tamp, &index); + if (ret) { printf("Unable to parse timestamp\n"); - - return false; + return ret; } sml_data->fieldNum = sml_data->fieldNum + 1; @@ -1593,7 +1714,7 @@ bool tscParseLine(const char* sql, TAOS_SML_DATA_POINT* sml_data) { return true; } - +//========================================================================= int32_t tscParseLines(char* lines[], int numLines, SArray* points, SArray* failedLines) { for (int32_t i = 0; i < numLines; ++i) { From d8545b5035ba9979500191e65eb28fc899f932cb Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Thu, 15 Jul 2021 18:22:35 +0800 Subject: [PATCH 07/27] pass test for tag id key as child table name and timestamp ns/us/ms/s --- src/client/src/tscParseLineProtocol.c | 51 ++++++++++++++++++++------- tests/examples/c/apitest.c | 6 ++++ tests/pytest/insert/line_insert.py | 21 +++++++---- 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index bdeaa8913b..25b35fdb36 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1365,23 +1365,23 @@ int32_t tscGetTimeStampValue(char *value, uint16_t len, SMLTimeStampType type, i } switch (type) { case SML_TIME_STAMP_NOW: { - time_t now = time(NULL); - *ts = SECONDS_TO_MICRO((int64_t)now); + *ts = taosGetTimestampNs(); break; } case SML_TIME_STAMP_SECONDS: { + *ts = (int64_t)(*ts * 1e9); break; } case SML_TIME_STAMP_MILLI_SECONDS: { - *ts = SECONDS_TO_MILLI(*ts); + *ts = convertTimePrecision(*ts, TSDB_TIME_PRECISION_MILLI, TSDB_TIME_PRECISION_NANO); break; } case SML_TIME_STAMP_MICRO_SECONDS: { - *ts = SECONDS_TO_MICRO(*ts); + *ts = convertTimePrecision(*ts, TSDB_TIME_PRECISION_MICRO, TSDB_TIME_PRECISION_NANO); break; } case SML_TIME_STAMP_NANO_SECONDS: { - *ts = SECONDS_TO_NANO(*ts); + *ts = *ts * 1; break; } default: { @@ -1405,7 +1405,7 @@ int32_t taos_sml_timestamp_convert(TAOS_SML_KV *pVal, char *value, uint16_t len) if (ret) { return ret; } - printf("Timestamp after conversion:%lld\n", tsVal); + printf("Timestamp after conversion:%ld\n", tsVal); pVal->type = TSDB_DATA_TYPE_TIMESTAMP; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; @@ -1592,12 +1592,14 @@ int32_t taos_sml_parse_measurement(TAOS_SML_DATA_POINT *pSml, const char **index pSml->stableName = calloc(TSDB_TABLE_NAME_LEN, 1); if (*cur == '_') { printf("Measurement field cannnot start with \'_\'\n"); + free(pSml->stableName); return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } while (*cur != '\0') { if (len > TSDB_TABLE_NAME_LEN) { printf("Measurement field cannot exceeds 193 characters"); + free(pSml->stableName); return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } //first unescaped comma or space identifies measurement @@ -1635,7 +1637,7 @@ bool tscGetChildTableName(TAOS_SML_DATA_POINT *pData) { for (int i = 0; i < tagNum; ++i) { //use tag value as child table name if key is "ID" //tag value has to be binary for now - if (!strcmp(pTags->key, "ID") && pTags->type == TSDB_DATA_TYPE_BINARY) { + if (!strcasecmp(pTags->key, "ID") && pTags->type == TSDB_DATA_TYPE_BINARY) { memcpy(childTableName, pTags->value, pTags->length); return true; } @@ -1654,8 +1656,6 @@ int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* sml_data) { ret = taos_sml_parse_measurement(sml_data, &index, &has_tags); if (ret) { printf("Unable to parse measurement\n"); - free(sml_data->stableName); - free(sml_data); return ret; } printf("============Parse measurement finished, has_tags:%d===============\n", has_tags); @@ -1671,11 +1671,36 @@ int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* sml_data) { sml_data->childTableName = calloc(TSDB_TABLE_NAME_LEN, 1); if (!tscGetChildTableName(sml_data)) { free(sml_data->childTableName); + sml_data->childTableName = NULL; + printf("no table name\n"); + } else { + printf("Child table name:%02x:%02x:%02x:%02x\n", sml_data->childTableName[0], sml_data->childTableName[1], + sml_data->childTableName[2], sml_data->childTableName[3]); } - printf("Child table name:%02x:%02x:%02x:%02x\n", sml_data->childTableName[0], - sml_data->childTableName[1], - sml_data->childTableName[2], - sml_data->childTableName[3]); + + TAOS_SML_KV* destTags = calloc(sml_data->tagNum, sizeof(TAOS_SML_KV)); + TAOS_SML_KV* srcTags = sml_data->tags; + int numDestTags = 0; + for (int32_t i = 0; i < sml_data->tagNum; ++i) { + TAOS_SML_KV* srcTag = srcTags + i; + if (strcasecmp(srcTag->key, "ID") == 0) { + continue; + } else { + TAOS_SML_KV* destTag = destTags + numDestTags; + memcpy(destTag, srcTag, sizeof(TAOS_SML_KV)); + destTag->key = calloc(1, strlen(srcTag->key) + 1); + memcpy(destTag->key, srcTag->key, strlen(srcTag->key) + 1); + destTag->value = calloc(1, srcTag->length); + memcpy(destTag->value, srcTag->value, srcTag->length); + numDestTags++; + } + free(srcTag->key); + free(srcTag->value); + } + sml_data->tags = destTags; + sml_data->tagNum = numDestTags; + + free(srcTags); } else { //no tags given } diff --git a/tests/examples/c/apitest.c b/tests/examples/c/apitest.c index c9ac8395de..ac522d6151 100644 --- a/tests/examples/c/apitest.c +++ b/tests/examples/c/apitest.c @@ -984,6 +984,11 @@ int32_t verify_schema_less(TAOS* taos) { code = taos_insert_lines(taos, &lines2[0], 1); code = taos_insert_lines(taos, &lines2[1], 1); + char* lines3[] = { + "sth,t1=4i64,t2=5f64,t4=5f64,ID=\"childtable\" c1=3i64,c3=L\"passitagin_stf\",c2=false,c5=5f64,c6=7u64 1626006933641ms", + "sth,t1=4i64,t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin_stf\",c2=false,c5=5f64,c6=7u64 1626006933654ms" + }; + code = taos_insert_lines(taos, lines3, 2); return code; } @@ -1007,6 +1012,7 @@ int main(int argc, char *argv[]) { printf("************ verify shemaless *************\n"); verify_schema_less(taos); + printf("************ verify query *************\n"); verify_query(taos); diff --git a/tests/pytest/insert/line_insert.py b/tests/pytest/insert/line_insert.py index 910396384f..ff3a32b0f7 100644 --- a/tests/pytest/insert/line_insert.py +++ b/tests/pytest/insert/line_insert.py @@ -55,24 +55,33 @@ class TDTestCase: self._conn.insertLines([ lines2[1] ]) print("insertLines result {}".format(code)) - tdSql.query("select * from st"); + tdSql.query("select * from st") tdSql.checkRows(4) - tdSql.query("select * from ste"); + tdSql.query("select * from ste") tdSql.checkRows(3) - tdSql.query("select * from stf"); + tdSql.query("select * from stf") tdSql.checkRows(2) - tdSql.query("select * from stg"); + tdSql.query("select * from stg") tdSql.checkRows(2) - tdSql.query("show tables"); + tdSql.query("show tables") tdSql.checkRows(8) - tdSql.query("describe stf"); + tdSql.query("describe stf") tdSql.checkData(2, 2, 14) + self._conn.insertLines([ + "sth,t1=4i64,t2=5f64,t4=5f64,ID=\"childtable\" c1=3i64,c3=L\"passitagin_stf\",c2=false,c5=5f64,c6=7u64 1626006933641ms", + "sth,t1=4i64,t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin_stf\",c2=false,c5=5f64,c6=7u64 1626006933654ms" + ]) + tdSql.query('select tbname, * from sth') + tdSql.checkRows(2) + + tdSql.query('select tbname, * from childtable') + tdSql.checkRows(1) def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__) From d1ba17941633b70d7086b2ed03e5176f8f7962e7 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Thu, 15 Jul 2021 21:30:35 +0800 Subject: [PATCH 08/27] [TD-4647]:fix mac os compile error --- src/client/src/tscParseLineProtocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 25b35fdb36..31df4e2578 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1405,7 +1405,7 @@ int32_t taos_sml_timestamp_convert(TAOS_SML_KV *pVal, char *value, uint16_t len) if (ret) { return ret; } - printf("Timestamp after conversion:%ld\n", tsVal); + printf("Timestamp after conversion:%"PRId64"\n", tsVal); pVal->type = TSDB_DATA_TYPE_TIMESTAMP; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; From e56f55c375f6aaf34a99fedf8c77fa35f2461f76 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Fri, 16 Jul 2021 01:02:37 +0800 Subject: [PATCH 09/27] restructured code according to coding style --- src/client/src/tscParseLineProtocol.c | 637 +++++++++++++------------- 1 file changed, 314 insertions(+), 323 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 25b35fdb36..dcbf4e4fdb 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -18,10 +18,6 @@ #include "taos.h" -#define SECONDS_TO_MILLI(TS) TS * 1000 -#define SECONDS_TO_MICRO(TS) TS * 1000000 -#define SECONDS_TO_NANO(TS) TS * 1000000000 - typedef struct { char sTableName[TSDB_TABLE_NAME_LEN]; SHashObj* tagHash; @@ -632,7 +628,7 @@ static int32_t insertChildTableBatch(TAOS* taos, char* cTableName, SArray* cols int32_t try = 0; TAOS_STMT* stmt = taos_stmt_init(taos); - + code = taos_stmt_prepare(stmt, sql, (unsigned long)strlen(sql)); if (code != 0) { tscError("%s", taos_stmt_errstr(stmt)); @@ -843,7 +839,7 @@ clean_up: 2: tag_key, tag_value, field_key Comma,Equal Sign,Space 3: field_value Double quote,Backslash */ -void escape_special_char(uint8_t field, const char **pos) { +static void escapeSpecialCharacter(uint8_t field, const char **pos) { const char *cur = *pos; if (*cur != '\\') { return; @@ -886,7 +882,7 @@ void escape_special_char(uint8_t field, const char **pos) { *pos = cur; } -bool is_valid_integer(char *str) { +static bool isValidInteger(char *str) { char *c = str; if (*c != '+' && *c != '-' && !isdigit(*c)) { return false; @@ -901,7 +897,7 @@ bool is_valid_integer(char *str) { return true; } -bool is_valid_float(char *str) { +static bool isValidFloat(char *str) { char *c = str; uint8_t has_dot, has_exp, has_sign; has_dot = 0; @@ -957,19 +953,18 @@ bool is_valid_float(char *str) { return true; } - -bool is_tiny_int(char *pVal, uint16_t len) { +static bool isTinyInt(char *pVal, uint16_t len) { if (len <= 2) { return false; } if (!strcmp(&pVal[len - 2], "i8")) { - printf("Type is int8(%s)\n", pVal); + //printf("Type is int8(%s)\n", pVal); return true; } return false; } -bool is_tiny_uint(char *pVal, uint16_t len) { +static bool isTinyUint(char *pVal, uint16_t len) { if (len <= 2) { return false; } @@ -977,24 +972,24 @@ bool is_tiny_uint(char *pVal, uint16_t len) { return false; } if (!strcmp(&pVal[len - 2], "u8")) { - printf("Type is uint8(%s)\n", pVal); + //printf("Type is uint8(%s)\n", pVal); return true; } return false; } -bool is_small_int(char *pVal, uint16_t len) { +static bool isSmallInt(char *pVal, uint16_t len) { if (len <= 3) { return false; } if (!strcmp(&pVal[len - 3], "i16")) { - printf("Type is int16(%s)\n", pVal); + //printf("Type is int16(%s)\n", pVal); return true; } return false; } -bool is_small_uint(char *pVal, uint16_t len) { +static bool isSmallUint(char *pVal, uint16_t len) { if (len <= 3) { return false; } @@ -1002,24 +997,24 @@ bool is_small_uint(char *pVal, uint16_t len) { return false; } if (strcmp(&pVal[len - 3], "u16") == 0) { - printf("Type is uint16(%s)\n", pVal); + //printf("Type is uint16(%s)\n", pVal); return true; } return false; } -bool is_int(char *pVal, uint16_t len) { +static bool isInt(char *pVal, uint16_t len) { if (len <= 3) { return false; } if (strcmp(&pVal[len - 3], "i32") == 0) { - printf("Type is int32(%s)\n", pVal); + //printf("Type is int32(%s)\n", pVal); return true; } return false; } -bool is_uint(char *pVal, uint16_t len) { +static bool isUint(char *pVal, uint16_t len) { if (len <= 3) { return false; } @@ -1027,24 +1022,24 @@ bool is_uint(char *pVal, uint16_t len) { return false; } if (strcmp(&pVal[len - 3], "u32") == 0) { - printf("Type is uint32(%s)\n", pVal); + //printf("Type is uint32(%s)\n", pVal); return true; } return false; } -bool is_big_int(char *pVal, uint16_t len) { +static bool isBigInt(char *pVal, uint16_t len) { if (len <= 3) { return false; } if (strcmp(&pVal[len - 3], "i64") == 0) { - printf("Type is int64(%s)\n", pVal); + //printf("Type is int64(%s)\n", pVal); return true; } return false; } -bool is_big_uint(char *pVal, uint16_t len) { +static bool isBigUint(char *pVal, uint16_t len) { if (len <= 3) { return false; } @@ -1052,48 +1047,48 @@ bool is_big_uint(char *pVal, uint16_t len) { return false; } if (strcmp(&pVal[len - 3], "u64") == 0) { - printf("Type is uint64(%s)\n", pVal); + //printf("Type is uint64(%s)\n", pVal); return true; } return false; } -bool is_float(char *pVal, uint16_t len) { +static bool isFloat(char *pVal, uint16_t len) { if (len <= 3) { return false; } if (strcmp(&pVal[len - 3], "f32") == 0) { - printf("Type is float(%s)\n", pVal); + //printf("Type is float(%s)\n", pVal); return true; } return false; } -bool is_double(char *pVal, uint16_t len) { +static bool isDouble(char *pVal, uint16_t len) { if (len <= 3) { return false; } if (strcmp(&pVal[len - 3], "f64") == 0) { - printf("Type is double(%s)\n", pVal); + //printf("Type is double(%s)\n", pVal); return true; } return false; } -bool is_bool(char *pVal, uint16_t len, bool *b_val) { +static bool isBool(char *pVal, uint16_t len, bool *bVal) { if ((len == 1) && (pVal[len - 1] == 't' || pVal[len - 1] == 'T')) { - printf("Type is bool(%c)\n", pVal[len - 1]); - *b_val = true; + //printf("Type is bool(%c)\n", pVal[len - 1]); + *bVal = true; return true; } if ((len == 1) && (pVal[len - 1] == 'f' || pVal[len - 1] == 'F')) { - printf("Type is bool(%c)\n", pVal[len - 1]); - *b_val = false; + //printf("Type is bool(%c)\n", pVal[len - 1]); + *bVal = false; return true; } @@ -1101,53 +1096,53 @@ bool is_bool(char *pVal, uint16_t len, bool *b_val) { (!strcmp(&pVal[len - 4], "true") || !strcmp(&pVal[len - 4], "True") || !strcmp(&pVal[len - 4], "TRUE"))) { - printf("Type is bool(%s)\n", &pVal[len - 4]); - *b_val = true; + //printf("Type is bool(%s)\n", &pVal[len - 4]); + *bVal = true; return true; } if((len == 5) && (!strcmp(&pVal[len - 5], "false") || !strcmp(&pVal[len - 5], "False") || !strcmp(&pVal[len - 5], "FALSE"))) { - printf("Type is bool(%s)\n", &pVal[len - 5]); - *b_val = false; + //printf("Type is bool(%s)\n", &pVal[len - 5]); + *bVal = false; return true; } return false; } -bool is_binary(char *pVal, uint16_t len) { +static bool isBinary(char *pVal, uint16_t len) { //binary: "abc" if (len < 2) { return false; } //binary if (pVal[0] == '"' && pVal[len - 1] == '"') { - printf("Type is binary(%s)\n", pVal); + //printf("Type is binary(%s)\n", pVal); return true; } return false; } -bool is_nchar(char *pVal, uint16_t len) { +static bool isNchar(char *pVal, uint16_t len) { //nchar: L"abc" if (len < 3) { return false; } if (pVal[0] == 'L' && pVal[1] == '"' && pVal[len - 1] == '"') { - printf("Type is nchar(%s)\n", pVal); + //printf("Type is nchar(%s)\n", pVal); return true; } return false; } -bool is_timestamp(char *pVal, uint16_t len, SMLTimeStampType *tsType) { +static bool isTimeStamp(char *pVal, uint16_t len, SMLTimeStampType *tsType) { if (len == 0) { return true; } if ((len == 1) && pVal[0] == '0') { *tsType = SML_TIME_STAMP_NOW; - printf("Type is timestamp(%s)\n", pVal); + //printf("Type is timestamp(%s)\n", pVal); return true; } if (len < 2) { @@ -1156,7 +1151,7 @@ bool is_timestamp(char *pVal, uint16_t len, SMLTimeStampType *tsType) { //No appendix use usec as default if (isdigit(pVal[len - 1]) && isdigit(pVal[len - 2])) { *tsType = SML_TIME_STAMP_MICRO_SECONDS; - printf("Type is timestamp(%s)\n", pVal); + //printf("Type is timestamp(%s)\n", pVal); return true; } if (pVal[len - 1] == 's') { @@ -1178,77 +1173,24 @@ bool is_timestamp(char *pVal, uint16_t len, SMLTimeStampType *tsType) { return false; } } - printf("Type is timestamp(%s)\n", pVal); + //printf("Type is timestamp(%s)\n", pVal); return true; } return false; } - //len does not include '\0' from value. -bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, - uint16_t len) { +static bool convertSmlValueType(TAOS_SML_KV *pVal, char *value, + uint16_t len) { if (len <= 0) { return false; } - //bool - bool b_val; - if (is_bool(value, len, &b_val)) { - pVal->type = TSDB_DATA_TYPE_BOOL; - pVal->length = (int16_t)tDataTypes[pVal->type].bytes; - pVal->value = calloc(pVal->length, 1); - memcpy(pVal->value, &b_val, pVal->length); - return true; - } - //binary - if (is_binary(value, len)) { - pVal->type = TSDB_DATA_TYPE_BINARY; - pVal->length = len - 2; - pVal->value = calloc(pVal->length, 1); - //copy after " - memcpy(pVal->value, value + 1, pVal->length); - return true; - } - //nchar - if (is_nchar(value, len)) { - pVal->type = TSDB_DATA_TYPE_NCHAR; - pVal->length = len - 3; - pVal->value = calloc(pVal->length, 1); - //copy after L" - memcpy(pVal->value, value + 2, pVal->length); - return true; - } - //floating number - if (is_float(value, len)) { - pVal->type = TSDB_DATA_TYPE_FLOAT; - pVal->length = (int16_t)tDataTypes[pVal->type].bytes; - value[len - 3] = '\0'; - if (!is_valid_float(value)) { - return false; - } - pVal->value = calloc(pVal->length, 1); - float val = (float)strtold(value, NULL); - memcpy(pVal->value, &val, pVal->length); - return true; - } - if (is_double(value, len)) { - pVal->type = TSDB_DATA_TYPE_DOUBLE; - pVal->length = (int16_t)tDataTypes[pVal->type].bytes; - value[len - 3] = '\0'; - if (!is_valid_float(value)) { - return false; - } - pVal->value = calloc(pVal->length, 1); - double val = (double)strtold(value, NULL); - memcpy(pVal->value, &val, pVal->length); - return true; - } //integer number - if (is_tiny_int(value, len)) { + if (isTinyInt(value, len)) { pVal->type = TSDB_DATA_TYPE_TINYINT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 2] = '\0'; - if (!is_valid_integer(value)) { + if (!isValidInteger(value)) { return false; } pVal->value = calloc(pVal->length, 1); @@ -1256,11 +1198,11 @@ bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, memcpy(pVal->value, &val, pVal->length); return true; } - if (is_tiny_uint(value, len)) { + if (isTinyUint(value, len)) { pVal->type = TSDB_DATA_TYPE_UTINYINT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 2] = '\0'; - if (!is_valid_integer(value)) { + if (!isValidInteger(value)) { return false; } pVal->value = calloc(pVal->length, 1); @@ -1268,11 +1210,11 @@ bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, memcpy(pVal->value, &val, pVal->length); return true; } - if (is_small_int(value, len)) { + if (isSmallInt(value, len)) { pVal->type = TSDB_DATA_TYPE_SMALLINT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 3] = '\0'; - if (!is_valid_integer(value)) { + if (!isValidInteger(value)) { return false; } pVal->value = calloc(pVal->length, 1); @@ -1280,11 +1222,11 @@ bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, memcpy(pVal->value, &val, pVal->length); return true; } - if (is_small_uint(value, len)) { + if (isSmallUint(value, len)) { pVal->type = TSDB_DATA_TYPE_USMALLINT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 3] = '\0'; - if (!is_valid_integer(value)) { + if (!isValidInteger(value)) { return false; } pVal->value = calloc(pVal->length, 1); @@ -1293,11 +1235,11 @@ bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, //memcpy(pVal->value, &val, pVal->length); return true; } - if (is_int(value, len)) { + if (isInt(value, len)) { pVal->type = TSDB_DATA_TYPE_INT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 3] = '\0'; - if (!is_valid_integer(value)) { + if (!isValidInteger(value)) { return false; } pVal->value = calloc(pVal->length, 1); @@ -1305,11 +1247,11 @@ bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, memcpy(pVal->value, &val, pVal->length); return true; } - if (is_uint(value, len)) { + if (isUint(value, len)) { pVal->type = TSDB_DATA_TYPE_UINT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 3] = '\0'; - if (!is_valid_integer(value)) { + if (!isValidInteger(value)) { return false; } pVal->value = calloc(pVal->length, 1); @@ -1317,11 +1259,11 @@ bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, memcpy(pVal->value, &val, pVal->length); return true; } - if (is_big_int(value, len)) { + if (isBigInt(value, len)) { pVal->type = TSDB_DATA_TYPE_BIGINT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 3] = '\0'; - if (!is_valid_integer(value)) { + if (!isValidInteger(value)) { return false; } pVal->value = calloc(pVal->length, 1); @@ -1329,11 +1271,11 @@ bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, memcpy(pVal->value, &val, pVal->length); return true; } - if (is_big_uint(value, len)) { + if (isBigUint(value, len)) { pVal->type = TSDB_DATA_TYPE_UBIGINT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 3] = '\0'; - if (!is_valid_integer(value)) { + if (!isValidInteger(value)) { return false; } pVal->value = calloc(pVal->length, 1); @@ -1341,11 +1283,64 @@ bool taos_sml_type_convert(TAOS_SML_KV *pVal, char *value, memcpy(pVal->value, &val, pVal->length); return true; } + //floating number + if (isFloat(value, len)) { + pVal->type = TSDB_DATA_TYPE_FLOAT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!isValidFloat(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + float val = (float)strtold(value, NULL); + memcpy(pVal->value, &val, pVal->length); + return true; + } + if (isDouble(value, len)) { + pVal->type = TSDB_DATA_TYPE_DOUBLE; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + value[len - 3] = '\0'; + if (!isValidFloat(value)) { + return false; + } + pVal->value = calloc(pVal->length, 1); + double val = (double)strtold(value, NULL); + memcpy(pVal->value, &val, pVal->length); + return true; + } + //binary + if (isBinary(value, len)) { + pVal->type = TSDB_DATA_TYPE_BINARY; + pVal->length = len - 2; + pVal->value = calloc(pVal->length, 1); + //copy after " + memcpy(pVal->value, value + 1, pVal->length); + return true; + } + //nchar + if (isNchar(value, len)) { + pVal->type = TSDB_DATA_TYPE_NCHAR; + pVal->length = len - 3; + pVal->value = calloc(pVal->length, 1); + //copy after L" + memcpy(pVal->value, value + 2, pVal->length); + return true; + } + //bool + bool bVal; + if (isBool(value, len, &bVal)) { + pVal->type = TSDB_DATA_TYPE_BOOL; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + pVal->value = calloc(pVal->length, 1); + memcpy(pVal->value, &bVal, pVal->length); + return true; + } //TODO: handle default is float here return false; } -int32_t tscGetTimeStampValue(char *value, uint16_t len, SMLTimeStampType type, int64_t *ts) { +static int32_t getTimeStampValue(char *value, uint16_t len, + SMLTimeStampType type, int64_t *ts) { if (len >= 2) { for (int i = 0; i < len - 2; ++i) { @@ -1391,21 +1386,21 @@ int32_t tscGetTimeStampValue(char *value, uint16_t len, SMLTimeStampType type, i return TSDB_CODE_SUCCESS; } -int32_t taos_sml_timestamp_convert(TAOS_SML_KV *pVal, char *value, uint16_t len) { +static int32_t convertSmlTimeStamp(TAOS_SML_KV *pVal, char *value, + uint16_t len) { int32_t ret; SMLTimeStampType type; int64_t tsVal; - - if (!is_timestamp(value, len, &type)) { + if (!isTimeStamp(value, len, &type)) { return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } - ret = tscGetTimeStampValue(value, len, type, &tsVal); + ret = getTimeStampValue(value, len, type, &tsVal); if (ret) { return ret; } - printf("Timestamp after conversion:%ld\n", tsVal); + //printf("Timestamp after conversion:%ld\n", tsVal); pVal->type = TSDB_DATA_TYPE_TIMESTAMP; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; @@ -1414,82 +1409,7 @@ int32_t taos_sml_timestamp_convert(TAOS_SML_KV *pVal, char *value, uint16_t len) return TSDB_CODE_SUCCESS; } -bool taos_sml_parse_value(TAOS_SML_KV *pKV, const char **index, - bool *is_last_kv) { - const char *start, *cur; - char *value = NULL; - uint16_t len = 0; - start = cur = *index; - - while (1) { - // unescaped ',' or ' ' or '\0' identifies a value - if ((*cur == ',' || *cur == ' ' || *cur == '\0') && *(cur - 1) != '\\') { - value = calloc(len + 1, 1); - memcpy(value, start, len); - value[len] = '\0'; - if (!taos_sml_type_convert(pKV, value, len)) { - free(value); - return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; - } - //unescaped ' ' or '\0' indicates end of value - *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false; - break; - } - //Escape special character - if (*cur == '\\') { - escape_special_char(2, &cur); - } - cur++; - len++; - } - - if (value) { - free(value); - } - - *index = (*cur == '\0') ? cur : cur + 1; - return TSDB_CODE_SUCCESS; -} - -int32_t taos_sml_parse_key(TAOS_SML_KV *pKV, const char **index) { - const char *cur = *index; - char key[TSDB_COL_NAME_LEN]; - uint16_t len = 0; - - //key field cannot start with '_' - if (*cur == '_') { - printf("Tag key cannnot start with \'_\'\n"); - return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; - } - while (*cur != '\0') { - if (len > TSDB_COL_NAME_LEN) { - printf("Key field cannot exceeds 65 characters"); - return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; - } - //unescaped '=' identifies a tag key - if (*cur == '=' && *(cur - 1) != '\\') { - printf("key: found equal sign\n"); - break; - } - //Escape special character - if (*cur == '\\') { - escape_special_char(2, &cur); - } - key[len] = *cur; - cur++; - len++; - } - key[len] = '\0'; - - pKV->key = calloc(len + 1, 1); - memcpy(pKV->key, key, len + 1); - printf("key:%s|len:%d\n", pKV->key, len); - *index = cur + 1; - return TSDB_CODE_SUCCESS; -} - - -int32_t taos_sml_parse_timestamp(TAOS_SML_KV **pTS, const char **index) { +static int32_t parseSmlTimeStamp(TAOS_SML_KV **pTS, const char **index) { const char *start, *cur; int32_t ret = TSDB_CODE_SUCCESS; int len = 0; @@ -1509,7 +1429,7 @@ int32_t taos_sml_parse_timestamp(TAOS_SML_KV **pTS, const char **index) { memcpy(value, start, len); } - ret = taos_sml_timestamp_convert(*pTS, value, len); + ret = convertSmlTimeStamp(*pTS, value, len); if (ret) { free(value); free(*pTS); @@ -1522,7 +1442,141 @@ int32_t taos_sml_parse_timestamp(TAOS_SML_KV **pTS, const char **index) { return ret; } -int32_t taos_sml_parse_kv_pairs(TAOS_SML_KV **pKVs, int *num_kvs, const char **index, bool isField) { +static bool getChildTableNameFromTags(TAOS_SML_DATA_POINT *pData) { + TAOS_SML_KV *pTags = pData->tags; + int tagNum = pData->tagNum; + char *childTableName = pData->childTableName; + + for (int i = 0; i < tagNum; ++i) { + //use tag value as child table name if key is "ID" + //tag value has to be binary for now + if (!strcasecmp(pTags->key, "ID") && pTags->type == TSDB_DATA_TYPE_BINARY) { + memcpy(childTableName, pTags->value, pTags->length); + return true; + } + pTags++; + } + return false; +} + +static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index) { + const char *cur = *index; + char key[TSDB_COL_NAME_LEN]; + uint16_t len = 0; + + //key field cannot start with '_' + if (*cur == '_') { + //printf("Tag key cannnot start with \'_\'\n"); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } + while (*cur != '\0') { + if (len > TSDB_COL_NAME_LEN) { + tscDebug("Key field cannot exceeds 65 characters"); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } + //unescaped '=' identifies a tag key + if (*cur == '=' && *(cur - 1) != '\\') { + break; + } + //Escape special character + if (*cur == '\\') { + escapeSpecialCharacter(2, &cur); + } + key[len] = *cur; + cur++; + len++; + } + key[len] = '\0'; + + pKV->key = calloc(len + 1, 1); + memcpy(pKV->key, key, len + 1); + tscDebug("Key:%s|len:%d", pKV->key, len); + *index = cur + 1; + return TSDB_CODE_SUCCESS; +} + + +static bool parseSmlValue(TAOS_SML_KV *pKV, const char **index, + bool *is_last_kv) { + const char *start, *cur; + char *value = NULL; + uint16_t len = 0; + start = cur = *index; + + while (1) { + // unescaped ',' or ' ' or '\0' identifies a value + if ((*cur == ',' || *cur == ' ' || *cur == '\0') && *(cur - 1) != '\\') { + value = calloc(len + 1, 1); + memcpy(value, start, len); + value[len] = '\0'; + if (!convertSmlValueType(pKV, value, len)) { + free(value); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } + //unescaped ' ' or '\0' indicates end of value + *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false; + break; + } + //Escape special character + if (*cur == '\\') { + escapeSpecialCharacter(2, &cur); + } + cur++; + len++; + } + + if (value) { + free(value); + } + + *index = (*cur == '\0') ? cur : cur + 1; + return TSDB_CODE_SUCCESS; +} + +static int32_t parseSmlMeasurement(TAOS_SML_DATA_POINT *pSml, const char **index, + uint8_t *has_tags) { + const char *cur = *index; + uint16_t len = 0; + + pSml->stableName = calloc(TSDB_TABLE_NAME_LEN, 1); + if (*cur == '_') { + tscError("Measurement field cannnot start with \'_\'"); + free(pSml->stableName); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } + + while (*cur != '\0') { + if (len > TSDB_TABLE_NAME_LEN) { + tscError("Measurement field cannot exceeds 193 characters"); + free(pSml->stableName); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } + //first unescaped comma or space identifies measurement + //if space detected first, meaning no tag in the input + if (*cur == ',' && *(cur - 1) != '\\') { + *has_tags = 1; + break; + } + if (*cur == ' ' && *(cur - 1) != '\\') { + break; + } + //Comma, Space, Backslash needs to be escaped if any + if (*cur == '\\') { + escapeSpecialCharacter(1, &cur); + } + pSml->stableName[len] = *cur; + cur++; + len++; + } + pSml->stableName[len] = '\0'; + *index = cur + 1; + tscDebug("Stable name in measurement:%s|len:%d", pSml->stableName, len); + + return TSDB_CODE_SUCCESS; +} + +static int32_t parseSmlKvPairs(TAOS_SML_KV **pKVs, int *num_kvs, + const char **index, bool isField) { const char *cur = *index; int32_t ret = TSDB_CODE_SUCCESS; TAOS_SML_KV *pkv; @@ -1540,20 +1594,20 @@ int32_t taos_sml_parse_kv_pairs(TAOS_SML_KV **pKVs, int *num_kvs, const char **i } while (*cur != '\0') { - ret = taos_sml_parse_key(pkv, &cur); + ret = parseSmlKey(pkv, &cur); if (ret) { - printf("Unable to parse key field\n"); + tscError("Unable to parse key field"); goto error; } - ret = taos_sml_parse_value(pkv, &cur, &is_last_kv); + ret = parseSmlValue(pkv, &cur, &is_last_kv); if (ret) { - printf("Unable to parse value field\n"); + tscError("Unable to parse value field"); goto error; } *num_kvs += 1; if (is_last_kv) { - printf("last key value field detected\n"); + tscDebug("last key-value field detected"); goto done; } @@ -1585,156 +1639,93 @@ int32_t taos_sml_parse_kv_pairs(TAOS_SML_KV **pKVs, int *num_kvs, const char **i return ret; } -int32_t taos_sml_parse_measurement(TAOS_SML_DATA_POINT *pSml, const char **index, uint8_t *has_tags) { - const char *cur = *index; - uint16_t len = 0; - - pSml->stableName = calloc(TSDB_TABLE_NAME_LEN, 1); - if (*cur == '_') { - printf("Measurement field cannnot start with \'_\'\n"); - free(pSml->stableName); - return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; +static void removeChildTableNameFromTags(TAOS_SML_DATA_POINT** smlData) { + TAOS_SML_KV* destTags = calloc((*smlData)->tagNum, sizeof(TAOS_SML_KV)); + TAOS_SML_KV* srcTags = (*smlData)->tags; + int numDestTags = 0; + for (int32_t i = 0; i < (*smlData)->tagNum; ++i) { + TAOS_SML_KV* srcTag = srcTags + i; + if (strcasecmp(srcTag->key, "ID") == 0) { + continue; + } else { + TAOS_SML_KV* destTag = destTags + numDestTags; + memcpy(destTag, srcTag, sizeof(TAOS_SML_KV)); + destTag->key = calloc(1, strlen(srcTag->key) + 1); + memcpy(destTag->key, srcTag->key, strlen(srcTag->key) + 1); + destTag->value = calloc(1, srcTag->length); + memcpy(destTag->value, srcTag->value, srcTag->length); + numDestTags++; + } + free(srcTag->key); + free(srcTag->value); } + (*smlData)->tags = destTags; + (*smlData)->tagNum = numDestTags; - while (*cur != '\0') { - if (len > TSDB_TABLE_NAME_LEN) { - printf("Measurement field cannot exceeds 193 characters"); - free(pSml->stableName); - return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; - } - //first unescaped comma or space identifies measurement - //if space detected first, meaning no tag in the input - if (*cur == ',' && *(cur - 1) != '\\') { - *has_tags = 1; - printf("measurement:found comma\n"); - break; - } - if (*cur == ' ' && *(cur - 1) != '\\') { - printf("measurement:found space\n"); - break; - } - //Comma, Space, Backslash needs to be escaped if any - if (*cur == '\\') { - escape_special_char(1, &cur); - } - pSml->stableName[len] = *cur; - cur++; - len++; - } - pSml->stableName[len] = '\0'; - *index = cur + 1; - printf("stable name:%s|len:%d\n", pSml->stableName, len); - - return TSDB_CODE_SUCCESS; + free(srcTags); } +static void moveTimeStampToFirstKv(TAOS_SML_DATA_POINT** smlData, TAOS_SML_KV *ts) { + TAOS_SML_KV* tsField = (*smlData)->fields; + tsField->length = ts->length; + tsField->type = ts->type; + tsField->value = malloc(ts->length); + tsField->key = malloc(strlen(ts->key) + 1); + memcpy(tsField->key, ts->key, strlen(ts->key) + 1); + memcpy(tsField->value, ts->value, ts->length); + (*smlData)->fieldNum = (*smlData)->fieldNum + 1; -bool tscGetChildTableName(TAOS_SML_DATA_POINT *pData) { - TAOS_SML_KV *pTags = pData->tags; - int tagNum = pData->tagNum; - char *childTableName = pData->childTableName; - - for (int i = 0; i < tagNum; ++i) { - //use tag value as child table name if key is "ID" - //tag value has to be binary for now - if (!strcasecmp(pTags->key, "ID") && pTags->type == TSDB_DATA_TYPE_BINARY) { - memcpy(childTableName, pTags->value, pTags->length); - return true; - } - pTags++; - } - return false; + free(ts->key); + free(ts->value); + free(ts); } -int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* sml_data) { +int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* smlData) { const char* index = sql; int32_t ret = TSDB_CODE_SUCCESS; uint8_t has_tags = 0; TAOS_SML_KV *timestamp = NULL; - - ret = taos_sml_parse_measurement(sml_data, &index, &has_tags); + ret = parseSmlMeasurement(smlData, &index, &has_tags); if (ret) { - printf("Unable to parse measurement\n"); + tscError("Unable to parse measurement"); return ret; } - printf("============Parse measurement finished, has_tags:%d===============\n", has_tags); + tscDebug("Parse measurement finished, has_tags:%d", has_tags); //Parse Tags if (has_tags) { - ret = taos_sml_parse_kv_pairs(&sml_data->tags, &sml_data->tagNum, &index, false); + ret = parseSmlKvPairs(&smlData->tags, &smlData->tagNum, &index, false); if (ret) { - printf("Unable to parse tag\n"); - //TODO free allocated fileds inside TAOS_SML_DATA_POINT first + tscError("Unable to parse tag"); return ret; } - sml_data->childTableName = calloc(TSDB_TABLE_NAME_LEN, 1); - if (!tscGetChildTableName(sml_data)) { - free(sml_data->childTableName); - sml_data->childTableName = NULL; - printf("no table name\n"); - } else { - printf("Child table name:%02x:%02x:%02x:%02x\n", sml_data->childTableName[0], sml_data->childTableName[1], - sml_data->childTableName[2], sml_data->childTableName[3]); + smlData->childTableName = calloc(TSDB_TABLE_NAME_LEN, 1); + if (!getChildTableNameFromTags(smlData)) { + free(smlData->childTableName); + smlData->childTableName = NULL; + tscDebug("No child table name in tags"); } - - TAOS_SML_KV* destTags = calloc(sml_data->tagNum, sizeof(TAOS_SML_KV)); - TAOS_SML_KV* srcTags = sml_data->tags; - int numDestTags = 0; - for (int32_t i = 0; i < sml_data->tagNum; ++i) { - TAOS_SML_KV* srcTag = srcTags + i; - if (strcasecmp(srcTag->key, "ID") == 0) { - continue; - } else { - TAOS_SML_KV* destTag = destTags + numDestTags; - memcpy(destTag, srcTag, sizeof(TAOS_SML_KV)); - destTag->key = calloc(1, strlen(srcTag->key) + 1); - memcpy(destTag->key, srcTag->key, strlen(srcTag->key) + 1); - destTag->value = calloc(1, srcTag->length); - memcpy(destTag->value, srcTag->value, srcTag->length); - numDestTags++; - } - free(srcTag->key); - free(srcTag->value); - } - sml_data->tags = destTags; - sml_data->tagNum = numDestTags; - - free(srcTags); - } else { - //no tags given + removeChildTableNameFromTags(&smlData); } + tscDebug("Parse tags finished, num of tags:%d", smlData->tagNum); - printf("============Parse tags finished, num_tags:%d===============\n", sml_data->tagNum); //Parse fields - ret = taos_sml_parse_kv_pairs(&sml_data->fields, &sml_data->fieldNum, &index, true); + ret = parseSmlKvPairs(&smlData->fields, &smlData->fieldNum, &index, true); if (ret) { - printf("Unable to parse field\n"); - //TODO free allocated fileds inside TAOS_SML_DATA_POINT first + tscError("Unable to parse field"); return ret; } + tscDebug("Parse fields finished, num of fields:%d", smlData->fieldNum); - printf("============Parse fields finished, num_fields:%d===============\n", sml_data->fieldNum); //Parse timestamp - ret = taos_sml_parse_timestamp(×tamp, &index); + ret = parseSmlTimeStamp(×tamp, &index); if (ret) { - printf("Unable to parse timestamp\n"); + tscError("Unable to parse timestamp"); return ret; } - - sml_data->fieldNum = sml_data->fieldNum + 1; - TAOS_SML_KV* tsField = sml_data->fields; - tsField->length = timestamp->length; - tsField->type = timestamp->type; - tsField->value = malloc(timestamp->length); - tsField->key = malloc(strlen(timestamp->key)+1); - memcpy(tsField->key, timestamp->key, strlen(timestamp->key)+1); - memcpy(tsField->value, timestamp->value, timestamp->length); - - free(timestamp->key); - free(timestamp->value); - free(timestamp); - printf("============Parse timestamp finished===============\n"); + moveTimeStampToFirstKv(&smlData, timestamp); + tscDebug("Parse timestamp finished"); return true; } From 1a60dadd0d30a790829b40f80edb54fe79c28e80 Mon Sep 17 00:00:00 2001 From: Shenglian Zhou Date: Fri, 16 Jul 2021 09:15:14 +0800 Subject: [PATCH 10/27] jdbc connector --- src/client/src/TSDBJNIConnector.c | 31 +++++++++++++++++++ .../com/taosdata/jdbc/TSDBJNIConnector.java | 9 ++++++ .../taosdata/jdbc/TSDBJNIConnectorTest.java | 4 +++ 3 files changed, 44 insertions(+) diff --git a/src/client/src/TSDBJNIConnector.c b/src/client/src/TSDBJNIConnector.c index 379cf86301..c9b00800e6 100644 --- a/src/client/src/TSDBJNIConnector.c +++ b/src/client/src/TSDBJNIConnector.c @@ -946,3 +946,34 @@ JNIEXPORT jint JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_setTableNameTagsI return JNI_SUCCESS; } + +JNIEXPORT jlong JNICALL Java_com_taosdata_jdbc_TSDBJNIConnector_insertLinesImp(JNIEnv *env, jobject jobj, + jobjectArray lines, jlong conn) { + TAOS *taos = (TAOS *)conn; + if (taos == NULL) { + jniError("jobj:%p, connection already closed", jobj); + return JNI_CONNECTION_NULL; + } + + int numLines = (*env)->GetArrayLength(env, lines); + char** c_lines = calloc(numLines, sizeof(char*)); + + for (int i = 0; i < numLines; ++i) { + jstring line = (jstring) ((*env)->GetObjectArrayElement(env, lines, i)); + c_lines[i] = (char*)(*env)->GetStringUTFChars(env, line, 0); + } + + int code = taos_insert_lines(taos, c_lines, numLines); + + for (int i = 0; i < numLines; ++i) { + jstring line = (jstring) ((*env)->GetObjectArrayElement(env, lines, i)); + (*env)->ReleaseStringUTFChars(env, line, c_lines[i]); + } + + if (code != TSDB_CODE_SUCCESS) { + jniError("jobj:%p, conn:%p, code:%s", jobj, taos, tstrerror(code)); + return JNI_TDENGINE_ERROR; + } + + return code; +} \ No newline at end of file diff --git a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBJNIConnector.java b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBJNIConnector.java index 7f400fc1ee..01d2c69ae6 100755 --- a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBJNIConnector.java +++ b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBJNIConnector.java @@ -348,4 +348,13 @@ public class TSDBJNIConnector { } private native int closeStmt(long stmt, long con); + + public void insertLines(String[] lines) { + int code = insertLines(lines, this.taos); + if (code != TSDBConstants.JNI_SUCCESS) { + throw TSDBError.createSQLException(TSDBErrorNumbers.ERROR_UNKNOWN, "failed to insertLines"); + } + } + + private native int insertLinesImp(String[] lines, long conn); } diff --git a/src/connector/jdbc/src/test/java/com/taosdata/jdbc/TSDBJNIConnectorTest.java b/src/connector/jdbc/src/test/java/com/taosdata/jdbc/TSDBJNIConnectorTest.java index b5f8114bff..28c84bd7f1 100644 --- a/src/connector/jdbc/src/test/java/com/taosdata/jdbc/TSDBJNIConnectorTest.java +++ b/src/connector/jdbc/src/test/java/com/taosdata/jdbc/TSDBJNIConnectorTest.java @@ -114,6 +114,10 @@ public class TSDBJNIConnectorTest { throw TSDBError.createSQLException(TSDBErrorNumbers.ERROR_JNI_RESULT_SET_NULL); } // close statement + connector.executeQuery("use d"); + String[] lines = new String[] {"st,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"passit\",c2=false,c4=4f64 1626006833639000000ns", + "st,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64 1626006833640000000ns"}; + connector.insetLines(lines); // close connection connector.closeConnection(); From 62cbe4f323775ced1620f544754a2d06ef8212bb Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 16 Jul 2021 09:37:46 +0800 Subject: [PATCH 11/27] fix jdbc connector compilation error --- .../src/main/java/com/taosdata/jdbc/TSDBJNIConnector.java | 4 ++-- .../src/test/java/com/taosdata/jdbc/TSDBJNIConnectorTest.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBJNIConnector.java b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBJNIConnector.java index 01d2c69ae6..051eca7e10 100755 --- a/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBJNIConnector.java +++ b/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBJNIConnector.java @@ -349,8 +349,8 @@ public class TSDBJNIConnector { private native int closeStmt(long stmt, long con); - public void insertLines(String[] lines) { - int code = insertLines(lines, this.taos); + public void insertLines(String[] lines) throws SQLException { + int code = insertLinesImp(lines, this.taos); if (code != TSDBConstants.JNI_SUCCESS) { throw TSDBError.createSQLException(TSDBErrorNumbers.ERROR_UNKNOWN, "failed to insertLines"); } diff --git a/src/connector/jdbc/src/test/java/com/taosdata/jdbc/TSDBJNIConnectorTest.java b/src/connector/jdbc/src/test/java/com/taosdata/jdbc/TSDBJNIConnectorTest.java index 28c84bd7f1..bbcbd33ebd 100644 --- a/src/connector/jdbc/src/test/java/com/taosdata/jdbc/TSDBJNIConnectorTest.java +++ b/src/connector/jdbc/src/test/java/com/taosdata/jdbc/TSDBJNIConnectorTest.java @@ -117,7 +117,7 @@ public class TSDBJNIConnectorTest { connector.executeQuery("use d"); String[] lines = new String[] {"st,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"passit\",c2=false,c4=4f64 1626006833639000000ns", "st,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64 1626006833640000000ns"}; - connector.insetLines(lines); + connector.insertLines(lines); // close connection connector.closeConnection(); From 76fc259bd024c0646460fbe88bd69d0589b515b3 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 16 Jul 2021 10:56:11 +0800 Subject: [PATCH 12/27] increase memory allocation by 1.5 during parse sml kv pairs --- src/client/src/tscParseLineProtocol.c | 31 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 1c388778a8..b6779fdc67 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1400,7 +1400,7 @@ static int32_t convertSmlTimeStamp(TAOS_SML_KV *pVal, char *value, if (ret) { return ret; } - tscDebug("Timestamp after conversion:%"PRId64"\n", tsVal); + tscDebug("Timestamp after conversion:%"PRId64, tsVal); pVal->type = TSDB_DATA_TYPE_TIMESTAMP; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; @@ -1582,14 +1582,16 @@ static int32_t parseSmlKvPairs(TAOS_SML_KV **pKVs, int *num_kvs, TAOS_SML_KV *pkv; bool is_last_kv = false; + int32_t capacity = 0; if (isField) { - //leave space for timestamp - *pKVs = calloc(2, sizeof(TAOS_SML_KV)); + capacity = 64; + *pKVs = calloc(capacity, sizeof(TAOS_SML_KV)); + // leave space for timestamp; pkv = *pKVs; pkv++; - } - else { - *pKVs = calloc(1, sizeof(TAOS_SML_KV)); + } else { + capacity = 8; + *pKVs = calloc(capacity, sizeof(TAOS_SML_KV)); pkv = *pKVs; } @@ -1613,11 +1615,19 @@ static int32_t parseSmlKvPairs(TAOS_SML_KV **pKVs, int *num_kvs, //reallocate addtional memory for more kvs TAOS_SML_KV *more_kvs = NULL; + if (isField) { - more_kvs = realloc(*pKVs, (*num_kvs + 2) * sizeof(TAOS_SML_KV)); + if ((*num_kvs + 2) > capacity) { + capacity *= 3; capacity /= 2; + } + more_kvs = realloc(*pKVs, capacity * sizeof(TAOS_SML_KV)); } else { - more_kvs = realloc(*pKVs, (*num_kvs + 1) * sizeof(TAOS_SML_KV)); + if ((*num_kvs + 1) > capacity) { + capacity *= 3; capacity /= 2; + } + more_kvs = realloc(*pKVs, capacity * sizeof(TAOS_SML_KV)); } + if (!more_kvs) { goto error; } @@ -1631,10 +1641,10 @@ static int32_t parseSmlKvPairs(TAOS_SML_KV **pKVs, int *num_kvs, } goto done; - error: +error: free(*pKVs); return ret; - done: +done: *index = cur; return ret; } @@ -1707,6 +1717,7 @@ int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* smlData) { tscDebug("No child table name in tags"); } removeChildTableNameFromTags(&smlData); + } tscDebug("Parse tags finished, num of tags:%d", smlData->tagNum); From aadbf21ddd3aeaa8f2487d4042af84801293c08f Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 16 Jul 2021 15:40:52 +0800 Subject: [PATCH 13/27] fix bug that save index instead of pointer when taosarray reallocation cause point invalidate --- src/client/src/tscParseLineProtocol.c | 100 ++++++++++++++++---------- 1 file changed, 63 insertions(+), 37 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index b6779fdc67..7c789b502e 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -34,7 +34,7 @@ typedef struct { char* value; //=================================== - SSchema* schema; + size_t fieldSchemaIdx; } TAOS_SML_KV; typedef struct { @@ -49,7 +49,7 @@ typedef struct { int fieldNum; //================================ - SSmlSTableSchema* schema; + size_t schemaIdx; } TAOS_SML_DATA_POINT; typedef enum { @@ -126,10 +126,12 @@ static int32_t getFieldBytesFromSmlKv(TAOS_SML_KV* kv, int32_t* bytes) { static int32_t buildSmlKvSchema(TAOS_SML_KV* smlKv, SHashObj* hash, SArray* array) { SSchema* pField = NULL; - SSchema** ppField = taosHashGet(hash, smlKv->key, strlen(smlKv->key)); + size_t* pFieldIdx = taosHashGet(hash, smlKv->key, strlen(smlKv->key)); + size_t fieldIdx = -1; int32_t code = 0; - if (ppField) { - pField = *ppField; + if (pFieldIdx) { + fieldIdx = *pFieldIdx; + pField = taosArrayGet(array, fieldIdx); if (pField->type != smlKv->type) { tscError("type mismatch. key %s, type %d. type before %d", smlKv->key, smlKv->type, pField->type); @@ -158,10 +160,11 @@ static int32_t buildSmlKvSchema(TAOS_SML_KV* smlKv, SHashObj* hash, SArray* arra field.bytes = bytes; pField = taosArrayPush(array, &field); - taosHashPut(hash, field.name, tagKeyLen, &pField, POINTER_BYTES); + fieldIdx = taosArrayGetSize(array) - 1; + taosHashPut(hash, field.name, tagKeyLen, &fieldIdx, sizeof(fieldIdx)); } - smlKv->schema = pField; + smlKv->fieldSchemaIdx = fieldIdx; return 0; } @@ -174,10 +177,12 @@ static int32_t buildDataPointSchemas(TAOS_SML_DATA_POINT* points, int numPoint, for (int i = 0; i < numPoint; ++i) { TAOS_SML_DATA_POINT* point = &points[i]; size_t stableNameLen = strlen(point->stableName); - SSmlSTableSchema** ppStableSchema = taosHashGet(sname2shema, point->stableName, stableNameLen); + size_t* pStableIdx = taosHashGet(sname2shema, point->stableName, stableNameLen); SSmlSTableSchema* pStableSchema = NULL; - if (ppStableSchema) { - pStableSchema= *ppStableSchema; + size_t stableIdx = -1; + if (pStableIdx) { + pStableSchema= taosArrayGet(stableSchemas, *pStableIdx); + stableIdx = *pStableIdx; } else { SSmlSTableSchema schema; strncpy(schema.sTableName, point->stableName, stableNameLen); @@ -188,7 +193,8 @@ static int32_t buildDataPointSchemas(TAOS_SML_DATA_POINT* points, int numPoint, schema.fieldHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, false); pStableSchema = taosArrayPush(stableSchemas, &schema); - taosHashPut(sname2shema, schema.sTableName, stableNameLen, &pStableSchema, POINTER_BYTES); + stableIdx = taosArrayGetSize(stableSchemas) - 1; + taosHashPut(sname2shema, schema.sTableName, stableNameLen, &stableIdx, sizeof(size_t)); } for (int j = 0; j < point->tagNum; ++j) { @@ -209,7 +215,7 @@ static int32_t buildDataPointSchemas(TAOS_SML_DATA_POINT* points, int numPoint, } } - point->schema = pStableSchema; + point->schemaIdx = stableIdx; } size_t numStables = taosArrayGetSize(stableSchemas); @@ -601,8 +607,12 @@ static int32_t creatChildTableIfNotExists(TAOS* taos, const char* cTableName, co return code; } - taos_stmt_close(stmt); - return 0; + code = taos_stmt_close(stmt); + if (code != 0) { + tscError("%s", taos_stmt_errstr(stmt)); + return code; + } + return code; } static int32_t insertChildTableBatch(TAOS* taos, char* cTableName, SArray* colsSchema, SArray* rowsBind) { @@ -674,7 +684,8 @@ static int32_t insertChildTableBatch(TAOS* taos, char* cTableName, SArray* cols return code; } -static int32_t arrangePointsByChildTableName(TAOS_SML_DATA_POINT* points, int numPoints, SHashObj* cname2points) { +static int32_t arrangePointsByChildTableName(TAOS_SML_DATA_POINT* points, int numPoints, + SHashObj* cname2points, SArray* stableSchemas) { for (int32_t i = 0; i < numPoints; ++i) { TAOS_SML_DATA_POINT * point = points + i; if (!point->childTableName) { @@ -686,11 +697,13 @@ static int32_t arrangePointsByChildTableName(TAOS_SML_DATA_POINT* points, int nu point->childTableName[tableNameLen] = '\0'; } + SSmlSTableSchema* stableSchema = taosArrayGet(stableSchemas, point->schemaIdx); + for (int j = 0; j < point->tagNum; ++j) { TAOS_SML_KV* kv = point->tags + j; if (kv->type == TSDB_DATA_TYPE_TIMESTAMP) { int64_t ts = *(int64_t*)(kv->value); - ts = convertTimePrecision(ts, TSDB_TIME_PRECISION_NANO, point->schema->precision); + ts = convertTimePrecision(ts, TSDB_TIME_PRECISION_NANO, stableSchema->precision); *(int64_t*)(kv->value) = ts; } } @@ -699,7 +712,7 @@ static int32_t arrangePointsByChildTableName(TAOS_SML_DATA_POINT* points, int nu TAOS_SML_KV* kv = point->fields + j; if (kv->type == TSDB_DATA_TYPE_TIMESTAMP) { int64_t ts = *(int64_t*)(kv->value); - ts = convertTimePrecision(ts, TSDB_TIME_PRECISION_NANO, point->schema->precision); + ts = convertTimePrecision(ts, TSDB_TIME_PRECISION_NANO, stableSchema->precision); *(int64_t*)(kv->value) = ts; } } @@ -718,10 +731,12 @@ static int32_t arrangePointsByChildTableName(TAOS_SML_DATA_POINT* points, int nu return 0; } -static int32_t insertPoints(TAOS* taos, TAOS_SML_DATA_POINT* points, int32_t numPoints) { +static int32_t insertPoints(TAOS* taos, TAOS_SML_DATA_POINT* points, int32_t numPoints, SArray* stableSchemas) { + int32_t code = TSDB_CODE_SUCCESS; + SHashObj* cname2points = taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, false); - arrangePointsByChildTableName(points, numPoints, cname2points); + arrangePointsByChildTableName(points, numPoints, cname2points, stableSchemas); int isNullColBind = TSDB_TRUE; SArray** pCTablePoints = taosHashIterate(cname2points, NULL); @@ -729,8 +744,9 @@ static int32_t insertPoints(TAOS* taos, TAOS_SML_DATA_POINT* points, int32_t num SArray* cTablePoints = *pCTablePoints; TAOS_SML_DATA_POINT * point = taosArrayGetP(cTablePoints, 0); - size_t numTags = taosArrayGetSize(point->schema->tags); - size_t numCols = taosArrayGetSize(point->schema->fields); + SSmlSTableSchema* sTableSchema = taosArrayGet(stableSchemas, point->schemaIdx); + size_t numTags = taosArrayGetSize(sTableSchema->tags); + size_t numCols = taosArrayGetSize(sTableSchema->fields); SArray* tagBinds = taosArrayInit(numTags, sizeof(TAOS_BIND)); taosArraySetSize(tagBinds, numTags); @@ -740,8 +756,7 @@ static int32_t insertPoints(TAOS* taos, TAOS_SML_DATA_POINT* points, int32_t num } for (int j = 0; j < point->tagNum; ++j) { TAOS_SML_KV* kv = point->tags + j; - size_t idx = TARRAY_ELEM_IDX(point->schema->tags, kv->schema); - TAOS_BIND* bind = taosArrayGet(tagBinds, idx); + TAOS_BIND* bind = taosArrayGet(tagBinds, kv->fieldSchemaIdx); bind->buffer_type = kv->type; bind->length = malloc(sizeof(uintptr_t*)); *bind->length = kv->length; @@ -762,8 +777,7 @@ static int32_t insertPoints(TAOS* taos, TAOS_SML_DATA_POINT* points, int32_t num } for (int j = 0; j < point->fieldNum; ++j) { TAOS_SML_KV* kv = point->fields + j; - size_t idx = TARRAY_ELEM_IDX(point->schema->fields, kv->schema); - TAOS_BIND* bind = colBinds + idx; + TAOS_BIND* bind = colBinds + kv->fieldSchemaIdx; bind->buffer_type = kv->type; bind->length = malloc(sizeof(uintptr_t*)); *bind->length = kv->length; @@ -773,14 +787,21 @@ static int32_t insertPoints(TAOS* taos, TAOS_SML_DATA_POINT* points, int32_t num taosArrayPush(rowsBind, &colBinds); } - creatChildTableIfNotExists(taos, point->childTableName, point->stableName, point->schema->tags, tagBinds); + code = creatChildTableIfNotExists(taos, point->childTableName, point->stableName, sTableSchema->tags, tagBinds); + if (code == 0) { + code = insertChildTableBatch(taos, point->childTableName, sTableSchema->fields, rowsBind); + if (code != 0) { + tscError("insert into child table %s failed. error %s", point->childTableName, tstrerror(code)); + } + } else { + tscError("Create Child Table %s failed, error %s", point->childTableName, tstrerror(code)); + } + for (int i = 0; i < taosArrayGetSize(tagBinds); ++i) { TAOS_BIND* bind = taosArrayGet(tagBinds, i); free(bind->length); } taosArrayDestroy(tagBinds); - - insertChildTableBatch(taos, point->childTableName, point->schema->fields, rowsBind); for (int i = 0; i < rows; ++i) { TAOS_BIND* colBinds = taosArrayGetP(rowsBind, i); for (int j = 0; j < numCols; ++j) { @@ -791,12 +812,14 @@ static int32_t insertPoints(TAOS* taos, TAOS_SML_DATA_POINT* points, int32_t num } taosArrayDestroy(rowsBind); taosArrayDestroy(cTablePoints); - + if (code != 0) { + break; + } pCTablePoints = taosHashIterate(cname2points, pCTablePoints); } taosHashCleanup(cname2points); - return 0; + return code; } int taos_sml_insert(TAOS* taos, TAOS_SML_DATA_POINT* points, int numPoint) { @@ -817,7 +840,7 @@ int taos_sml_insert(TAOS* taos, TAOS_SML_DATA_POINT* points, int numPoint) { goto clean_up; } - code = insertPoints(taos, points, numPoint); + code = insertPoints(taos, points, numPoint, stableSchemas); if (code != 0) { tscError("error insert points : %s", tstrerror(code)); } @@ -1619,13 +1642,17 @@ static int32_t parseSmlKvPairs(TAOS_SML_KV **pKVs, int *num_kvs, if (isField) { if ((*num_kvs + 2) > capacity) { capacity *= 3; capacity /= 2; + more_kvs = realloc(*pKVs, capacity * sizeof(TAOS_SML_KV)); + } else { + more_kvs = *pKVs; } - more_kvs = realloc(*pKVs, capacity * sizeof(TAOS_SML_KV)); } else { if ((*num_kvs + 1) > capacity) { capacity *= 3; capacity /= 2; + more_kvs = realloc(*pKVs, capacity * sizeof(TAOS_SML_KV)); + } else { + more_kvs = *pKVs; } - more_kvs = realloc(*pKVs, capacity * sizeof(TAOS_SML_KV)); } if (!more_kvs) { @@ -1642,7 +1669,6 @@ static int32_t parseSmlKvPairs(TAOS_SML_KV **pKVs, int *num_kvs, goto done; error: - free(*pKVs); return ret; done: *index = cur; @@ -1738,7 +1764,7 @@ int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* smlData) { moveTimeStampToFirstKv(&smlData, timestamp); tscDebug("Parse timestamp finished"); - return true; + return TSDB_CODE_SUCCESS; } //========================================================================= @@ -1746,8 +1772,8 @@ int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* smlData) { int32_t tscParseLines(char* lines[], int numLines, SArray* points, SArray* failedLines) { for (int32_t i = 0; i < numLines; ++i) { TAOS_SML_DATA_POINT point = {0}; - bool succ = tscParseLine(lines[i], &point); - if (!succ) { + int32_t code = tscParseLine(lines[i], &point); + if (code != TSDB_CODE_SUCCESS) { tscError("data point line parse failed. line %d", i); return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } else { From 7630c9aef6325cb275380974c8cb9396abcd7ac6 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Fri, 16 Jul 2021 15:53:04 +0800 Subject: [PATCH 14/27] support default as float --- src/client/src/tscParseLineProtocol.c | 28 ++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 1c388778a8..486520cc08 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1185,13 +1185,13 @@ static bool convertSmlValueType(TAOS_SML_KV *pVal, char *value, if (len <= 0) { return false; } + //integer number if (isTinyInt(value, len)) { pVal->type = TSDB_DATA_TYPE_TINYINT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 2] = '\0'; if (!isValidInteger(value)) { - return false; } pVal->value = calloc(pVal->length, 1); int8_t val = (int8_t)strtoll(value, NULL, 10); @@ -1335,7 +1335,17 @@ static bool convertSmlValueType(TAOS_SML_KV *pVal, char *value, memcpy(pVal->value, &bVal, pVal->length); return true; } - //TODO: handle default is float here + //Handle default(no appendix) as float + if (isValidInteger(value) || isValidFloat(value)) { + printf("Gavin Default as float\n"); + pVal->type = TSDB_DATA_TYPE_FLOAT; + pVal->length = (int16_t)tDataTypes[pVal->type].bytes; + pVal->value = calloc(pVal->length, 1); + float val = (float)strtold(value, NULL); + memcpy(pVal->value, &val, pVal->length); + printf("value:%02x %02x %02x %02x\n", pVal->value[0]&0xff,pVal->value[1]&0xff,pVal->value[2]&0xff,pVal->value[3]&0xff); + return true; + } return false; } @@ -1464,9 +1474,9 @@ static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index) { char key[TSDB_COL_NAME_LEN]; uint16_t len = 0; - //key field cannot start with '_' - if (*cur == '_') { - //printf("Tag key cannnot start with \'_\'\n"); + //key field cannot start with digit + if (isdigit(*cur)) { + tscError("Tag key cannnot start with digit\n"); return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } while (*cur != '\0') { @@ -1490,7 +1500,7 @@ static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index) { pKV->key = calloc(len + 1, 1); memcpy(pKV->key, key, len + 1); - tscDebug("Key:%s|len:%d", pKV->key, len); + //tscDebug("Key:%s|len:%d", pKV->key, len); *index = cur + 1; return TSDB_CODE_SUCCESS; } @@ -1539,8 +1549,8 @@ static int32_t parseSmlMeasurement(TAOS_SML_DATA_POINT *pSml, const char **index uint16_t len = 0; pSml->stableName = calloc(TSDB_TABLE_NAME_LEN, 1); - if (*cur == '_') { - tscError("Measurement field cannnot start with \'_\'"); + if (isdigit(*cur)) { + tscError("Measurement field cannnot start with digit"); free(pSml->stableName); return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } @@ -1607,7 +1617,7 @@ static int32_t parseSmlKvPairs(TAOS_SML_KV **pKVs, int *num_kvs, *num_kvs += 1; if (is_last_kv) { - tscDebug("last key-value field detected"); + //tscDebug("last key-value field detected"); goto done; } From df201ac9bcb8d4281f9cb8fe10f9e4c257677682 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Fri, 16 Jul 2021 16:07:20 +0800 Subject: [PATCH 15/27] fix tag/field first character cannot be digit --- src/client/src/tscParseLineProtocol.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index f3550cb419..5a19e2e222 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1215,6 +1215,7 @@ static bool convertSmlValueType(TAOS_SML_KV *pVal, char *value, pVal->length = (int16_t)tDataTypes[pVal->type].bytes; value[len - 2] = '\0'; if (!isValidInteger(value)) { + return false; } pVal->value = calloc(pVal->length, 1); int8_t val = (int8_t)strtoll(value, NULL, 10); @@ -1360,13 +1361,11 @@ static bool convertSmlValueType(TAOS_SML_KV *pVal, char *value, } //Handle default(no appendix) as float if (isValidInteger(value) || isValidFloat(value)) { - printf("Gavin Default as float\n"); pVal->type = TSDB_DATA_TYPE_FLOAT; pVal->length = (int16_t)tDataTypes[pVal->type].bytes; pVal->value = calloc(pVal->length, 1); float val = (float)strtold(value, NULL); memcpy(pVal->value, &val, pVal->length); - printf("value:%02x %02x %02x %02x\n", pVal->value[0]&0xff,pVal->value[1]&0xff,pVal->value[2]&0xff,pVal->value[3]&0xff); return true; } return false; From 908a9dee487a28fcd58ea745549adf6574d3a225 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 16 Jul 2021 18:38:44 +0800 Subject: [PATCH 16/27] fix error that pointer to array is invalid after array reallocation --- src/client/src/tscParseLineProtocol.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 5a19e2e222..622b5e7ffc 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -236,11 +236,12 @@ static int32_t buildDataPointSchemas(TAOS_SML_DATA_POINT* points, int numPoint, return 0; } -static int32_t generateSchemaAction(SSchema* pointColField, SHashObj* dbAttrHash, bool isTag, char sTableName[], +static int32_t generateSchemaAction(SSchema* pointColField, SHashObj* dbAttrHash, SArray* dbAttrArray, bool isTag, char sTableName[], SSchemaAction* action, bool* actionNeeded) { - SSchema** ppDbAttr = taosHashGet(dbAttrHash, pointColField->name, strlen(pointColField->name)); - if (ppDbAttr) { - SSchema* dbAttr = *ppDbAttr; + size_t* pDbIndex = taosHashGet(dbAttrHash, pointColField->name, strlen(pointColField->name)); + if (pDbIndex) { + SSchema* dbAttr = taosArrayGet(dbAttrArray, *pDbIndex); + assert(strcasecmp(dbAttr->name, pointColField->name) == 0); if (pointColField->type != dbAttr->type) { tscError("point type and db type mismatch. key: %s. point type: %d, db type: %d", pointColField->name, pointColField->type, dbAttr->type); @@ -453,7 +454,8 @@ int32_t loadTableMeta(TAOS* taos, char* tableName, SSmlSTableSchema* schema) { field.type = tableMeta->schema[i].type; field.bytes = tableMeta->schema[i].bytes; SSchema* pField = taosArrayPush(schema->fields, &field); - taosHashPut(schema->fieldHash, field.name, strlen(field.name), &pField, POINTER_BYTES); + size_t fieldIndex = taosArrayGetSize(schema->fields) - 1; + taosHashPut(schema->fieldHash, field.name, strlen(field.name), &fieldIndex, sizeof(fieldIndex)); } for (int i=0; itableInfo.numOfTags; ++i) { @@ -463,7 +465,8 @@ int32_t loadTableMeta(TAOS* taos, char* tableName, SSmlSTableSchema* schema) { field.type = tableMeta->schema[j].type; field.bytes = tableMeta->schema[j].bytes; SSchema* pField = taosArrayPush(schema->tags, &field); - taosHashPut(schema->tagHash, field.name, strlen(field.name), &pField, POINTER_BYTES); + size_t tagIndex = taosArrayGetSize(schema->tags) - 1; + taosHashPut(schema->tagHash, field.name, strlen(field.name), &tagIndex, sizeof(tagIndex)); } tscDebug("load table meta succeed. %s, columns number: %d, tag number: %d, precision: %d", tableName, tableMeta->tableInfo.numOfColumns, tableMeta->tableInfo.numOfTags, schema->precision); @@ -506,7 +509,7 @@ static int32_t reconcileDBSchemas(TAOS* taos, SArray* stableSchemas) { SSchema* pointTag = taosArrayGet(pointSchema->tags, j); SSchemaAction schemaAction = {0}; bool actionNeeded = false; - generateSchemaAction(pointTag, dbTagHash, true, pointSchema->sTableName, &schemaAction, &actionNeeded); + generateSchemaAction(pointTag, dbTagHash, dbSchema.tags, true, pointSchema->sTableName, &schemaAction, &actionNeeded); if (actionNeeded) { applySchemaAction(taos, &schemaAction); } @@ -520,7 +523,7 @@ static int32_t reconcileDBSchemas(TAOS* taos, SArray* stableSchemas) { SSchema* pointCol = taosArrayGet(pointSchema->fields, j); SSchemaAction schemaAction = {0}; bool actionNeeded = false; - generateSchemaAction(pointCol, dbFieldHash, false, pointSchema->sTableName, &schemaAction, &actionNeeded); + generateSchemaAction(pointCol, dbFieldHash, dbSchema.fields,false, pointSchema->sTableName, &schemaAction, &actionNeeded); if (actionNeeded) { applySchemaAction(taos, &schemaAction); } From 3d3658dc0593dfa5ec33c581e72309ebdb70bfc1 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 16 Jul 2021 18:38:44 +0800 Subject: [PATCH 17/27] fix error that pointer to array is invalid after array reallocation --- src/client/src/tscParseLineProtocol.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 5a19e2e222..7d7d7b8eb8 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -236,11 +236,12 @@ static int32_t buildDataPointSchemas(TAOS_SML_DATA_POINT* points, int numPoint, return 0; } -static int32_t generateSchemaAction(SSchema* pointColField, SHashObj* dbAttrHash, bool isTag, char sTableName[], +static int32_t generateSchemaAction(SSchema* pointColField, SHashObj* dbAttrHash, SArray* dbAttrArray, bool isTag, char sTableName[], SSchemaAction* action, bool* actionNeeded) { - SSchema** ppDbAttr = taosHashGet(dbAttrHash, pointColField->name, strlen(pointColField->name)); - if (ppDbAttr) { - SSchema* dbAttr = *ppDbAttr; + size_t* pDbIndex = taosHashGet(dbAttrHash, pointColField->name, strlen(pointColField->name)); + if (pDbIndex) { + SSchema* dbAttr = taosArrayGet(dbAttrArray, *pDbIndex); + assert(strcasecmp(dbAttr->name, pointColField->name) == 0); if (pointColField->type != dbAttr->type) { tscError("point type and db type mismatch. key: %s. point type: %d, db type: %d", pointColField->name, pointColField->type, dbAttr->type); @@ -452,8 +453,9 @@ int32_t loadTableMeta(TAOS* taos, char* tableName, SSmlSTableSchema* schema) { tstrncpy(field.name, tableMeta->schema[i].name, strlen(tableMeta->schema[i].name)+1); field.type = tableMeta->schema[i].type; field.bytes = tableMeta->schema[i].bytes; - SSchema* pField = taosArrayPush(schema->fields, &field); - taosHashPut(schema->fieldHash, field.name, strlen(field.name), &pField, POINTER_BYTES); + taosArrayPush(schema->fields, &field); + size_t fieldIndex = taosArrayGetSize(schema->fields) - 1; + taosHashPut(schema->fieldHash, field.name, strlen(field.name), &fieldIndex, sizeof(fieldIndex)); } for (int i=0; itableInfo.numOfTags; ++i) { @@ -462,8 +464,9 @@ int32_t loadTableMeta(TAOS* taos, char* tableName, SSmlSTableSchema* schema) { tstrncpy(field.name, tableMeta->schema[j].name, strlen(tableMeta->schema[j].name)+1); field.type = tableMeta->schema[j].type; field.bytes = tableMeta->schema[j].bytes; - SSchema* pField = taosArrayPush(schema->tags, &field); - taosHashPut(schema->tagHash, field.name, strlen(field.name), &pField, POINTER_BYTES); + taosArrayPush(schema->tags, &field); + size_t tagIndex = taosArrayGetSize(schema->tags) - 1; + taosHashPut(schema->tagHash, field.name, strlen(field.name), &tagIndex, sizeof(tagIndex)); } tscDebug("load table meta succeed. %s, columns number: %d, tag number: %d, precision: %d", tableName, tableMeta->tableInfo.numOfColumns, tableMeta->tableInfo.numOfTags, schema->precision); @@ -506,7 +509,7 @@ static int32_t reconcileDBSchemas(TAOS* taos, SArray* stableSchemas) { SSchema* pointTag = taosArrayGet(pointSchema->tags, j); SSchemaAction schemaAction = {0}; bool actionNeeded = false; - generateSchemaAction(pointTag, dbTagHash, true, pointSchema->sTableName, &schemaAction, &actionNeeded); + generateSchemaAction(pointTag, dbTagHash, dbSchema.tags, true, pointSchema->sTableName, &schemaAction, &actionNeeded); if (actionNeeded) { applySchemaAction(taos, &schemaAction); } @@ -520,7 +523,7 @@ static int32_t reconcileDBSchemas(TAOS* taos, SArray* stableSchemas) { SSchema* pointCol = taosArrayGet(pointSchema->fields, j); SSchemaAction schemaAction = {0}; bool actionNeeded = false; - generateSchemaAction(pointCol, dbFieldHash, false, pointSchema->sTableName, &schemaAction, &actionNeeded); + generateSchemaAction(pointCol, dbFieldHash, dbSchema.fields,false, pointSchema->sTableName, &schemaAction, &actionNeeded); if (actionNeeded) { applySchemaAction(taos, &schemaAction); } From 02bef1fb72b8f82851f94241f9211350044e56e1 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sat, 17 Jul 2021 13:18:22 +0800 Subject: [PATCH 18/27] fix invalidate read and log parse errorn line --- src/client/src/tscParseLineProtocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 7d7d7b8eb8..c2f0af962c 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1460,7 +1460,7 @@ static int32_t parseSmlTimeStamp(TAOS_SML_KV **pTS, const char **index) { } if (len > 0) { - value = calloc(len, 1); + value = calloc(len+1, 1); memcpy(value, start, len); } @@ -1786,7 +1786,7 @@ int32_t tscParseLines(char* lines[], int numLines, SArray* points, SArray* faile TAOS_SML_DATA_POINT point = {0}; int32_t code = tscParseLine(lines[i], &point); if (code != TSDB_CODE_SUCCESS) { - tscError("data point line parse failed. line %d", i); + tscError("data point line parse failed. line %d : %s", i, lines[i]); return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } else { tscDebug("data point line parse success. line %d", i); From c5e02db6bcbed9d5e073f5d90115bacd08c1e9a3 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sat, 17 Jul 2021 14:35:09 +0800 Subject: [PATCH 19/27] fix memory leak --- src/client/src/tscParseLineProtocol.c | 34 +++++++++++++-------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index c2f0af962c..581ad1a03c 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1781,23 +1781,6 @@ int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* smlData) { //========================================================================= -int32_t tscParseLines(char* lines[], int numLines, SArray* points, SArray* failedLines) { - for (int32_t i = 0; i < numLines; ++i) { - TAOS_SML_DATA_POINT point = {0}; - int32_t code = tscParseLine(lines[i], &point); - if (code != TSDB_CODE_SUCCESS) { - tscError("data point line parse failed. line %d : %s", i, lines[i]); - return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; - } else { - tscDebug("data point line parse success. line %d", i); - } - - taosArrayPush(points, &point); - } - return 0; -} - - void destroySmlDataPoint(TAOS_SML_DATA_POINT* point) { for (int i=0; itagNum; ++i) { free((point->tags+i)->key); @@ -1813,6 +1796,23 @@ void destroySmlDataPoint(TAOS_SML_DATA_POINT* point) { free(point->childTableName); } +int32_t tscParseLines(char* lines[], int numLines, SArray* points, SArray* failedLines) { + for (int32_t i = 0; i < numLines; ++i) { + TAOS_SML_DATA_POINT point = {0}; + int32_t code = tscParseLine(lines[i], &point); + if (code != TSDB_CODE_SUCCESS) { + tscError("data point line parse failed. line %d : %s", i, lines[i]); + destroySmlDataPoint(&point); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + } else { + tscDebug("data point line parse success. line %d", i); + } + + taosArrayPush(points, &point); + } + return 0; +} + int taos_insert_lines(TAOS* taos, char* lines[], int numLines) { int32_t code = 0; SArray* lpPoints = taosArrayInit(numLines, sizeof(TAOS_SML_DATA_POINT)); From bac3766fab2c2c2b82c9a8cc50b90c50c5ad4610 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Mon, 19 Jul 2021 10:12:02 +0800 Subject: [PATCH 20/27] [TD-5208]:fix potential mem leak when parsing value failed --- src/client/src/tscParseLineProtocol.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 581ad1a03c..ca277ed74a 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1371,7 +1371,7 @@ static bool convertSmlValueType(TAOS_SML_KV *pVal, char *value, memcpy(pVal->value, &val, pVal->length); return true; } - return false; + return false; } static int32_t getTimeStampValue(char *value, uint16_t len, @@ -1460,7 +1460,7 @@ static int32_t parseSmlTimeStamp(TAOS_SML_KV **pTS, const char **index) { } if (len > 0) { - value = calloc(len+1, 1); + value = calloc(len + 1, 1); memcpy(value, start, len); } @@ -1541,13 +1541,6 @@ static bool parseSmlValue(TAOS_SML_KV *pKV, const char **index, while (1) { // unescaped ',' or ' ' or '\0' identifies a value if ((*cur == ',' || *cur == ' ' || *cur == '\0') && *(cur - 1) != '\\') { - value = calloc(len + 1, 1); - memcpy(value, start, len); - value[len] = '\0'; - if (!convertSmlValueType(pKV, value, len)) { - free(value); - return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; - } //unescaped ' ' or '\0' indicates end of value *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false; break; @@ -1560,9 +1553,16 @@ static bool parseSmlValue(TAOS_SML_KV *pKV, const char **index, len++; } - if (value) { + value = calloc(len + 1, 1); + memcpy(value, start, len); + value[len] = '\0'; + if (!convertSmlValueType(pKV, value, len)) { + //free previous alocated key field + free(pKV->key); free(value); + return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } + free(value); *index = (*cur == '\0') ? cur : cur + 1; return TSDB_CODE_SUCCESS; From ff7aec3b36c58dd8793a28a3437f1ba42a1ad247 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Tue, 20 Jul 2021 09:24:00 +0800 Subject: [PATCH 21/27] improve log & memory allocation check --- src/client/src/tscParseLineProtocol.c | 31 ++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index ca277ed74a..d60c209a27 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -540,7 +540,8 @@ static int32_t reconcileDBSchemas(TAOS* taos, SArray* stableSchemas) { return 0; } -static int32_t getChildTableName(TAOS_SML_DATA_POINT* point, char* tableName, int* tableNameLen) { +static int32_t getSmlMd5ChildTableName(TAOS_SML_DATA_POINT* point, char* tableName, int* tableNameLen) { + tscDebug("taos_sml_insert get child table name through md5"); qsort(point->tags, point->tagNum, sizeof(TAOS_SML_KV), compareSmlColKv); SStringBuilder sb; memset(&sb, 0, sizeof(sb)); @@ -694,7 +695,7 @@ static int32_t arrangePointsByChildTableName(TAOS_SML_DATA_POINT* points, int nu if (!point->childTableName) { char childTableName[TSDB_TABLE_NAME_LEN]; int32_t tableNameLen = TSDB_TABLE_NAME_LEN; - getChildTableName(point, childTableName, &tableNameLen); + getSmlMd5ChildTableName(point, childTableName, &tableNameLen); point->childTableName = calloc(1, tableNameLen+1); strncpy(point->childTableName, childTableName, tableNameLen); point->childTableName[tableNameLen] = '\0'; @@ -774,6 +775,10 @@ static int32_t insertPoints(TAOS* taos, TAOS_SML_DATA_POINT* points, int32_t num point = taosArrayGetP(cTablePoints, i); TAOS_BIND* colBinds = calloc(numCols, sizeof(TAOS_BIND)); + if (colBinds == NULL) { + tscError("taos_sml_insert insert points, failed to allocated memory for TAOS_BIND, " + "num of rows: %zu, num of cols: %zu", rows, numCols); + } for (int j = 0; j < numCols; ++j) { TAOS_BIND* bind = colBinds + j; bind->is_null = &isNullColBind; @@ -1815,8 +1820,27 @@ int32_t tscParseLines(char* lines[], int numLines, SArray* points, SArray* faile int taos_insert_lines(TAOS* taos, char* lines[], int numLines) { int32_t code = 0; - SArray* lpPoints = taosArrayInit(numLines, sizeof(TAOS_SML_DATA_POINT)); + if (numLines <= 0) { + tscError("taos_insert_lines numLines should be greater than zero. numLines: %d", numLines); + code = TSDB_CODE_TSC_APP_ERROR; + return code; + } + for (int i = 0; i < numLines; ++i) { + if (lines[i] == NULL) { + tscError("taos_insert_lines line %d is NULL", i); + code = TSDB_CODE_TSC_APP_ERROR; + return code; + } + } + + SArray* lpPoints = taosArrayInit(numLines, sizeof(TAOS_SML_DATA_POINT)); + if (lpPoints == NULL) { + tscError("taos_insert_lines failed to allocate memory"); + return TSDB_CODE_TSC_OUT_OF_MEMORY; + } + + tscDebug("taos_insert_lines begin inserting %d lines, first line: %s", numLines, lines[0]); code = tscParseLines(lines, numLines, lpPoints, NULL); size_t numPoints = taosArrayGetSize(lpPoints); @@ -1831,6 +1855,7 @@ int taos_insert_lines(TAOS* taos, char* lines[], int numLines) { } cleanup: + tscDebug("taos_insert_lines finish inserting %d lines. code: %d", numLines, code); for (int i=0; i Date: Tue, 20 Jul 2021 13:52:02 +0800 Subject: [PATCH 22/27] add performance test --- tests/examples/c/CMakeLists.txt | 2 + tests/examples/c/schemaless.c | 85 +++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 tests/examples/c/schemaless.c diff --git a/tests/examples/c/CMakeLists.txt b/tests/examples/c/CMakeLists.txt index 906ca2dd41..e94de3cbca 100644 --- a/tests/examples/c/CMakeLists.txt +++ b/tests/examples/c/CMakeLists.txt @@ -5,6 +5,8 @@ IF (TD_LINUX) AUX_SOURCE_DIRECTORY(. SRC) ADD_EXECUTABLE(demo apitest.c) TARGET_LINK_LIBRARIES(demo taos_static trpc tutil pthread ) + ADD_EXECUTABLE(sml schemaless.c) + TARGET_LINK_LIBRARIES(sml taos_static trpc tutil pthread ) ADD_EXECUTABLE(subscribe subscribe.c) TARGET_LINK_LIBRARIES(subscribe taos_static trpc tutil pthread ) ADD_EXECUTABLE(epoll epoll.c) diff --git a/tests/examples/c/schemaless.c b/tests/examples/c/schemaless.c new file mode 100644 index 0000000000..e7573a2186 --- /dev/null +++ b/tests/examples/c/schemaless.c @@ -0,0 +1,85 @@ +#include "taos.h" +#include "taoserror.h" + +#include +#include +#include +#include +#include + +int numSuperTables = 8; +int numChildTables = 1024; +int numRowsPerChildTable = 128; + +void shuffle(char**lines, size_t n) +{ + if (n > 1) + { + size_t i; + for (i = 0; i < n - 1; i++) + { + size_t j = i + rand() / (RAND_MAX / (n - i) + 1); + char* t = lines[j]; + lines[j] = lines[i]; + lines[i] = t; + } + } +} + +static int64_t getTimeInUs() { + struct timeval systemTime; + gettimeofday(&systemTime, NULL); + return (int64_t)systemTime.tv_sec * 1000000L + (int64_t)systemTime.tv_usec; +} + +int main(int argc, char* argv[]) { + TAOS_RES *result; + const char* host = "127.0.0.1"; + const char* user = "root"; + const char* passwd = "taosdata"; + + taos_options(TSDB_OPTION_TIMEZONE, "GMT-8"); + TAOS* taos = taos_connect(host, user, passwd, "", 0); + if (taos == NULL) { + printf("\033[31mfailed to connect to db, reason:%s\033[0m\n", taos_errstr(taos)); + exit(1); + } + + char* info = taos_get_server_info(taos); + printf("server info: %s\n", info); + info = taos_get_client_info(taos); + printf("client info: %s\n", info); + result = taos_query(taos, "drop database if exists db;"); + taos_free_result(result); + usleep(100000); + result = taos_query(taos, "create database db precision 'ms';"); + taos_free_result(result); + usleep(100000); + + (void)taos_select_db(taos, "db"); + + time_t ct = time(0); + int64_t ts = ct * 1000; + char* lineFormat = "sta%d,t0=true,t1=127i8,t2=32767i16,t3=%di32,t4=9223372036854775807i64,t9=11.12345f32,t10=22.123456789f64,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c0=true,c1=127i8,c2=32767i16,c3=2147483647i32,c4=9223372036854775807i64,c5=255u8,c6=32770u16,c7=2147483699u32,c8=9223372036854775899u64,c9=11.12345f32,c10=22.123456789f64,c11=\"binaryValue\",c12=L\"ncharValue\" %lldms"; + + char** lines = calloc(numSuperTables * numChildTables * numRowsPerChildTable, sizeof(char*)); + int l = 0; + for (int i = 0; i < numSuperTables; ++i) { + for (int j = 0; j < numChildTables; ++j) { + for (int k = 0; k < numRowsPerChildTable; ++k) { + char* line = calloc(512, 1); + snprintf(line, 512, lineFormat, i, j, ts + 10 * l); + lines[l] = line; + ++l; + } + } + } + shuffle(lines, numSuperTables * numChildTables * numRowsPerChildTable); + + printf("%s\n", "begin taos_insert_lines"); + int64_t begin = getTimeInUs(); + int32_t code = taos_insert_lines(taos, lines, numSuperTables * numChildTables * numRowsPerChildTable); + int64_t end = getTimeInUs(); + printf("code: %d, %s, %ld\n", code, tstrerror(code), end-begin); + return 0; +} From 43835e9b92e9b3d755b893f490c37f08fd5a3417 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Tue, 20 Jul 2021 14:34:26 +0800 Subject: [PATCH 23/27] improve child table name performance --- src/client/src/tscParseLineProtocol.c | 69 +++++---------------------- 1 file changed, 13 insertions(+), 56 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index d60c209a27..1d6364329f 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1482,23 +1482,6 @@ static int32_t parseSmlTimeStamp(TAOS_SML_KV **pTS, const char **index) { return ret; } -static bool getChildTableNameFromTags(TAOS_SML_DATA_POINT *pData) { - TAOS_SML_KV *pTags = pData->tags; - int tagNum = pData->tagNum; - char *childTableName = pData->childTableName; - - for (int i = 0; i < tagNum; ++i) { - //use tag value as child table name if key is "ID" - //tag value has to be binary for now - if (!strcasecmp(pTags->key, "ID") && pTags->type == TSDB_DATA_TYPE_BINARY) { - memcpy(childTableName, pTags->value, pTags->length); - return true; - } - pTags++; - } - return false; -} - static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index) { const char *cur = *index; char key[TSDB_COL_NAME_LEN]; @@ -1616,7 +1599,7 @@ static int32_t parseSmlMeasurement(TAOS_SML_DATA_POINT *pSml, const char **index } static int32_t parseSmlKvPairs(TAOS_SML_KV **pKVs, int *num_kvs, - const char **index, bool isField) { + const char **index, bool isField, TAOS_SML_DATA_POINT* smlData) { const char *cur = *index; int32_t ret = TSDB_CODE_SUCCESS; TAOS_SML_KV *pkv; @@ -1646,8 +1629,16 @@ static int32_t parseSmlKvPairs(TAOS_SML_KV **pKVs, int *num_kvs, tscError("Unable to parse value field"); goto error; } - *num_kvs += 1; - + if (!isField && + (strcasecmp(pkv->key, "ID") == 0) && pkv->type == TSDB_DATA_TYPE_BINARY) { + smlData->childTableName = malloc( pkv->length + 1); + memcpy(smlData->childTableName, pkv->value, pkv->length); + smlData->childTableName[pkv->length] = '\0'; + free(pkv->key); + free(pkv->value); + } else { + *num_kvs += 1; + } if (is_last_kv) { //tscDebug("last key-value field detected"); goto done; @@ -1692,32 +1683,6 @@ done: return ret; } -static void removeChildTableNameFromTags(TAOS_SML_DATA_POINT** smlData) { - TAOS_SML_KV* destTags = calloc((*smlData)->tagNum, sizeof(TAOS_SML_KV)); - TAOS_SML_KV* srcTags = (*smlData)->tags; - int numDestTags = 0; - for (int32_t i = 0; i < (*smlData)->tagNum; ++i) { - TAOS_SML_KV* srcTag = srcTags + i; - if (strcasecmp(srcTag->key, "ID") == 0) { - continue; - } else { - TAOS_SML_KV* destTag = destTags + numDestTags; - memcpy(destTag, srcTag, sizeof(TAOS_SML_KV)); - destTag->key = calloc(1, strlen(srcTag->key) + 1); - memcpy(destTag->key, srcTag->key, strlen(srcTag->key) + 1); - destTag->value = calloc(1, srcTag->length); - memcpy(destTag->value, srcTag->value, srcTag->length); - numDestTags++; - } - free(srcTag->key); - free(srcTag->value); - } - (*smlData)->tags = destTags; - (*smlData)->tagNum = numDestTags; - - free(srcTags); -} - static void moveTimeStampToFirstKv(TAOS_SML_DATA_POINT** smlData, TAOS_SML_KV *ts) { TAOS_SML_KV* tsField = (*smlData)->fields; tsField->length = ts->length; @@ -1748,24 +1713,16 @@ int32_t tscParseLine(const char* sql, TAOS_SML_DATA_POINT* smlData) { //Parse Tags if (has_tags) { - ret = parseSmlKvPairs(&smlData->tags, &smlData->tagNum, &index, false); + ret = parseSmlKvPairs(&smlData->tags, &smlData->tagNum, &index, false, smlData); if (ret) { tscError("Unable to parse tag"); return ret; } - smlData->childTableName = calloc(TSDB_TABLE_NAME_LEN, 1); - if (!getChildTableNameFromTags(smlData)) { - free(smlData->childTableName); - smlData->childTableName = NULL; - tscDebug("No child table name in tags"); - } - removeChildTableNameFromTags(&smlData); - } tscDebug("Parse tags finished, num of tags:%d", smlData->tagNum); //Parse fields - ret = parseSmlKvPairs(&smlData->fields, &smlData->fieldNum, &index, true); + ret = parseSmlKvPairs(&smlData->fields, &smlData->fieldNum, &index, true, smlData); if (ret) { tscError("Unable to parse field"); return ret; From d959f60187693f8302b491519d6a988ff7b99ad8 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Tue, 20 Jul 2021 15:47:17 +0800 Subject: [PATCH 24/27] add more function test to schemaless.c --- tests/examples/c/schemaless.c | 75 +++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/examples/c/schemaless.c b/tests/examples/c/schemaless.c index e7573a2186..50bee7887f 100644 --- a/tests/examples/c/schemaless.c +++ b/tests/examples/c/schemaless.c @@ -81,5 +81,80 @@ int main(int argc, char* argv[]) { int32_t code = taos_insert_lines(taos, lines, numSuperTables * numChildTables * numRowsPerChildTable); int64_t end = getTimeInUs(); printf("code: %d, %s, %ld\n", code, tstrerror(code), end-begin); + + char* lines_000_0[] = { + "sta1,id=sta1_1,t0=true,t1=127i8,t2=32767i16,t3=2147483647i32,t4=9223372036854775807i64,t5=255u8,t6=32770u16,t7=2147483699u32,t8=9223372036854775899u64,t9=11.12345f32,t10=22.123456789f64,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c0=true,c1=127i8,c2=32767i16,c3=2147483647i32,c4=9223372036854775807i64,c5=255u8,c6=32770u16,c7=2147483699u32,c8=9223372036854775899u64,c9=11.12345f32,c10=22.123456789f64,c11=\"binaryValue\",c12=L\"ncharValue\" 1626006833639000us" + }; + + code = taos_insert_lines(taos, lines_000_0 , sizeof(lines_000_0)/sizeof(char*)); + if (0 == code) { + printf("taos_insert_lines() lines_000_0 should return error\n"); + return -1; + } + + char* lines_000_1[] = { + "sta2,id=\"sta2_1\",t0=true,t1=127i8,t2=32767i16,t3=2147483647i32,t4=9223372036854775807i64,t5=255u8,t6=32770u16,t7=2147483699u32,t8=9223372036854775899u64,t9=11.12345f32,t10=22.123456789f64,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c0=true,c1=127i8,c2=32767i16,c3=2147483647i32,c4=9223372036854775807i64,c5=255u8,c6=32770u16,c7=2147483699u32,c8=9223372036854775899u64,c9=11.12345f32,c10=22.123456789f64,c11=\"binaryValue\",c12=L\"ncharValue\" 1626006833639001" + }; + + code = taos_insert_lines(taos, lines_000_1 , sizeof(lines_000_1)/sizeof(char*)); + if (0 == code) { + printf("taos_insert_lines() lines_000_1 should return error\n"); + return -1; + } + + char* lines_000_2[] = { + "sta3,id=\"sta3_1\",t0=true,t1=127i8,t2=32767i16,t3=2147483647i32,t4=9223372036854775807i64,t9=11.12345f32,t10=22.123456789f64,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c0=true,c1=127i8,c2=32767i16,c3=2147483647i32,c4=9223372036854775807i64,c5=255u8,c6=32770u16,c7=2147483699u32,c8=9223372036854775899u64,c9=11.12345f32,c10=22.123456789f64,c11=\"binaryValue\",c12=L\"ncharValue\" 0" + }; + + code = taos_insert_lines(taos, lines_000_2 , sizeof(lines_000_2)/sizeof(char*)); + if (0 != code) { + printf("taos_insert_lines() lines_000_2 return code:%d (%s)\n", code, (char*)tstrerror(code)); + return -1; + } + + char* lines_001_0[] = { + "sta4,t0=true,t1=127i8,t2=32767i16,t3=2147483647i32,t4=9223372036854775807i64,t9=11.12345f32,t10=22.123456789f64,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c0=true,c1=127i8,c2=32767i16,c3=2147483647i32,c4=9223372036854775807i64,c9=11.12345f32,c10=22.123456789f64,c11=\"binaryValue\",c12=L\"ncharValue\" 1626006833639000us", + + }; + + code = taos_insert_lines(taos, lines_001_0 , sizeof(lines_001_0)/sizeof(char*)); + if (0 != code) { + printf("taos_insert_lines() lines_001_0 return code:%d (%s)\n", code, (char*)tstrerror(code)); + return -1; + } + + char* lines_001_1[] = { + "sta5,id=\"sta5_1\",t0=true,t1=127i8,t2=32767i16,t3=2147483647i32,t4=9223372036854775807i64,t9=11.12345f32,t10=22.123456789f64,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c0=true,c1=127i8,c2=32767i16,c3=2147483647i32,c4=9223372036854775807i64,c9=11.12345f32,c10=22.123456789f64,c11=\"binaryValue\",c12=L\"ncharValue\" 1626006833639001" + }; + + code = taos_insert_lines(taos, lines_001_1 , sizeof(lines_001_1)/sizeof(char*)); + if (0 != code) { + printf("taos_insert_lines() lines_001_1 return code:%d (%s)\n", code, (char*)tstrerror(code)); + return -1; + } + + char* lines_001_2[] = { + "sta6,id=\"sta6_1\",t0=true,t1=127i8,t2=32767i16,t3=2147483647i32,t4=9223372036854775807i64,t9=11.12345f32,t10=22.123456789f64,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c0=true,c1=127i8,c2=32767i16,c3=2147483647i32,c4=9223372036854775807i64,c9=11.12345f32,c10=22.123456789f64,c11=\"binaryValue\",c12=L\"ncharValue\" 0" + }; + + code = taos_insert_lines(taos, lines_001_2 , sizeof(lines_001_2)/sizeof(char*)); + if (0 != code) { + printf("taos_insert_lines() lines_001_2 return code:%d (%s)\n", code, (char*)tstrerror(code)); + return -1; + } + + char* lines_002[] = { + "stb,id=\"stb_1\",t20=t,t21=T,t22=true,t23=True,t24=TRUE,t25=f,t26=F,t27=false,t28=False,t29=FALSE,t10=33.12345,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c20=t,c21=T,c22=true,c23=True,c24=TRUE,c25=f,c26=F,c27=false,c28=False,c29=FALSE,c10=33.12345,c11=\"binaryValue\",c12=L\"ncharValue\" 1626006833639000000ns", + "stc,id=\"stc_1\",t20=t,t21=T,t22=true,t23=True,t24=TRUE,t25=f,t26=F,t27=false,t28=False,t29=FALSE,t10=33.12345,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c20=t,c21=T,c22=true,c23=True,c24=TRUE,c25=f,c26=F,c27=false,c28=False,c29=FALSE,c10=33.12345,c11=\"binaryValue\",c12=L\"ncharValue\" 1626006833639019us", + "stc,id=\"stc_1\",t20=t,t21=T,t22=true,t23=True,t24=TRUE,t25=f,t26=F,t27=false,t28=False,t29=FALSE,t10=33.12345,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c20=t,c21=T,c22=true,c23=True,c24=TRUE,c25=f,c26=F,c27=false,c28=False,c29=FALSE,c10=33.12345,c11=\"binaryValue\",c12=L\"ncharValue\" 1626006833640ms", + "stc,id=\"stc_1\",t20=t,t21=T,t22=true,t23=True,t24=TRUE,t25=f,t26=F,t27=false,t28=False,t29=FALSE,t10=33.12345,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c20=t,c21=T,c22=true,c23=True,c24=TRUE,c25=f,c26=F,c27=false,c28=False,c29=FALSE,c10=33.12345,c11=\"binaryValue\",c12=L\"ncharValue\" 1626006834s" + }; + + code = taos_insert_lines(taos, lines_002 , sizeof(lines_002)/sizeof(char*)); + if (0 != code) { + printf("taos_insert_lines() lines_002 return code:%d (%s)\n", code, (char*)tstrerror(code)); + return -1; + } + return 0; } From 437784413ff065032ce27b72c0c39d201ff7e1ee Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Tue, 20 Jul 2021 15:55:33 +0800 Subject: [PATCH 25/27] fix arm compilation error --- tests/examples/c/schemaless.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/examples/c/schemaless.c b/tests/examples/c/schemaless.c index 50bee7887f..d6450914df 100644 --- a/tests/examples/c/schemaless.c +++ b/tests/examples/c/schemaless.c @@ -1,5 +1,6 @@ #include "taos.h" #include "taoserror.h" +#include "os.h" #include #include @@ -80,7 +81,7 @@ int main(int argc, char* argv[]) { int64_t begin = getTimeInUs(); int32_t code = taos_insert_lines(taos, lines, numSuperTables * numChildTables * numRowsPerChildTable); int64_t end = getTimeInUs(); - printf("code: %d, %s, %ld\n", code, tstrerror(code), end-begin); + printf("code: %d, %s. time used: %"PRId64"\n", code, tstrerror(code), end-begin); char* lines_000_0[] = { "sta1,id=sta1_1,t0=true,t1=127i8,t2=32767i16,t3=2147483647i32,t4=9223372036854775807i64,t5=255u8,t6=32770u16,t7=2147483699u32,t8=9223372036854775899u64,t9=11.12345f32,t10=22.123456789f64,t11=\"binaryTagValue\",t12=L\"ncharTagValue\" c0=true,c1=127i8,c2=32767i16,c3=2147483647i32,c4=9223372036854775807i64,c5=255u8,c6=32770u16,c7=2147483699u32,c8=9223372036854775899u64,c9=11.12345f32,c10=22.123456789f64,c11=\"binaryValue\",c12=L\"ncharValue\" 1626006833639000us" From 19ae4f51e83e717d1484ae4c50fb6818f14056a9 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Thu, 22 Jul 2021 08:28:06 +0800 Subject: [PATCH 26/27] add max line number of taos_insert_lines call --- src/client/src/tscParseLineProtocol.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 1d6364329f..d83f3a9c1c 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -1777,8 +1777,9 @@ int32_t tscParseLines(char* lines[], int numLines, SArray* points, SArray* faile int taos_insert_lines(TAOS* taos, char* lines[], int numLines) { int32_t code = 0; - if (numLines <= 0) { - tscError("taos_insert_lines numLines should be greater than zero. numLines: %d", numLines); + + if (numLines <= 0 || numLines > 65536) { + tscError("taos_insert_lines numLines should be between 1 and 65536. numLines: %d", numLines); code = TSDB_CODE_TSC_APP_ERROR; return code; } From a84c731b7a1315fe3c737789c8a6c50b19bee978 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Thu, 22 Jul 2021 11:33:55 +0800 Subject: [PATCH 27/27] [TD-5449]: fix invalid stable name core dump and sigabort total tag num exceed maximum --- src/client/src/tscParseLineProtocol.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index d83f3a9c1c..ce76f5d82c 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -494,6 +494,7 @@ static int32_t reconcileDBSchemas(TAOS* taos, SArray* stableSchemas) { code = loadTableMeta(taos, pointSchema->sTableName, &dbSchema); if (code != 0) { tscError("reconcile point schema failed. can not create %s", pointSchema->sTableName); + return code; } else { pointSchema->precision = dbSchema.precision; destroySmlSTableSchema(&dbSchema); @@ -1565,6 +1566,7 @@ static int32_t parseSmlMeasurement(TAOS_SML_DATA_POINT *pSml, const char **index if (isdigit(*cur)) { tscError("Measurement field cannnot start with digit"); free(pSml->stableName); + pSml->stableName = NULL; return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } @@ -1572,6 +1574,7 @@ static int32_t parseSmlMeasurement(TAOS_SML_DATA_POINT *pSml, const char **index if (len > TSDB_TABLE_NAME_LEN) { tscError("Measurement field cannot exceeds 193 characters"); free(pSml->stableName); + pSml->stableName = NULL; return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; } //first unescaped comma or space identifies measurement