From 37bc1bca3697126a545f5d33939b582047e0cea3 Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 3 Apr 2023 17:00:52 +0800 Subject: [PATCH 01/40] enh: column/row max length support up to 64K --- docs/examples/c/async_query_example.c | 2 +- docs/examples/c/query_example.c | 2 +- include/libs/function/function.h | 10 +++++----- include/libs/nodes/cmdnodes.h | 2 +- include/libs/parser/parser.h | 2 +- include/util/tdef.h | 11 +++++++---- source/client/src/clientSmlJson.c | 2 +- source/dnode/mnode/impl/src/mndFunc.c | 2 +- source/libs/executor/src/timewindowoperator.c | 2 +- source/libs/function/inc/tpercentile.h | 4 ++-- source/libs/function/src/builtinsimpl.c | 18 +++++++++--------- source/libs/function/src/tpercentile.c | 2 +- source/libs/parser/src/parInsertSml.c | 2 +- source/libs/scalar/src/sclfunc.c | 16 ++++++++-------- source/util/src/tcompare.c | 2 +- tools/shell/src/shellEngine.c | 2 +- 16 files changed, 42 insertions(+), 39 deletions(-) diff --git a/docs/examples/c/async_query_example.c b/docs/examples/c/async_query_example.c index b370420b12..3807c4bfd7 100644 --- a/docs/examples/c/async_query_example.c +++ b/docs/examples/c/async_query_example.c @@ -8,7 +8,7 @@ #include #include -typedef int16_t VarDataLenT; +typedef uint16_t VarDataLenT; #define TSDB_NCHAR_SIZE sizeof(int32_t) #define VARSTR_HEADER_SIZE sizeof(VarDataLenT) diff --git a/docs/examples/c/query_example.c b/docs/examples/c/query_example.c index fcae95bcd4..c7d52115b5 100644 --- a/docs/examples/c/query_example.c +++ b/docs/examples/c/query_example.c @@ -6,7 +6,7 @@ #include #include -typedef int16_t VarDataLenT; +typedef uint16_t VarDataLenT; #define TSDB_NCHAR_SIZE sizeof(int32_t) #define VARSTR_HEADER_SIZE sizeof(VarDataLenT) diff --git a/include/libs/function/function.h b/include/libs/function/function.h index fb6ef26a8a..1411ee7c4b 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -99,11 +99,11 @@ typedef struct SSubsidiaryResInfo { } SSubsidiaryResInfo; typedef struct SResultDataInfo { - int16_t precision; - int16_t scale; - int16_t type; - int16_t bytes; - int32_t interBufSize; + int16_t precision; + int16_t scale; + int16_t type; + uint16_t bytes; + int32_t interBufSize; } SResultDataInfo; #define GET_RES_INFO(ctx) ((ctx)->resultInfo) diff --git a/include/libs/nodes/cmdnodes.h b/include/libs/nodes/cmdnodes.h index c716d77b32..e276373f2d 100644 --- a/include/libs/nodes/cmdnodes.h +++ b/include/libs/nodes/cmdnodes.h @@ -30,7 +30,7 @@ extern "C" { #define SHOW_CREATE_DB_RESULT_COLS 2 #define SHOW_CREATE_DB_RESULT_FIELD1_LEN (TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE) -#define SHOW_CREATE_DB_RESULT_FIELD2_LEN (TSDB_MAX_BINARY_LEN + VARSTR_HEADER_SIZE) +#define SHOW_CREATE_DB_RESULT_FIELD2_LEN (TSDB_MAX_BINARY_LEN) #define SHOW_CREATE_TB_RESULT_COLS 2 #define SHOW_CREATE_TB_RESULT_FIELD1_LEN (TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE) diff --git a/include/libs/parser/parser.h b/include/libs/parser/parser.h index 558203052f..94fb6824d2 100644 --- a/include/libs/parser/parser.h +++ b/include/libs/parser/parser.h @@ -114,7 +114,7 @@ STableDataCxt* smlInitTableDataCtx(SQuery* query, STableMeta* pTableMeta); int32_t smlBindData(SQuery* handle, bool dataFormat, SArray* tags, SArray* colsSchema, SArray* cols, STableMeta* pTableMeta, char* tableName, const char* sTableName, int32_t sTableNameLen, int32_t ttl, - char* msgBuf, int16_t msgBufLen); + char* msgBuf, int32_t msgBufLen); int32_t smlBuildOutput(SQuery* handle, SHashObj* pVgHash); int rawBlockBindData(SQuery *query, STableMeta* pTableMeta, void* data, SVCreateTbReq* pCreateTb, TAOS_FIELD *fields, int numFields, bool needChangeLength); diff --git a/include/util/tdef.h b/include/util/tdef.h index e5000891c9..8c5c5502fd 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -236,8 +236,11 @@ typedef enum ELogicConditionType { * - Firstly, we use 65531(65535 - 4), as the SDataRow/SKVRow contains 4 bits header. * - Secondly, if all cols are VarDataT type except primary key, we need 4 bits to store the offset, thus * the final value is 65531-(4096-1)*4 = 49151. + * + * History value:49151/65531 + * - 65531 compatible with 2.0 */ -#define TSDB_MAX_BYTES_PER_ROW 49151 +#define TSDB_MAX_BYTES_PER_ROW 65531 #define TSDB_MAX_TAGS_LEN 16384 #define TSDB_MAX_TAGS 128 @@ -406,9 +409,9 @@ typedef enum ELogicConditionType { #define TSDB_EXPLAIN_RESULT_ROW_SIZE (16 * 1024) #define TSDB_EXPLAIN_RESULT_COLUMN_NAME "QUERY_PLAN" -#define TSDB_MAX_FIELD_LEN 16384 -#define TSDB_MAX_BINARY_LEN (TSDB_MAX_FIELD_LEN - TSDB_KEYSIZE) // keep 16384 -#define TSDB_MAX_NCHAR_LEN (TSDB_MAX_FIELD_LEN - TSDB_KEYSIZE) // keep 16384 +#define TSDB_MAX_FIELD_LEN 65519 // compatible with 2.0 +#define TSDB_MAX_BINARY_LEN TSDB_MAX_FIELD_LEN // 16384:65519 +#define TSDB_MAX_NCHAR_LEN TSDB_MAX_FIELD_LEN // 16384:65519 #define PRIMARYKEY_TIMESTAMP_COL_ID 1 #define COL_REACH_END(colId, maxColId) ((colId) > (maxColId)) diff --git a/source/client/src/clientSmlJson.c b/source/client/src/clientSmlJson.c index 9fd98e33b7..b0ae316031 100644 --- a/source/client/src/clientSmlJson.c +++ b/source/client/src/clientSmlJson.c @@ -575,7 +575,7 @@ static int32_t smlConvertJSONString(SSmlKv *pVal, char *typeStr, cJSON *value) { uError("OTD:invalid type(%s) for JSON String", typeStr); return TSDB_CODE_TSC_INVALID_JSON_TYPE; } - pVal->length = (int16_t)strlen(value->valuestring); + pVal->length = strlen(value->valuestring); if (pVal->type == TSDB_DATA_TYPE_BINARY && pVal->length > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; diff --git a/source/dnode/mnode/impl/src/mndFunc.c b/source/dnode/mnode/impl/src/mndFunc.c index 7a475c61b6..f4451d1630 100644 --- a/source/dnode/mnode/impl/src/mndFunc.c +++ b/source/dnode/mnode/impl/src/mndFunc.c @@ -475,7 +475,7 @@ RETRIEVE_FUNC_OVER: return code; } -static void *mnodeGenTypeStr(char *buf, int32_t buflen, uint8_t type, int16_t len) { +static void *mnodeGenTypeStr(char *buf, int32_t buflen, uint8_t type, int32_t len) { char *msg = "unknown"; if (type >= sizeof(tDataTypes) / sizeof(tDataTypes[0])) { return msg; diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index fef588a503..3272eefba4 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -1111,7 +1111,7 @@ static void doStateWindowAggImpl(SOperatorInfo* pOperator, SStateWindowOperatorI bool masterScan = true; int32_t numOfOutput = pOperator->exprSupp.numOfExprs; - int16_t bytes = pStateColInfoData->info.bytes; + int32_t bytes = pStateColInfoData->info.bytes; SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pInfo->tsSlotId); TSKEY* tsList = (TSKEY*)pColInfoData->pData; diff --git a/source/libs/function/inc/tpercentile.h b/source/libs/function/inc/tpercentile.h index 80159460f5..65b7b38a05 100644 --- a/source/libs/function/inc/tpercentile.h +++ b/source/libs/function/inc/tpercentile.h @@ -53,7 +53,7 @@ typedef int32_t (*__perc_hash_func_t)(struct tMemBucket *pBucket, const void *va typedef struct tMemBucket { int16_t numOfSlots; int16_t type; - int16_t bytes; + int32_t bytes; int32_t total; int32_t elemPerPage; // number of elements for each object int32_t maxCapacity; // maximum allowed number of elements that can be sort directly to get the result @@ -67,7 +67,7 @@ typedef struct tMemBucket { SHashObj *groupPagesMap; // disk page map for different groups; } tMemBucket; -tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval); +tMemBucket *tMemBucketCreate(int32_t nElemSize, int16_t dataType, double minval, double maxval); void tMemBucketDestroy(tMemBucket *pBucket); diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 4760358e0d..62d1f6244b 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -196,11 +196,11 @@ typedef struct SMavgInfo { } SMavgInfo; typedef struct SSampleInfo { - int32_t samples; - int32_t totalPoints; - int32_t numSampled; - uint8_t colType; - int16_t colBytes; + int32_t samples; + int32_t totalPoints; + int32_t numSampled; + uint8_t colType; + uint16_t colBytes; STuplePos nullTuplePos; bool nullTupleSaved; @@ -220,7 +220,7 @@ typedef struct STailInfo { int32_t numAdded; int32_t offset; uint8_t colType; - int16_t colBytes; + uint16_t colBytes; STailItem** pItems; } STailInfo; @@ -233,7 +233,7 @@ typedef struct SUniqueItem { typedef struct SUniqueInfo { int32_t numOfPoints; uint8_t colType; - int16_t colBytes; + uint16_t colBytes; bool hasNull; // null is not hashable, handle separately SHashObj* pHash; char pItems[]; @@ -247,13 +247,13 @@ typedef struct SModeItem { typedef struct SModeInfo { uint8_t colType; - int16_t colBytes; + uint16_t colBytes; SHashObj* pHash; STuplePos nullTuplePos; bool nullTupleSaved; - char* buf; // serialize data buffer + char* buf; // serialize data buffer } SModeInfo; typedef struct SDerivInfo { diff --git a/source/libs/function/src/tpercentile.c b/source/libs/function/src/tpercentile.c index de381fadbd..a18051e0b6 100644 --- a/source/libs/function/src/tpercentile.c +++ b/source/libs/function/src/tpercentile.c @@ -236,7 +236,7 @@ static void resetSlotInfo(tMemBucket *pBucket) { } } -tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval) { +tMemBucket *tMemBucketCreate(int32_t nElemSize, int16_t dataType, double minval, double maxval) { tMemBucket *pBucket = (tMemBucket *)taosMemoryCalloc(1, sizeof(tMemBucket)); if (pBucket == NULL) { return NULL; diff --git a/source/libs/parser/src/parInsertSml.c b/source/libs/parser/src/parInsertSml.c index 106ee641af..c05ce02aa2 100644 --- a/source/libs/parser/src/parInsertSml.c +++ b/source/libs/parser/src/parInsertSml.c @@ -242,7 +242,7 @@ end: int32_t smlBindData(SQuery* query, bool dataFormat, SArray* tags, SArray* colsSchema, SArray* cols, STableMeta* pTableMeta, char* tableName, const char* sTableName, int32_t sTableNameLen, int32_t ttl, - char* msgBuf, int16_t msgBufLen) { + char* msgBuf, int32_t msgBufLen) { SMsgBuf pBuf = {.buf = msgBuf, .len = msgBufLen}; SSchema* pTagsSchema = getTableTagSchema(pTableMeta); diff --git a/source/libs/scalar/src/sclfunc.c b/source/libs/scalar/src/sclfunc.c index 195a08525c..23f07fb332 100644 --- a/source/libs/scalar/src/sclfunc.c +++ b/source/libs/scalar/src/sclfunc.c @@ -12,7 +12,7 @@ typedef double (*_double_fn)(double); typedef double (*_double_fn_2)(double, double); typedef int (*_conv_fn)(int); typedef void (*_trim_fn)(char *, char *, int32_t, int32_t); -typedef int16_t (*_len_fn)(char *, int32_t); +typedef uint16_t (*_len_fn)(char *, int32_t); /** Math functions **/ static double tlog(double v) { return log(v); } @@ -286,9 +286,9 @@ static int32_t doScalarFunction(SScalarParam *pInput, int32_t inputNum, SScalarP } /** String functions **/ -static int16_t tlength(char *input, int32_t type) { return varDataLen(input); } +static VarDataLenT tlength(char *input, int32_t type) { return varDataLen(input); } -static int16_t tcharlength(char *input, int32_t type) { +static VarDataLenT tcharlength(char *input, int32_t type) { if (type == TSDB_DATA_TYPE_VARCHAR) { return varDataLen(input); } else { // NCHAR @@ -377,7 +377,7 @@ static int32_t doLengthFunction(SScalarParam *pInput, int32_t inputNum, SScalarP return TSDB_CODE_SUCCESS; } -static int32_t concatCopyHelper(const char *input, char *output, bool hasNchar, int32_t type, int16_t *dataLen) { +static int32_t concatCopyHelper(const char *input, char *output, bool hasNchar, int32_t type, VarDataLenT *dataLen) { if (hasNchar && type == TSDB_DATA_TYPE_VARCHAR) { TdUcs4 *newBuf = taosMemoryCalloc((varDataLen(input) + 1) * TSDB_NCHAR_SIZE, 1); int32_t len = varDataLen(input); @@ -457,7 +457,7 @@ int32_t concatFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOu continue; } - int16_t dataLen = 0; + VarDataLenT dataLen = 0; for (int32_t i = 0; i < inputNum; ++i) { int32_t rowIdx = (pInput[i].numOfRows == 1) ? 0 : k; input[i] = colDataGetData(pInputData[i], rowIdx); @@ -526,8 +526,8 @@ int32_t concatWsFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *p continue; } - int16_t dataLen = 0; - bool hasNull = false; + VarDataLenT dataLen = 0; + bool hasNull = false; for (int32_t i = 1; i < inputNum; ++i) { if (colDataIsNull_s(pInputData[i], k) || IS_NULL_TYPE(GET_PARAM_TYPE(&pInput[i]))) { hasNull = true; @@ -695,7 +695,7 @@ int32_t substrFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOu /** Conversion functions **/ int32_t castFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput) { int16_t inputType = GET_PARAM_TYPE(&pInput[0]); - int16_t inputLen = GET_PARAM_BYTES(&pInput[0]); + int32_t inputLen = GET_PARAM_BYTES(&pInput[0]); int16_t outputType = GET_PARAM_TYPE(&pOutput[0]); int64_t outputLen = GET_PARAM_BYTES(&pOutput[0]); diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index f8f78ae6a5..be2ad730f7 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -1241,7 +1241,7 @@ int32_t taosArrayCompareString(const void *a, const void *b) { int32_t comparestrPatternMatch(const void *pLeft, const void *pRight) { SPatternCompareInfo pInfo = PATTERN_COMPARE_INFO_INITIALIZER; - ASSERT(varDataLen(pRight) <= TSDB_MAX_FIELD_LEN); + ASSERT(varDataTLen(pRight) <= TSDB_MAX_FIELD_LEN); size_t pLen = varDataLen(pRight); size_t sz = varDataLen(pLeft); diff --git a/tools/shell/src/shellEngine.c b/tools/shell/src/shellEngine.c index a87ba16267..165b00dc68 100644 --- a/tools/shell/src/shellEngine.c +++ b/tools/shell/src/shellEngine.c @@ -695,7 +695,7 @@ int32_t shellCalcColWidth(TAOS_FIELD *field, int32_t precision) { case TSDB_DATA_TYPE_NCHAR: case TSDB_DATA_TYPE_JSON: { - int16_t bytes = field->bytes * TSDB_NCHAR_SIZE; + uint16_t bytes = field->bytes * TSDB_NCHAR_SIZE; if (bytes > shell.args.displayWidth) { return TMAX(shell.args.displayWidth, width); } else { From 303bc7dc230cbed51ff941ad36e3e11930dbc41e Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 3 Apr 2023 17:12:14 +0800 Subject: [PATCH 02/40] enh: column/row max length support up to 64K --- include/util/tdef.h | 17 ++++------------- source/client/src/clientSmlJson.c | 2 +- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/include/util/tdef.h b/include/util/tdef.h index 8c5c5502fd..5782da35bc 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -231,16 +231,7 @@ typedef enum ELogicConditionType { #define TSDB_QUERY_ID_LEN 26 #define TSDB_TRANS_OPER_LEN 16 -/** - * In some scenarios uint16_t (0~65535) is used to store the row len. - * - Firstly, we use 65531(65535 - 4), as the SDataRow/SKVRow contains 4 bits header. - * - Secondly, if all cols are VarDataT type except primary key, we need 4 bits to store the offset, thus - * the final value is 65531-(4096-1)*4 = 49151. - * - * History value:49151/65531 - * - 65531 compatible with 2.0 - */ -#define TSDB_MAX_BYTES_PER_ROW 65531 +#define TSDB_MAX_BYTES_PER_ROW 65531 // 49151:65531 #define TSDB_MAX_TAGS_LEN 16384 #define TSDB_MAX_TAGS 128 @@ -409,9 +400,9 @@ typedef enum ELogicConditionType { #define TSDB_EXPLAIN_RESULT_ROW_SIZE (16 * 1024) #define TSDB_EXPLAIN_RESULT_COLUMN_NAME "QUERY_PLAN" -#define TSDB_MAX_FIELD_LEN 65519 // compatible with 2.0 -#define TSDB_MAX_BINARY_LEN TSDB_MAX_FIELD_LEN // 16384:65519 -#define TSDB_MAX_NCHAR_LEN TSDB_MAX_FIELD_LEN // 16384:65519 +#define TSDB_MAX_FIELD_LEN 65519 // 16384:65519 +#define TSDB_MAX_BINARY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 +#define TSDB_MAX_NCHAR_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 #define PRIMARYKEY_TIMESTAMP_COL_ID 1 #define COL_REACH_END(colId, maxColId) ((colId) > (maxColId)) diff --git a/source/client/src/clientSmlJson.c b/source/client/src/clientSmlJson.c index b0ae316031..c3a6e15697 100644 --- a/source/client/src/clientSmlJson.c +++ b/source/client/src/clientSmlJson.c @@ -575,7 +575,7 @@ static int32_t smlConvertJSONString(SSmlKv *pVal, char *typeStr, cJSON *value) { uError("OTD:invalid type(%s) for JSON String", typeStr); return TSDB_CODE_TSC_INVALID_JSON_TYPE; } - pVal->length = strlen(value->valuestring); + pVal->length = (uint16_t)strlen(value->valuestring); if (pVal->type == TSDB_DATA_TYPE_BINARY && pVal->length > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; From 2d4dd64584fb3bc6516bc1aa63570223efd35273 Mon Sep 17 00:00:00 2001 From: kailixu Date: Mon, 24 Apr 2023 20:21:28 +0800 Subject: [PATCH 03/40] chore: more code --- include/libs/nodes/cmdnodes.h | 2 +- include/util/tdef.h | 7 + source/client/src/clientSml.c | 7 +- source/client/src/clientSmlJson.c | 2 + source/client/src/clientSmlLine.c | 5 +- source/client/src/clientSmlTelnet.c | 3 +- source/libs/parser/src/parAstCreater.c | 2 +- source/libs/parser/src/parTranslater.c | 10 +- tests/parallel_test/cases.task | 1 + .../1-insert/influxdb_line_taosc_insert.py | 17 +- tests/system-test/1-insert/stmt_error.py | 225 ++++++++++++++++++ tests/system-test/runAllOne.sh | 1 + tests/system-test/win-test-file | 1 + 13 files changed, 267 insertions(+), 16 deletions(-) create mode 100644 tests/system-test/1-insert/stmt_error.py diff --git a/include/libs/nodes/cmdnodes.h b/include/libs/nodes/cmdnodes.h index 0a9893907b..2323d044ec 100644 --- a/include/libs/nodes/cmdnodes.h +++ b/include/libs/nodes/cmdnodes.h @@ -30,7 +30,7 @@ extern "C" { #define SHOW_CREATE_DB_RESULT_COLS 2 #define SHOW_CREATE_DB_RESULT_FIELD1_LEN (TSDB_DB_NAME_LEN + VARSTR_HEADER_SIZE) -#define SHOW_CREATE_DB_RESULT_FIELD2_LEN (TSDB_MAX_BINARY_LEN) +#define SHOW_CREATE_DB_RESULT_FIELD2_LEN (TSDB_MAX_BINARY_LEN + VARSTR_HEADER_SIZE) #define SHOW_CREATE_TB_RESULT_COLS 2 #define SHOW_CREATE_TB_RESULT_FIELD1_LEN (TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE) diff --git a/include/util/tdef.h b/include/util/tdef.h index ead649a51c..7a2c57b9f8 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -22,6 +22,13 @@ extern "C" { #endif + +#if 1 +#define TASSERT assert(0); +#else +#define TASSERT +#endif + #define TSDB__packed #define TSKEY int64_t diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 17150286e1..0bca52449c 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -565,8 +565,8 @@ static int32_t smlFindNearestPowerOf2(int32_t length, uint8_t type) { } if (type == TSDB_DATA_TYPE_BINARY && result > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { result = TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE; - } else if (type == TSDB_DATA_TYPE_NCHAR && result > (TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE) { - result = (TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE; + } else if (type == TSDB_DATA_TYPE_NCHAR && result > (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE) { + result = (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE; } if (type == TSDB_DATA_TYPE_NCHAR) { @@ -637,6 +637,9 @@ static int32_t smlBuildFieldsList(SSmlHandle *info, SSchema *schemaField, SHashO field.bytes = getBytes(kv->type, kv->length); memcpy(field.name, kv->key, kv->keyLen); taosArrayPush(results, &field); + if(numOfCols == 0) { + + } } else if (action == SCHEMA_ACTION_CHANGE_COLUMN_SIZE || action == SCHEMA_ACTION_CHANGE_TAG_SIZE) { uint16_t *index = (uint16_t *)taosHashGet(schemaHash, kv->key, kv->keyLen); if (index == NULL) { diff --git a/source/client/src/clientSmlJson.c b/source/client/src/clientSmlJson.c index c3a6e15697..b50a29d4fb 100644 --- a/source/client/src/clientSmlJson.c +++ b/source/client/src/clientSmlJson.c @@ -578,10 +578,12 @@ static int32_t smlConvertJSONString(SSmlKv *pVal, char *typeStr, cJSON *value) { pVal->length = (uint16_t)strlen(value->valuestring); if (pVal->type == TSDB_DATA_TYPE_BINARY && pVal->length > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { + TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } if (pVal->type == TSDB_DATA_TYPE_NCHAR && pVal->length > (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE) { + TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } diff --git a/source/client/src/clientSmlLine.c b/source/client/src/clientSmlLine.c index 335e3a1dc7..7dd087039b 100644 --- a/source/client/src/clientSmlLine.c +++ b/source/client/src/clientSmlLine.c @@ -81,6 +81,7 @@ int32_t smlParseValue(SSmlKv *pVal, SSmlMsgBuf *msg) { pVal->type = TSDB_DATA_TYPE_BINARY; pVal->length -= BINARY_ADD_LEN; if (pVal->length > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { + TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } pVal->value += (BINARY_ADD_LEN - 1); @@ -94,6 +95,7 @@ int32_t smlParseValue(SSmlKv *pVal, SSmlMsgBuf *msg) { pVal->type = TSDB_DATA_TYPE_NCHAR; pVal->length -= NCHAR_ADD_LEN; if (pVal->length > (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE) { + TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } pVal->value += (NCHAR_ADD_LEN - 1); @@ -236,7 +238,8 @@ static int32_t smlParseTagKv(SSmlHandle *info, char **sql, char *sqlEnd, SSmlLin PROCESS_SLASH(value, valueLen) } - if (unlikely(valueLen > (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)) { + if (unlikely(valueLen > (TSDB_MAX_TAGS_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)) { + TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } diff --git a/source/client/src/clientSmlTelnet.c b/source/client/src/clientSmlTelnet.c index 036442573d..9baa1e5758 100644 --- a/source/client/src/clientSmlTelnet.c +++ b/source/client/src/clientSmlTelnet.c @@ -158,7 +158,8 @@ static int32_t smlParseTelnetTags(SSmlHandle *info, char *data, char *sqlEnd, SS return TSDB_CODE_TSC_INVALID_VALUE; } - if (unlikely(valueLen > (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)) { + if (unlikely(valueLen > (TSDB_MAX_TAGS_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)) { + TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } diff --git a/source/libs/parser/src/parAstCreater.c b/source/libs/parser/src/parAstCreater.c index 2afe34c1f7..c53721f865 100644 --- a/source/libs/parser/src/parAstCreater.c +++ b/source/libs/parser/src/parAstCreater.c @@ -1208,7 +1208,7 @@ SDataType createDataType(uint8_t type) { } SDataType createVarLenDataType(uint8_t type, const SToken* pLen) { - SDataType dt = {.type = type, .precision = 0, .scale = 0, .bytes = taosStr2Int16(pLen->z, NULL, 10)}; + SDataType dt = {.type = type, .precision = 0, .scale = 0, .bytes = taosStr2Int32(pLen->z, NULL, 10)}; return dt; } diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 25e92a55ec..20ca987250 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -4498,8 +4498,9 @@ static int32_t checkTableTagsSchema(STranslateContext* pCxt, SHashObj* pHash, SN code = generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_ONLY_ONE_JSON_TAG); } if (TSDB_CODE_SUCCESS == code) { - if ((TSDB_DATA_TYPE_VARCHAR == pTag->dataType.type && calcTypeBytes(pTag->dataType) > TSDB_MAX_BINARY_LEN) || - (TSDB_DATA_TYPE_NCHAR == pTag->dataType.type && calcTypeBytes(pTag->dataType) > TSDB_MAX_NCHAR_LEN)) { + if ((TSDB_DATA_TYPE_VARCHAR == pTag->dataType.type && calcTypeBytes(pTag->dataType) > TSDB_MAX_TAGS_LEN) || + (TSDB_DATA_TYPE_NCHAR == pTag->dataType.type && calcTypeBytes(pTag->dataType) > TSDB_MAX_TAGS_LEN)) { + TASSERT code = generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN); } } @@ -4551,6 +4552,7 @@ static int32_t checkTableColsSchema(STranslateContext* pCxt, SHashObj* pHash, in if (TSDB_CODE_SUCCESS == code) { if ((TSDB_DATA_TYPE_VARCHAR == pCol->dataType.type && calcTypeBytes(pCol->dataType) > TSDB_MAX_BINARY_LEN) || (TSDB_DATA_TYPE_NCHAR == pCol->dataType.type && calcTypeBytes(pCol->dataType) > TSDB_MAX_NCHAR_LEN)) { + TASSERT code = generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN); } } @@ -5236,6 +5238,7 @@ static int32_t checkAlterSuperTableBySchema(STranslateContext* pCxt, SAlterTable if (TSDB_ALTER_TABLE_UPDATE_COLUMN_BYTES == pStmt->alterType) { if (calcTypeBytes(pStmt->dataType) > TSDB_MAX_FIELD_LEN) { + TASSERT return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN); } @@ -5245,7 +5248,8 @@ static int32_t checkAlterSuperTableBySchema(STranslateContext* pCxt, SAlterTable } if (TSDB_ALTER_TABLE_UPDATE_TAG_BYTES == pStmt->alterType) { - if (calcTypeBytes(pStmt->dataType) > TSDB_MAX_FIELD_LEN) { + if (calcTypeBytes(pStmt->dataType) > TSDB_MAX_TAGS_LEN) { + TASSERT return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN); } diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index dda4ec3e84..bf91f7c8c7 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -338,6 +338,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/delete_childtable.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/delete_normaltable.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/keep_expired.py +,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/stmt_error.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/drop.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/drop.py -N 3 -M 3 -i False -n 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/join2.py diff --git a/tests/system-test/1-insert/influxdb_line_taosc_insert.py b/tests/system-test/1-insert/influxdb_line_taosc_insert.py index 6372502484..b53abc41aa 100644 --- a/tests/system-test/1-insert/influxdb_line_taosc_insert.py +++ b/tests/system-test/1-insert/influxdb_line_taosc_insert.py @@ -673,11 +673,11 @@ class TDTestCase: tdSql.checkNotEqual(err.errno, 0) # # # binary - # stb_name = tdCom.getLongName(7, "letters") - # input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(16374, "letters")}" 1626006833639000000' - # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + stb_name = tdCom.getLongName(7, "letters") + input_sql = f'{stb_name},t0=t c0=f,c11=f,c2=f,c3=f,c4=f,c5=f,c6=f,c7=f,c8=f,c9=f,c10=f,c12=f,c1="{tdCom.getLongName(65519, "letters")}" 1626006833639000000' + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - # input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(16375, "letters")}" 1626006833639000000' + # input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(65514, "letters")}" 1626006833639000000' # try: # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) # except SchemalessError as err: @@ -884,7 +884,7 @@ class TDTestCase: tdSql.checkRows(2) tdSql.checkNotEqual(tb_name1, tb_name3) - # * tag binary max is 16384, col+ts binary max 49151 + # * tag binary max is 16384, col+ts binary max 65531 def tagColBinaryMaxLengthCheckCase(self): """ every binary and nchar must be length+2 @@ -911,7 +911,10 @@ class TDTestCase: tdSql.checkRows(2) # # * check col,col+ts max in describe ---> 16143 - input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(16374, "letters")}",c2="{tdCom.getLongName(16374, "letters")}",c3="{tdCom.getLongName(16374, "letters")}",c4="{tdCom.getLongName(12, "letters")}" 1626006833639000000' + input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(65517, "letters")}" 1626006833639000000' + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + + input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(49133, "letters")}",c2="{tdCom.getLongName(16384, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) tdSql.query(f"select * from {stb_name}") @@ -1280,7 +1283,7 @@ class TDTestCase: self.nowTsCheckCase() self.dateFormatTsCheckCase() self.illegalTsCheckCase() - # self.tagValueLengthCheckCase() + self.tagValueLengthCheckCase() self.colValueLengthCheckCase() self.tagColIllegalValueCheckCase() self.duplicateIdTagColInsertCheckCase() diff --git a/tests/system-test/1-insert/stmt_error.py b/tests/system-test/1-insert/stmt_error.py new file mode 100644 index 0000000000..b77cd488f7 --- /dev/null +++ b/tests/system-test/1-insert/stmt_error.py @@ -0,0 +1,225 @@ +# encoding:UTF-8 +from taos import * + +from ctypes import * +from datetime import datetime +import taos + +import taos +import time + +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import * + +class TDTestCase: + def __init__(self): + self.err_case = 0 + self.curret_case = 0 + + def caseDescription(self): + + ''' + case1 : [TD-11899] : this is an test case for check stmt error use . + ''' + return + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), logSql) + + def conn(self): + # type: () -> taos.TaosConnection + return connect() + + def test_stmt_insert(self,conn): + # type: (TaosConnection) -> None + + dbname = "pytest_taos_stmt" + try: + conn.execute("drop database if exists %s" % dbname) + conn.execute("create database if not exists %s" % dbname) + conn.select_db(dbname) + + conn.execute( + "create table if not exists log(ts timestamp, bo bool, nil tinyint, ti tinyint, si smallint, ii int,\ + bi bigint, tu tinyint unsigned, su smallint unsigned, iu int unsigned, bu bigint unsigned, \ + ff float, dd double, bb binary(65059), nn nchar(100), tt timestamp)", + ) + conn.load_table_info("log") + + + stmt = conn.statement("insert into log values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)") + params = new_bind_params(16) + params[0].timestamp(1626861392589, PrecisionEnum.Milliseconds) + params[1].bool(True) + params[2].tinyint(None) + params[3].tinyint(2) + params[4].smallint(3) + params[5].int(4) + params[6].bigint(5) + params[7].tinyint_unsigned(6) + params[8].smallint_unsigned(7) + params[9].int_unsigned(8) + params[10].bigint_unsigned(9) + params[11].float(10.1) + params[12].double(10.11) + binaryStr = '123456789' + for i in range(1301): + binaryStr += "1234567890abcdefghij1234567890abcdefghij12345hello" + params[13].binary(binaryStr) + params[14].nchar("stmt") + params[15].timestamp(1626861392589, PrecisionEnum.Milliseconds) + + stmt.bind_param(params) + stmt.execute() + + assert stmt.affected_rows == 1 + stmt.close() + + querystmt=conn.statement("select ?, bo, nil, ti, si, ii,bi, tu, su, iu, bu, ff, dd, bb, nn, tt from log") + queryparam=new_bind_params(1) + print(type(queryparam)) + queryparam[0].binary("ts") + querystmt.bind_param(queryparam) + querystmt.execute() + result=querystmt.use_result() + + row=result.fetch_all() + print(row) + + assert row[0][1] == True + assert row[0][2] == None + for i in range(3, 10): + assert row[0][i] == i - 1 + #float == may not work as expected + # assert row[0][11] == c_float(10.1) + assert row[0][12] == 10.11 + assert row[0][13][65054:] == "hello" + assert row[0][14] == "stmt" + + conn.execute("drop database if exists %s" % dbname) + conn.close() + + except Exception as err: + conn.execute("drop database if exists %s" % dbname) + conn.close() + raise err + + def test_stmt_insert_error(self,conn): + # type: (TaosConnection) -> None + + dbname = "pytest_taos_stmt_error" + try: + conn.execute("drop database if exists %s" % dbname) + conn.execute("create database if not exists %s" % dbname) + conn.select_db(dbname) + + conn.execute( + "create table if not exists log(ts timestamp, bo bool, nil tinyint, ti tinyint, si smallint, ii int,\ + bi bigint, tu tinyint unsigned, su smallint unsigned, iu int unsigned, bu bigint unsigned, \ + ff float, dd double, bb binary(100), nn nchar(100), tt timestamp , error_data int )", + ) + conn.load_table_info("log") + + + stmt = conn.statement("insert into log values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1000)") + params = new_bind_params(16) + params[0].timestamp(1626861392589, PrecisionEnum.Milliseconds) + params[1].bool(True) + params[2].tinyint(None) + params[3].tinyint(2) + params[4].smallint(3) + params[5].int(4) + params[6].bigint(5) + params[7].tinyint_unsigned(6) + params[8].smallint_unsigned(7) + params[9].int_unsigned(8) + params[10].bigint_unsigned(9) + params[11].float(10.1) + params[12].double(10.11) + params[13].binary("hello") + params[14].nchar("stmt") + params[15].timestamp(1626861392589, PrecisionEnum.Milliseconds) + + stmt.bind_param(params) + stmt.execute() + + conn.close() + + except Exception as err: + conn.execute("drop database if exists %s" % dbname) + conn.close() + raise err + + def test_stmt_insert_error_null_timestamp(self,conn): + + dbname = "pytest_taos_stmt_error_null_ts" + try: + conn.execute("drop database if exists %s" % dbname) + conn.execute("create database if not exists %s" % dbname) + conn.execute("alter database %s keep 36500" % dbname) + conn.select_db(dbname) + + conn.execute("create stable STB(ts timestamp, n int) tags(b int)") + + stmt = conn.statement("insert into ? using STB tags(?) values(?, ?)") + params = new_bind_params(1) + params[0].int(4); + stmt.set_tbname_tags("ct", params); + + multi_params = new_multi_binds(2); + multi_params[0].timestamp([9223372036854775808]) + multi_params[1].int([123]) + stmt.bind_param_batch(multi_params) + + stmt.execute() + result = stmt.use_result() + + result.close() + stmt.close() + + stmt = conn.statement("select * from STB") + stmt.execute() + result = stmt.use_result() + print(result.affected_rows) + row = result.next() + print(row) + + result.close() + stmt.close() + conn.close() + + except Exception as err: + conn.close() + raise err + + def run(self): + + self.test_stmt_insert(self.conn()) + try: + self.test_stmt_insert_error(self.conn()) + except Exception as error : + + if str(error)=='[0x0200]: no mix usage for ? and values': + tdLog.info('=========stmt error occured for bind part column ==============') + else: + tdLog.exit("expect error(%s) not occured" % str(error)) + + try: + self.test_stmt_insert_error_null_timestamp(self.conn()) + tdLog.exit("expect error not occured - 1") + except Exception as error : + if str(error)=='[0x060b]: Timestamp data out of range': + tdLog.info('=========stmt error occured for bind part column(NULL Timestamp) ==============') + else: + tdLog.exit("expect error(%s) not occured - 2" % str(error)) + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/system-test/runAllOne.sh b/tests/system-test/runAllOne.sh index 5a8d358d98..40addb0ca5 100644 --- a/tests/system-test/runAllOne.sh +++ b/tests/system-test/runAllOne.sh @@ -287,6 +287,7 @@ python3 ./test.py -f 1-insert/tb_100w_data_order.py -P python3 ./test.py -f 1-insert/delete_childtable.py -P python3 ./test.py -f 1-insert/delete_normaltable.py -P python3 ./test.py -f 1-insert/keep_expired.py -P +python3 ./test.py -f 1-insert/stmt_error.py -P python3 ./test.py -f 1-insert/drop.py -P python3 ./test.py -f 2-query/join2.py -P python3 ./test.py -f 2-query/union1.py -P diff --git a/tests/system-test/win-test-file b/tests/system-test/win-test-file index 7e68c40fd8..8b7b7d868d 100644 --- a/tests/system-test/win-test-file +++ b/tests/system-test/win-test-file @@ -218,6 +218,7 @@ python3 ./test.py -f 1-insert/delete_stable.py python3 ./test.py -f 1-insert/delete_childtable.py python3 ./test.py -f 1-insert/delete_normaltable.py python3 ./test.py -f 1-insert/keep_expired.py +python3 ./test.py -f 1-insert/stmt_error.py python3 ./test.py -f 1-insert/drop.py python3 ./test.py -f 1-insert/drop.py -N 3 -M 3 -i False -n 3 python3 ./test.py -f 2-query/join2.py From b059cc4ee13a9ad4c5970cf9c6dc671bc701a3e1 Mon Sep 17 00:00:00 2001 From: kailixu Date: Tue, 25 Apr 2023 10:59:02 +0800 Subject: [PATCH 04/40] chore: code optimization --- source/client/inc/clientInt.h | 2 +- source/client/src/clientEnv.c | 2 +- source/client/src/clientHb.c | 13 +++++++++---- source/client/src/clientMain.c | 10 +++++++--- tests/script/api/passwdTest.c | 7 +++---- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index 8e20d7d275..ab8e20b85e 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -361,7 +361,7 @@ void stopAllRequests(SHashObj* pRequests); // conn level int hbRegisterConn(SAppHbMgr* pAppHbMgr, int64_t tscRefId, int64_t clusterId, int8_t connType); -void hbDeregisterConn(SAppHbMgr* pAppHbMgr, SClientHbKey connKey, void* param); +void hbDeregisterConn(STscObj* pTscObj, SClientHbKey connKey); typedef struct SSqlCallbackWrapper { SParseContext* pParseCtx; diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 99569fdb57..ba02ae6731 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -239,7 +239,7 @@ void destroyTscObj(void *pObj) { tscTrace("begin to destroy tscObj %" PRIx64 " p:%p", tscId, pTscObj); SClientHbKey connKey = {.tscRid = pTscObj->id, .connType = pTscObj->connType}; - hbDeregisterConn(pTscObj->pAppInfo->pAppHbMgr, connKey, pTscObj->passInfo.fp); + hbDeregisterConn(pTscObj, connKey); destroyAllRequests(pTscObj->pRequests); taosHashCleanup(pTscObj->pRequests); diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 79435da89f..4240a8510d 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -994,6 +994,7 @@ SAppHbMgr *appHbMgrInit(SAppInstInfo *pAppInstInfo, char *key) { // init stat pAppHbMgr->startTime = taosGetTimestampMs(); pAppHbMgr->connKeyCnt = 0; + pAppHbMgr->passKeyCnt = 0; pAppHbMgr->reportCnt = 0; pAppHbMgr->reportBytes = 0; pAppHbMgr->key = taosStrdup(key); @@ -1154,7 +1155,8 @@ int hbRegisterConn(SAppHbMgr *pAppHbMgr, int64_t tscRefId, int64_t clusterId, in } } -void hbDeregisterConn(SAppHbMgr *pAppHbMgr, SClientHbKey connKey, void *param) { +void hbDeregisterConn(STscObj *pTscObj, SClientHbKey connKey) { + SAppHbMgr *pAppHbMgr = pTscObj->pAppInfo->pAppHbMgr; SClientHbReq *pReq = taosHashAcquire(pAppHbMgr->activeInfo, &connKey, sizeof(SClientHbKey)); if (pReq) { tFreeClientHbReq(pReq); @@ -1167,7 +1169,10 @@ void hbDeregisterConn(SAppHbMgr *pAppHbMgr, SClientHbKey connKey, void *param) { } atomic_sub_fetch_32(&pAppHbMgr->connKeyCnt, 1); - if (param) { - atomic_sub_fetch_32(&pAppHbMgr->passKeyCnt, 1); + + taosThreadMutexLock(&pTscObj->mutex); + if (pTscObj->passInfo.fp) { + int32_t cnt = atomic_sub_fetch_32(&pAppHbMgr->passKeyCnt, 1); } -} + taosThreadMutexUnlock(&pTscObj->mutex); +} \ No newline at end of file diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index aed11b4fb1..55465f227e 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -134,11 +134,15 @@ int taos_set_notify_cb(TAOS *taos, __taos_notify_fn_t fp, void *param, int type) switch (type) { case TAOS_NOTIFY_PASSVER: { + taosThreadMutexLock(&pObj->mutex); + if (fp && !pObj->passInfo.fp) { + atomic_add_fetch_32(&pObj->pAppInfo->pAppHbMgr->passKeyCnt, 1); + } else if (!fp && pObj->passInfo.fp) { + atomic_sub_fetch_32(&pObj->pAppInfo->pAppHbMgr->passKeyCnt, 1); + } pObj->passInfo.fp = fp; pObj->passInfo.param = param; - if (fp) { - atomic_add_fetch_32(&pObj->pAppInfo->pAppHbMgr->passKeyCnt, 1); - } + taosThreadMutexUnlock(&pObj->mutex); break; } default: { diff --git a/tests/script/api/passwdTest.c b/tests/script/api/passwdTest.c index 8a2b0a0390..1bf4987689 100644 --- a/tests/script/api/passwdTest.c +++ b/tests/script/api/passwdTest.c @@ -33,8 +33,7 @@ #define nUser 10 #define USER_LEN 24 -void Test(TAOS *taos, char *qstr); -void createUers(TAOS *taos, const char *host, char *qstr); +void createUsers(TAOS *taos, const char *host, char *qstr); void passVerTestMulti(const char *host, char *qstr); int nPassVerNotified = 0; @@ -98,14 +97,14 @@ int main(int argc, char *argv[]) { printf("failed to connect to server, reason:%s\n", "null taos" /*taos_errstr(taos)*/); exit(1); } - createUers(taos, argv[1], qstr); + createUsers(taos, argv[1], qstr); passVerTestMulti(argv[1], qstr); taos_close(taos); taos_cleanup(); } -void createUers(TAOS *taos, const char *host, char *qstr) { +void createUsers(TAOS *taos, const char *host, char *qstr) { // users for (int i = 0; i < nUser; ++i) { sprintf(users[i], "user%d", i); From a6e37622ff1a8fe181a7ecc8044aaed000af7d6d Mon Sep 17 00:00:00 2001 From: kailixu Date: Tue, 25 Apr 2023 22:15:06 +0800 Subject: [PATCH 05/40] chore: more code --- include/util/tdef.h | 7 -- source/client/src/clientSml.c | 32 +++++- source/client/src/clientSmlJson.c | 2 - source/client/src/clientSmlLine.c | 3 - source/client/src/clientSmlTelnet.c | 1 - source/libs/parser/src/parTranslater.c | 4 - tests/system-test/1-insert/boundary.py | 56 ++++++++++ .../1-insert/influxdb_line_taosc_insert.py | 100 ++++++++++-------- tests/system-test/1-insert/stmt_error.py | 1 - 9 files changed, 142 insertions(+), 64 deletions(-) diff --git a/include/util/tdef.h b/include/util/tdef.h index 7a2c57b9f8..ead649a51c 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -22,13 +22,6 @@ extern "C" { #endif - -#if 1 -#define TASSERT assert(0); -#else -#define TASSERT -#endif - #define TSDB__packed #define TSKEY int64_t diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 0bca52449c..84c6fffdac 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -624,6 +624,10 @@ static int32_t getBytes(uint8_t type, int32_t length) { static int32_t smlBuildFieldsList(SSmlHandle *info, SSchema *schemaField, SHashObj *schemaHash, SArray *cols, SArray *results, int32_t numOfCols, bool isTag) { + bool check = numOfCols == 0 ? true : false; + int32_t maxLen = isTag ? TSDB_MAX_TAGS_LEN : TSDB_MAX_BYTES_PER_ROW; + + int32_t len = 0; for (int j = 0; j < taosArrayGetSize(cols); ++j) { SSmlKv *kv = (SSmlKv *)taosArrayGet(cols, j); ESchemaAction action = SCHEMA_ACTION_NULL; @@ -635,11 +639,17 @@ static int32_t smlBuildFieldsList(SSmlHandle *info, SSchema *schemaField, SHashO SField field = {0}; field.type = kv->type; field.bytes = getBytes(kv->type, kv->length); + if (check) { + len += field.bytes; + if (len > maxLen) { + code = TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; + uError("smlBuildFieldsList add %s failed since %s", isTag ? "tag" : "col", tstrerror(code)); + return code; + } + } + memcpy(field.name, kv->key, kv->keyLen); taosArrayPush(results, &field); - if(numOfCols == 0) { - - } } else if (action == SCHEMA_ACTION_CHANGE_COLUMN_SIZE || action == SCHEMA_ACTION_CHANGE_TAG_SIZE) { uint16_t *index = (uint16_t *)taosHashGet(schemaHash, kv->key, kv->keyLen); if (index == NULL) { @@ -650,6 +660,14 @@ static int32_t smlBuildFieldsList(SSmlHandle *info, SSchema *schemaField, SHashO if (isTag) newIndex -= numOfCols; SField *field = (SField *)taosArrayGet(results, newIndex); field->bytes = getBytes(kv->type, kv->length); + if (check) { + len += (kv->length - schemaField[*index].bytes + VARSTR_HEADER_SIZE); + if (len > maxLen) { + code = TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; + uError("smlBuildFieldsList change %s failed since %s", isTag ? "tag" : "col", tstrerror(code)); + return code; + } + } } } return TSDB_CODE_SUCCESS; @@ -780,11 +798,15 @@ static int32_t smlModifyDBSchemas(SSmlHandle *info) { code = smlBuildFieldsList(info, NULL, NULL, sTableData->tags, pTags, 0, true); if (code != TSDB_CODE_SUCCESS) { uError("SML:0x%" PRIx64 " smlBuildFieldsList tag1 failed. %s", info->id, pName.tname); + taosArrayDestroy(pColumns); + taosArrayDestroy(pTags); goto end; } code = smlBuildFieldsList(info, NULL, NULL, sTableData->cols, pColumns, 0, false); if (code != TSDB_CODE_SUCCESS) { uError("SML:0x%" PRIx64 " smlBuildFieldsList col1 failed. %s", info->id, pName.tname); + taosArrayDestroy(pColumns); + taosArrayDestroy(pTags); goto end; } code = smlSendMetaMsg(info, &pName, pColumns, pTags, NULL, SCHEMA_ACTION_CREATE_STABLE); @@ -836,6 +858,8 @@ static int32_t smlModifyDBSchemas(SSmlHandle *info) { pTableMeta->tableInfo.numOfColumns, true); if (code != TSDB_CODE_SUCCESS) { uError("SML:0x%" PRIx64 " smlBuildFieldsList tag2 failed. %s", info->id, pName.tname); + taosArrayDestroy(pColumns); + taosArrayDestroy(pTags); goto end; } @@ -890,6 +914,8 @@ static int32_t smlModifyDBSchemas(SSmlHandle *info) { pTableMeta->tableInfo.numOfColumns, false); if (code != TSDB_CODE_SUCCESS) { uError("SML:0x%" PRIx64 " smlBuildFieldsList col2 failed. %s", info->id, pName.tname); + taosArrayDestroy(pColumns); + taosArrayDestroy(pTags); goto end; } diff --git a/source/client/src/clientSmlJson.c b/source/client/src/clientSmlJson.c index b50a29d4fb..c3a6e15697 100644 --- a/source/client/src/clientSmlJson.c +++ b/source/client/src/clientSmlJson.c @@ -578,12 +578,10 @@ static int32_t smlConvertJSONString(SSmlKv *pVal, char *typeStr, cJSON *value) { pVal->length = (uint16_t)strlen(value->valuestring); if (pVal->type == TSDB_DATA_TYPE_BINARY && pVal->length > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { - TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } if (pVal->type == TSDB_DATA_TYPE_NCHAR && pVal->length > (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE) { - TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } diff --git a/source/client/src/clientSmlLine.c b/source/client/src/clientSmlLine.c index 7dd087039b..db2f2bd9fe 100644 --- a/source/client/src/clientSmlLine.c +++ b/source/client/src/clientSmlLine.c @@ -81,7 +81,6 @@ int32_t smlParseValue(SSmlKv *pVal, SSmlMsgBuf *msg) { pVal->type = TSDB_DATA_TYPE_BINARY; pVal->length -= BINARY_ADD_LEN; if (pVal->length > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { - TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } pVal->value += (BINARY_ADD_LEN - 1); @@ -95,7 +94,6 @@ int32_t smlParseValue(SSmlKv *pVal, SSmlMsgBuf *msg) { pVal->type = TSDB_DATA_TYPE_NCHAR; pVal->length -= NCHAR_ADD_LEN; if (pVal->length > (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE) { - TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } pVal->value += (NCHAR_ADD_LEN - 1); @@ -239,7 +237,6 @@ static int32_t smlParseTagKv(SSmlHandle *info, char **sql, char *sqlEnd, SSmlLin } if (unlikely(valueLen > (TSDB_MAX_TAGS_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)) { - TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } diff --git a/source/client/src/clientSmlTelnet.c b/source/client/src/clientSmlTelnet.c index 9baa1e5758..3a9aad4e81 100644 --- a/source/client/src/clientSmlTelnet.c +++ b/source/client/src/clientSmlTelnet.c @@ -159,7 +159,6 @@ static int32_t smlParseTelnetTags(SSmlHandle *info, char *data, char *sqlEnd, SS } if (unlikely(valueLen > (TSDB_MAX_TAGS_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)) { - TASSERT return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; } diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 20ca987250..b4b499a789 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -4500,7 +4500,6 @@ static int32_t checkTableTagsSchema(STranslateContext* pCxt, SHashObj* pHash, SN if (TSDB_CODE_SUCCESS == code) { if ((TSDB_DATA_TYPE_VARCHAR == pTag->dataType.type && calcTypeBytes(pTag->dataType) > TSDB_MAX_TAGS_LEN) || (TSDB_DATA_TYPE_NCHAR == pTag->dataType.type && calcTypeBytes(pTag->dataType) > TSDB_MAX_TAGS_LEN)) { - TASSERT code = generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN); } } @@ -4552,7 +4551,6 @@ static int32_t checkTableColsSchema(STranslateContext* pCxt, SHashObj* pHash, in if (TSDB_CODE_SUCCESS == code) { if ((TSDB_DATA_TYPE_VARCHAR == pCol->dataType.type && calcTypeBytes(pCol->dataType) > TSDB_MAX_BINARY_LEN) || (TSDB_DATA_TYPE_NCHAR == pCol->dataType.type && calcTypeBytes(pCol->dataType) > TSDB_MAX_NCHAR_LEN)) { - TASSERT code = generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN); } } @@ -5238,7 +5236,6 @@ static int32_t checkAlterSuperTableBySchema(STranslateContext* pCxt, SAlterTable if (TSDB_ALTER_TABLE_UPDATE_COLUMN_BYTES == pStmt->alterType) { if (calcTypeBytes(pStmt->dataType) > TSDB_MAX_FIELD_LEN) { - TASSERT return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN); } @@ -5249,7 +5246,6 @@ static int32_t checkAlterSuperTableBySchema(STranslateContext* pCxt, SAlterTable if (TSDB_ALTER_TABLE_UPDATE_TAG_BYTES == pStmt->alterType) { if (calcTypeBytes(pStmt->dataType) > TSDB_MAX_TAGS_LEN) { - TASSERT return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN); } diff --git a/tests/system-test/1-insert/boundary.py b/tests/system-test/1-insert/boundary.py index d3742ef5f9..29dcbc7c46 100644 --- a/tests/system-test/1-insert/boundary.py +++ b/tests/system-test/1-insert/boundary.py @@ -166,6 +166,61 @@ class TDTestCase: else: tdLog.exit("error info is not true") tdSql.execute('drop database db') + + def row_col_tag_maxlen_check(self): + tdSql.prepare() + tdSql.execute('use db') + tdSql.execute('create table if not exists stb1 (ts timestamp, c1 int,c2 binary(1000)) tags (city binary(16382))') + tdSql.error('create table if not exists stb1 (ts timestamp, c1 int,c2 binary(1000)) tags (city binary(16383))') + tdSql.execute('create table if not exists stb2 (ts timestamp, c0 tinyint, c1 int, c2 nchar(16379)) tags (city binary(16382))') + tdSql.error('create table if not exists stb2 (ts timestamp, c0 smallint, c1 int, c2 nchar(16379)) tags (city binary(16382))') + tdSql.execute('create table if not exists stb3 (ts timestamp, c1 int, c2 binary(65517)) tags (city binary(16382))') + tdSql.error('create table if not exists stb3 (ts timestamp, c0 bool, c1 int, c2 binary(65517)) tags (city binary(16382))') + # prepare the column and tag data + char100='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0123456789' + tag_max_16382='' + binary_max_65517 = '' + nchar_max_16379='' + for num in range(163): + nchar_max_16379 += char100 + for num in range(4): + binary_max_65517 += char100 + + nchar_max_16379 += '0123456789012345678901234567890123456789012345678901234567890123456789012345678' + tag_max_16382 = nchar_max_16379 + tag_max_16382 += '9ab' + + for num in range(3): + binary_max_65517 += char100 + binary_max_65517 += '01234567890123456' + + # insert/query and check + tdSql.execute(f"create table ct1 using stb1 tags('{tag_max_16382}')") + tdSql.execute(f"create table ct2 using stb2 tags('{tag_max_16382}')") + tdSql.execute(f"create table ct3 using stb3 tags('{tag_max_16382}')") + tdSql.execute(f"insert into ct1 values (now,1,'nchar_max_16379')") + tdSql.execute(f"insert into ct2 values (now,1,1,'{nchar_max_16379}')") + tdSql.execute(f"insert into ct3 values (now,1,'{binary_max_65517}')") + + tdSql.query("select * from stb1") + tdSql.checkEqual(tdSql.queryResult[0][3],tag_max_16382) + + tdSql.query("select * from ct2") + tdSql.checkEqual(tdSql.queryResult[0][3],nchar_max_16379) + + tdSql.query("select * from stb2") + tdSql.checkEqual(tdSql.queryResult[0][3],nchar_max_16379) + tdSql.checkEqual(tdSql.queryResult[0][4],tag_max_16382) + + tdSql.query("select * from ct3") + tdSql.checkEqual(tdSql.queryResult[0][2],binary_max_65517) + + tdSql.query("select * from stb3") + tdSql.checkEqual(tdSql.queryResult[0][2],binary_max_65517) + tdSql.checkEqual(tdSql.queryResult[0][3],tag_max_16382) + + tdSql.execute('drop database db') + def run(self): self.dbname_length_check() self.tbname_length_check() @@ -174,6 +229,7 @@ class TDTestCase: self.username_length_check() self.password_length_check() self.sql_length_check() + self.row_col_tag_maxlen_check() def stop(self): tdSql.close() diff --git a/tests/system-test/1-insert/influxdb_line_taosc_insert.py b/tests/system-test/1-insert/influxdb_line_taosc_insert.py index b53abc41aa..667ce3239c 100644 --- a/tests/system-test/1-insert/influxdb_line_taosc_insert.py +++ b/tests/system-test/1-insert/influxdb_line_taosc_insert.py @@ -672,28 +672,28 @@ class TDTestCase: except SchemalessError as err: tdSql.checkNotEqual(err.errno, 0) - # # # binary + # binary stb_name = tdCom.getLongName(7, "letters") - input_sql = f'{stb_name},t0=t c0=f,c11=f,c2=f,c3=f,c4=f,c5=f,c6=f,c7=f,c8=f,c9=f,c10=f,c12=f,c1="{tdCom.getLongName(65519, "letters")}" 1626006833639000000' + input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(65517, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - # input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(65514, "letters")}" 1626006833639000000' - # try: - # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - # except SchemalessError as err: - # tdSql.checkNotEqual(err.errno, 0) + input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(65518, "letters")}" 1626006833639000000' + try: + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + except SchemalessError as err: + tdSql.checkNotEqual(err.errno, 0) - # # nchar - # # * legal nchar could not be larger than 16374/4 - # stb_name = tdCom.getLongName(7, "letters") - # input_sql = f'{stb_name},t0=t c0=f,c1=L"{tdCom.getLongName(4093, "letters")}" 1626006833639000000' - # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + # nchar + # * legal nchar could not be larger than 16374/4 + stb_name = tdCom.getLongName(7, "letters") + input_sql = f'{stb_name},t0=t c0=f,c1=L"{tdCom.getLongName(16379, "letters")}" 1626006833639000000' + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - # input_sql = f'{stb_name},t0=t c0=f,c1=L"{tdCom.getLongName(4094, "letters")}" 1626006833639000000' - # try: - # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - # except SchemalessError as err: - # tdSql.checkNotEqual(err.errno, 0) + input_sql = f'{stb_name},t0=t c0=f,c1=L"{tdCom.getLongName(16380, "letters")}" 1626006833639000000' + try: + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + except SchemalessError as err: + tdSql.checkNotEqual(err.errno, 0) def tagColIllegalValueCheckCase(self): @@ -896,26 +896,40 @@ class TDTestCase: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) # * every binary and nchar must be length+2, so here is two tag, max length could not larger than 16384-2*2 - input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(16374, "letters")}",t2="{tdCom.getLongName(5, "letters")}" c0=f 1626006833639000000' - self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + # input_sql = f'{stb_name}, t1="{tdCom.getLongName(4095, "letters")}"" c0=f 1626006833639000000' + # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(2) - input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(16374, "letters")}",t2="{tdCom.getLongName(6, "letters")}" c0=f 1626006833639000000' - try: - self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - raise Exception("should not reach here") - except SchemalessError as err: - tdSql.checkNotEqual(err.errno, 0) - tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(2) + # tdSql.query(f"select * from {stb_name}") + # tdSql.checkRows(2) + # input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(4084, "letters")}",t2="{tdCom.getLongName(6, "letters")}" c0=f 1626006833639000000' + # try: + # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + # raise Exception("should not reach here") + # except SchemalessError as err: + # tdSql.checkNotEqual(err.errno, 0) + # tdSql.query(f"select * from {stb_name}") + # tdSql.checkRows(2) # # * check col,col+ts max in describe ---> 16143 - input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(65517, "letters")}" 1626006833639000000' + input_sql = f'{stb_name},t0=t c0=i32,c1="{tdCom.getLongName(65517, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(49133, "letters")}",c2="{tdCom.getLongName(16384, "letters")}" 1626006833639000000' + input_sql = f'{stb_name},t0=t c0=i32,c1="{tdCom.getLongName(65517, "letters")},c2=f" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + try: + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + except SchemalessError as err: + tdSql.checkNotEqual(err.errno, 0) + + input_sql = f'{stb_name},t0=t c0=i16,c1="{tdCom.getLongName(49133, "letters")}",c2="{tdCom.getLongName(16384, "letters")}" 1626006833639000000' + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + + input_sql = f'{stb_name},t0=t c0=i16,c1="{tdCom.getLongName(49133, "letters")}",c2="{tdCom.getLongName(16384, "letters")},c3=t" 1626006833639000000' + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + try: + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + except SchemalessError as err: + tdSql.checkNotEqual(err.errno, 0) tdSql.query(f"select * from {stb_name}") tdSql.checkRows(3) @@ -939,17 +953,17 @@ class TDTestCase: code = self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) # * legal nchar could not be larger than 16374/4 - input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4093, "letters")}",t2=L"{tdCom.getLongName(1, "letters")}" c0=f 1626006833639000000' - self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(2) - input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4093, "letters")}",t2=L"{tdCom.getLongName(2, "letters")}" c0=f 1626006833639000000' - try: - self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - except SchemalessError as err: - tdSql.checkNotEqual(err.errno, 0) - tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(2) + # input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4093, "letters")}",t2=L"{tdCom.getLongName(1, "letters")}" c0=f 1626006833639000000' + # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + # tdSql.query(f"select * from {stb_name}") + # tdSql.checkRows(2) + # input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4093, "letters")}",t2=L"{tdCom.getLongName(2, "letters")}" c0=f 1626006833639000000' + # try: + # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + # except SchemalessError as err: + # tdSql.checkNotEqual(err.errno, 0) + # tdSql.query(f"select * from {stb_name}") + # tdSql.checkRows(2) input_sql = f'{stb_name},t0=t c0=f,c1=L"{tdCom.getLongName(4093, "letters")}",c2=L"{tdCom.getLongName(4093, "letters")}",c3=L"{tdCom.getLongName(4093, "letters")}",c4=L"{tdCom.getLongName(4, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) @@ -1283,7 +1297,7 @@ class TDTestCase: self.nowTsCheckCase() self.dateFormatTsCheckCase() self.illegalTsCheckCase() - self.tagValueLengthCheckCase() + # self.tagValueLengthCheckCase() self.colValueLengthCheckCase() self.tagColIllegalValueCheckCase() self.duplicateIdTagColInsertCheckCase() diff --git a/tests/system-test/1-insert/stmt_error.py b/tests/system-test/1-insert/stmt_error.py index b77cd488f7..c6d747c317 100644 --- a/tests/system-test/1-insert/stmt_error.py +++ b/tests/system-test/1-insert/stmt_error.py @@ -216,7 +216,6 @@ class TDTestCase: tdLog.info('=========stmt error occured for bind part column(NULL Timestamp) ==============') else: tdLog.exit("expect error(%s) not occured - 2" % str(error)) - def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__) From 1e29384de4d89f949654a8a3414b894f6a971714 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 4 Apr 2023 09:18:59 +0800 Subject: [PATCH 06/40] fix: show vgroups properly after split vgroup of multi-replicas --- source/dnode/mnode/impl/src/mndVgroup.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index f0bece6e5e..da30ebcc18 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2173,23 +2173,24 @@ static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj // adjust vgroup replica if (pDb->cfg.replications != newVg1.replica) { if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, pDb, &newVg1, pArray) != 0) goto _OVER; + } else { + pRaw = mndVgroupActionEncode(&newVg1); + if (pRaw == NULL) goto _OVER; + if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER; + (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY); + pRaw = NULL; } + if (pDb->cfg.replications != newVg2.replica) { if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, pDb, &newVg2, pArray) != 0) goto _OVER; + } else { + pRaw = mndVgroupActionEncode(&newVg2); + if (pRaw == NULL) goto _OVER; + if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER; + (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY); + pRaw = NULL; } - pRaw = mndVgroupActionEncode(&newVg1); - if (pRaw == NULL) goto _OVER; - if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER; - (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY); - pRaw = NULL; - - pRaw = mndVgroupActionEncode(&newVg2); - if (pRaw == NULL) goto _OVER; - if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER; - (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY); - pRaw = NULL; - pRaw = mndVgroupActionEncode(pVgroup); if (pRaw == NULL) goto _OVER; if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER; From 9ce8915c7a38d8d53768a970243a27a75d1eef2b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 4 Apr 2023 10:27:31 +0800 Subject: [PATCH 07/40] enh: sort vnodeGid by dnodeId after vgroup altered --- source/dnode/mnode/impl/src/mndVgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index da30ebcc18..b80b0a698a 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2084,6 +2084,8 @@ int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pOldDb return -1; } + mndSortVnodeGid(&newVgroup); + { SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup); if (pVgRaw == NULL) return -1; From 67eac1a76fd52c37ba4b05431ce9ea8a3289eceb Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 27 Apr 2023 15:24:07 +0800 Subject: [PATCH 08/40] enh: process split vgroup msg in non-blocking mode --- source/dnode/mnode/impl/src/mndVgroup.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index b80b0a698a..64e0f82a1f 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2248,7 +2248,12 @@ static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { if (pDb == NULL) goto _OVER; code = mndSplitVgroup(pMnode, pReq, pDb, pVgroup); - if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; + if (code != 0) { + mError("vgId:%d, failed to start to split vgroup since %s, db:%s", pVgroup->vgId, terrstr(), pDb->name); + goto _OVER; + } + + mInfo("vgId:%d, split vgroup started successfully. db:%s", pVgroup->vgId, pDb->name); _OVER: mndReleaseVgroup(pMnode, pVgroup); From d4929041886d2da0f8d3979d57ae581a41030303 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 28 Apr 2023 16:40:26 +0800 Subject: [PATCH 09/40] chore: error process and test cases --- source/client/src/clientSml.c | 71 +++++++++----- source/client/src/clientSmlJson.c | 2 +- .../1-insert/influxdb_line_taosc_insert.py | 98 +++++++++++-------- 3 files changed, 105 insertions(+), 66 deletions(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 4d0f5365f3..45f0def157 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -560,9 +560,14 @@ static int32_t smlGenerateSchemaAction(SSchema *colField, SHashObj *colHash, SSm static int32_t smlFindNearestPowerOf2(int32_t length, uint8_t type) { int32_t result = 1; - while (result <= length) { - result *= 2; + if (length < 1024) { + while (result <= length) { + result <<= 1; + } + } else { + result = length; } + if (type == TSDB_DATA_TYPE_BINARY && result > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { result = TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE; } else if (type == TSDB_DATA_TYPE_NCHAR && result > (TSDB_MAX_NCHAR_LEN - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE) { @@ -624,10 +629,6 @@ static int32_t getBytes(uint8_t type, int32_t length) { static int32_t smlBuildFieldsList(SSmlHandle *info, SSchema *schemaField, SHashObj *schemaHash, SArray *cols, SArray *results, int32_t numOfCols, bool isTag) { - bool check = numOfCols == 0 ? true : false; - int32_t maxLen = isTag ? TSDB_MAX_TAGS_LEN : TSDB_MAX_BYTES_PER_ROW; - - int32_t len = 0; for (int j = 0; j < taosArrayGetSize(cols); ++j) { SSmlKv *kv = (SSmlKv *)taosArrayGet(cols, j); ESchemaAction action = SCHEMA_ACTION_NULL; @@ -639,15 +640,6 @@ static int32_t smlBuildFieldsList(SSmlHandle *info, SSchema *schemaField, SHashO SField field = {0}; field.type = kv->type; field.bytes = getBytes(kv->type, kv->length); - if (check) { - len += field.bytes; - if (len > maxLen) { - code = TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; - uError("smlBuildFieldsList add %s failed since %s", isTag ? "tag" : "col", tstrerror(code)); - return code; - } - } - memcpy(field.name, kv->key, kv->keyLen); taosArrayPush(results, &field); } else if (action == SCHEMA_ACTION_CHANGE_COLUMN_SIZE || action == SCHEMA_ACTION_CHANGE_TAG_SIZE) { @@ -660,16 +652,19 @@ static int32_t smlBuildFieldsList(SSmlHandle *info, SSchema *schemaField, SHashO if (isTag) newIndex -= numOfCols; SField *field = (SField *)taosArrayGet(results, newIndex); field->bytes = getBytes(kv->type, kv->length); - if (check) { - len += (kv->length - schemaField[*index].bytes + VARSTR_HEADER_SIZE); - if (len > maxLen) { - code = TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; - uError("smlBuildFieldsList change %s failed since %s", isTag ? "tag" : "col", tstrerror(code)); - return code; - } - } } } + + int32_t maxLen = isTag ? TSDB_MAX_TAGS_LEN : TSDB_MAX_BYTES_PER_ROW; + int32_t len = 0; + for (int j = 0; j < taosArrayGetSize(results); ++j) { + SField *field = taosArrayGet(results, j); + len += field->bytes; + } + if (len > maxLen) { + return isTag ? TSDB_CODE_PAR_INVALID_TAGS_LENGTH : TSDB_CODE_PAR_INVALID_ROW_LENGTH; + } + return TSDB_CODE_SUCCESS; } @@ -867,6 +862,21 @@ static int32_t smlModifyDBSchemas(SSmlHandle *info) { goto end; } + if (taosArrayGetSize(pTags) + pTableMeta->tableInfo.numOfColumns > TSDB_MAX_COLUMNS) { + uError("SML:0x%" PRIx64 " too many columns than 4096", info->id); + code = TSDB_CODE_PAR_TOO_MANY_COLUMNS; + taosArrayDestroy(pColumns); + taosArrayDestroy(pTags); + goto end; + } + if (taosArrayGetSize(pTags) > TSDB_MAX_TAGS) { + uError("SML:0x%" PRIx64 " too many tags than 128", info->id); + code = TSDB_CODE_PAR_INVALID_TAGS_NUM; + taosArrayDestroy(pColumns); + taosArrayDestroy(pTags); + goto end; + } + code = smlSendMetaMsg(info, &pName, pColumns, pTags, pTableMeta, action); if (code != TSDB_CODE_SUCCESS) { uError("SML:0x%" PRIx64 " smlSendMetaMsg failed. can not create %s", info->id, pName.tname); @@ -923,6 +933,14 @@ static int32_t smlModifyDBSchemas(SSmlHandle *info) { goto end; } + if (taosArrayGetSize(pColumns) + pTableMeta->tableInfo.numOfTags > TSDB_MAX_COLUMNS) { + uError("SML:0x%" PRIx64 " too many columns than 4096", info->id); + code = TSDB_CODE_PAR_TOO_MANY_COLUMNS; + taosArrayDestroy(pColumns); + taosArrayDestroy(pTags); + goto end; + } + code = smlSendMetaMsg(info, &pName, pColumns, pTags, pTableMeta, action); if (code != TSDB_CODE_SUCCESS) { uError("SML:0x%" PRIx64 " smlSendMetaMsg failed. can not create %s", info->id, pName.tname); @@ -1527,8 +1545,11 @@ static int smlProcess(SSmlHandle *info, char *lines[], char *rawLine, char *rawL do { code = smlModifyDBSchemas(info); - if (code == 0) break; - taosMsleep(500); + if (code == 0 || code == TSDB_CODE_SML_INVALID_DATA || code == TSDB_CODE_PAR_TOO_MANY_COLUMNS || + code == TSDB_CODE_PAR_INVALID_TAGS_NUM || code == TSDB_CODE_PAR_INVALID_ROW_LENGTH || + code == TSDB_CODE_PAR_INVALID_TAGS_LENGTH) + break; + taosMsleep(100); uInfo("SML:0x%" PRIx64 " smlModifyDBSchemas retry code:%s, times:%d", info->id, tstrerror(code), retryNum); } while (retryNum++ < taosHashGetSize(info->superTables) * MAX_RETRY_TIMES); diff --git a/source/client/src/clientSmlJson.c b/source/client/src/clientSmlJson.c index c3a6e15697..b0ae316031 100644 --- a/source/client/src/clientSmlJson.c +++ b/source/client/src/clientSmlJson.c @@ -575,7 +575,7 @@ static int32_t smlConvertJSONString(SSmlKv *pVal, char *typeStr, cJSON *value) { uError("OTD:invalid type(%s) for JSON String", typeStr); return TSDB_CODE_TSC_INVALID_JSON_TYPE; } - pVal->length = (uint16_t)strlen(value->valuestring); + pVal->length = strlen(value->valuestring); if (pVal->type == TSDB_DATA_TYPE_BINARY && pVal->length > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { return TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN; diff --git a/tests/system-test/1-insert/influxdb_line_taosc_insert.py b/tests/system-test/1-insert/influxdb_line_taosc_insert.py index 667ce3239c..46aec2909a 100644 --- a/tests/system-test/1-insert/influxdb_line_taosc_insert.py +++ b/tests/system-test/1-insert/influxdb_line_taosc_insert.py @@ -439,7 +439,7 @@ class TDTestCase: for input_sql in [self.genLongSql(127, 1)[0], self.genLongSql(1, 4093)[0]]: tdCom.cleanTb(dbname="test") self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - for input_sql in [self.genLongSql(129, 1)[0], self.genLongSql(1, 4095)[0]]: + for input_sql in [self.genLongSql(128, 1)[0], self.genLongSql(1, 4094)[0]]: tdCom.cleanTb(dbname="test") try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) @@ -578,10 +578,16 @@ class TDTestCase: # binary stb_name = tdCom.getLongName(7, "letters") - input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(16374, "letters")}" c0=f 1626006833639000000' + input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(4091, "letters")}" c0=f 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(16375, "letters")}" c0=f 1626006833639000000' + input_sql = f'{stb_name},t0="a",t1="{tdCom.getLongName(4088, "letters")}" c0=f 1626006833639000000' + try: + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + except SchemalessError as err: + tdSql.checkNotEqual(err.errno, 0) + + input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(4092, "letters")}" c0=f 1626006833639000000' try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) except SchemalessError as err: @@ -590,10 +596,10 @@ class TDTestCase: # nchar # * legal nchar could not be larger than 16374/4 stb_name = tdCom.getLongName(7, "letters") - input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4093, "letters")}" c0=f 1626006833639000000' + input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4090, "letters")}" c0=f 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4094, "letters")}" c0=f 1626006833639000000' + input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4091, "letters")}" c0=f 1626006833639000000' try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) except SchemalessError as err: @@ -674,9 +680,15 @@ class TDTestCase: # binary stb_name = tdCom.getLongName(7, "letters") - input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(65517, "letters")}" 1626006833639000000' + input_sql = f'{stb_name},t0=t c0=1i32,c1="{tdCom.getLongName(65517, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + input_sql = f'{stb_name},t0=t c0=1i32,c1="{tdCom.getLongName(65517, "letters")},c2=f" 1626006833639000000' + try: + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + except SchemalessError as err: + tdSql.checkNotEqual(err.errno, 0) + input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(65518, "letters")}" 1626006833639000000' try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) @@ -686,10 +698,10 @@ class TDTestCase: # nchar # * legal nchar could not be larger than 16374/4 stb_name = tdCom.getLongName(7, "letters") - input_sql = f'{stb_name},t0=t c0=f,c1=L"{tdCom.getLongName(16379, "letters")}" 1626006833639000000' + input_sql = f'{stb_name},t0=t c0=1i32,c1=L"{tdCom.getLongName(16379, "letters")}",c2=f 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - input_sql = f'{stb_name},t0=t c0=f,c1=L"{tdCom.getLongName(16380, "letters")}" 1626006833639000000' + input_sql = f'{stb_name},t0=t c0=1i32,c1=L"{tdCom.getLongName(16380, "letters")}",c2=1i16 1626006833639000000' try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) except SchemalessError as err: @@ -896,52 +908,57 @@ class TDTestCase: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) # * every binary and nchar must be length+2, so here is two tag, max length could not larger than 16384-2*2 - # input_sql = f'{stb_name}, t1="{tdCom.getLongName(4095, "letters")}"" c0=f 1626006833639000000' - # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - - # tdSql.query(f"select * from {stb_name}") - # tdSql.checkRows(2) - # input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(4084, "letters")}",t2="{tdCom.getLongName(6, "letters")}" c0=f 1626006833639000000' - # try: - # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - # raise Exception("should not reach here") - # except SchemalessError as err: - # tdSql.checkNotEqual(err.errno, 0) - # tdSql.query(f"select * from {stb_name}") - # tdSql.checkRows(2) - - # # * check col,col+ts max in describe ---> 16143 - input_sql = f'{stb_name},t0=t c0=i32,c1="{tdCom.getLongName(65517, "letters")}" 1626006833639000000' + input_sql = f'{stb_name}, t0=f,t1="{tdCom.getLongName(4093, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - input_sql = f'{stb_name},t0=t c0=i32,c1="{tdCom.getLongName(65517, "letters")},c2=f" 1626006833639000000' - self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + tdSql.query(f"select * from {stb_name}") + tdSql.checkRows(2) + input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(4084, "letters")}",t2="{tdCom.getLongName(6, "letters")}" c0=f 1626006833639000000' try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + raise Exception("should not reach here") except SchemalessError as err: tdSql.checkNotEqual(err.errno, 0) + tdSql.query(f"select * from {stb_name}") + tdSql.checkRows(2) - input_sql = f'{stb_name},t0=t c0=i16,c1="{tdCom.getLongName(49133, "letters")}",c2="{tdCom.getLongName(16384, "letters")}" 1626006833639000000' - self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - input_sql = f'{stb_name},t0=t c0=i16,c1="{tdCom.getLongName(49133, "letters")}",c2="{tdCom.getLongName(16384, "letters")},c3=t" 1626006833639000000' + stb_name = tdCom.getLongName(8, "letters") + # # * check col,col+ts max in describe ---> 16143 + input_sql = f'{stb_name},t0=t c0=1i32,c1="{tdCom.getLongName(65517, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + + input_sql = f'{stb_name},t0=t c0=1i32,c1="{tdCom.getLongName(65517, "letters")}",c2=f 1626006833639000000' try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) except SchemalessError as err: tdSql.checkNotEqual(err.errno, 0) tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(3) + tdSql.checkRows(1) + + + stb_name = tdCom.getLongName(9, "letters") + input_sql = f'{stb_name},t0=t c0=1i16,c1="{tdCom.getLongName(49133, "letters")}",c2="{tdCom.getLongName(16384, "letters")}" 1626006833639000000' + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + + input_sql = f'{stb_name},t0=t c0=1i16,c1="{tdCom.getLongName(49133, "letters")}",c2="{tdCom.getLongName(16384, "letters")},c3=t" 1626006833639000000' + try: + self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + except SchemalessError as err: + tdSql.checkNotEqual(err.errno, 0) + + tdSql.query(f"select * from {stb_name}") + tdSql.checkRows(1) + input_sql = f'{stb_name},t0=t c0=f,c1="{tdCom.getLongName(16374, "letters")}",c2="{tdCom.getLongName(16374, "letters")}",c3="{tdCom.getLongName(16374, "letters")}",c4="{tdCom.getLongName(13, "letters")}" 1626006833639000000' try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) except SchemalessError as err: tdSql.checkNotEqual(err.errno, 0) - tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(3) - # * tag nchar max is 16374/4, col+ts nchar max 49151 + + # * tag nchar max is 16384/4, col+ts nchar max 65531 def tagColNcharMaxLengthCheckCase(self): """ check nchar length limit @@ -952,7 +969,7 @@ class TDTestCase: input_sql = f'{stb_name},id="{tb_name}",t0=t c0=f 1626006833639000000' code = self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - # * legal nchar could not be larger than 16374/4 + # * legal tag nchar could not be larger than 16384/4 # input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4093, "letters")}",t2=L"{tdCom.getLongName(1, "letters")}" c0=f 1626006833639000000' # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) # tdSql.query(f"select * from {stb_name}") @@ -968,14 +985,15 @@ class TDTestCase: input_sql = f'{stb_name},t0=t c0=f,c1=L"{tdCom.getLongName(4093, "letters")}",c2=L"{tdCom.getLongName(4093, "letters")}",c3=L"{tdCom.getLongName(4093, "letters")}",c4=L"{tdCom.getLongName(4, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(3) + tdSql.checkRows(2) + input_sql = f'{stb_name},t0=t c0=f,c1=L"{tdCom.getLongName(4093, "letters")}",c2=L"{tdCom.getLongName(4093, "letters")}",c3=L"{tdCom.getLongName(4093, "letters")}",c4=L"{tdCom.getLongName(5, "letters")}" 1626006833639000000' try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) except SchemalessError as err: tdSql.checkNotEqual(err.errno, 0) tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(3) + tdSql.checkRows(2) def batchInsertCheckCase(self): """ @@ -1291,13 +1309,13 @@ class TDTestCase: self.idSeqCheckCase() self.idUpperCheckCase() self.noIdCheckCase() - # self.maxColTagCheckCase() + self.maxColTagCheckCase() self.idIllegalNameCheckCase() self.idStartWithNumCheckCase() self.nowTsCheckCase() self.dateFormatTsCheckCase() self.illegalTsCheckCase() - # self.tagValueLengthCheckCase() + self.tagValueLengthCheckCase() self.colValueLengthCheckCase() self.tagColIllegalValueCheckCase() self.duplicateIdTagColInsertCheckCase() @@ -1307,8 +1325,8 @@ class TDTestCase: self.tagColAddDupIDCheckCase() self.tagColAddCheckCase() self.tagMd5Check() - # self.tagColBinaryMaxLengthCheckCase() - # self.tagColNcharMaxLengthCheckCase() + self.tagColBinaryMaxLengthCheckCase() + self.tagColNcharMaxLengthCheckCase() self.batchInsertCheckCase() self.multiInsertCheckCase(10) self.batchErrorInsertCheckCase() From ba344f7b95cb9f69802fdbe8079725e3f7d2d58c Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Thu, 4 May 2023 11:55:09 +0800 Subject: [PATCH 10/40] feat: add python udf docs --- docs/zh/07-develop/09-udf.md | 129 +++++++++++++++++++++++++++++++-- docs/zh/12-taos-sql/22-meta.md | 4 + docs/zh/12-taos-sql/26-udf.md | 26 +++++-- 3 files changed, 146 insertions(+), 13 deletions(-) diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index 8a8ef82009..59513a03aa 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -6,11 +6,13 @@ description: "支持用户编码的聚合函数和标量函数,在查询中嵌 在有些应用场景中,应用逻辑需要的查询无法直接使用系统内置的函数来表示。利用 UDF(User Defined Function) 功能,TDengine 可以插入用户编写的处理代码并在查询中使用它们,就能够很方便地解决特殊应用场景中的使用需求。 UDF 通常以数据表中的一列数据做为输入,同时支持以嵌套子查询的结果作为输入。 -TDengine 支持通过 C/C++ 语言进行 UDF 定义。接下来结合示例讲解 UDF 的使用方法。 - 用户可以通过 UDF 实现两类函数:标量函数和聚合函数。标量函数对每行数据输出一个值,如求绝对值 abs,正弦函数 sin,字符串拼接函数 concat 等。聚合函数对多行数据进行输出一个值,如求平均数 avg,最大值 max 等。 -实现 UDF 时,需要实现规定的接口函数 +TDengine 支持通过 C/Python 语言进行 UDF 定义。接下来结合示例讲解 UDF 的使用方法。 + +# C 语言实现UDF + +使用 C 语言实现 UDF 时,需要实现规定的接口函数 - 标量函数需要实现标量接口函数 scalarfn 。 - 聚合函数需要实现聚合接口函数 aggfn_start , aggfn , aggfn_finish。 - 如果需要初始化,实现 udf_init;如果需要清理工作,实现udf_destroy。 @@ -213,9 +215,6 @@ gcc -g -O0 -fPIC -shared bit_and.c -o libbitand.so 这样就准备好了动态链接库 libbitand.so 文件,可以供后文创建 UDF 时使用了。为了保证可靠的系统运行,编译器 GCC 推荐使用 7.5 及以上版本。 -## 管理和使用UDF -编译好的UDF,还需要将其加入到系统才能被正常的SQL调用。关于如何管理和使用UDF,参见[UDF使用说明](../12-taos-sql/26-udf.md) - ## 示例代码 ### 标量函数示例 [bit_and](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/bit_and.c) @@ -268,4 +267,120 @@ select max_vol(vol1,vol2,vol3,deviceid) from battery; {{#include tests/script/sh/max_vol.c}} ``` - \ No newline at end of file + + +# Python 语言实现UDF +使用 Python 语言实现 UDF 时,需要实现规定的接口函数 +- 标量函数需要实现标量接口函数 process 。 +- 聚合函数需要实现聚合接口函数 start ,reduce ,finish。 +- 如果需要初始化,实现 init;如果需要清理工作,实现 destroy。 + +## 实现标量函数 + +标量函数实现模版如下 +```Python +def init(): + # initialization +def destroy(): + # destroy +def process(input: datablock) -> tuple[output_type]: + # process input datablock, + # datablock.data(row, col) is to access the python object in location(row,col) + # return tuple object consisted of object of type outputtype +``` + +## 实现聚合函数 + +聚合函数实现模版如下 +```Python +def init(): + #initialization +def destroy(): + #destroy +def start() -> bytes: + #return serialize(init_state) +def reduce(inputs: datablock, buf: bytes) -> bytes + # deserialize buf to state + # reduce the inputs and state into new_state. + # use inputs.data(i,j) to access python ojbect of location(i,j) + # serialize new_state into new_state_bytes + return new_state_bytes +def finish(buf: bytes) -> output_type: + #return obj of type outputtype +``` + +## 接口函数定义 + +### 标量接口函数 +```Python +def process(input: datablock) -> tuple[output_type]: +``` +- input:datablock 类似二维矩阵,通过成员方法 data(row,col)返回位于 row 行,col 列的 python 对象 +- 返回值是一个 Python 对象元组,每个元素类型为输出类型。 + +### 聚合接口函数 +```Python +def start() -> bytes: +def reduce(inputs: datablock, buf: bytes) -> bytes +def finish(buf: bytes) -> output_type: +``` + +首先调用 start 生成最初结果 buffer,然后输入数据会被分为多个行数据块,对每个数据块 inputs 和当前中间结果 buf 调用 reduce,得到新的中间结果,最后再调用 finish 从中间结果 buf 产生最终输出,最终输出只能含 0 或 1 条数据。 + + +### UDF 初始化和销毁 +```Python +def init() +def destroy() +``` + +其中 init 完成初始化工作。 destroy 完成清理工作。如果没有初始化工作,无需定义 init 函数。如果没有清理工作,无需定义 destroy 函数。 + +## Python数据类型和TDengine数据类型映射 +| **TDengine SQL数据类型** | **Python数据类型** | +| :-----------------------: | ------------ | +|TINYINT / SMALLINT / INT / BIGINT | int | +|TINYINT UNSIGNED / SMALLINT UNSIGNED / INT UNSIGNED / BIGINT UNSIGNED | int | +|FLOAT / DOUBLE | float | +|BOOL | bool | +|BINARY / VARCHAR / NCHAR | bytes| +|TIMESTAMP | int | +|JSON and other types | 不支持 | + +## Python UDF 环境的安装 +1. 安装 taospyudf 包。此包执行Python UDF程序。 +```bash +pip install taospyudf +lddconfig +``` +2. 如果 Python UDF 程序执行时,引用其它的包,PYTHONPATH 环境变量可以通过在 taos.cfg 的 UdfdLdLibPath 变量配置 + +## 示例代码 +### 标量函数示例 [pybitand](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pybitand.py) + +bit_add 实现多列的按位与功能。如果只有一列,返回这一列。bit_add 忽略空值。 + +
+pybitand.py + +```Python +{{#include tests/script/sh/pybitand.py}} +``` + +
+ +### 聚合函数示例 [pyl2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pyl2norm.py) + +pyl2norm 实现了输入列的所有数据的二阶范数,即对每个数据先平方,再累加求和,最后开方。 + +
+pyl2norm.py + +```c +{{#include tests/script/sh/pyl2norm.py}} +``` + +
+ +# 管理和使用UDF +编译好的UDF,还需要将其加入到系统才能被正常的SQL调用。关于如何管理和使用UDF,参见[UDF使用说明](../12-taos-sql/26-udf.md) \ No newline at end of file diff --git a/docs/zh/12-taos-sql/22-meta.md b/docs/zh/12-taos-sql/22-meta.md index 1f2e3fb7d5..7fb60b85a7 100644 --- a/docs/zh/12-taos-sql/22-meta.md +++ b/docs/zh/12-taos-sql/22-meta.md @@ -120,6 +120,10 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | 5 | create_time | TIMESTAMP | 创建时间 | | 6 | code_len | INT | 代码长度 | | 7 | bufsize | INT | buffer 大小 | +| 8 | func_language | BINARY(31) | 自定义函数编程语言 | +| 9 | func_body | BINARY(16384) | 函数体定义 | +| 10 | func_version | INT | 函数版本号。初始版本为0,每次替换更新,版本号加1。| + ## INS_INDEXES diff --git a/docs/zh/12-taos-sql/26-udf.md b/docs/zh/12-taos-sql/26-udf.md index 7697944f9a..a45e837c8d 100644 --- a/docs/zh/12-taos-sql/26-udf.md +++ b/docs/zh/12-taos-sql/26-udf.md @@ -11,29 +11,38 @@ description: 使用 UDF 的详细指南 在创建 UDF 时,需要区分标量函数和聚合函数。如果创建时声明了错误的函数类别,则可能导致通过 SQL 指令调用函数时出错。此外,用户需要保证输入数据类型与 UDF 程序匹配,UDF 输出数据类型与 OUTPUTTYPE 匹配。 +使用 CREATE OR REPLACE FUNCTION,如果函数已经存在,会修改已有的函数属性。 + - 创建标量函数 ```sql -CREATE FUNCTION function_name AS library_path OUTPUTTYPE output_type; +CREATE [OR REPLACE] FUNCTION function_name AS library_path OUTPUTTYPE output_type [LANGUAGE 'C|Python']; ``` - function_name:标量函数未来在 SQL 中被调用时的函数名,必须与函数实现中 udf 的实际名称一致; - - library_path:包含 UDF 函数实现的动态链接库的库文件绝对路径(指的是库文件在当前客户端所在主机上的保存路径,通常是指向一个 .so 文件),这个路径需要用英文单引号或英文双引号括起来; + - LANGUAGE 'C|Python':函数编程语言,目前支持C语言和Python语言。 + - library_path:如果编程语言是C,路径是包含 UDF 函数实现的动态链接库的库文件绝对路径(指的是库文件在当前客户端所在主机上的保存路径,通常是指向一个 .so 文件)。如果编程语言是Python,路径是包含 UDF 函数实现的Python文件路径。这个路径需要用英文单引号或英文双引号括起来; - output_type:此函数计算结果的数据类型名称; - 例如,如下语句可以把 libbitand.so 创建为系统中可用的 UDF: +例如,如下语句可以把 libbitand.so 创建为系统中可用的 UDF: ```sql CREATE FUNCTION bit_and AS "/home/taos/udf_example/libbitand.so" OUTPUTTYPE INT; ``` +例如,使用以下语句可以修改已经定义的 bit_and 函数,输出类型是 BIGINT,使用Python语言实现。 + + ```sql + CREATE OR REPLACE FUNCTION bit_and AS "/home/taos/udf_example/bit_and.py" OUTPUTTYPE BIGINT LANGUAGE 'Python'; + ``` - 创建聚合函数: ```sql -CREATE AGGREGATE FUNCTION function_name AS library_path OUTPUTTYPE output_type [ BUFSIZE buffer_size ]; +CREATE [OR REPLACE] AGGREGATE FUNCTION function_name AS library_path OUTPUTTYPE output_type [ BUFSIZE buffer_size ] [LANGUAGE 'C|Python']; ``` - function_name:聚合函数未来在 SQL 中被调用时的函数名,必须与函数实现中 udfNormalFunc 的实际名称一致; - - library_path:包含 UDF 函数实现的动态链接库的库文件绝对路径(指的是库文件在当前客户端所在主机上的保存路径,通常是指向一个 .so 文件),这个路径需要用英文单引号或英文双引号括起来; - - output_type:此函数计算结果的数据类型,与上文中 udfNormalFunc 的 itype 参数不同,这里不是使用数字表示法,而是直接写类型名称即可; + - LANGUAGE 'C|Python':函数编程语言,目前支持C语言和Python语言。 + - library_path:如果编程语言是C,路径是包含 UDF 函数实现的动态链接库的库文件绝对路径(指的是库文件在当前客户端所在主机上的保存路径,通常是指向一个 .so 文件)。如果编程语言是Python,路径是包含 UDF 函数实现的Python文件路径。这个路径需要用英文单引号或英文双引号括起来;; + - output_type:此函数计算结果的数据类型名称; - buffer_size:中间计算结果的缓冲区大小,单位是字节。如果不使用可以不设置。 例如,如下语句可以把 libl2norm.so 创建为系统中可用的 UDF: @@ -41,6 +50,11 @@ CREATE AGGREGATE FUNCTION function_name AS library_path OUTPUTTYPE output_type [ ```sql CREATE AGGREGATE FUNCTION l2norm AS "/home/taos/udf_example/libl2norm.so" OUTPUTTYPE DOUBLE bufsize 8; ``` + 例如,使用以下语句可以修改已经定义的 l2norm 函数的缓冲区大小为64。 + ```sql + CREATE AGGREGATE FUNCTION l2norm AS "/home/taos/udf_example/libl2norm.so" OUTPUTTYPE DOUBLE bufsize 64; + ``` + 关于如何开发自定义函数,请参考 [UDF使用说明](/develop/udf)。 ## 管理 UDF From 38f507d941cd89423bcc229c28830988f7aa855e Mon Sep 17 00:00:00 2001 From: dmchen Date: Thu, 4 May 2023 05:49:36 +0000 Subject: [PATCH 11/40] transaction multithread --- source/dnode/mnode/impl/inc/mndDef.h | 1 + source/dnode/mnode/impl/src/mndTrans.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index f547ce025d..fac574f9e9 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -177,6 +177,7 @@ typedef struct { SArray* pRpcArray; SRWLatch lockRpcArray; int64_t mTraceId; + TdThreadMutex mutex; } STrans; typedef struct { diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 106eea0313..53dc0476cd 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -546,6 +546,7 @@ static void mndTransDropData(STrans *pTrans) { pTrans->param = NULL; pTrans->paramLen = 0; } + (void)taosThreadMutexDestroy(&pTrans->mutex); } static int32_t mndTransActionDelete(SSdb *pSdb, STrans *pTrans, bool callFunc) { @@ -651,6 +652,7 @@ STrans *mndTransCreate(SMnode *pMnode, ETrnPolicy policy, ETrnConflct conflict, pTrans->pRpcArray = taosArrayInit(1, sizeof(SRpcHandleInfo)); pTrans->mTraceId = pReq ? TRACE_GET_ROOTID(&pReq->info.traceId) : 0; taosInitRWLatch(&pTrans->lockRpcArray); + taosThreadMutexInit(&pTrans->mutex, NULL); if (pTrans->redoActions == NULL || pTrans->undoActions == NULL || pTrans->commitActions == NULL || pTrans->pRpcArray == NULL) { @@ -1307,7 +1309,13 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) int32_t code = 0; int32_t numOfActions = taosArrayGetSize(pTrans->redoActions); if (numOfActions == 0) return code; - if (pTrans->redoActionPos >= numOfActions) return code; + + taosThreadMutexLock(&pTrans->mutex); + + if (pTrans->redoActionPos >= numOfActions) { + taosThreadMutexUnlock(&pTrans->mutex); + return code; + } mInfo("trans:%d, execute %d actions serial, current redoAction:%d", pTrans->id, numOfActions, pTrans->redoActionPos); @@ -1377,6 +1385,8 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) } } + taosThreadMutexUnlock(&pTrans->mutex); + return code; } From 9f472565c994f92b44630f1a5e8c350539c5bf68 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 4 May 2023 13:58:23 +0800 Subject: [PATCH 12/40] enhance: finish python udf docs --- docs/zh/07-develop/09-udf.md | 46 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index 59513a03aa..8aa841db89 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -19,7 +19,7 @@ TDengine 支持通过 C/Python 语言进行 UDF 定义。接下来结合示例 接口函数的名称是 UDF 名称,或者是 UDF 名称和特定后缀(_start, _finish, _init, _destroy)的连接。列表中的scalarfn,aggfn, udf需要替换成udf函数名。 -## 实现标量函数 +## C UDF 实现标量函数 标量函数实现模板如下 ```c #include "taos.h" @@ -51,7 +51,7 @@ int32_t scalarfn_destroy() { ``` scalarfn 为函数名的占位符,需要替换成函数名,如bit_and。 -## 实现聚合函数 +## C UDF 实现聚合函数 聚合函数的实现模板如下 ```c @@ -102,7 +102,7 @@ int32_t aggfn_destroy() { ``` aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 -## 接口函数定义 +## C UDF 接口函数定义 接口函数的名称是 udf 名称,或者是 udf 名称和特定后缀(_start, _finish, _init, _destroy)的连接。以下描述中函数名称中的 scalarfn,aggfn, udf 需要替换成udf函数名。 @@ -110,7 +110,7 @@ aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 接口函数参数类型见数据结构定义。 -### 标量接口函数 +### C UDF 标量接口函数 `int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn *resultColumn)` @@ -120,7 +120,7 @@ aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 - inputDataBlock: 输入的数据块 - resultColumn: 输出列 -### 聚合接口函数 +### C UDF 聚合接口函数 `int32_t aggfn_start(SUdfInterBuf *interBuf)` @@ -137,7 +137,7 @@ aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 - result:最终结果。 -### UDF 初始化和销毁 +### C UDF 初始化和销毁 `int32_t udf_init()` `int32_t udf_destroy()` @@ -145,7 +145,7 @@ aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 其中 udf 是函数名的占位符。udf_init 完成初始化工作。 udf_destroy 完成清理工作。如果没有初始化工作,无需定义udf_init函数。如果没有清理工作,无需定义udf_destroy函数。 -## UDF 数据结构 +## C UDF 数据结构 ```c typedef struct SUdfColumnMeta { int16_t type; @@ -203,7 +203,7 @@ typedef struct SUdfInterBuf { 为了更好的操作以上数据结构,提供了一些便利函数,定义在 taosudf.h。 -## 编译 UDF +## 编译 C UDF 用户定义函数的 C 语言源代码无法直接被 TDengine 系统使用,而是需要先编译为 动态链接库,之后才能载入 TDengine 系统。 @@ -215,9 +215,9 @@ gcc -g -O0 -fPIC -shared bit_and.c -o libbitand.so 这样就准备好了动态链接库 libbitand.so 文件,可以供后文创建 UDF 时使用了。为了保证可靠的系统运行,编译器 GCC 推荐使用 7.5 及以上版本。 -## 示例代码 +## C UDF 示例代码 -### 标量函数示例 [bit_and](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/bit_and.c) +### C UDF 标量函数示例 [bit_and](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/bit_and.c) bit_add 实现多列的按位与功能。如果只有一列,返回这一列。bit_add 忽略空值。 @@ -230,7 +230,7 @@ bit_add 实现多列的按位与功能。如果只有一列,返回这一列。 -### 聚合函数示例1 返回值为数值类型 [l2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/l2norm.c) +### C UDF 聚合函数示例1 返回值为数值类型 [l2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/l2norm.c) l2norm 实现了输入列的所有数据的二阶范数,即对每个数据先平方,再累加求和,最后开方。 @@ -243,7 +243,7 @@ l2norm 实现了输入列的所有数据的二阶范数,即对每个数据先 -### 聚合函数示例2 返回值为字符串类型 [max_vol](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/max_vol.c) +### C UDF 聚合函数示例2 返回值为字符串类型 [max_vol](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/max_vol.c) max_vol 实现了从多个输入的电压列中找到最大电压,返回由设备ID + 最大电压所在(行,列)+ 最大电压值 组成的组合字符串值 @@ -275,7 +275,7 @@ select max_vol(vol1,vol2,vol3,deviceid) from battery; - 聚合函数需要实现聚合接口函数 start ,reduce ,finish。 - 如果需要初始化,实现 init;如果需要清理工作,实现 destroy。 -## 实现标量函数 +## Python UDF 实现标量函数 标量函数实现模版如下 ```Python @@ -289,7 +289,7 @@ def process(input: datablock) -> tuple[output_type]: # return tuple object consisted of object of type outputtype ``` -## 实现聚合函数 +## Python UDF 实现聚合函数 聚合函数实现模版如下 ```Python @@ -309,16 +309,16 @@ def finish(buf: bytes) -> output_type: #return obj of type outputtype ``` -## 接口函数定义 +## Python UDF 接口函数定义 -### 标量接口函数 +### Python UDF 标量接口函数 ```Python def process(input: datablock) -> tuple[output_type]: ``` - input:datablock 类似二维矩阵,通过成员方法 data(row,col)返回位于 row 行,col 列的 python 对象 - 返回值是一个 Python 对象元组,每个元素类型为输出类型。 -### 聚合接口函数 +### Python UDF 聚合接口函数 ```Python def start() -> bytes: def reduce(inputs: datablock, buf: bytes) -> bytes @@ -328,7 +328,7 @@ def finish(buf: bytes) -> output_type: 首先调用 start 生成最初结果 buffer,然后输入数据会被分为多个行数据块,对每个数据块 inputs 和当前中间结果 buf 调用 reduce,得到新的中间结果,最后再调用 finish 从中间结果 buf 产生最终输出,最终输出只能含 0 或 1 条数据。 -### UDF 初始化和销毁 +### Python UDF 初始化和销毁 ```Python def init() def destroy() @@ -353,12 +353,12 @@ def destroy() pip install taospyudf lddconfig ``` -2. 如果 Python UDF 程序执行时,引用其它的包,PYTHONPATH 环境变量可以通过在 taos.cfg 的 UdfdLdLibPath 变量配置 +2. 如果 Python UDF 程序执行时,通过 PYTHONPATH 引用其它的包,可以设置 taos.cfg 的 UdfdLdLibPath 变量为PYTHONPATH的内容 -## 示例代码 -### 标量函数示例 [pybitand](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pybitand.py) +## Python UDF 示例代码 +### Python UDF 标量函数示例 [pybitand](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pybitand.py) -bit_add 实现多列的按位与功能。如果只有一列,返回这一列。bit_add 忽略空值。 +pybitand 实现多列的按位与功能。如果只有一列,返回这一列。pybitand 忽略空值。
pybitand.py @@ -369,7 +369,7 @@ bit_add 实现多列的按位与功能。如果只有一列,返回这一列。
-### 聚合函数示例 [pyl2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pyl2norm.py) +### Python UDF 聚合函数示例 [pyl2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pyl2norm.py) pyl2norm 实现了输入列的所有数据的二阶范数,即对每个数据先平方,再累加求和,最后开方。 From ed3325419ca33c1bd6948726602856b884ea4ae4 Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Thu, 4 May 2023 18:36:18 +0800 Subject: [PATCH 13/40] Update 29-changes.md --- docs/zh/12-taos-sql/29-changes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/12-taos-sql/29-changes.md b/docs/zh/12-taos-sql/29-changes.md index 8a5f1b20e3..27dd3294b7 100644 --- a/docs/zh/12-taos-sql/29-changes.md +++ b/docs/zh/12-taos-sql/29-changes.md @@ -27,7 +27,7 @@ description: "TDengine 3.0 版本的语法变更说明" | - | :------- | :-------- | :------- | | 1 | ALTER ACCOUNT | 废除 | 2.x中为企业版功能,3.0不再支持。语法暂时保留了,执行报“This statement is no longer supported”错误。 | 2 | ALTER ALL DNODES | 新增 | 修改所有DNODE的参数。 -| 3 | ALTER DATABASE | 调整 |

废除

  • QUORUM:写入需要的副本确认数。3.0 版本默认行为是强一致性,且不支持修改为弱一致性。
  • BLOCKS:VNODE使用的内存块数。3.0版本使用BUFFER来表示VNODE写入内存池的大小。
  • UPDATE:更新操作的支持模式。3.0版本所有数据库都支持部分列更新。
  • CACHELAST:缓存最新一行数据的模式。3.0版本用CACHEMODEL代替。
  • COMP:3.0版本暂不支持修改。

新增

  • CACHEMODEL:表示是否在内存中缓存子表的最近数据。
  • CACHESIZE:表示缓存子表最近数据的内存大小。
  • WAL_FSYNC_PERIOD:代替原FSYNC参数。
  • WAL_LEVEL:代替原WAL参数。
  • WAL_RETENTION_PERIOD:3.0.4.0版本新增,wal文件的额外保留策略,用于数据订阅。
  • WAL_RETENTION_SIZE:3.0.4.0版本新增,wal文件的额外保留策略,用于数据订阅。

调整

  • REPLICA:3.0.0版本暂不支持修改。
  • KEEP:3.0版本新增支持带单位的设置方式。
+| 3 | ALTER DATABASE | 调整 |

废除

  • QUORUM:写入需要的副本确认数。3.0 版本默认行为是强一致性,且不支持修改为弱一致性。
  • BLOCKS:VNODE使用的内存块数。3.0版本使用BUFFER来表示VNODE写入内存池的大小。
  • UPDATE:更新操作的支持模式。3.0版本所有数据库都支持部分列更新。
  • CACHELAST:缓存最新一行数据的模式。3.0版本用CACHEMODEL代替。
  • COMP:3.0版本暂不支持修改。

新增

  • CACHEMODEL:表示是否在内存中缓存子表的最近数据。
  • CACHESIZE:表示缓存子表最近数据的内存大小。
  • WAL_FSYNC_PERIOD:代替原FSYNC参数。
  • WAL_LEVEL:代替原WAL参数。
  • WAL_RETENTION_PERIOD:3.0.4.0版本新增,wal文件的额外保留策略,用于数据订阅。
  • WAL_RETENTION_SIZE:3.0.4.0版本新增,wal文件的额外保留策略,用于数据订阅。

调整

  • KEEP:3.0版本新增支持带单位的设置方式。
| 4 | ALTER STABLE | 调整 | 废除
  • CHANGE TAG:修改标签列的名称。3.0版本使用RENAME TAG代替。
    新增
  • RENAME TAG:代替原CHANGE TAG子句。
  • COMMENT:修改超级表的注释。
| 5 | ALTER TABLE | 调整 | 废除
  • CHANGE TAG:修改标签列的名称。3.0版本使用RENAME TAG代替。
    新增
  • RENAME TAG:代替原CHANGE TAG子句。
  • COMMENT:修改表的注释。
  • TTL:修改表的生命周期。
| 6 | ALTER USER | 调整 | 废除
  • PRIVILEGE:修改用户权限。3.0版本使用GRANT和REVOKE来授予和回收权限。
    新增
  • ENABLE:启用或停用此用户。
  • SYSINFO:修改用户是否可查看系统信息。
From 5d63f438b5ffaf21a4334e046e15f1bcc0e75edd Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 4 May 2023 13:51:48 +0800 Subject: [PATCH 14/40] enh: confirm alter hash range --- include/common/tmsgdef.h | 1 - source/common/src/tmsg.c | 2 +- source/dnode/mnode/impl/src/mndVgroup.c | 5 +++++ source/dnode/vnode/inc/vnode.h | 1 + source/dnode/vnode/src/vnd/vnodeCfg.c | 3 +++ source/dnode/vnode/src/vnd/vnodeOpen.c | 1 + source/dnode/vnode/src/vnd/vnodeSvr.c | 25 ++++++++++++++++++++++--- 7 files changed, 33 insertions(+), 5 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index ffad6f6646..c4371f35dc 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -225,7 +225,6 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_COMMIT, "vnode-commit", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_CREATE_INDEX, "vnode-create-index", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_INDEX, "vnode-drop-index", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_VND_DISABLE_WRITE, "vnode-disable-write", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_MAX_MSG, "vnd-max", NULL, NULL) diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 4139d6c7d4..324e6ff37b 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -7691,4 +7691,4 @@ void tDeleteMqSubTopicEp(SMqSubTopicEp *pSubTopicEp) { taosMemoryFreeClear(pSubTopicEp->schema.pSchema); pSubTopicEp->schema.nCols = 0; taosArrayDestroy(pSubTopicEp->vgs); -} \ No newline at end of file +} diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 64e0f82a1f..add2b1568c 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2164,6 +2164,7 @@ static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj mInfo("vgId:%d, vnode:%d dnode:%d", newVg2.vgId, i, newVg2.vnodeGid[i].dnodeId); } + // alter hash range int32_t maxVgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP); if (mndAddAlterVnodeHashRangeAction(pMnode, pTrans, &newVg1, maxVgId) != 0) goto _OVER; newVg1.vgId = maxVgId; @@ -2172,6 +2173,10 @@ static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj if (mndAddAlterVnodeHashRangeAction(pMnode, pTrans, &newVg2, maxVgId) != 0) goto _OVER; newVg2.vgId = maxVgId; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1) != 0) goto _OVER; + + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg2) != 0) goto _OVER; + // adjust vgroup replica if (pDb->cfg.replications != newVg1.replica) { if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, pDb, &newVg1, pArray) != 0) goto _OVER; diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 828a173108..0a54da9b53 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -343,6 +343,7 @@ struct SVnodeCfg { SVnodeStats vndStats; uint32_t hashBegin; uint32_t hashEnd; + bool hashChange; int16_t sttTrigger; int16_t hashPrefix; int16_t hashSuffix; diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index 511ba9cc24..faa4d2fc57 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -134,6 +134,7 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { if (tjsonAddIntegerToObject(pJson, "sstTrigger", pCfg->sttTrigger) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "hashBegin", pCfg->hashBegin) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "hashEnd", pCfg->hashEnd) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "hashChange", pCfg->hashChange) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "hashMethod", pCfg->hashMethod) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "hashPrefix", pCfg->hashPrefix) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "hashSuffix", pCfg->hashSuffix) < 0) return -1; @@ -249,6 +250,8 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { if (code < 0) return -1; tjsonGetNumberValue(pJson, "hashEnd", pCfg->hashEnd, code); if (code < 0) return -1; + tjsonGetNumberValue(pJson, "hashChange", pCfg->hashChange, code); + if (code < 0) return -1; tjsonGetNumberValue(pJson, "hashMethod", pCfg->hashMethod, code); if (code < 0) return -1; tjsonGetNumberValue(pJson, "hashPrefix", pCfg->hashPrefix, code); diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 7d41edfdd9..2f7520e3a7 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -198,6 +198,7 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod info.config.vgId = pReq->dstVgId; info.config.hashBegin = pReq->hashBegin; info.config.hashEnd = pReq->hashEnd; + info.config.hashChange = true; info.config.walCfg.vgId = pReq->dstVgId; SSyncCfg *pCfg = &info.config.syncCfg; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index dd576392f0..b5a5b1500b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -1453,11 +1453,30 @@ int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen) { return vnodeProcessCreateTSmaReq(pVnode, 1, pCont, contLen, NULL); } +static int32_t vnodeConsolidateAlterHashRange(SVnode *pVnode, int64_t version) { + int32_t code = TSDB_CODE_SUCCESS; + + vInfo("vgId:%d, trim meta of tables per hash range [%" PRIu32 ", %" PRIu32 "]. apply-index:%" PRId64, TD_VID(pVnode), + pVnode->config.hashBegin, pVnode->config.hashEnd, version); + + // TODO: trim meta of tables from TDB per hash range [pVnode->config.hashBegin, pVnode->config.hashEnd] + + return code; +} + static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { - vInfo("vgId:%d, vnode management handle msgType:alter-confirm, alter replica confim msg is processed", - TD_VID(pVnode)); + vInfo("vgId:%d, vnode handle msgType:alter-confirm, alter confim msg is processed", TD_VID(pVnode)); + int32_t code = TSDB_CODE_SUCCESS; + if (!pVnode->config.hashChange) { + goto _exit; + } + + code = vnodeConsolidateAlterHashRange(pVnode, version); + pVnode->config.hashChange = false; + +_exit: pRsp->msgType = TDMT_VND_ALTER_CONFIRM_RSP; - pRsp->code = TSDB_CODE_SUCCESS; + pRsp->code = code; pRsp->pCont = NULL; pRsp->contLen = 0; From cfc2b2ecda081d346ea7700ef87d47789804ebeb Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 4 May 2023 19:46:53 +0800 Subject: [PATCH 15/40] test: adjust test case split_vgroup_replica1.sim --- tests/script/tsim/dnode/split_vgroup_replica1.sim | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/script/tsim/dnode/split_vgroup_replica1.sim b/tests/script/tsim/dnode/split_vgroup_replica1.sim index 51d63d25f4..135f7a579e 100644 --- a/tests/script/tsim/dnode/split_vgroup_replica1.sim +++ b/tests/script/tsim/dnode/split_vgroup_replica1.sim @@ -93,6 +93,19 @@ endi print =============== step4: split print split vgroup 2 sql split vgroup 2 +$wt = 0 + stepwt1: + $wt = $wt + 1 + sleep 1000 + if $wt == 200 then + print ====> split vgroup not completed! + return -1 + endi +sql show transactions +if $rows != 0 then + print wait 1 seconds to alter + goto stepwt1 +endi print =============== step5: check split result sql show d1.tables From 9f3a0d9a5e463681295598821a7be439cc7de86b Mon Sep 17 00:00:00 2001 From: dmchen Date: Fri, 5 May 2023 02:17:06 +0000 Subject: [PATCH 16/40] balance leader skew --- source/dnode/mnode/impl/src/mndVgroup.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index add2b1568c..7aa3afa627 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -1917,7 +1917,7 @@ int32_t mndAddVgroupBalanceToTrans(SMnode *pMnode, SVgObj *pVgroup, STrans *pTra int32_t vgid = pVgroup->vgId; int8_t replica = pVgroup->replica; - if(pVgroup->replica <= 1) { + if(pVgroup->replica <= 1) { mInfo("trans:%d, vgid:%d no need to balance, replica:%d", pTrans->id, vgid, replica); return -1; } @@ -1951,6 +1951,19 @@ int32_t mndAddVgroupBalanceToTrans(SMnode *pMnode, SVgObj *pVgroup, STrans *pTra return -1; } + SDbObj *pDb = mndAcquireDb(pMnode, pVgroup->dbName); + if (pDb == NULL) { + mError("trans:%d, vgid:%d failed to be balanced to dnode:%d, because db not exist", pTrans->id, vgid, dnodeId); + return -1; + } + + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, pVgroup) != 0) { + mError("trans:%d, vgid:%d failed to be balanced to dnode:%d", pTrans->id, vgid, dnodeId); + return -1; + } + + mndReleaseDb(pMnode, pDb); + SSdbRaw *pRaw = mndVgroupActionEncode(pVgroup); if (pRaw == NULL) { mError("trans:%d, vgid:%d failed to encode action to dnode:%d", pTrans->id, vgid, dnodeId); @@ -1965,7 +1978,8 @@ int32_t mndAddVgroupBalanceToTrans(SMnode *pMnode, SVgObj *pVgroup, STrans *pTra } else { - mInfo("trans:%d, vgid:%d cant be balanced to dnode:%d, exist:%d, online:%d", pTrans->id, vgid, dnodeId, exist, online); + mInfo("trans:%d, vgid:%d cant be balanced to dnode:%d, exist:%d, online:%d", + pTrans->id, vgid, dnodeId, exist, online); } return 0; From accdcda3436f731464d14e54dd5e8901361a2ea1 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 5 May 2023 13:21:04 +0800 Subject: [PATCH 17/40] docs: add python udf english version --- docs/en/07-develop/09-udf.md | 187 +++++++++++++++++++++++++++++---- docs/en/12-taos-sql/22-meta.md | 3 + docs/en/12-taos-sql/26-udf.md | 32 ++++-- docs/zh/07-develop/09-udf.md | 17 +-- docs/zh/12-taos-sql/26-udf.md | 10 +- 5 files changed, 206 insertions(+), 43 deletions(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index dc42743b51..e1f1bab4e0 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -6,10 +6,12 @@ description: This document describes how to create user-defined functions (UDF), The built-in functions of TDengine may not be sufficient for the use cases of every application. In this case, you can define custom functions for use in TDengine queries. These are known as user-defined functions (UDF). A user-defined function takes one column of data or the result of a subquery as its input. -TDengine supports user-defined functions written in C or C++. This document describes the usage of user-defined functions. - User-defined functions can be scalar functions or aggregate functions. Scalar functions, such as `abs`, `sin`, and `concat`, output a value for every row of data. Aggregate functions, such as `avg` and `max` output one value for multiple rows of data. +TDengine supports user-defined functions written in C or Python. This document describes the usage of user-defined functions. + +# Implement a UDF in C + When you create a user-defined function, you must implement standard interface functions: - For scalar functions, implement the `scalarfn` interface function. - For aggregate functions, implement the `aggfn_start`, `aggfn`, and `aggfn_finish` interface functions. @@ -17,7 +19,7 @@ When you create a user-defined function, you must implement standard interface f There are strict naming conventions for these interface functions. The names of the start, finish, init, and destroy interfaces must be _start, _finish, _init, and _destroy, respectively. Replace `scalarfn`, `aggfn`, and `udf` with the name of your user-defined function. -## Implementing a Scalar Function +## Implementing a Scalar Function in C The implementation of a scalar function is described as follows: ```c #include "taos.h" @@ -49,7 +51,7 @@ int32_t scalarfn_destroy() { ``` Replace `scalarfn` with the name of your function. -## Implementing an Aggregate Function +### Implementing an Aggregate Function in C The implementation of an aggregate function is described as follows: ```c @@ -100,7 +102,7 @@ int32_t aggfn_destroy() { ``` Replace `aggfn` with the name of your function. -## Interface Functions +## C UDF Interface Functions There are strict naming conventions for interface functions. The names of the start, finish, init, and destroy interfaces must be _start, _finish, _init, and _destroy, respectively. Replace `scalarfn`, `aggfn`, and `udf` with the name of your user-defined function. @@ -108,7 +110,7 @@ Interface functions return a value that indicates whether the operation was succ For information about the parameters for interface functions, see Data Model -### Interfaces for Scalar Functions +### Interfaces for C UDF Scalar Functions `int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn *resultColumn)` @@ -118,7 +120,7 @@ The parameters in the function are defined as follows: - inputDataBlock: The data block to input. - resultColumn: The column to output. The column to output. -### Interfaces for Aggregate Functions +### Interfaces for C UDF Aggregate Functions `int32_t aggfn_start(SUdfInterBuf *interBuf)` @@ -126,7 +128,7 @@ The parameters in the function are defined as follows: `int32_t aggfn_finish(SUdfInterBuf* interBuf, SUdfInterBuf *result)` -Replace `aggfn` with the name of your function. In the function, aggfn_start is called to generate a result buffer. Data is then divided between multiple blocks, and aggfn is called on each block to update the result. Finally, aggfn_finish is called to generate final results from the intermediate results. The final result contains only one or zero data points. +Replace `aggfn` with the name of your function. In the function, aggfn_start is called to generate a result buffer. Data is then divided between multiple blocks, and the `aggfn` function is called on each block to update the result. Finally, aggfn_finish is called to generate the final results from the intermediate results. The final result contains only one or zero data points. The parameters in the function are defined as follows: - interBuf: The intermediate result buffer. @@ -135,15 +137,15 @@ The parameters in the function are defined as follows: - result: The final result. -### Initializing and Terminating User-Defined Functions +### C UDF Initializing and Terminating User-Defined Functions `int32_t udf_init()` `int32_t udf_destroy()` -Replace `udf`with the name of your function. udf_init initializes the function. udf_destroy terminates the function. If it is not necessary to initialize your function, udf_init is not required. If it is not necessary to terminate your function, udf_destroy is not required. +Replace `udf` with the name of your function. udf_init initializes the function. udf_destroy terminates the function. If it is not necessary to initialize your function, udf_init is not required. If it is not necessary to terminate your function, udf_destroy is not required. -## Data Structure of User-Defined Functions +## Data Structure of C User-Defined Functions ```c typedef struct SUdfColumnMeta { int16_t type; @@ -193,7 +195,7 @@ typedef struct SUdfInterBuf { ``` The data structure is described as follows: -- The SUdfDataBlock block includes the number of rows (numOfRows) and number of columns (numCols). udfCols[i] (0 <= i <= numCols-1) indicates that each column is of type SUdfColumn. +- The SUdfDataBlock block includes the number of rows (numOfRows) and the number of columns (numCols). udfCols[i] (0 <= i <= numCols-1) indicates that each column is of type SUdfColumn. - SUdfColumn includes the definition of the data type of the column (colMeta) and the data in the column (colData). - The member definitions of SUdfColumnMeta are the same as the data type definitions in `taos.h`. - The data in SUdfColumnData can become longer. varLenCol indicates variable-length data, and fixLenCol indicates fixed-length data. @@ -201,9 +203,9 @@ The data structure is described as follows: Additional functions are defined in `taosudf.h` to make it easier to work with these structures. -## Compile UDF +## Compile C UDF -To use your user-defined function in TDengine, first compile it to a dynamically linked library (DLL). +To use your user-defined function in TDengine, first, compile it to a shared library. For example, the sample UDF `bit_and.c` can be compiled into a DLL as follows: @@ -213,12 +215,9 @@ gcc -g -O0 -fPIC -shared bit_and.c -o libbitand.so The generated DLL file `libbitand.so` can now be used to implement your function. Note: GCC 7.5 or later is required. -## Manage and Use User-Defined Functions -After compiling your function into a DLL, you add it to TDengine. For more information, see [User-Defined Functions](../12-taos-sql/26-udf.md). +## C UDF Sample Code -## Sample Code - -### Sample scalar function: [bit_and](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/bit_and.c) +### C UDF Sample scalar function: [bit_and](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/bit_and.c) The bit_and function implements bitwise addition for multiple columns. If there is only one column, the column is returned. The bit_and function ignores null values. @@ -231,7 +230,7 @@ The bit_and function implements bitwise addition for multiple columns. If there -### Sample aggregate function: [l2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/l2norm.c) +### C UDF Sample aggregate function 1: [l2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/l2norm.c) The l2norm function finds the second-order norm for all data in the input column. This squares the values, takes a cumulative sum, and finds the square root. @@ -243,3 +242,151 @@ The l2norm function finds the second-order norm for all data in the input column ``` + +### C UDF Sample aggregate function 2: [max_vol](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/max_vol.c) + +The max_vol function returns a string concatenating the deviceId column, the row number and column number of the maximum voltage and the maximum voltage given several voltage columns as input. + +Create Table: +```bash +create table battery(ts timestamp, vol1 float, vol2 float, vol3 float, deviceId varchar(16)); +``` +Create the UDF: +```bash +create aggregate function max_vol as '/root/udf/libmaxvol.so' outputtype binary(64) bufsize 10240 language 'C'; +``` +Use the UDF in the query: +```bash +select max_vol(vol1,vol2,vol3,deviceid) from battery; +``` + +
+max_vol.c + +```c +{{#include tests/script/sh/max_vol.c}} +``` + +
+ +#Implement a UDF in Python + +Implement the specified interface functions when implementing a UDF in Python. +- implement `process` function for the scalar UDF。 +- implement `start`, `reduce`, `finish` for the aggregate UDF。 +- implement `init` for initialization and `destroy` for termination。 + +## Implement a Scalar UDF in Python + +The implementation of a scalar UDF is described as follows: + +```Python +def init(): + # initialization +def destroy(): + # destroy +def process(input: datablock) -> tuple[output_type]: + # process input datablock, + # datablock.data(row, col) is to access the python object in location(row,col) + # return tuple object consisted of object of type outputtype +``` + +## Implement an Aggregate UDF in Python + +The implementation of an aggregate function is described as follows: + +```Python +def init(): + #initialization +def destroy(): + #destroy +def start() -> bytes: + #return serialize(init_state) +def reduce(inputs: datablock, buf: bytes) -> bytes + # deserialize buf to state + # reduce the inputs and state into new_state. + # use inputs.data(i,j) to access python ojbect of location(i,j) + # serialize new_state into new_state_bytes + return new_state_bytes +def finish(buf: bytes) -> output_type: + #return obj of type outputtype +``` + +## Python UDF interface functions + +### Python UDF scalar interface functions +```Python +def process(input: datablock) -> tuple[output_type]: +``` +- `input` is a data block two-dimension matrix-like object, of which method `data(row, col)` returns the Python object located at location (`row`, `col`) +- return a Python tuple object, of which each item is a Python object of type `output_type` + +### Python UDF aggregate interface functions +```Python +def start() -> bytes: +def reduce(input: datablock, buf: bytes) -> bytes +def finish(buf: bytes) -> output_type: +``` + +- first `start()` is called to return the initial result in type `bytes` +- then the input data are divided into multiple data blocks and for each block `input`, `reduce` is called with the data block `input` and the current result `buf` bytes and generates a new intermediate result buffer. +- finally, the `finish` function is called on the intermediate result `buf` and outputs 0 or 1 data of type `output_type` + + +### Python UDF Initialization and Termination +```Python +def init() +def destroy() +``` +Implement `init` for initialization and `destroy` for termination. + +## TDengine SQL data type and Python UDF Data Type Mapping Table + +The following table describes the mapping between TDengine SQL data type and Python UDF Data Type. The `NULL` value of all TDengine SQL types is mapped to the `None` value in Python. + +| **TDengine SQL Data Type** | **Python Data Type** | +| :-----------------------: | ------------ | +|TINYINT / SMALLINT / INT / BIGINT | int | +|TINYINT UNSIGNED / SMALLINT UNSIGNED / INT UNSIGNED / BIGINT UNSIGNED | int | +|FLOAT / DOUBLE | float | +|BOOL | bool | +|BINARY / VARCHAR / NCHAR | bytes| +|TIMESTAMP | int | +|JSON and other types | Not Supported | + +## Python UDF Installation +1. Install Python package `taospyudf` that executes Python UDF +```bash +sudo pip install taospyudf +ldconfig +``` +2. If PYTHONPATH is needed to find Python packages when the Python UDF executes, include the PYTHONPATH contents into the udfdLdLibPath variable of the taos.cfg configuration file + +## Python UDF Sample Code +### Python UDF Scalar Function Sample Code [pybitand](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pybitand.py) + +The `pybitand` function implements bitwise addition for multiple columns. If there is only one column, the column is returned. The `pybitand` function ignores null values. + +
+pybitand.py + +```Python +{{#include tests/script/sh/pybitand.py}} +``` + +
+ +### Python UDF Aggregate Function Sample Code [pyl2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pyl2norm.py) + +The `pyl2norm` function finds the second-order norm for all data in the input column. This squares the values, takes a cumulative sum, and finds the square root. +
+pyl2norm.py + +```c +{{#include tests/script/sh/pyl2norm.py}} +``` + +
+ +## Manage and Use User-Defined Functions +You can add UDF to TDengine before using it in SQL queries. For more information, see [User-Defined Functions](../12-taos-sql/26-udf.md). diff --git a/docs/en/12-taos-sql/22-meta.md b/docs/en/12-taos-sql/22-meta.md index 81284aeaed..e63f682761 100644 --- a/docs/en/12-taos-sql/22-meta.md +++ b/docs/en/12-taos-sql/22-meta.md @@ -120,6 +120,9 @@ Provides information about user-defined functions. | 5 | create_time | TIMESTAMP | Creation time | | 6 | code_len | INT | Length of the source code | | 7 | bufsize | INT | Buffer size | +| 8 | func_language | BINARY(31) | UDF programming language | +| 9 | func_body | BINARY(16384) | UDF function body | +| 10 | func_version | INT | UDF function version. starting from 0. Increasing by 1 each time it is updated| ## INS_INDEXES diff --git a/docs/en/12-taos-sql/26-udf.md b/docs/en/12-taos-sql/26-udf.md index cb64873705..c4d6d4fca4 100644 --- a/docs/en/12-taos-sql/26-udf.md +++ b/docs/en/12-taos-sql/26-udf.md @@ -7,17 +7,18 @@ description: This document describes the SQL statements related to user-defined You can create user-defined functions and import them into TDengine. ## Create UDF -SQL command can be executed on the host where the generated UDF DLL resides to load the UDF DLL into TDengine. This operation cannot be done through REST interface or web console. Once created, any client of the current TDengine can use these UDF functions in their SQL commands. UDF are stored in the management node of TDengine. The UDFs loaded in TDengine would be still available after TDengine is restarted. +SQL command can be executed on the host where the generated UDF DLL resides to load the UDF DLL into TDengine. This operation cannot be done through REST interface or web console. Once created, any client of the current TDengine can use these UDF functions in their SQL commands. UDF is stored in the management node of TDengine. The UDFs loaded in TDengine would be still available after TDengine is restarted. When creating UDF, the type of UDF, i.e. a scalar function or aggregate function must be specified. If the specified type is wrong, the SQL statements using the function would fail with errors. The input data type and output data type must be consistent with the UDF definition. - Create Scalar Function ```sql -CREATE FUNCTION function_name AS library_path OUTPUTTYPE output_type; +CREATE [OR REPLACE] FUNCTION function_name AS library_path OUTPUTTYPE output_type [LANGUAGE 'C|Python']; ``` - - - function_name: The scalar function name to be used in SQL statement which must be consistent with the UDF name and is also the name of the compiled DLL (.so file). - - library_path: The absolute path of the DLL file including the name of the shared object file (.so). The path must be quoted with single or double quotes. + - OR REPLACE: if the UDF exists, the UDF properties are modified + - function_name: The scalar function name to be used in the SQL statement + - LANGUAGE 'C|Python': the programming language of UDF. Now C or Python is supported. If this clause is omitted, C is assumed as the programming language. + - library_path: For C programming language, The absolute path of the DLL file including the name of the shared object file (.so). For Python programming language, the absolute path of the Python UDF script. The path must be quoted with single or double quotes. - output_type: The data type of the results of the UDF. For example, the following SQL statement can be used to create a UDF from `libbitand.so`. @@ -25,14 +26,20 @@ CREATE FUNCTION function_name AS library_path OUTPUTTYPE output_type; ```sql CREATE FUNCTION bit_and AS "/home/taos/udf_example/libbitand.so" OUTPUTTYPE INT; ``` + For Example, the following SQL statement can be used to modify the existing function `bit_and`. The OUTPUT type is changed to BIGINT and the programming language is changed to Python. + + ```sql + CREATE OR REPLACE FUNCTION bit_and AS "/home/taos/udf_example/bit_and.py" OUTPUTTYPE BIGINT LANGUAGE 'Python'; + ``` - Create Aggregate Function ```sql CREATE AGGREGATE FUNCTION function_name AS library_path OUTPUTTYPE output_type [ BUFSIZE buffer_size ]; ``` - - - function_name: The aggregate function name to be used in SQL statement which must be consistent with the udfNormalFunc name and is also the name of the compiled DLL (.so file). - - library_path: The absolute path of the DLL file including the name of the shared object file (.so). The path must be quoted with single or double quotes. + - OR REPLACE: if the UDF exists, the UDF properties are modified + - function_name: The aggregate function name to be used in the SQL statement + - LANGUAGE 'C|Python': the programming language of the UDF. Now C or Python is supported. If this clause is omitted, C is assumed as the programming language. + - library_path: For C programming language, The absolute path of the DLL file including the name of the shared object file (.so). For Python programming language, the absolute path of the Python UDF script. The path must be quoted with single or double quotes. - output_type: The output data type, the value is the literal string of the supported TDengine data type. - buffer_size: The size of the intermediate buffer in bytes. This parameter is optional. @@ -41,6 +48,11 @@ CREATE AGGREGATE FUNCTION function_name AS library_path OUTPUTTYPE output_type [ ```sql CREATE AGGREGATE FUNCTION l2norm AS "/home/taos/udf_example/libl2norm.so" OUTPUTTYPE DOUBLE bufsize 8; ``` + For example, the following SQL statement modifies the buffer size of existing UDF `l2norm` to 64 + ```sql + CREATE AGGREGATE FUNCTION l2norm AS "/home/taos/udf_example/libl2norm.so" OUTPUTTYPE DOUBLE bufsize 64; + ``` + For more information about user-defined functions, see [User-Defined Functions](/develop/udf). ## Manage UDF @@ -61,9 +73,9 @@ SHOW FUNCTIONS; ## Call UDF -The function name specified when creating UDF can be used directly in SQL statements, just like builtin functions. For example: +The function name specified when creating UDF can be used directly in SQL statements, just like built-in functions. For example: ```sql SELECT bit_and(c1,c2) FROM table; ``` -The above SQL statement invokes function X for column c1 and c2 on table. You can use query keywords like WHERE with user-defined functions. +The above SQL statement invokes function X for columns c1 and c2 on the table. You can use query keywords like WHERE with user-defined functions. diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index 8aa841db89..703f222516 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -10,7 +10,7 @@ description: "支持用户编码的聚合函数和标量函数,在查询中嵌 TDengine 支持通过 C/Python 语言进行 UDF 定义。接下来结合示例讲解 UDF 的使用方法。 -# C 语言实现UDF +# C 语言实现 UDF 使用 C 语言实现 UDF 时,需要实现规定的接口函数 - 标量函数需要实现标量接口函数 scalarfn 。 @@ -269,7 +269,7 @@ select max_vol(vol1,vol2,vol3,deviceid) from battery; -# Python 语言实现UDF +# Python 语言实现 UDF 使用 Python 语言实现 UDF 时,需要实现规定的接口函数 - 标量函数需要实现标量接口函数 process 。 - 聚合函数需要实现聚合接口函数 start ,reduce ,finish。 @@ -336,7 +336,10 @@ def destroy() 其中 init 完成初始化工作。 destroy 完成清理工作。如果没有初始化工作,无需定义 init 函数。如果没有清理工作,无需定义 destroy 函数。 -## Python数据类型和TDengine数据类型映射 +## Python 数据类型和 TDengine 数据类型映射 + +下表描述了TDengine SQL数据类型和Python数据类型的映射。任何类型的NULL值都映射成Python的None值。 + | **TDengine SQL数据类型** | **Python数据类型** | | :-----------------------: | ------------ | |TINYINT / SMALLINT / INT / BIGINT | int | @@ -350,8 +353,8 @@ def destroy() ## Python UDF 环境的安装 1. 安装 taospyudf 包。此包执行Python UDF程序。 ```bash -pip install taospyudf -lddconfig +sudo pip install taospyudf +ldconfig ``` 2. 如果 Python UDF 程序执行时,通过 PYTHONPATH 引用其它的包,可以设置 taos.cfg 的 UdfdLdLibPath 变量为PYTHONPATH的内容 @@ -382,5 +385,5 @@ pyl2norm 实现了输入列的所有数据的二阶范数,即对每个数据 -# 管理和使用UDF -编译好的UDF,还需要将其加入到系统才能被正常的SQL调用。关于如何管理和使用UDF,参见[UDF使用说明](../12-taos-sql/26-udf.md) \ No newline at end of file +# 管理和使用 UDF +需要 UDF 将其加入到系统才能被正常的 SQL 调用。关于如何管理和使用 UDF,参见[UDF使用说明](../12-taos-sql/26-udf.md) \ No newline at end of file diff --git a/docs/zh/12-taos-sql/26-udf.md b/docs/zh/12-taos-sql/26-udf.md index a45e837c8d..c1d2761d7d 100644 --- a/docs/zh/12-taos-sql/26-udf.md +++ b/docs/zh/12-taos-sql/26-udf.md @@ -11,15 +11,13 @@ description: 使用 UDF 的详细指南 在创建 UDF 时,需要区分标量函数和聚合函数。如果创建时声明了错误的函数类别,则可能导致通过 SQL 指令调用函数时出错。此外,用户需要保证输入数据类型与 UDF 程序匹配,UDF 输出数据类型与 OUTPUTTYPE 匹配。 -使用 CREATE OR REPLACE FUNCTION,如果函数已经存在,会修改已有的函数属性。 - - 创建标量函数 ```sql CREATE [OR REPLACE] FUNCTION function_name AS library_path OUTPUTTYPE output_type [LANGUAGE 'C|Python']; ``` - - - function_name:标量函数未来在 SQL 中被调用时的函数名,必须与函数实现中 udf 的实际名称一致; - - LANGUAGE 'C|Python':函数编程语言,目前支持C语言和Python语言。 + - OR REPLACE: 如果函数已经存在,会修改已有的函数属性。 + - function_name:标量函数未来在 SQL 中被调用时的函数名; + - LANGUAGE 'C|Python':函数编程语言,目前支持C语言和Python语言。 如果这个从句忽略,编程语言是C语言 - library_path:如果编程语言是C,路径是包含 UDF 函数实现的动态链接库的库文件绝对路径(指的是库文件在当前客户端所在主机上的保存路径,通常是指向一个 .so 文件)。如果编程语言是Python,路径是包含 UDF 函数实现的Python文件路径。这个路径需要用英文单引号或英文双引号括起来; - output_type:此函数计算结果的数据类型名称; @@ -38,7 +36,7 @@ CREATE [OR REPLACE] FUNCTION function_name AS library_path OUTPUTTYPE output_typ ```sql CREATE [OR REPLACE] AGGREGATE FUNCTION function_name AS library_path OUTPUTTYPE output_type [ BUFSIZE buffer_size ] [LANGUAGE 'C|Python']; ``` - + - OR REPLACE: 如果函数已经存在,会修改已有的函数属性。 - function_name:聚合函数未来在 SQL 中被调用时的函数名,必须与函数实现中 udfNormalFunc 的实际名称一致; - LANGUAGE 'C|Python':函数编程语言,目前支持C语言和Python语言。 - library_path:如果编程语言是C,路径是包含 UDF 函数实现的动态链接库的库文件绝对路径(指的是库文件在当前客户端所在主机上的保存路径,通常是指向一个 .so 文件)。如果编程语言是Python,路径是包含 UDF 函数实现的Python文件路径。这个路径需要用英文单引号或英文双引号括起来;; From c949fee06b12da07f9b2dcd445e14f3093a6588f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 5 May 2023 10:33:24 +0800 Subject: [PATCH 18/40] enh: commit vnode after consolidating alter hash range --- source/dnode/mnode/impl/src/mndVgroup.c | 2 +- source/dnode/vnode/src/vnd/vnodeOpen.c | 2 -- source/dnode/vnode/src/vnd/vnodeSvr.c | 12 ++++++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index add2b1568c..d4be6bed73 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2164,7 +2164,7 @@ static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj mInfo("vgId:%d, vnode:%d dnode:%d", newVg2.vgId, i, newVg2.vnodeGid[i].dnodeId); } - // alter hash range + // alter vgId and hash range int32_t maxVgId = sdbGetMaxId(pMnode->pSdb, SDB_VGROUP); if (mndAddAlterVnodeHashRangeAction(pMnode, pTrans, &newVg1, maxVgId) != 0) goto _OVER; newVg1.vgId = maxVgId; diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 2f7520e3a7..b5e7c6875b 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -236,8 +236,6 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod return -1; } - // todo vnode compact here - vInfo("vgId:%d, vnode hashrange is altered", info.config.vgId); return 0; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index b5a5b1500b..a8511eedfd 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -425,7 +425,10 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } } break; case TDMT_VND_ALTER_CONFIRM: - vnodeProcessAlterConfirmReq(pVnode, version, pReq, len, pRsp); + needCommit = pVnode->config.hashChange; + if (vnodeProcessAlterConfirmReq(pVnode, version, pReq, len, pRsp) < 0) { + goto _err; + } break; case TDMT_VND_ALTER_CONFIG: vnodeProcessAlterConfigReq(pVnode, version, pReq, len, pRsp); @@ -1472,6 +1475,11 @@ static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void } code = vnodeConsolidateAlterHashRange(pVnode, version); + if (code < 0) { + vError("vgId:%d, failed to consolidate alter hashrange since %s. version:%" PRId64, TD_VID(pVnode), terrstr(), + version); + goto _exit; + } pVnode->config.hashChange = false; _exit: @@ -1480,7 +1488,7 @@ _exit: pRsp->pCont = NULL; pRsp->contLen = 0; - return 0; + return code; } static int32_t vnodeProcessAlterConfigReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { From c5b4bbc6d23f9880548c4cd475e3a45e8a57c82e Mon Sep 17 00:00:00 2001 From: Alex Duan <51781608+DuanKuanJun@users.noreply.github.com> Date: Fri, 5 May 2023 13:55:15 +0800 Subject: [PATCH 19/40] Update 09-udf.md --- docs/zh/07-develop/09-udf.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index 703f222516..e22e466ad7 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -359,7 +359,7 @@ ldconfig 2. 如果 Python UDF 程序执行时,通过 PYTHONPATH 引用其它的包,可以设置 taos.cfg 的 UdfdLdLibPath 变量为PYTHONPATH的内容 ## Python UDF 示例代码 -### Python UDF 标量函数示例 [pybitand](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pybitand.py) +### Python UDF 标量函数示例 [pybitand](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pybitand.py) pybitand 实现多列的按位与功能。如果只有一列,返回这一列。pybitand 忽略空值。 @@ -372,7 +372,7 @@ pybitand 实现多列的按位与功能。如果只有一列,返回这一列 -### Python UDF 聚合函数示例 [pyl2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pyl2norm.py) +### Python UDF 聚合函数示例 [pyl2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pyl2norm.py) pyl2norm 实现了输入列的所有数据的二阶范数,即对每个数据先平方,再累加求和,最后开方。 @@ -386,4 +386,4 @@ pyl2norm 实现了输入列的所有数据的二阶范数,即对每个数据 # 管理和使用 UDF -需要 UDF 将其加入到系统才能被正常的 SQL 调用。关于如何管理和使用 UDF,参见[UDF使用说明](../12-taos-sql/26-udf.md) \ No newline at end of file +需要 UDF 将其加入到系统才能被正常的 SQL 调用。关于如何管理和使用 UDF,参见[UDF使用说明](../12-taos-sql/26-udf.md) From 879bbe982a30834cdbd6f5a055453a1224339fed Mon Sep 17 00:00:00 2001 From: gccgdb1234 Date: Fri, 5 May 2023 14:08:21 +0800 Subject: [PATCH 20/40] doc: revise doc structure of UDF --- docs/en/07-develop/09-udf.md | 51 ++++++++++++++++----------------- docs/zh/07-develop/09-udf.md | 55 ++++++++++++++++++------------------ 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index e1f1bab4e0..aef845d3ce 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -10,7 +10,7 @@ User-defined functions can be scalar functions or aggregate functions. Scalar fu TDengine supports user-defined functions written in C or Python. This document describes the usage of user-defined functions. -# Implement a UDF in C +## Implement a UDF in C When you create a user-defined function, you must implement standard interface functions: - For scalar functions, implement the `scalarfn` interface function. @@ -19,7 +19,7 @@ When you create a user-defined function, you must implement standard interface f There are strict naming conventions for these interface functions. The names of the start, finish, init, and destroy interfaces must be _start, _finish, _init, and _destroy, respectively. Replace `scalarfn`, `aggfn`, and `udf` with the name of your user-defined function. -## Implementing a Scalar Function in C +### Implementing a Scalar Function in C The implementation of a scalar function is described as follows: ```c #include "taos.h" @@ -102,7 +102,7 @@ int32_t aggfn_destroy() { ``` Replace `aggfn` with the name of your function. -## C UDF Interface Functions +### UDF Interface Definition in C There are strict naming conventions for interface functions. The names of the start, finish, init, and destroy interfaces must be _start, _finish, _init, and _destroy, respectively. Replace `scalarfn`, `aggfn`, and `udf` with the name of your user-defined function. @@ -110,8 +110,7 @@ Interface functions return a value that indicates whether the operation was succ For information about the parameters for interface functions, see Data Model -### Interfaces for C UDF Scalar Functions - +#### Scalar Interface `int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn *resultColumn)` Replace `scalarfn` with the name of your function. This function performs scalar calculations on data blocks. You can configure a value through the parameters in the `resultColumn` structure. @@ -120,7 +119,7 @@ The parameters in the function are defined as follows: - inputDataBlock: The data block to input. - resultColumn: The column to output. The column to output. -### Interfaces for C UDF Aggregate Functions +#### Aggregate Interface `int32_t aggfn_start(SUdfInterBuf *interBuf)` @@ -137,7 +136,7 @@ The parameters in the function are defined as follows: - result: The final result. -### C UDF Initializing and Terminating User-Defined Functions +#### Initialization and Cleanup Interface `int32_t udf_init()` `int32_t udf_destroy()` @@ -145,7 +144,7 @@ The parameters in the function are defined as follows: Replace `udf` with the name of your function. udf_init initializes the function. udf_destroy terminates the function. If it is not necessary to initialize your function, udf_init is not required. If it is not necessary to terminate your function, udf_destroy is not required. -## Data Structure of C User-Defined Functions +### Data Structures for UDF in C ```c typedef struct SUdfColumnMeta { int16_t type; @@ -203,7 +202,7 @@ The data structure is described as follows: Additional functions are defined in `taosudf.h` to make it easier to work with these structures. -## Compile C UDF +### Compiling C UDF To use your user-defined function in TDengine, first, compile it to a shared library. @@ -215,9 +214,9 @@ gcc -g -O0 -fPIC -shared bit_and.c -o libbitand.so The generated DLL file `libbitand.so` can now be used to implement your function. Note: GCC 7.5 or later is required. -## C UDF Sample Code +### UDF Sample Code in C -### C UDF Sample scalar function: [bit_and](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/bit_and.c) +#### Scalar function: [bit_and](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/bit_and.c) The bit_and function implements bitwise addition for multiple columns. If there is only one column, the column is returned. The bit_and function ignores null values. @@ -230,7 +229,7 @@ The bit_and function implements bitwise addition for multiple columns. If there -### C UDF Sample aggregate function 1: [l2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/l2norm.c) +#### Aggregate function 1: [l2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/l2norm.c) The l2norm function finds the second-order norm for all data in the input column. This squares the values, takes a cumulative sum, and finds the square root. @@ -243,7 +242,7 @@ The l2norm function finds the second-order norm for all data in the input column -### C UDF Sample aggregate function 2: [max_vol](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/max_vol.c) +#### Aggregate function 2: [max_vol](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/max_vol.c) The max_vol function returns a string concatenating the deviceId column, the row number and column number of the maximum voltage and the maximum voltage given several voltage columns as input. @@ -269,14 +268,14 @@ select max_vol(vol1,vol2,vol3,deviceid) from battery; -#Implement a UDF in Python +## Implement a UDF in Python Implement the specified interface functions when implementing a UDF in Python. - implement `process` function for the scalar UDF。 - implement `start`, `reduce`, `finish` for the aggregate UDF。 - implement `init` for initialization and `destroy` for termination。 -## Implement a Scalar UDF in Python +### Implement a Scalar UDF in Python The implementation of a scalar UDF is described as follows: @@ -291,7 +290,7 @@ def process(input: datablock) -> tuple[output_type]: # return tuple object consisted of object of type outputtype ``` -## Implement an Aggregate UDF in Python +### Implement an Aggregate UDF in Python The implementation of an aggregate function is described as follows: @@ -312,16 +311,16 @@ def finish(buf: bytes) -> output_type: #return obj of type outputtype ``` -## Python UDF interface functions +### Python UDF Interface Definition -### Python UDF scalar interface functions +#### Scalar interface ```Python def process(input: datablock) -> tuple[output_type]: ``` - `input` is a data block two-dimension matrix-like object, of which method `data(row, col)` returns the Python object located at location (`row`, `col`) - return a Python tuple object, of which each item is a Python object of type `output_type` -### Python UDF aggregate interface functions +#### Aggregate Interface ```Python def start() -> bytes: def reduce(input: datablock, buf: bytes) -> bytes @@ -333,14 +332,14 @@ def finish(buf: bytes) -> output_type: - finally, the `finish` function is called on the intermediate result `buf` and outputs 0 or 1 data of type `output_type` -### Python UDF Initialization and Termination +#### Initialization and Cleanup Interface ```Python def init() def destroy() ``` Implement `init` for initialization and `destroy` for termination. -## TDengine SQL data type and Python UDF Data Type Mapping Table +### Data Mapping between TDengine SQL and Python UDF The following table describes the mapping between TDengine SQL data type and Python UDF Data Type. The `NULL` value of all TDengine SQL types is mapped to the `None` value in Python. @@ -354,7 +353,7 @@ The following table describes the mapping between TDengine SQL data type and Pyt |TIMESTAMP | int | |JSON and other types | Not Supported | -## Python UDF Installation +### Installing Python UDF 1. Install Python package `taospyudf` that executes Python UDF ```bash sudo pip install taospyudf @@ -362,8 +361,8 @@ ldconfig ``` 2. If PYTHONPATH is needed to find Python packages when the Python UDF executes, include the PYTHONPATH contents into the udfdLdLibPath variable of the taos.cfg configuration file -## Python UDF Sample Code -### Python UDF Scalar Function Sample Code [pybitand](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pybitand.py) +### Python UDF Sample Code +#### Scalar Function [pybitand](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pybitand.py) The `pybitand` function implements bitwise addition for multiple columns. If there is only one column, the column is returned. The `pybitand` function ignores null values. @@ -376,7 +375,7 @@ The `pybitand` function implements bitwise addition for multiple columns. If the -### Python UDF Aggregate Function Sample Code [pyl2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pyl2norm.py) +#### Aggregate Function [pyl2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pyl2norm.py) The `pyl2norm` function finds the second-order norm for all data in the input column. This squares the values, takes a cumulative sum, and finds the square root.
@@ -389,4 +388,4 @@ The `pyl2norm` function finds the second-order norm for all data in the input co
## Manage and Use User-Defined Functions -You can add UDF to TDengine before using it in SQL queries. For more information, see [User-Defined Functions](../12-taos-sql/26-udf.md). +You need to add UDF to TDengine before using it in SQL queries. For more information about how to manage UDF and how to invoke UDF, please see [Manage and Use UDF](../12-taos-sql/26-udf.md). diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index 703f222516..dbbea7a00f 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -10,7 +10,7 @@ description: "支持用户编码的聚合函数和标量函数,在查询中嵌 TDengine 支持通过 C/Python 语言进行 UDF 定义。接下来结合示例讲解 UDF 的使用方法。 -# C 语言实现 UDF +## 用 C 语言实现 UDF 使用 C 语言实现 UDF 时,需要实现规定的接口函数 - 标量函数需要实现标量接口函数 scalarfn 。 @@ -19,7 +19,7 @@ TDengine 支持通过 C/Python 语言进行 UDF 定义。接下来结合示例 接口函数的名称是 UDF 名称,或者是 UDF 名称和特定后缀(_start, _finish, _init, _destroy)的连接。列表中的scalarfn,aggfn, udf需要替换成udf函数名。 -## C UDF 实现标量函数 +### 用 C 语言实现标量函数 标量函数实现模板如下 ```c #include "taos.h" @@ -51,7 +51,7 @@ int32_t scalarfn_destroy() { ``` scalarfn 为函数名的占位符,需要替换成函数名,如bit_and。 -## C UDF 实现聚合函数 +### 用 C 语言实现聚合函数 聚合函数的实现模板如下 ```c @@ -102,7 +102,7 @@ int32_t aggfn_destroy() { ``` aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 -## C UDF 接口函数定义 +### C 语言 UDF 接口函数定义 接口函数的名称是 udf 名称,或者是 udf 名称和特定后缀(_start, _finish, _init, _destroy)的连接。以下描述中函数名称中的 scalarfn,aggfn, udf 需要替换成udf函数名。 @@ -110,7 +110,7 @@ aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 接口函数参数类型见数据结构定义。 -### C UDF 标量接口函数 +#### 标量函数接口 `int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn *resultColumn)` @@ -120,7 +120,7 @@ aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 - inputDataBlock: 输入的数据块 - resultColumn: 输出列 -### C UDF 聚合接口函数 +#### 聚合函数接口 `int32_t aggfn_start(SUdfInterBuf *interBuf)` @@ -137,7 +137,7 @@ aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 - result:最终结果。 -### C UDF 初始化和销毁 +#### 初始化和销毁接口 `int32_t udf_init()` `int32_t udf_destroy()` @@ -145,7 +145,7 @@ aggfn为函数名的占位符,需要修改为自己的函数名,如l2norm。 其中 udf 是函数名的占位符。udf_init 完成初始化工作。 udf_destroy 完成清理工作。如果没有初始化工作,无需定义udf_init函数。如果没有清理工作,无需定义udf_destroy函数。 -## C UDF 数据结构 +### C 语言 UDF 数据结构 ```c typedef struct SUdfColumnMeta { int16_t type; @@ -203,7 +203,7 @@ typedef struct SUdfInterBuf { 为了更好的操作以上数据结构,提供了一些便利函数,定义在 taosudf.h。 -## 编译 C UDF +### 编译 C UDF 用户定义函数的 C 语言源代码无法直接被 TDengine 系统使用,而是需要先编译为 动态链接库,之后才能载入 TDengine 系统。 @@ -215,9 +215,9 @@ gcc -g -O0 -fPIC -shared bit_and.c -o libbitand.so 这样就准备好了动态链接库 libbitand.so 文件,可以供后文创建 UDF 时使用了。为了保证可靠的系统运行,编译器 GCC 推荐使用 7.5 及以上版本。 -## C UDF 示例代码 +### C UDF 示例代码 -### C UDF 标量函数示例 [bit_and](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/bit_and.c) +#### 标量函数示例 [bit_and](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/bit_and.c) bit_add 实现多列的按位与功能。如果只有一列,返回这一列。bit_add 忽略空值。 @@ -230,7 +230,7 @@ bit_add 实现多列的按位与功能。如果只有一列,返回这一列。 -### C UDF 聚合函数示例1 返回值为数值类型 [l2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/l2norm.c) +#### 聚合函数示例1 返回值为数值类型 [l2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/l2norm.c) l2norm 实现了输入列的所有数据的二阶范数,即对每个数据先平方,再累加求和,最后开方。 @@ -243,7 +243,7 @@ l2norm 实现了输入列的所有数据的二阶范数,即对每个数据先 -### C UDF 聚合函数示例2 返回值为字符串类型 [max_vol](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/max_vol.c) +#### 聚合函数示例2 返回值为字符串类型 [max_vol](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/max_vol.c) max_vol 实现了从多个输入的电压列中找到最大电压,返回由设备ID + 最大电压所在(行,列)+ 最大电压值 组成的组合字符串值 @@ -269,13 +269,14 @@ select max_vol(vol1,vol2,vol3,deviceid) from battery; -# Python 语言实现 UDF +## 用 Python 语言实现 UDF + 使用 Python 语言实现 UDF 时,需要实现规定的接口函数 - 标量函数需要实现标量接口函数 process 。 - 聚合函数需要实现聚合接口函数 start ,reduce ,finish。 - 如果需要初始化,实现 init;如果需要清理工作,实现 destroy。 -## Python UDF 实现标量函数 +### 用 Python 实现标量函数 标量函数实现模版如下 ```Python @@ -289,7 +290,7 @@ def process(input: datablock) -> tuple[output_type]: # return tuple object consisted of object of type outputtype ``` -## Python UDF 实现聚合函数 +### 用 Python 实现聚合函数 聚合函数实现模版如下 ```Python @@ -309,16 +310,16 @@ def finish(buf: bytes) -> output_type: #return obj of type outputtype ``` -## Python UDF 接口函数定义 +### Python UDF 接口函数定义 -### Python UDF 标量接口函数 +#### 标量函数接口 ```Python def process(input: datablock) -> tuple[output_type]: ``` - input:datablock 类似二维矩阵,通过成员方法 data(row,col)返回位于 row 行,col 列的 python 对象 - 返回值是一个 Python 对象元组,每个元素类型为输出类型。 -### Python UDF 聚合接口函数 +#### 聚合函数接口 ```Python def start() -> bytes: def reduce(inputs: datablock, buf: bytes) -> bytes @@ -328,7 +329,7 @@ def finish(buf: bytes) -> output_type: 首先调用 start 生成最初结果 buffer,然后输入数据会被分为多个行数据块,对每个数据块 inputs 和当前中间结果 buf 调用 reduce,得到新的中间结果,最后再调用 finish 从中间结果 buf 产生最终输出,最终输出只能含 0 或 1 条数据。 -### Python UDF 初始化和销毁 +#### 初始化和销毁接口 ```Python def init() def destroy() @@ -336,7 +337,7 @@ def destroy() 其中 init 完成初始化工作。 destroy 完成清理工作。如果没有初始化工作,无需定义 init 函数。如果没有清理工作,无需定义 destroy 函数。 -## Python 数据类型和 TDengine 数据类型映射 +### Python 和 TDengine之间的数据类型映射 下表描述了TDengine SQL数据类型和Python数据类型的映射。任何类型的NULL值都映射成Python的None值。 @@ -350,7 +351,7 @@ def destroy() |TIMESTAMP | int | |JSON and other types | 不支持 | -## Python UDF 环境的安装 +### Python UDF 环境的安装 1. 安装 taospyudf 包。此包执行Python UDF程序。 ```bash sudo pip install taospyudf @@ -358,8 +359,8 @@ ldconfig ``` 2. 如果 Python UDF 程序执行时,通过 PYTHONPATH 引用其它的包,可以设置 taos.cfg 的 UdfdLdLibPath 变量为PYTHONPATH的内容 -## Python UDF 示例代码 -### Python UDF 标量函数示例 [pybitand](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pybitand.py) +### Python UDF 示例代码 +#### 标量函数示例 [pybitand](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pybitand.py) pybitand 实现多列的按位与功能。如果只有一列,返回这一列。pybitand 忽略空值。 @@ -372,7 +373,7 @@ pybitand 实现多列的按位与功能。如果只有一列,返回这一列 -### Python UDF 聚合函数示例 [pyl2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/pyl2norm.py) +#### 聚合函数示例 [pyl2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pyl2norm.py) pyl2norm 实现了输入列的所有数据的二阶范数,即对每个数据先平方,再累加求和,最后开方。 @@ -385,5 +386,5 @@ pyl2norm 实现了输入列的所有数据的二阶范数,即对每个数据 -# 管理和使用 UDF -需要 UDF 将其加入到系统才能被正常的 SQL 调用。关于如何管理和使用 UDF,参见[UDF使用说明](../12-taos-sql/26-udf.md) \ No newline at end of file +## 管理和使用 UDF +在使用 UDF 之前需要先将其加入到 TDengine 系统中。关于如何管理和使用 UDF,请参考[管理和使用 UDF](../12-taos-sql/26-udf.md) \ No newline at end of file From 623e6e7c2f6ae415ca34eab0333ec1d6d16ca898 Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Fri, 5 May 2023 14:22:29 +0800 Subject: [PATCH 21/40] Update 09-udf.md --- docs/en/07-develop/09-udf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index aef845d3ce..a7753647e3 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -387,5 +387,5 @@ The `pyl2norm` function finds the second-order norm for all data in the input co -## Manage and Use User-Defined Functions +## Manage and Use UDF You need to add UDF to TDengine before using it in SQL queries. For more information about how to manage UDF and how to invoke UDF, please see [Manage and Use UDF](../12-taos-sql/26-udf.md). From 0e6de0935393270f4f36ec23b6d1ae07b9031cdc Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 5 May 2023 14:53:32 +0800 Subject: [PATCH 22/40] enh: unify error msg for disk space checking in doInitAggInfoSup --- source/libs/executor/src/aggregateoperator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/aggregateoperator.c b/source/libs/executor/src/aggregateoperator.c index ec8060348d..1f170552db 100644 --- a/source/libs/executor/src/aggregateoperator.c +++ b/source/libs/executor/src/aggregateoperator.c @@ -467,8 +467,8 @@ int32_t doInitAggInfoSup(SAggSupporter* pAggSup, SqlFunctionCtx* pCtx, int32_t n getBufferPgSize(pAggSup->resultRowSize, &defaultPgsz, &defaultBufsz); if (!osTempSpaceAvailable()) { - code = TSDB_CODE_NO_AVAIL_DISK; - qError("Init stream agg supporter failed since %s, %s", terrstr(code), pKey); + code = TSDB_CODE_NO_DISKSPACE; + qError("Init stream agg supporter failed since %s, key:%s, tempDir:%s", terrstr(code), pKey, tsTempDir); return code; } From b0e77eeb4500fa7b1ab8afab56f1e24bfe84993f Mon Sep 17 00:00:00 2001 From: gccgdb1234 Date: Fri, 5 May 2023 16:13:47 +0800 Subject: [PATCH 23/40] doc: refine the description of TABLE_PREFIX and TABLE_SUFFIX --- docs/en/12-taos-sql/02-database.md | 4 ++-- docs/zh/12-taos-sql/02-database.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/12-taos-sql/02-database.md b/docs/en/12-taos-sql/02-database.md index ec007d6830..8c579622c9 100644 --- a/docs/en/12-taos-sql/02-database.md +++ b/docs/en/12-taos-sql/02-database.md @@ -72,8 +72,8 @@ database_option: { - 0: The database can contain multiple supertables. - 1: The database can contain only one supertable. - STT_TRIGGER: specifies the number of file merges triggered by flushed files. The default is 8, ranging from 1 to 16. For high-frequency scenarios with few tables, it is recommended to use the default configuration or a smaller value for this parameter; For multi-table low-frequency scenarios, it is recommended to configure this parameter with a larger value. -- TABLE_PREFIX:The prefix length in the table name that is ignored when distributing table to vnode based on table name. -- TABLE_SUFFIX:The suffix length in the table name that is ignored when distributing table to vnode based on table name. +- TABLE_PREFIX: The prefix in the table name that is ignored when distributing a table to a vgroup when it's a positive number, or only the prefix is used when distributing a table to a vgroup, the default value is 0; For example, if the table name v30001, then "0001" is used if TSDB_PREFIX is set to 2 but "v3" is used if TSDB_PREFIX is set to -2; It can help you to control the distribution of tables. +- TABLE_SUFFIX:The suffix in the table name that is ignored when distributing a table to a vgroup when it's a positive number, or only the suffix is used when distributing a table to a vgroup, the default value is 0; For example, if the table name v30001, then "v300" is used if TSDB_SUFFIX is set to 2 but "01" is used if TSDB_SUFFIX is set to -2; It can help you to control the distribution of tables. - TSDB_PAGESIZE: The page size of the data storage engine in a vnode. The unit is KB. The default is 4 KB. The range is 1 to 16384, that is, 1 KB to 16 MB. - WAL_RETENTION_PERIOD: specifies the maximum time of which WAL files are to be kept for consumption. This parameter is used for data subscription. Enter a time in seconds. The default value 0. A value of 0 indicates that WAL files are not required to keep for consumption. Alter it with a proper value at first to create topics. - WAL_RETENTION_SIZE: specifies the maximum total size of which WAL files are to be kept for consumption. This parameter is used for data subscription. Enter a size in KB. The default value is 0. A value of 0 indicates that the total size of WAL files to keep for consumption has no upper limit. diff --git a/docs/zh/12-taos-sql/02-database.md b/docs/zh/12-taos-sql/02-database.md index dd30635660..b0e2887ba9 100644 --- a/docs/zh/12-taos-sql/02-database.md +++ b/docs/zh/12-taos-sql/02-database.md @@ -71,8 +71,8 @@ database_option: { - 0:表示可以创建多张超级表。 - 1:表示只可以创建一张超级表。 - STT_TRIGGER:表示落盘文件触发文件合并的个数。默认为 1,范围 1 到 16。对于少表高频场景,此参数建议使用默认配置,或较小的值;而对于多表低频场景,此参数建议配置较大的值。 -- TABLE_PREFIX:内部存储引擎根据表名分配存储该表数据的 VNODE 时要忽略的前缀的长度。 -- TABLE_SUFFIX:内部存储引擎根据表名分配存储该表数据的 VNODE 时要忽略的后缀的长度。 +- TABLE_PREFIX:当其为正值时,在决定把一个表分配到哪个 vgroup 时要忽略表名中指定长度的前缀;当其为负值时,在决定把一个表分配到哪个 vgroup 时只使用表名中指定长度的前缀;例如,假定表名为 "v30001",当 TSDB_PREFIX = 2 时 使用 "0001" 来决定分配到哪个 vgroup ,当 TSDB_PREFIX = -2 时使用 "v3" 来决定分配到哪个 vgroup +- TABLE_SUFFIX:当其为正值时,在决定把一个表分配到哪个 vgroup 时要忽略表名中指定长度的后缀;当其为负值时,在决定把一个表分配到哪个 vgroup 时只使用表名中指定长度的后缀;例如,假定表名为 "v30001",当 TSDB_SUFFIX = 2 时 使用 "v300" 来决定分配到哪个 vgroup ,当 TSDB_SUFFIX = -2 时使用 "0001" 来决定分配到哪个 vgroup。 - TSDB_PAGESIZE:一个 VNODE 中时序数据存储引擎的页大小,单位为 KB,默认为 4 KB。范围为 1 到 16384,即 1 KB到 16 MB。 - WAL_RETENTION_PERIOD: 为了数据订阅消费,需要WAL日志文件额外保留的最大时长策略。WAL日志清理,不受订阅客户端消费状态影响。单位为 s。默认为 0,表示无需为订阅保留。新建订阅,应先设置恰当的时长策略。 - WAL_RETENTION_SIZE:为了数据订阅消费,需要WAL日志文件额外保留的最大累计大小策略。单位为 KB。默认为 0,表示累计大小无上限。 From b3bd3799afb018962cac149105d11d1e35d524b2 Mon Sep 17 00:00:00 2001 From: dapan1121 <72057773+dapan1121@users.noreply.github.com> Date: Fri, 5 May 2023 16:19:34 +0800 Subject: [PATCH 24/40] Update 02-database.md --- docs/zh/12-taos-sql/02-database.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/12-taos-sql/02-database.md b/docs/zh/12-taos-sql/02-database.md index b0e2887ba9..a2a0914120 100644 --- a/docs/zh/12-taos-sql/02-database.md +++ b/docs/zh/12-taos-sql/02-database.md @@ -72,7 +72,7 @@ database_option: { - 1:表示只可以创建一张超级表。 - STT_TRIGGER:表示落盘文件触发文件合并的个数。默认为 1,范围 1 到 16。对于少表高频场景,此参数建议使用默认配置,或较小的值;而对于多表低频场景,此参数建议配置较大的值。 - TABLE_PREFIX:当其为正值时,在决定把一个表分配到哪个 vgroup 时要忽略表名中指定长度的前缀;当其为负值时,在决定把一个表分配到哪个 vgroup 时只使用表名中指定长度的前缀;例如,假定表名为 "v30001",当 TSDB_PREFIX = 2 时 使用 "0001" 来决定分配到哪个 vgroup ,当 TSDB_PREFIX = -2 时使用 "v3" 来决定分配到哪个 vgroup -- TABLE_SUFFIX:当其为正值时,在决定把一个表分配到哪个 vgroup 时要忽略表名中指定长度的后缀;当其为负值时,在决定把一个表分配到哪个 vgroup 时只使用表名中指定长度的后缀;例如,假定表名为 "v30001",当 TSDB_SUFFIX = 2 时 使用 "v300" 来决定分配到哪个 vgroup ,当 TSDB_SUFFIX = -2 时使用 "0001" 来决定分配到哪个 vgroup。 +- TABLE_SUFFIX:当其为正值时,在决定把一个表分配到哪个 vgroup 时要忽略表名中指定长度的后缀;当其为负值时,在决定把一个表分配到哪个 vgroup 时只使用表名中指定长度的后缀;例如,假定表名为 "v30001",当 TSDB_SUFFIX = 2 时 使用 "v300" 来决定分配到哪个 vgroup ,当 TSDB_SUFFIX = -2 时使用 "01" 来决定分配到哪个 vgroup。 - TSDB_PAGESIZE:一个 VNODE 中时序数据存储引擎的页大小,单位为 KB,默认为 4 KB。范围为 1 到 16384,即 1 KB到 16 MB。 - WAL_RETENTION_PERIOD: 为了数据订阅消费,需要WAL日志文件额外保留的最大时长策略。WAL日志清理,不受订阅客户端消费状态影响。单位为 s。默认为 0,表示无需为订阅保留。新建订阅,应先设置恰当的时长策略。 - WAL_RETENTION_SIZE:为了数据订阅消费,需要WAL日志文件额外保留的最大累计大小策略。单位为 KB。默认为 0,表示累计大小无上限。 From 997be0a3edef6ac78923eed97ab2607658f1c034 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 5 May 2023 17:04:36 +0800 Subject: [PATCH 25/40] fix: having clause issues --- source/libs/parser/src/parTranslater.c | 29 ++++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 25e92a55ec..a2748c7e6d 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -2289,6 +2289,23 @@ static int32_t checkAggColCoexist(STranslateContext* pCxt, SSelectStmt* pSelect) return TSDB_CODE_SUCCESS; } +static int32_t checkPartitionGroupBy(STranslateContext* pCxt, SSelectStmt* pSelect) { + int32_t code = TSDB_CODE_SUCCESS; + if (NULL == pSelect->pGroupByList && NULL == pSelect->pPartitionByList && NULL == pSelect->pWindow) { + return code; + } + if (NULL != pSelect->pHaving) { + code = checkExprForGroupBy(pCxt, &pSelect->pHaving); + } + if (TSDB_CODE_SUCCESS == code && NULL != pSelect->pProjectionList) { + code = checkExprListForGroupBy(pCxt, pSelect, pSelect->pProjectionList); + } + if (TSDB_CODE_SUCCESS == code && NULL != pSelect->pOrderByList) { + code = checkExprListForGroupBy(pCxt, pSelect, pSelect->pOrderByList); + } + return code; +} + static int32_t checkWindowFuncCoexist(STranslateContext* pCxt, SSelectStmt* pSelect) { if (NULL == pSelect->pWindow) { return TSDB_CODE_SUCCESS; @@ -2901,9 +2918,6 @@ static int32_t translateOrderBy(STranslateContext* pCxt, SSelectStmt* pSelect) { pCxt->currClause = SQL_CLAUSE_ORDER_BY; code = translateExprList(pCxt, pSelect->pOrderByList); } - if (TSDB_CODE_SUCCESS == code) { - code = checkExprListForGroupBy(pCxt, pSelect, pSelect->pOrderByList); - } return code; } @@ -3022,9 +3036,6 @@ static int32_t translateSelectList(STranslateContext* pCxt, SSelectStmt* pSelect if (TSDB_CODE_SUCCESS == code) { code = translateProjectionList(pCxt, pSelect); } - if (TSDB_CODE_SUCCESS == code) { - code = checkExprListForGroupBy(pCxt, pSelect, pSelect->pProjectionList); - } if (TSDB_CODE_SUCCESS == code) { code = translateFillValues(pCxt, pSelect); } @@ -3041,9 +3052,6 @@ static int32_t translateHaving(STranslateContext* pCxt, SSelectStmt* pSelect) { } pCxt->currClause = SQL_CLAUSE_HAVING; int32_t code = translateExpr(pCxt, &pSelect->pHaving); - if (TSDB_CODE_SUCCESS == code && (NULL != pSelect->pGroupByList || NULL != pSelect->pWindow)) { - code = checkExprForGroupBy(pCxt, &pSelect->pHaving); - } return code; } @@ -3629,6 +3637,9 @@ static int32_t translateSelectFrom(STranslateContext* pCxt, SSelectStmt* pSelect if (TSDB_CODE_SUCCESS == code) { code = translateOrderBy(pCxt, pSelect); } + if (TSDB_CODE_SUCCESS == code) { + code = checkPartitionGroupBy(pCxt, pSelect); + } if (TSDB_CODE_SUCCESS == code) { code = checkAggColCoexist(pCxt, pSelect); } From 908fd4ff97bd2c1b00b85ef5b84fca1f264d4f4b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 5 May 2023 17:48:21 +0800 Subject: [PATCH 26/40] feat: process split vgroup msg imp --- source/dnode/mnode/impl/src/mndVgroup.c | 41 ++++--------------------- 1 file changed, 6 insertions(+), 35 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index d4be6bed73..e580c78383 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2103,7 +2103,7 @@ static int32_t mndAddAdjustVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, return 0; } -static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) { +int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) { int32_t code = -1; STrans *pTrans = NULL; SSdbRaw *pRaw = NULL; @@ -2229,42 +2229,13 @@ _OVER: return code; } -static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - int32_t code = -1; - SVgObj *pVgroup = NULL; - SDbObj *pDb = NULL; +extern int32_t mndProcessSplitVgroupMsgImp(SRpcMsg *pReq); - SSplitVgroupReq req = {0}; - if (tDeserializeSSplitVgroupReq(pReq->pCont, pReq->contLen, &req) != 0) { - terrno = TSDB_CODE_INVALID_MSG; - goto _OVER; - } +static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { return mndProcessSplitVgroupMsgImp(pReq); } - mInfo("vgId:%d, start to split", req.vgId); - if (mndCheckOperPrivilege(pMnode, pReq->info.conn.user, MND_OPER_SPLIT_VGROUP) != 0) { - goto _OVER; - } - - pVgroup = mndAcquireVgroup(pMnode, req.vgId); - if (pVgroup == NULL) goto _OVER; - - pDb = mndAcquireDb(pMnode, pVgroup->dbName); - if (pDb == NULL) goto _OVER; - - code = mndSplitVgroup(pMnode, pReq, pDb, pVgroup); - if (code != 0) { - mError("vgId:%d, failed to start to split vgroup since %s, db:%s", pVgroup->vgId, terrstr(), pDb->name); - goto _OVER; - } - - mInfo("vgId:%d, split vgroup started successfully. db:%s", pVgroup->vgId, pDb->name); - -_OVER: - mndReleaseVgroup(pMnode, pVgroup); - mndReleaseDb(pMnode, pDb); - return code; -} +#ifndef TD_ENTERPRISE +int32_t mndProcessSplitVgroupMsgImp(SRpcMsg *pReq) { return 0; } +#endif static int32_t mndSetBalanceVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SDnodeObj *pSrc, SDnodeObj *pDst) { From 76a20b2221b22e246ac4528e8e5f305f38e31cba Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 5 May 2023 18:57:15 +0800 Subject: [PATCH 27/40] enh: adjust enum ETrnExec --- source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 2 +- source/dnode/mnode/impl/inc/mndDef.h | 2 +- source/dnode/mnode/impl/inc/mndTrans.h | 1 + source/dnode/mnode/impl/src/mndTrans.c | 4 +++- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index a7c4b2e70e..ef4a2b52bb 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -60,7 +60,7 @@ static void vmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { if (IsReq(pMsg)) { if (code != 0) { if (terrno != 0) code = terrno; - dGError("msg:%p, failed to process since %s, type:%s", pMsg, terrstr(code), TMSG_INFO(pMsg->msgType)); + dGError("msg:%p, failed to process since %s, type:%s", pMsg, tstrerror(code), TMSG_INFO(pMsg->msgType)); } vmSendRsp(pMsg, code); } diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index f547ce025d..e227848e9f 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -118,7 +118,7 @@ typedef enum { } ETrnPolicy; typedef enum { - TRN_EXEC_PRARLLEL = 0, + TRN_EXEC_PARALLEL = 0, TRN_EXEC_SERIAL = 1, } ETrnExec; diff --git a/source/dnode/mnode/impl/inc/mndTrans.h b/source/dnode/mnode/impl/inc/mndTrans.h index 057e3efbbc..03434573c4 100644 --- a/source/dnode/mnode/impl/inc/mndTrans.h +++ b/source/dnode/mnode/impl/inc/mndTrans.h @@ -76,6 +76,7 @@ void mndTransSetRpcRsp(STrans *pTrans, void *pCont, int32_t contLen); void mndTransSetCb(STrans *pTrans, ETrnFunc startFunc, ETrnFunc stopFunc, void *param, int32_t paramLen); void mndTransSetDbName(STrans *pTrans, const char *dbname, const char *stbname); void mndTransSetSerial(STrans *pTrans); +void mndTransSetParallel(STrans *pTrans); void mndTransSetOper(STrans *pTrans, EOperType oper); int32_t mndTrancCheckConflict(SMnode *pMnode, STrans *pTrans); diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 106eea0313..ccb0882bb0 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -643,7 +643,7 @@ STrans *mndTransCreate(SMnode *pMnode, ETrnPolicy policy, ETrnConflct conflict, pTrans->stage = TRN_STAGE_PREPARE; pTrans->policy = policy; pTrans->conflict = conflict; - pTrans->exec = TRN_EXEC_PRARLLEL; + pTrans->exec = TRN_EXEC_PARALLEL; pTrans->createdTime = taosGetTimestampMs(); pTrans->redoActions = taosArrayInit(TRANS_ARRAY_SIZE, sizeof(STransAction)); pTrans->undoActions = taosArrayInit(TRANS_ARRAY_SIZE, sizeof(STransAction)); @@ -793,6 +793,8 @@ void mndTransSetDbName(STrans *pTrans, const char *dbname, const char *stbname) void mndTransSetSerial(STrans *pTrans) { pTrans->exec = TRN_EXEC_SERIAL; } +void mndTransSetParallel(STrans *pTrans) { pTrans->exec = TRN_EXEC_PARALLEL; } + void mndTransSetOper(STrans *pTrans, EOperType oper) { pTrans->oper = oper; } static int32_t mndTransSync(SMnode *pMnode, STrans *pTrans) { From 0c3dc0867fff2d8884842d6dd88d2e6f8efd8320 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 5 May 2023 20:33:41 +0800 Subject: [PATCH 28/40] chore: sync fix from main --- source/client/src/clientSml.c | 11 ++++++----- source/libs/tdb/src/db/tdbBtree.c | 5 +++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 45f0def157..98b64b211a 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -558,14 +558,15 @@ static int32_t smlGenerateSchemaAction(SSchema *colField, SHashObj *colHash, SSm return 0; } +#define BOUNDARY 1024 static int32_t smlFindNearestPowerOf2(int32_t length, uint8_t type) { int32_t result = 1; - if (length < 1024) { - while (result <= length) { - result <<= 1; - } - } else { + if (length >= BOUNDARY){ result = length; + }else{ + while (result <= length) { + result << 1; + } } if (type == TSDB_DATA_TYPE_BINARY && result > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { diff --git a/source/libs/tdb/src/db/tdbBtree.c b/source/libs/tdb/src/db/tdbBtree.c index 6df2b40000..c49b5726b6 100644 --- a/source/libs/tdb/src/db/tdbBtree.c +++ b/source/libs/tdb/src/db/tdbBtree.c @@ -1814,6 +1814,11 @@ int tdbBtreeNext(SBTC *pBtc, void **ppKey, int *kLen, void **ppVal, int *vLen) { *ppVal = pVal; *vLen = cd.vLen; + } else { + if (TDB_CELLDECODER_FREE_VAL(&cd)) { + tdbTrace("tdb/btree-next2 decoder: %p pVal free: %p", &cd, cd.pVal); + tdbFree(cd.pVal); + } } ret = tdbBtcMoveToNext(pBtc); From 4ae9eab90ebf0e5ccc6dc34a772d7905d2d88414 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 5 May 2023 20:42:49 +0800 Subject: [PATCH 29/40] chore: fix --- source/client/src/clientSml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 98b64b211a..cf1c6cc434 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -565,7 +565,7 @@ static int32_t smlFindNearestPowerOf2(int32_t length, uint8_t type) { result = length; }else{ while (result <= length) { - result << 1; + result <<= 1; } } From 29da3087727535528b13a8329faf1c337312b806 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 5 May 2023 23:58:45 +0800 Subject: [PATCH 30/40] chore: test case --- .../1-insert/influxdb_line_taosc_insert.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/system-test/1-insert/influxdb_line_taosc_insert.py b/tests/system-test/1-insert/influxdb_line_taosc_insert.py index 46aec2909a..819951cce5 100644 --- a/tests/system-test/1-insert/influxdb_line_taosc_insert.py +++ b/tests/system-test/1-insert/influxdb_line_taosc_insert.py @@ -908,22 +908,24 @@ class TDTestCase: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) # * every binary and nchar must be length+2, so here is two tag, max length could not larger than 16384-2*2 - input_sql = f'{stb_name}, t0=f,t1="{tdCom.getLongName(4093, "letters")}" 1626006833639000000' + stb_name = tdCom.getLongName(8, "letters") + input_sql = f'{stb_name},t0=f,t1="{tdCom.getLongName(4091, "letters")}", c0=f 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(2) - input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(4084, "letters")}",t2="{tdCom.getLongName(6, "letters")}" c0=f 1626006833639000000' + tdSql.checkRows(1) + + input_sql = f'{stb_name},t0=t,t1="{tdCom.getLongName(4092, "letters")}", c0=f 1626006833639000000' try: self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) raise Exception("should not reach here") except SchemalessError as err: tdSql.checkNotEqual(err.errno, 0) tdSql.query(f"select * from {stb_name}") - tdSql.checkRows(2) + tdSql.checkRows(1) - stb_name = tdCom.getLongName(8, "letters") + stb_name = tdCom.getLongName(9, "letters") # # * check col,col+ts max in describe ---> 16143 input_sql = f'{stb_name},t0=t c0=1i32,c1="{tdCom.getLongName(65517, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) @@ -938,7 +940,7 @@ class TDTestCase: tdSql.checkRows(1) - stb_name = tdCom.getLongName(9, "letters") + stb_name = tdCom.getLongName(10, "letters") input_sql = f'{stb_name},t0=t c0=1i16,c1="{tdCom.getLongName(49133, "letters")}",c2="{tdCom.getLongName(16384, "letters")}" 1626006833639000000' self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) From 75795253a9b1dc6c3f7ebebbc0e19fb9bb0518fc Mon Sep 17 00:00:00 2001 From: kailixu Date: Sat, 6 May 2023 06:06:12 +0800 Subject: [PATCH 31/40] more code --- source/client/src/clientHb.c | 2 +- tests/system-test/1-insert/influxdb_line_taosc_insert.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 4240a8510d..203aad8068 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -1172,7 +1172,7 @@ void hbDeregisterConn(STscObj *pTscObj, SClientHbKey connKey) { taosThreadMutexLock(&pTscObj->mutex); if (pTscObj->passInfo.fp) { - int32_t cnt = atomic_sub_fetch_32(&pAppHbMgr->passKeyCnt, 1); + atomic_sub_fetch_32(&pAppHbMgr->passKeyCnt, 1); } taosThreadMutexUnlock(&pTscObj->mutex); } \ No newline at end of file diff --git a/tests/system-test/1-insert/influxdb_line_taosc_insert.py b/tests/system-test/1-insert/influxdb_line_taosc_insert.py index 819951cce5..3e9bad4efd 100644 --- a/tests/system-test/1-insert/influxdb_line_taosc_insert.py +++ b/tests/system-test/1-insert/influxdb_line_taosc_insert.py @@ -896,7 +896,7 @@ class TDTestCase: tdSql.checkRows(2) tdSql.checkNotEqual(tb_name1, tb_name3) - # * tag binary max is 16384, col+ts binary max 65531 + # * tag binary max is 16384-2, col+ts binary max 65531 def tagColBinaryMaxLengthCheckCase(self): """ every binary and nchar must be length+2 @@ -960,7 +960,7 @@ class TDTestCase: tdSql.checkNotEqual(err.errno, 0) - # * tag nchar max is 16384/4, col+ts nchar max 65531 + # * tag nchar max is (16384-2)/4, col+ts nchar max 65531 def tagColNcharMaxLengthCheckCase(self): """ check nchar length limit @@ -971,7 +971,7 @@ class TDTestCase: input_sql = f'{stb_name},id="{tb_name}",t0=t c0=f 1626006833639000000' code = self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) - # * legal tag nchar could not be larger than 16384/4 + # * legal tag nchar could not be larger than (16384-2)/4 # input_sql = f'{stb_name},t0=t,t1=L"{tdCom.getLongName(4093, "letters")}",t2=L"{tdCom.getLongName(1, "letters")}" c0=f 1626006833639000000' # self._conn.schemaless_insert([input_sql], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) # tdSql.query(f"select * from {stb_name}") From 72c6292ab47dc6b15799db750b742a0d5c843cd5 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sat, 6 May 2023 09:14:03 +0800 Subject: [PATCH 32/40] enh: declare mndSplitVgroup in mndVgroup.h --- source/dnode/mnode/impl/inc/mndVgroup.h | 2 ++ source/dnode/mnode/impl/src/mndVgroup.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/inc/mndVgroup.h b/source/dnode/mnode/impl/inc/mndVgroup.h index 0229735952..94c4eae83f 100644 --- a/source/dnode/mnode/impl/inc/mndVgroup.h +++ b/source/dnode/mnode/impl/inc/mndVgroup.h @@ -50,6 +50,8 @@ void *mndBuildCreateVnodeReq(SMnode *, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *p void *mndBuildDropVnodeReq(SMnode *, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen); bool mndVgroupInDb(SVgObj *pVgroup, int64_t dbUid); +int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup); + #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index add2b1568c..7ece3b17ff 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2103,7 +2103,7 @@ static int32_t mndAddAdjustVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, return 0; } -static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) { +int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) { int32_t code = -1; STrans *pTrans = NULL; SSdbRaw *pRaw = NULL; From fd43736e926abe2a0c2531c370ec44b73a151a28 Mon Sep 17 00:00:00 2001 From: Alex Duan <51781608+DuanKuanJun@users.noreply.github.com> Date: Sat, 6 May 2023 10:48:31 +0800 Subject: [PATCH 33/40] Update 09-udf.md --- docs/zh/07-develop/09-udf.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index 443dcb9806..99ecd903b4 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -217,7 +217,7 @@ gcc -g -O0 -fPIC -shared bit_and.c -o libbitand.so ### C UDF 示例代码 -#### 标量函数示例 [bit_and](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/bit_and.c) +#### 标量函数示例 [bit_and](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/bit_and.c) bit_add 实现多列的按位与功能。如果只有一列,返回这一列。bit_add 忽略空值。 @@ -230,7 +230,7 @@ bit_add 实现多列的按位与功能。如果只有一列,返回这一列。 -#### 聚合函数示例1 返回值为数值类型 [l2norm](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/l2norm.c) +#### 聚合函数示例1 返回值为数值类型 [l2norm](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/l2norm.c) l2norm 实现了输入列的所有数据的二阶范数,即对每个数据先平方,再累加求和,最后开方。 @@ -243,7 +243,7 @@ l2norm 实现了输入列的所有数据的二阶范数,即对每个数据先 -#### 聚合函数示例2 返回值为字符串类型 [max_vol](https://github.com/taosdata/TDengine/blob/develop/tests/script/sh/max_vol.c) +#### 聚合函数示例2 返回值为字符串类型 [max_vol](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/max_vol.c) max_vol 实现了从多个输入的电压列中找到最大电压,返回由设备ID + 最大电压所在(行,列)+ 最大电压值 组成的组合字符串值 From 47791dfe2fd0cfbea9b93f5fc075e6c0dc1af4e3 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Sat, 6 May 2023 11:02:20 +0800 Subject: [PATCH 34/40] fix: group by validation issue --- source/libs/parser/src/parTranslater.c | 16 ++++++++++++---- source/libs/parser/test/parSelectTest.cpp | 13 +++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index a2748c7e6d..1dcc18e5fb 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -2289,20 +2289,22 @@ static int32_t checkAggColCoexist(STranslateContext* pCxt, SSelectStmt* pSelect) return TSDB_CODE_SUCCESS; } -static int32_t checkPartitionGroupBy(STranslateContext* pCxt, SSelectStmt* pSelect) { +static int32_t checkHavingGroupBy(STranslateContext* pCxt, SSelectStmt* pSelect) { int32_t code = TSDB_CODE_SUCCESS; - if (NULL == pSelect->pGroupByList && NULL == pSelect->pPartitionByList && NULL == pSelect->pWindow) { + if (NULL == getGroupByList(pCxt) && NULL == pSelect->pPartitionByList && NULL == pSelect->pWindow) { return code; } if (NULL != pSelect->pHaving) { code = checkExprForGroupBy(pCxt, &pSelect->pHaving); } +/* if (TSDB_CODE_SUCCESS == code && NULL != pSelect->pProjectionList) { code = checkExprListForGroupBy(pCxt, pSelect, pSelect->pProjectionList); } if (TSDB_CODE_SUCCESS == code && NULL != pSelect->pOrderByList) { code = checkExprListForGroupBy(pCxt, pSelect, pSelect->pOrderByList); } +*/ return code; } @@ -2918,6 +2920,9 @@ static int32_t translateOrderBy(STranslateContext* pCxt, SSelectStmt* pSelect) { pCxt->currClause = SQL_CLAUSE_ORDER_BY; code = translateExprList(pCxt, pSelect->pOrderByList); } + if (TSDB_CODE_SUCCESS == code) { + code = checkExprListForGroupBy(pCxt, pSelect, pSelect->pOrderByList); + } return code; } @@ -3036,6 +3041,9 @@ static int32_t translateSelectList(STranslateContext* pCxt, SSelectStmt* pSelect if (TSDB_CODE_SUCCESS == code) { code = translateProjectionList(pCxt, pSelect); } + if (TSDB_CODE_SUCCESS == code) { + code = checkExprListForGroupBy(pCxt, pSelect, pSelect->pProjectionList); + } if (TSDB_CODE_SUCCESS == code) { code = translateFillValues(pCxt, pSelect); } @@ -3635,10 +3643,10 @@ static int32_t translateSelectFrom(STranslateContext* pCxt, SSelectStmt* pSelect code = translateSelectList(pCxt, pSelect); } if (TSDB_CODE_SUCCESS == code) { - code = translateOrderBy(pCxt, pSelect); + code = checkHavingGroupBy(pCxt, pSelect); } if (TSDB_CODE_SUCCESS == code) { - code = checkPartitionGroupBy(pCxt, pSelect); + code = translateOrderBy(pCxt, pSelect); } if (TSDB_CODE_SUCCESS == code) { code = checkAggColCoexist(pCxt, pSelect); diff --git a/source/libs/parser/test/parSelectTest.cpp b/source/libs/parser/test/parSelectTest.cpp index ec6c69ea8d..2d8ce55b72 100644 --- a/source/libs/parser/test/parSelectTest.cpp +++ b/source/libs/parser/test/parSelectTest.cpp @@ -239,6 +239,19 @@ TEST_F(ParserSelectTest, groupBySemanticCheck) { run("SELECT COUNT(*) cnt, c2 FROM t1 WHERE c1 > 0 GROUP BY c1", TSDB_CODE_PAR_GROUPBY_LACK_EXPRESSION); } +TEST_F(ParserSelectTest, havingCheck) { + useDb("root", "test"); + + run("select tbname,count(*) from st1 partition by tbname having c1>0", TSDB_CODE_PAR_INVALID_OPTR_USAGE); + + run("select tbname,count(*) from st1 group by tbname having c1>0", TSDB_CODE_PAR_GROUPBY_LACK_EXPRESSION); + + run("select max(c1) from st1 group by tbname having c1>0"); + + run("select max(c1) from st1 partition by tbname having c1>0"); +} + + TEST_F(ParserSelectTest, orderBy) { useDb("root", "test"); From fced6270215aa2ba29a936ef1faaed722e25ab5d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 5 May 2023 19:14:34 +0800 Subject: [PATCH 35/40] enh: add mndTransCommitVgStatus --- source/dnode/mnode/impl/src/mndVgroup.c | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index e580c78383..83989a22bb 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2103,6 +2103,18 @@ static int32_t mndAddAdjustVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, return 0; } +static int32_t mndTransCommitVgStatus(STrans *pTrans, SVgObj *pVg, ESdbStatus vgStatus) { + SSdbRaw *pRaw = mndVgroupActionEncode(pVg); + if (pRaw == NULL) goto _err; + if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _err; + (void)sdbSetRawStatus(pRaw, vgStatus); + pRaw = NULL; + return 0; +_err: + sdbFreeRaw(pRaw); + return -1; +} + int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) { int32_t code = -1; STrans *pTrans = NULL; @@ -2181,28 +2193,16 @@ int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgro if (pDb->cfg.replications != newVg1.replica) { if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, pDb, &newVg1, pArray) != 0) goto _OVER; } else { - pRaw = mndVgroupActionEncode(&newVg1); - if (pRaw == NULL) goto _OVER; - if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER; - (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY); - pRaw = NULL; + if (mndTransCommitVgStatus(pTrans, &newVg1, SDB_STATUS_READY) < 0) goto _OVER; } if (pDb->cfg.replications != newVg2.replica) { if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, pDb, &newVg2, pArray) != 0) goto _OVER; } else { - pRaw = mndVgroupActionEncode(&newVg2); - if (pRaw == NULL) goto _OVER; - if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER; - (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY); - pRaw = NULL; + if (mndTransCommitVgStatus(pTrans, &newVg2, SDB_STATUS_READY) < 0) goto _OVER; } - pRaw = mndVgroupActionEncode(pVgroup); - if (pRaw == NULL) goto _OVER; - if (mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER; - (void)sdbSetRawStatus(pRaw, SDB_STATUS_DROPPED); - pRaw = NULL; + if (mndTransCommitVgStatus(pTrans, pVgroup, SDB_STATUS_DROPPED) < 0) goto _OVER; memcpy(&dbObj, pDb, sizeof(SDbObj)); if (dbObj.cfg.pRetensions != NULL) { From 028bfd34bf4abd69885af45224c55e839ace47dd Mon Sep 17 00:00:00 2001 From: liuyao <38781207+54liuyao@users.noreply.github.com> Date: Sat, 6 May 2023 14:59:27 +0800 Subject: [PATCH 36/40] Update 14-stream.md --- docs/zh/12-taos-sql/14-stream.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/12-taos-sql/14-stream.md b/docs/zh/12-taos-sql/14-stream.md index d90db3cab0..634f50356d 100644 --- a/docs/zh/12-taos-sql/14-stream.md +++ b/docs/zh/12-taos-sql/14-stream.md @@ -227,7 +227,7 @@ T = 最新事件时间 - DELETE_MARK ## 流式计算支持的函数 1. 所有的 [单行函数](../function/#单行函数) 均可用于流计算。 -2. 以下 19 个聚合/选择函数 不能 应用在创建流计算的 SQL 语句,[系统信息函数](../function/#系统信息函数) 也不能用于流计算中。此外的其他类型的函数均可用于流计算。 +2. 以下 19 个聚合/选择函数 不能 应用在创建流计算的 SQL 语句。此外的其他类型的函数均可用于流计算。 - [leastsquares](../function/#leastsquares) - [percentile](../function/#percentile) From e48fdceb7c1b64e6f6e0976a6168041409ae88a3 Mon Sep 17 00:00:00 2001 From: liuyao <38781207+54liuyao@users.noreply.github.com> Date: Sat, 6 May 2023 16:03:12 +0800 Subject: [PATCH 37/40] Update 14-stream.md --- docs/en/12-taos-sql/14-stream.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/12-taos-sql/14-stream.md b/docs/en/12-taos-sql/14-stream.md index d99c5bdae4..43c49c03cd 100644 --- a/docs/en/12-taos-sql/14-stream.md +++ b/docs/en/12-taos-sql/14-stream.md @@ -147,7 +147,7 @@ In both of these methods, configuring the watermark is essential for obtaining a ## Supported functions -All [scalar functions](../function/#scalar-functions) are available in stream processing. All [System information functions](../function/#system-information-functions) are not allowed in stream processing. All [Aggregate functions](../function/#aggregate-functions) and [Selection functions](../function/#selection-functions) are available in stream processing, except the followings: +All [scalar functions](../function/#scalar-functions) are available in stream processing. All [Aggregate functions](../function/#aggregate-functions) and [Selection functions](../function/#selection-functions) are available in stream processing, except the followings: - [leastsquares](../function/#leastsquares) - [percentile](../function/#percentile) - [top](../function/#top) From 1f25cc57cdbfebbc755adb35ed4c6d41ffb5af9c Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Sat, 6 May 2023 16:39:40 +0800 Subject: [PATCH 38/40] fix: projection merge issue --- source/libs/planner/src/planLogicCreater.c | 2 +- source/libs/planner/src/planOptimizer.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 6544898be9..d1011cbf3a 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -1070,7 +1070,7 @@ static int32_t createProjectLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSel TSWAP(pProject->node.pLimit, pSelect->pLimit); TSWAP(pProject->node.pSlimit, pSelect->pSlimit); - pProject->ignoreGroupId = pSelect->isSubquery ? true : (NULL == pSelect->pPartitionByList); + pProject->ignoreGroupId = (pSelect->isSubquery && NULL == pProject->node.pLimit && NULL == pProject->node.pSlimit) ? true : (NULL == pSelect->pPartitionByList); pProject->node.groupAction = (!pSelect->isSubquery && pCxt->pPlanCxt->streamQuery) ? GROUP_ACTION_KEEP : GROUP_ACTION_CLEAR; pProject->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 4f8b57de5f..5cb3591984 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2351,6 +2351,17 @@ static EDealRes mergeProjectionsExpr(SNode** pNode, void* pContext) { return DEAL_RES_CONTINUE; } +static int32_t mergeProjectionsLogicNode(SLogicNode* pDstNode, SLogicNode* pSrcNode) { + SProjectLogicNode *pDstPro = (SProjectLogicNode*)pDstNode; + SProjectLogicNode *pSrcPro = (SProjectLogicNode*)pSrcNode; + + if (!pSrcPro->ignoreGroupId) { + pDstPro->ignoreGroupId = pSrcPro->ignoreGroupId; + } + + return TSDB_CODE_SUCCESS; +} + static int32_t mergeProjectsOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan, SLogicNode* pSelfNode) { SLogicNode* pChild = (SLogicNode*)nodesListGetNode(pSelfNode->pChildren, 0); @@ -2360,8 +2371,11 @@ static int32_t mergeProjectsOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* if (TSDB_CODE_SUCCESS == code) { if (1 == LIST_LENGTH(pChild->pChildren)) { - SLogicNode* pGrandChild = (SLogicNode*)nodesListGetNode(pChild->pChildren, 0); - code = replaceLogicNode(pLogicSubplan, pChild, pGrandChild); + code = mergeProjectionsLogicNode(pSelfNode, pChild); + if (TSDB_CODE_SUCCESS == code) { + SLogicNode* pGrandChild = (SLogicNode*)nodesListGetNode(pChild->pChildren, 0); + code = replaceLogicNode(pLogicSubplan, pChild, pGrandChild); + } } else { // no grand child NODES_CLEAR_LIST(pSelfNode->pChildren); } From 758bcb901a1209f1638c641b451f88e19c61df63 Mon Sep 17 00:00:00 2001 From: Adam Ji Date: Sat, 6 May 2023 22:55:44 +0800 Subject: [PATCH 39/40] docs: fix example tmp (#21197) --- docs/examples/rust/nativeexample/examples/subscribe_demo.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/examples/rust/nativeexample/examples/subscribe_demo.rs b/docs/examples/rust/nativeexample/examples/subscribe_demo.rs index 7551ad46b1..d54bb60e93 100644 --- a/docs/examples/rust/nativeexample/examples/subscribe_demo.rs +++ b/docs/examples/rust/nativeexample/examples/subscribe_demo.rs @@ -45,7 +45,7 @@ async fn main() -> anyhow::Result<()> { taos.exec_many([ format!("DROP TOPIC IF EXISTS tmq_meters"), format!("DROP DATABASE IF EXISTS `{db}`"), - format!("CREATE DATABASE `{db}`"), + format!("CREATE DATABASE `{db}` WAL_RETENTION_PERIOD 3600"), format!("USE `{db}`"), // create super table format!("CREATE TABLE `meters` (`ts` TIMESTAMP, `current` FLOAT, `voltage` INT, `phase` FLOAT) TAGS (`groupid` INT, `location` BINARY(24))"), From 1526eb2923c7e158b5dbe9ec56efd3e3d4415436 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Mon, 8 May 2023 10:28:07 +0800 Subject: [PATCH 40/40] fix: projection group merge issue --- source/libs/planner/src/planLogicCreater.c | 2 +- source/libs/planner/src/planOptimizer.c | 18 ++---------------- source/libs/planner/src/planSpliter.c | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index d1011cbf3a..6544898be9 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -1070,7 +1070,7 @@ static int32_t createProjectLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSel TSWAP(pProject->node.pLimit, pSelect->pLimit); TSWAP(pProject->node.pSlimit, pSelect->pSlimit); - pProject->ignoreGroupId = (pSelect->isSubquery && NULL == pProject->node.pLimit && NULL == pProject->node.pSlimit) ? true : (NULL == pSelect->pPartitionByList); + pProject->ignoreGroupId = pSelect->isSubquery ? true : (NULL == pSelect->pPartitionByList); pProject->node.groupAction = (!pSelect->isSubquery && pCxt->pPlanCxt->streamQuery) ? GROUP_ACTION_KEEP : GROUP_ACTION_CLEAR; pProject->node.requireDataOrder = DATA_ORDER_LEVEL_NONE; diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 5cb3591984..4f8b57de5f 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2351,17 +2351,6 @@ static EDealRes mergeProjectionsExpr(SNode** pNode, void* pContext) { return DEAL_RES_CONTINUE; } -static int32_t mergeProjectionsLogicNode(SLogicNode* pDstNode, SLogicNode* pSrcNode) { - SProjectLogicNode *pDstPro = (SProjectLogicNode*)pDstNode; - SProjectLogicNode *pSrcPro = (SProjectLogicNode*)pSrcNode; - - if (!pSrcPro->ignoreGroupId) { - pDstPro->ignoreGroupId = pSrcPro->ignoreGroupId; - } - - return TSDB_CODE_SUCCESS; -} - static int32_t mergeProjectsOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan, SLogicNode* pSelfNode) { SLogicNode* pChild = (SLogicNode*)nodesListGetNode(pSelfNode->pChildren, 0); @@ -2371,11 +2360,8 @@ static int32_t mergeProjectsOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* if (TSDB_CODE_SUCCESS == code) { if (1 == LIST_LENGTH(pChild->pChildren)) { - code = mergeProjectionsLogicNode(pSelfNode, pChild); - if (TSDB_CODE_SUCCESS == code) { - SLogicNode* pGrandChild = (SLogicNode*)nodesListGetNode(pChild->pChildren, 0); - code = replaceLogicNode(pLogicSubplan, pChild, pGrandChild); - } + SLogicNode* pGrandChild = (SLogicNode*)nodesListGetNode(pChild->pChildren, 0); + code = replaceLogicNode(pLogicSubplan, pChild, pGrandChild); } else { // no grand child NODES_CLEAR_LIST(pSelfNode->pChildren); } diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index fd77261818..eed59c8236 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -532,6 +532,24 @@ static int32_t stbSplGetNumOfVgroups(SLogicNode* pNode) { return 0; } +static int32_t stbSplRewriteFromMergeNode(SMergeLogicNode* pMerge, SLogicNode* pNode) { + int32_t code = TSDB_CODE_SUCCESS; + + switch (nodeType(pNode)) { + case QUERY_NODE_LOGIC_PLAN_PROJECT: { + SProjectLogicNode *pLogicNode = (SProjectLogicNode*)pNode; + if (pMerge->node.pLimit || pMerge->node.pSlimit) { + pLogicNode->ignoreGroupId = false; + } + break; + } + default: + break; + } + + return code; +} + static int32_t stbSplCreateMergeNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pSplitNode, SNodeList* pMergeKeys, SLogicNode* pPartChild, bool groupSort) { SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE); @@ -563,6 +581,9 @@ static int32_t stbSplCreateMergeNode(SSplitContext* pCxt, SLogicSubplan* pSubpla ((SLimitNode*)pSplitNode->pLimit)->limit += ((SLimitNode*)pSplitNode->pLimit)->offset; ((SLimitNode*)pSplitNode->pLimit)->offset = 0; } + if (TSDB_CODE_SUCCESS == code) { + code = stbSplRewriteFromMergeNode(pMerge, pSplitNode); + } if (TSDB_CODE_SUCCESS == code) { if (NULL == pSubplan) { code = nodesListMakeAppend(&pSplitNode->pChildren, (SNode*)pMerge);