From a59b558331f96e65a74ed38f822733d656b2c326 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Mon, 11 Apr 2022 20:22:49 +0800 Subject: [PATCH] feat(query): add cast function TD-14242 --- include/common/ttypes.h | 37 ++++++ include/libs/scalar/scalar.h | 3 + source/common/src/tvariant.c | 11 +- source/libs/function/src/builtins.c | 10 +- source/libs/parser/src/parAstCreater.c | 11 +- source/libs/scalar/src/sclfunc.c | 153 +++++++++++++++++++++++++ 6 files changed, 218 insertions(+), 7 deletions(-) diff --git a/include/common/ttypes.h b/include/common/ttypes.h index 37d688a0ef..721b00fc7b 100644 --- a/include/common/ttypes.h +++ b/include/common/ttypes.h @@ -141,6 +141,43 @@ typedef struct { } \ } while (0) +#define NUM_TO_STRING(_inputType, _input, _outputBytes, _output) \ + do { \ + switch (_inputType) { \ + case TSDB_DATA_TYPE_TINYINT: \ + snprintf(_output, (int32_t)(_outputBytes), "%d", *(int8_t *)(_input)); \ + break; \ + case TSDB_DATA_TYPE_UTINYINT: \ + snprintf(_output, (int32_t)(_outputBytes), "%d", *(uint8_t *)(_input)); \ + break; \ + case TSDB_DATA_TYPE_SMALLINT: \ + snprintf(_output, (int32_t)(_outputBytes), "%d", *(int16_t *)(_input)); \ + break; \ + case TSDB_DATA_TYPE_USMALLINT: \ + snprintf(_output, (int32_t)(_outputBytes), "%d", *(uint16_t *)(_input)); \ + break; \ + case TSDB_DATA_TYPE_TIMESTAMP: \ + case TSDB_DATA_TYPE_BIGINT: \ + snprintf(_output, (int32_t)(_outputBytes), "%" PRId64, *(int64_t *)(_input)); \ + break; \ + case TSDB_DATA_TYPE_UBIGINT: \ + snprintf(_output, (int32_t)(_outputBytes), "%" PRIu64, *(uint64_t *)(_input)); \ + break; \ + case TSDB_DATA_TYPE_FLOAT: \ + snprintf(_output, (int32_t)(_outputBytes), "%f", *(float *)(_input)); \ + break; \ + case TSDB_DATA_TYPE_DOUBLE: \ + snprintf(_output, (int32_t)(_outputBytes), "%f", *(double *)(_input)); \ + break; \ + case TSDB_DATA_TYPE_UINT: \ + snprintf(_output, (int32_t)(_outputBytes), "%u", *(uint32_t *)(_input)); \ + break; \ + default: \ + snprintf(_output, (int32_t)(_outputBytes), "%d", *(int32_t *)(_input)); \ + break; \ + } \ + } while (0) + #define IS_SIGNED_NUMERIC_TYPE(_t) ((_t) >= TSDB_DATA_TYPE_TINYINT && (_t) <= TSDB_DATA_TYPE_BIGINT) #define IS_UNSIGNED_NUMERIC_TYPE(_t) ((_t) >= TSDB_DATA_TYPE_UTINYINT && (_t) <= TSDB_DATA_TYPE_UBIGINT) #define IS_FLOAT_TYPE(_t) ((_t) == TSDB_DATA_TYPE_FLOAT || (_t) == TSDB_DATA_TYPE_DOUBLE) diff --git a/include/libs/scalar/scalar.h b/include/libs/scalar/scalar.h index b5acc64f0b..3c5b164648 100644 --- a/include/libs/scalar/scalar.h +++ b/include/libs/scalar/scalar.h @@ -70,6 +70,9 @@ int32_t ltrimFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOut int32_t rtrimFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput); int32_t substrFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput); +/* Conversion functions */ +int32_t castFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput); + bool getTimePseudoFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); int32_t winStartTsFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput); diff --git a/source/common/src/tvariant.c b/source/common/src/tvariant.c index 8c5010e577..a0503e43a1 100644 --- a/source/common/src/tvariant.c +++ b/source/common/src/tvariant.c @@ -1028,13 +1028,18 @@ int32_t taosVariantTypeSetType(SVariant *pVariant, char type) { char * taosVariantGet(SVariant *pVar, int32_t type) { switch (type) { - case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_BOOL: case TSDB_DATA_TYPE_TINYINT: case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_INT: case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_INT: case TSDB_DATA_TYPE_TIMESTAMP: return (char *)&pVar->i; + case TSDB_DATA_TYPE_UTINYINT: + case TSDB_DATA_TYPE_USMALLINT: + case TSDB_DATA_TYPE_UINT: + case TSDB_DATA_TYPE_UBIGINT: + return (char *)&pVar->u; case TSDB_DATA_TYPE_DOUBLE: case TSDB_DATA_TYPE_FLOAT: return (char *)&pVar->d; @@ -1042,7 +1047,7 @@ char * taosVariantGet(SVariant *pVar, int32_t type) { return (char *)pVar->pz; case TSDB_DATA_TYPE_NCHAR: return (char *)pVar->ucs4; - default: + default: return NULL; } diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index aa7343f610..69e7eb6d12 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -389,7 +389,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .checkFunc = stubCheckAndGetResultType, .getEnvFunc = NULL, .initFunc = NULL, - .sprocessFunc = NULL, + .sprocessFunc = castFunction, .finalizeFunc = NULL }, { @@ -599,7 +599,13 @@ int32_t stubCheckAndGetResultType(SFunctionNode* pFunc) { break; } case FUNCTION_TYPE_CAST: { - pFunc->node.resType = (SDataType) { .bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes, .type = TSDB_DATA_TYPE_BIGINT }; + //type + SValueNode* pParam = nodesListGetNode(pFunc->pParameterList, 1); + int32_t paraType = pParam->datum.i; + //bytes + pParam = nodesListGetNode(pFunc->pParameterList, 2); + int32_t paraBytes = pParam->datum.i; + pFunc->node.resType = (SDataType) { .bytes = paraBytes, .type = paraType}; break; } diff --git a/source/libs/parser/src/parAstCreater.c b/source/libs/parser/src/parAstCreater.c index 4908748f0d..5c15a371dc 100644 --- a/source/libs/parser/src/parAstCreater.c +++ b/source/libs/parser/src/parAstCreater.c @@ -255,13 +255,20 @@ SNodeList* addValueNodeFromTypeToList(SAstCreateContext* pCxt, SDataType dataTyp char buf[64] = {0}; //add value node for type snprintf(buf, sizeof(buf), "%u", dataType.type); - SToken token = {.type = TSDB_DATA_TYPE_TINYINT, .n = strlen(buf), .z = buf}; + SToken token = {.type = TSDB_DATA_TYPE_SMALLINT, .n = strlen(buf), .z = buf}; SNode* pNode = createValueNode(pCxt, token.type, &token); addNodeToList(pCxt, pList, pNode); //add value node for bytes memset(buf, 0, sizeof(buf)); - snprintf(buf, sizeof(buf), "%u", dataType.bytes); + int32_t bytes; + if (IS_VAR_DATA_TYPE(dataType.type)) { + bytes = (dataType.type == TSDB_DATA_TYPE_NCHAR) ? dataType.bytes * TSDB_NCHAR_SIZE : dataType.bytes; + bytes += VARSTR_HEADER_SIZE; + } else { + bytes = dataType.bytes; + } + snprintf(buf, sizeof(buf), "%d", bytes); token.type = TSDB_DATA_TYPE_BIGINT; token.n = strlen(buf); token.z = buf; diff --git a/source/libs/scalar/src/sclfunc.c b/source/libs/scalar/src/sclfunc.c index 0956c2add5..7b861f9b79 100644 --- a/source/libs/scalar/src/sclfunc.c +++ b/source/libs/scalar/src/sclfunc.c @@ -647,6 +647,159 @@ int32_t substrFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOu return TSDB_CODE_SUCCESS; } +int32_t castFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput) { + if (inputNum!= 3) { + return TSDB_CODE_FAILED; + } + + int16_t inputType = pInput[0].columnData->info.type; + int16_t outputType = *(int16_t *)pInput[1].columnData->pData; + if (outputType != TSDB_DATA_TYPE_BIGINT && outputType != TSDB_DATA_TYPE_UBIGINT && + outputType != TSDB_DATA_TYPE_VARCHAR && outputType != TSDB_DATA_TYPE_NCHAR && + outputType != TSDB_DATA_TYPE_TIMESTAMP) { + return TSDB_CODE_FAILED; + } + int64_t outputLen = *(int64_t *)pInput[2].columnData->pData; + + char *input = NULL; + char *outputBuf = taosMemoryCalloc(outputLen * pInput[0].numOfRows, 1); + char *output = outputBuf; + if (IS_VAR_DATA_TYPE(inputType)) { + input = pInput[0].columnData->pData + pInput[0].columnData->varmeta.offset[0]; + } else { + input = pInput[0].columnData->pData; + } + + for (int32_t i = 0; i < pInput[0].numOfRows; ++i) { + if (colDataIsNull_s(pInput[0].columnData, i)) { + colDataAppendNULL(pOutput->columnData, i); + continue; + } + + switch(outputType) { + case TSDB_DATA_TYPE_BIGINT: { + if (inputType == TSDB_DATA_TYPE_BINARY) { + memcpy(output, varDataVal(input), varDataLen(input)); + *(int64_t *)output = strtoll(output, NULL, 10); + } else if (inputType == TSDB_DATA_TYPE_NCHAR) { + char *newBuf = taosMemoryCalloc(1, outputLen * TSDB_NCHAR_SIZE + 1); + int32_t len = taosUcs4ToMbs((TdUcs4 *)varDataVal(input), varDataLen(input), newBuf); + if (len < 0) { + taosMemoryFree(newBuf); + return TSDB_CODE_FAILED; + } + newBuf[len] = 0; + *(int64_t *)output = strtoll(newBuf, NULL, 10); + taosMemoryFree(newBuf); + } else { + GET_TYPED_DATA(*(int64_t *)output, int64_t, inputType, input); + } + break; + } + case TSDB_DATA_TYPE_UBIGINT: { + if (inputType == TSDB_DATA_TYPE_BINARY) { + memcpy(output, varDataVal(input), varDataLen(input)); + *(uint64_t *)output = strtoull(output, NULL, 10); + } else if (inputType == TSDB_DATA_TYPE_NCHAR) { + char *newBuf = taosMemoryCalloc(1, outputLen * TSDB_NCHAR_SIZE + 1); + int32_t len = taosUcs4ToMbs((TdUcs4 *)varDataVal(input), varDataLen(input), newBuf); + if (len < 0) { + taosMemoryFree(newBuf); + return TSDB_CODE_FAILED; + } + newBuf[len] = 0; + *(uint64_t *)output = strtoull(newBuf, NULL, 10); + taosMemoryFree(newBuf); + } else { + GET_TYPED_DATA(*(uint64_t *)output, uint64_t, inputType, input); + } + break; + } + case TSDB_DATA_TYPE_TIMESTAMP: { + if (inputType == TSDB_DATA_TYPE_BINARY || inputType == TSDB_DATA_TYPE_NCHAR) { + //not support + return TSDB_CODE_FAILED; + } else { + GET_TYPED_DATA(*(int64_t *)output, int64_t, inputType, input); + } + break; + } + case TSDB_DATA_TYPE_BINARY: { + if (inputType == TSDB_DATA_TYPE_BOOL) { + int32_t len = sprintf(varDataVal(output), "%.*s", (int32_t)(outputLen - VARSTR_HEADER_SIZE), *(int8_t *)input ? "true" : "false"); + varDataSetLen(output, len); + } else if (inputType == TSDB_DATA_TYPE_BINARY) { + int32_t len = sprintf(varDataVal(output), "%.*s", (int32_t)(outputLen - VARSTR_HEADER_SIZE), varDataVal(input)); + varDataSetLen(output, len); + } else if (inputType == TSDB_DATA_TYPE_BINARY || inputType == TSDB_DATA_TYPE_NCHAR) { + //not support + return TSDB_CODE_FAILED; + } else { + char tmp[400] = {0}; + NUM_TO_STRING(inputType, input, sizeof(tmp), tmp); + int32_t len = (int32_t)strlen(tmp); + len = (outputLen - VARSTR_HEADER_SIZE) > len ? len : (outputLen - VARSTR_HEADER_SIZE); + memcpy(varDataVal(output), tmp, len); + varDataSetLen(output, len); + } + break; + } + case TSDB_DATA_TYPE_NCHAR: { + int32_t outputCharLen = (outputLen - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE; + if (inputType == TSDB_DATA_TYPE_BOOL) { + char tmp[8] = {0}; + int32_t len = sprintf(tmp, "%.*s", outputCharLen, *(int8_t *)input ? "true" : "false" ); + bool ret = taosMbsToUcs4(tmp, len, (TdUcs4 *)varDataVal(output), outputLen - VARSTR_HEADER_SIZE, &len); + if (!ret) { + return TSDB_CODE_FAILED; + } + varDataSetLen(output, len); + } else if (inputType == TSDB_DATA_TYPE_BINARY) { + int32_t len = outputCharLen > varDataLen(input) ? varDataLen(input) : outputCharLen; + bool ret = taosMbsToUcs4(input + VARSTR_HEADER_SIZE, len, (TdUcs4 *)varDataVal(output), outputLen - VARSTR_HEADER_SIZE, &len); + if (!ret) { + return TSDB_CODE_FAILED; + } + varDataSetLen(output, len); + } else if (inputType == TSDB_DATA_TYPE_NCHAR) { + int32_t len = MIN(outputLen, varDataLen(input) + VARSTR_HEADER_SIZE); + memcpy(output, input, len); + varDataSetLen(output, len - VARSTR_HEADER_SIZE); + } else { + char tmp[400] = {0}; + NUM_TO_STRING(inputType, input, sizeof(tmp), tmp); + int32_t len = (int32_t)strlen(tmp); + len = outputCharLen > len ? len : outputCharLen; + bool ret = taosMbsToUcs4(tmp, len, (TdUcs4 *)varDataVal(output), outputLen - VARSTR_HEADER_SIZE, &len); + if (!ret) { + return TSDB_CODE_FAILED; + } + varDataSetLen(output, len); + } + break; + } + default: { + return TSDB_CODE_FAILED; + } + } + + colDataAppend(pOutput->columnData, i, output, false); + if (IS_VAR_DATA_TYPE(inputType)) { + input += varDataTLen(input); + } else { + input += tDataTypes[inputType].bytes; + } + if (IS_VAR_DATA_TYPE(outputType)) { + output += varDataTLen(output); + } else { + output += tDataTypes[outputType].bytes; + } + } + + pOutput->numOfRows = pInput->numOfRows; + taosMemoryFree(outputBuf); + return TSDB_CODE_SUCCESS; +} int32_t atanFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput) { return doScalarFunctionUnique(pInput, inputNum, pOutput, atan);