From 8240efda4c98304918ad42993f3122fa09dd25e5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 11 Apr 2022 17:03:41 +0800 Subject: [PATCH] fix[query]: enable filter on nchar type. --- source/libs/scalar/src/filter.c | 29 +++++++++--------- source/libs/scalar/src/scalar.c | 1 - source/libs/scalar/src/sclvector.c | 48 ++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 28 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 2e47e19023..191143de12 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -1748,7 +1748,6 @@ int32_t fltInitValFieldData(SFilterInfo *info) { SFilterField* fi = right; SValueNode* var = (SValueNode *)fi->desc; - if (var == NULL) { assert(fi->data != NULL); continue; @@ -1767,13 +1766,18 @@ int32_t fltInitValFieldData(SFilterInfo *info) { } SDataType *dType = &var->node.resType; + size_t bytes = 0; if (type == TSDB_DATA_TYPE_BINARY) { size_t len = (dType->type == TSDB_DATA_TYPE_BINARY || dType->type == TSDB_DATA_TYPE_NCHAR) ? dType->bytes : MAX_NUM_STR_SIZE; - fi->data = taosMemoryCalloc(1, len + 1 + VARSTR_HEADER_SIZE); + bytes = len + 1 + VARSTR_HEADER_SIZE; + + fi->data = taosMemoryCalloc(1, bytes); } else if (type == TSDB_DATA_TYPE_NCHAR) { - size_t len = (dType->type == TSDB_DATA_TYPE_BINARY || dType->type == TSDB_DATA_TYPE_NCHAR) ? dType->bytes : MAX_NUM_STR_SIZE; - fi->data = taosMemoryCalloc(1, (len + 1) * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE); + size_t len = (dType->type == TSDB_DATA_TYPE_BINARY || dType->type == TSDB_DATA_TYPE_NCHAR) ? dType->bytes : MAX_NUM_STR_SIZE; + bytes = (len + 1) * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE; + + fi->data = taosMemoryCalloc(1, bytes); } else if (type != TSDB_DATA_TYPE_JSON){ if (dType->type == TSDB_DATA_TYPE_VALUE_ARRAY) { //TIME RANGE /* @@ -1797,8 +1801,11 @@ int32_t fltInitValFieldData(SFilterInfo *info) { } else { SScalarParam out = {.columnData = taosMemoryCalloc(1, sizeof(SColumnInfoData))}; out.columnData->info.type = type; - out.columnData->info.bytes = tDataTypes[type].bytes; - ASSERT(!IS_VAR_DATA_TYPE(type)); + if (IS_VAR_DATA_TYPE(type)) { + out.columnData->info.bytes = bytes; + } else { + out.columnData->info.bytes = tDataTypes[type].bytes; + } // todo refactor the convert int32_t code = doConvertDataType(var, &out); @@ -2985,13 +2992,13 @@ bool filterExecuteImplMisc(void *pinfo, int32_t numOfRows, int8_t** p, SColumnDa for (int32_t i = 0; i < numOfRows; ++i) { uint32_t uidx = info->groups[0].unitIdxs[0]; void *colData = colDataGetData((SColumnInfoData *)info->cunits[uidx].colData, i); - if (colData == NULL || colDataIsNull((SColumnInfoData *)info->cunits[uidx].colData, 0, i, NULL)) { + if (colData == NULL || colDataIsNull_s((SColumnInfoData *)info->cunits[uidx].colData, i)) { (*p)[i] = 0; all = false; continue; } - // match/nmatch for nchar type need convert from ucs4 to mbs + // match/nmatch for nchar type need convert from ucs4 to mbs if(info->cunits[uidx].dataType == TSDB_DATA_TYPE_NCHAR && (info->cunits[uidx].optr == OP_TYPE_MATCH || info->cunits[uidx].optr == OP_TYPE_NMATCH)){ char *newColData = taosMemoryCalloc(info->cunits[uidx].dataSize * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE, 1); int32_t len = taosUcs4ToMbs((TdUcs4*)varDataVal(colData), varDataLen(colData), varDataVal(newColData)); @@ -3222,19 +3229,13 @@ int32_t fltInitFromNode(SNode* tree, SFilterInfo *info, uint32_t options) { info->unitFlags = taosMemoryMalloc(info->unitNum * sizeof(*info->unitFlags)); filterDumpInfoToString(info, "Final", 0); - return code; _return: - qInfo("init from node failed, code:%d", code); - return code; } - - - bool filterRangeExecute(SFilterInfo *info, SColumnDataAgg *pDataStatis, int32_t numOfCols, int32_t numOfRows) { if (FILTER_EMPTY_RES(info)) { return false; diff --git a/source/libs/scalar/src/scalar.c b/source/libs/scalar/src/scalar.c index f97e5a2d2a..116dfdd5d5 100644 --- a/source/libs/scalar/src/scalar.c +++ b/source/libs/scalar/src/scalar.c @@ -46,7 +46,6 @@ int32_t doConvertDataType(SValueNode* pValueNode, SScalarParam* out) { colDataAppend(in.columnData, 0, nodesGetValueFromNode(pValueNode), false); colInfoDataEnsureCapacity(out->columnData, 1); - int32_t code = vectorConvertImpl(&in, out); sclFreeParam(&in); diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index 4bce8f33f2..ff677d2f7b 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -177,10 +177,25 @@ static FORCE_INLINE void varToBool(char *buf, SScalarParam* pOut, int32_t rowInd colDataAppendInt8(pOut->columnData, rowIndex, (int8_t*) &v); } +static FORCE_INLINE void varToNchar(char* buf, SScalarParam* pOut, int32_t rowIndex) { + int32_t len = 0; + int32_t inputLen = varDataLen(buf); + + char* t = taosMemoryCalloc(1,(inputLen + 1) * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE); + /*int32_t resLen = */taosMbsToUcs4(varDataVal(buf), inputLen, (TdUcs4*) varDataVal(t), pOut->columnData->info.bytes, &len); + varDataSetLen(t, len); + + colDataAppend(pOut->columnData, rowIndex, t, false); + taosMemoryFree(t); +} + +//TODO opt performance int32_t vectorConvertFromVarData(const SScalarParam* pIn, SScalarParam* pOut, int32_t inType, int32_t outType) { int32_t bufSize = pIn->columnData->info.bytes; char *tmp = taosMemoryMalloc(bufSize); + bool vton = false; + _bufConverteFunc func = NULL; if (TSDB_DATA_TYPE_BOOL == outType) { func = varToBool; @@ -190,6 +205,9 @@ int32_t vectorConvertFromVarData(const SScalarParam* pIn, SScalarParam* pOut, in func = varToUnsigned; } else if (IS_FLOAT_TYPE(outType)) { func = varToFloat; + } else if (outType == TSDB_DATA_TYPE_NCHAR) { + func = varToNchar; + vton = true; } else { sclError("invalid convert outType:%d", outType); return TSDB_CODE_QRY_APP_ERROR; @@ -197,26 +215,30 @@ int32_t vectorConvertFromVarData(const SScalarParam* pIn, SScalarParam* pOut, in pOut->numOfRows = pIn->numOfRows; for (int32_t i = 0; i < pIn->numOfRows; ++i) { - if (colDataIsNull(pIn->columnData, pIn->numOfRows, i, NULL)) { + if (colDataIsNull_s(pIn->columnData, i)) { colDataAppendNULL(pOut->columnData, i); continue; } char* data = colDataGetData(pIn->columnData, i); - if (TSDB_DATA_TYPE_BINARY == inType) { - memcpy(tmp, varDataVal(data), varDataLen(data)); - tmp[varDataLen(data)] = 0; + if (vton) { + memcpy(tmp, data, varDataTLen(data)); } else { - ASSERT (varDataLen(data) <= bufSize); - - int len = taosUcs4ToMbs((TdUcs4*)varDataVal(data), varDataLen(data), tmp); - if (len < 0){ - sclError("castConvert taosUcs4ToMbs error 1"); - taosMemoryFreeClear(tmp); - return TSDB_CODE_QRY_APP_ERROR; + if (TSDB_DATA_TYPE_VARCHAR == inType) { + memcpy(tmp, varDataVal(data), varDataLen(data)); + tmp[varDataLen(data)] = 0; + } else { + ASSERT(varDataLen(data) <= bufSize); + + int len = taosUcs4ToMbs((TdUcs4 *)varDataVal(data), varDataLen(data), tmp); + if (len < 0) { + sclError("castConvert taosUcs4ToMbs error 1"); + taosMemoryFreeClear(tmp); + return TSDB_CODE_QRY_APP_ERROR; + } + + tmp[len] = 0; } - - tmp[len] = 0; } (*func)(tmp, pOut, i);