From 02b1257b01a3277d20f658f5fb9bb865c5d85af2 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 6 Jun 2023 11:17:02 +0800 Subject: [PATCH] enh: reassign data column in blocks --- include/common/tcommon.h | 1 + source/common/src/tdatablock.c | 76 +++++++++++++++++++++++--- source/libs/executor/src/executorInt.c | 1 - source/libs/function/src/tudf.c | 16 +++++- 4 files changed, 83 insertions(+), 11 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 2f93f8c3e3..d2352e100c 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -231,6 +231,7 @@ typedef struct SColumnInfoData { }; SColumnInfo info; // column info bool hasNull; // if current column data has null value. + bool reassigned; // if current column data is reassigned. } SColumnInfoData; typedef struct SQueryTableDataCond { diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index a6a54a8347..24e978b0ea 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -23,6 +23,20 @@ int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRows) { if (IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { + if (pColumnInfoData->reassigned) { + int32_t totalSize = 0; + for (int32_t row = 0; row < numOfRows; ++row) { + char* pColData = pColumnInfoData->pData + pColumnInfoData->varmeta.offset[row]; + int32_t colSize = 0; + if (pColumnInfoData->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pColData); + } else { + colSize = varDataTLen(pColData); + } + totalSize += colSize; + } + return totalSize; + } return pColumnInfoData->varmeta.length; } else { if (pColumnInfoData->info.type == TSDB_DATA_TYPE_NULL) { @@ -138,8 +152,8 @@ int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx, SVarColAttr* pAttr = &pColumnInfoData->varmeta; - uint32_t len = pColumnInfoData->varmeta.length; pColumnInfoData->varmeta.offset[dstRowIdx] = pColumnInfoData->varmeta.offset[srcRowIdx]; + pColumnInfoData->reassigned = true; } else { memcpy(pColumnInfoData->pData + pColumnInfoData->info.bytes * dstRowIdx, pData, pColumnInfoData->info.bytes); colDataClearNull_f(pColumnInfoData->nullbitmap, dstRowIdx); @@ -603,8 +617,22 @@ int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock) { *(int32_t*)pStart = dataSize; pStart += sizeof(int32_t); - memcpy(pStart, pCol->pData, dataSize); - pStart += dataSize; + if (pCol->reassigned && IS_VAR_DATA_TYPE(pCol->info.type)) { + for (int32_t row = 0; row < numOfRows; ++row) { + char* pColData = pCol->pData + pCol->varmeta.offset[row]; + int32_t colSize = 0; + if (pCol->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pColData); + } else { + colSize = varDataTLen(pColData); + } + memcpy(pStart, pColData, colSize); + pStart += colSize; + } + } else { + memcpy(pStart, pCol->pData, dataSize); + pStart += dataSize; + } } return 0; @@ -1764,7 +1792,20 @@ int32_t tEncodeDataBlock(void** buf, const SSDataBlock* pBlock) { int32_t len = colDataGetLength(pColData, rows); tlen += taosEncodeFixedI32(buf, len); - tlen += taosEncodeBinary(buf, pColData->pData, len); + if (pColData->reassigned && IS_VAR_DATA_TYPE(pColData->info.type)) { + for (int32_t row = 0; row < rows; ++row) { + char* pData = pColData->pData + pColData->varmeta.offset[row]; + int32_t colSize = 0; + if (pColData->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pData); + } else { + colSize = varDataTLen(pData); + } + tlen += taosEncodeBinary(buf, pData, colSize); + } + } else { + tlen += taosEncodeBinary(buf, pColData->pData, len); + } } return tlen; } @@ -2525,12 +2566,29 @@ int32_t blockEncode(const SSDataBlock* pBlock, char* data, int32_t numOfCols) { data += metaSize; dataLen += metaSize; - colSizes[col] = colDataGetLength(pColRes, numOfRows); - dataLen += colSizes[col]; - if (pColRes->pData != NULL) { - memmove(data, pColRes->pData, colSizes[col]); + if (pColRes->reassigned && IS_VAR_DATA_TYPE(pColRes->info.type)) { + colSizes[col] = 0; + for (int32_t row = 0; row < numOfRows; ++row) { + char* pColData = pColRes->pData + pColRes->varmeta.offset[row]; + int32_t colSize = 0; + if (pColRes->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pColData); + } else { + colSize = varDataTLen(pColData); + } + colSizes[col] += colSize; + dataLen += colSize; + memmove(data, pColData, colSize); + data += colSize; + } + } else { + colSizes[col] = colDataGetLength(pColRes, numOfRows); + dataLen += colSizes[col]; + if (pColRes->pData != NULL) { + memmove(data, pColRes->pData, colSizes[col]); + } + data += colSizes[col]; } - data += colSizes[col]; colSizes[col] = htonl(colSizes[col]); // uError("blockEncode col bytes:%d, type:%d, size:%d, htonl size:%d", pColRes->info.bytes, pColRes->info.type, htonl(colSizes[col]), colSizes[col]); diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index ce7a117543..fbc0512a26 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -562,7 +562,6 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD int32_t numOfRows = 0; if (IS_VAR_DATA_TYPE(pDst->info.type)) { int32_t j = 0; - pDst->varmeta.length = 0; while (j < totalRows) { if (pIndicator[j] == 0) { diff --git a/source/libs/function/src/tudf.c b/source/libs/function/src/tudf.c index 6b70422ac8..31a7dfdbc5 100644 --- a/source/libs/function/src/tudf.c +++ b/source/libs/function/src/tudf.c @@ -791,7 +791,21 @@ int32_t convertDataBlockToUdfDataBlock(SSDataBlock *block, SUdfDataBlock *udfBlo memcpy(udfCol->colData.varLenCol.varOffsets, col->varmeta.offset, udfCol->colData.varLenCol.varOffsetsLen); udfCol->colData.varLenCol.payloadLen = colDataGetLength(col, udfBlock->numOfRows); udfCol->colData.varLenCol.payload = taosMemoryMalloc(udfCol->colData.varLenCol.payloadLen); - memcpy(udfCol->colData.varLenCol.payload, col->pData, udfCol->colData.varLenCol.payloadLen); + if (col->reassigned) { + for (int32_t row = 0; row < udfCol->colData.numOfRows; ++row) { + char* pColData = col->pData + col->varmeta.offset[row]; + int32_t colSize = 0; + if (col->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pColData); + } else { + colSize = varDataTLen(pColData); + } + memcpy(udfCol->colData.varLenCol.payload, pColData, colSize); + udfCol->colData.varLenCol.payload += colSize; + } + } else { + memcpy(udfCol->colData.varLenCol.payload, col->pData, udfCol->colData.varLenCol.payloadLen); + } } else { udfCol->colData.fixLenCol.nullBitmapLen = BitmapLen(udfCol->colData.numOfRows); int32_t bitmapLen = udfCol->colData.fixLenCol.nullBitmapLen;