diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 2f93f8c3e3..d2352e100c 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -231,6 +231,7 @@ typedef struct SColumnInfoData { }; SColumnInfo info; // column info bool hasNull; // if current column data has null value. + bool reassigned; // if current column data is reassigned. } SColumnInfoData; typedef struct SQueryTableDataCond { diff --git a/include/common/tdatablock.h b/include/common/tdatablock.h index 33c571fc1b..53fc07c3f3 100644 --- a/include/common/tdatablock.h +++ b/include/common/tdatablock.h @@ -178,6 +178,7 @@ int32_t getJsonValueLen(const char* data); int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull); int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull); +int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx, uint32_t srcRowIdx, const char* pData); int32_t colDataSetNItems(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, uint32_t numOfRows, bool trimValue); int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, int32_t* capacity, const SColumnInfoData* pSource, int32_t numOfRow2); diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 311c79381c..24e978b0ea 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -23,6 +23,20 @@ int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRows) { if (IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { + if (pColumnInfoData->reassigned) { + int32_t totalSize = 0; + for (int32_t row = 0; row < numOfRows; ++row) { + char* pColData = pColumnInfoData->pData + pColumnInfoData->varmeta.offset[row]; + int32_t colSize = 0; + if (pColumnInfoData->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pColData); + } else { + colSize = varDataTLen(pColData); + } + totalSize += colSize; + } + return totalSize; + } return pColumnInfoData->varmeta.length; } else { if (pColumnInfoData->info.type == TSDB_DATA_TYPE_NULL) { @@ -126,6 +140,29 @@ int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const return 0; } +int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx, uint32_t srcRowIdx, const char* pData) { + int32_t type = pColumnInfoData->info.type; + if (IS_VAR_DATA_TYPE(type)) { + int32_t dataLen = 0; + if (type == TSDB_DATA_TYPE_JSON) { + dataLen = getJsonValueLen(pData); + } else { + dataLen = varDataTLen(pData); + } + + SVarColAttr* pAttr = &pColumnInfoData->varmeta; + + pColumnInfoData->varmeta.offset[dstRowIdx] = pColumnInfoData->varmeta.offset[srcRowIdx]; + pColumnInfoData->reassigned = true; + } else { + memcpy(pColumnInfoData->pData + pColumnInfoData->info.bytes * dstRowIdx, pData, pColumnInfoData->info.bytes); + colDataClearNull_f(pColumnInfoData->nullbitmap, dstRowIdx); + } + + return 0; +} + + int32_t colDataReserve(SColumnInfoData* pColumnInfoData, size_t newSize) { if (!IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { return TSDB_CODE_SUCCESS; @@ -580,8 +617,22 @@ int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock) { *(int32_t*)pStart = dataSize; pStart += sizeof(int32_t); - memcpy(pStart, pCol->pData, dataSize); - pStart += dataSize; + if (pCol->reassigned && IS_VAR_DATA_TYPE(pCol->info.type)) { + for (int32_t row = 0; row < numOfRows; ++row) { + char* pColData = pCol->pData + pCol->varmeta.offset[row]; + int32_t colSize = 0; + if (pCol->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pColData); + } else { + colSize = varDataTLen(pColData); + } + memcpy(pStart, pColData, colSize); + pStart += colSize; + } + } else { + memcpy(pStart, pCol->pData, dataSize); + pStart += dataSize; + } } return 0; @@ -1741,7 +1792,20 @@ int32_t tEncodeDataBlock(void** buf, const SSDataBlock* pBlock) { int32_t len = colDataGetLength(pColData, rows); tlen += taosEncodeFixedI32(buf, len); - tlen += taosEncodeBinary(buf, pColData->pData, len); + if (pColData->reassigned && IS_VAR_DATA_TYPE(pColData->info.type)) { + for (int32_t row = 0; row < rows; ++row) { + char* pData = pColData->pData + pColData->varmeta.offset[row]; + int32_t colSize = 0; + if (pColData->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pData); + } else { + colSize = varDataTLen(pData); + } + tlen += taosEncodeBinary(buf, pData, colSize); + } + } else { + tlen += taosEncodeBinary(buf, pColData->pData, len); + } } return tlen; } @@ -2502,12 +2566,29 @@ int32_t blockEncode(const SSDataBlock* pBlock, char* data, int32_t numOfCols) { data += metaSize; dataLen += metaSize; - colSizes[col] = colDataGetLength(pColRes, numOfRows); - dataLen += colSizes[col]; - if (pColRes->pData != NULL) { - memmove(data, pColRes->pData, colSizes[col]); + if (pColRes->reassigned && IS_VAR_DATA_TYPE(pColRes->info.type)) { + colSizes[col] = 0; + for (int32_t row = 0; row < numOfRows; ++row) { + char* pColData = pColRes->pData + pColRes->varmeta.offset[row]; + int32_t colSize = 0; + if (pColRes->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pColData); + } else { + colSize = varDataTLen(pColData); + } + colSizes[col] += colSize; + dataLen += colSize; + memmove(data, pColData, colSize); + data += colSize; + } + } else { + colSizes[col] = colDataGetLength(pColRes, numOfRows); + dataLen += colSizes[col]; + if (pColRes->pData != NULL) { + memmove(data, pColRes->pData, colSizes[col]); + } + data += colSizes[col]; } - data += colSizes[col]; colSizes[col] = htonl(colSizes[col]); // uError("blockEncode col bytes:%d, type:%d, size:%d, htonl size:%d", pColRes->info.bytes, pColRes->info.type, htonl(colSizes[col]), colSizes[col]); diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index 4f1a0254e4..fbc0512a26 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -562,7 +562,6 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD int32_t numOfRows = 0; if (IS_VAR_DATA_TYPE(pDst->info.type)) { int32_t j = 0; - pDst->varmeta.length = 0; while (j < totalRows) { if (pIndicator[j] == 0) { @@ -573,18 +572,8 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD if (colDataIsNull_var(pDst, j)) { colDataSetNull_var(pDst, numOfRows); } else { - // fix address sanitizer error. p1 may point to memory that will change during realloc of colDataSetVal, first copy it to p2 char* p1 = colDataGetVarData(pDst, j); - int32_t len = 0; - if (pDst->info.type == TSDB_DATA_TYPE_JSON) { - len = getJsonValueLen(p1); - } else { - len = varDataTLen(p1); - } - char* p2 = taosMemoryMalloc(len); - memcpy(p2, p1, len); - colDataSetVal(pDst, numOfRows, p2, false); - taosMemoryFree(p2); + colDataReassignVal(pDst, numOfRows, j, p1); } numOfRows += 1; j += 1; diff --git a/source/libs/function/src/tudf.c b/source/libs/function/src/tudf.c index 6b70422ac8..31a7dfdbc5 100644 --- a/source/libs/function/src/tudf.c +++ b/source/libs/function/src/tudf.c @@ -791,7 +791,21 @@ int32_t convertDataBlockToUdfDataBlock(SSDataBlock *block, SUdfDataBlock *udfBlo memcpy(udfCol->colData.varLenCol.varOffsets, col->varmeta.offset, udfCol->colData.varLenCol.varOffsetsLen); udfCol->colData.varLenCol.payloadLen = colDataGetLength(col, udfBlock->numOfRows); udfCol->colData.varLenCol.payload = taosMemoryMalloc(udfCol->colData.varLenCol.payloadLen); - memcpy(udfCol->colData.varLenCol.payload, col->pData, udfCol->colData.varLenCol.payloadLen); + if (col->reassigned) { + for (int32_t row = 0; row < udfCol->colData.numOfRows; ++row) { + char* pColData = col->pData + col->varmeta.offset[row]; + int32_t colSize = 0; + if (col->info.type == TSDB_DATA_TYPE_JSON) { + colSize = getJsonValueLen(pColData); + } else { + colSize = varDataTLen(pColData); + } + memcpy(udfCol->colData.varLenCol.payload, pColData, colSize); + udfCol->colData.varLenCol.payload += colSize; + } + } else { + memcpy(udfCol->colData.varLenCol.payload, col->pData, udfCol->colData.varLenCol.payloadLen); + } } else { udfCol->colData.fixLenCol.nullBitmapLen = BitmapLen(udfCol->colData.numOfRows); int32_t bitmapLen = udfCol->colData.fixLenCol.nullBitmapLen;