diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index e6512b5bac..77376a05d9 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -127,6 +127,8 @@ typedef enum EFunctionType { FUNCTION_TYPE_APERCENTILE_MERGE, FUNCTION_TYPE_SPREAD_PARTIAL, FUNCTION_TYPE_SPREAD_MERGE, + FUNCTION_TYPE_HISTOGRAM_PARTIAL, + FUNCTION_TYPE_HISTOGRAM_MERGE, // user defined funcion FUNCTION_TYPE_UDF = 10000 diff --git a/source/libs/function/inc/builtinsimpl.h b/source/libs/function/inc/builtinsimpl.h index 56379f0e1f..3a9b94d71e 100644 --- a/source/libs/function/inc/builtinsimpl.h +++ b/source/libs/function/inc/builtinsimpl.h @@ -78,7 +78,6 @@ bool percentileFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultI int32_t percentileFunction(SqlFunctionCtx *pCtx); int32_t percentileFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); -int32_t getApercentileMaxSize(); bool getApercentileFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool apercentileFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo); int32_t apercentileFunction(SqlFunctionCtx *pCtx); @@ -86,6 +85,7 @@ int32_t apercentileFunctionMerge(SqlFunctionCtx* pCtx); int32_t apercentileFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); int32_t apercentilePartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); int32_t apercentileCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx); +int32_t getApercentileMaxSize(); bool getDiffFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool diffFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResInfo); @@ -103,13 +103,13 @@ int32_t topFunction(SqlFunctionCtx *pCtx); int32_t bottomFunction(SqlFunctionCtx *pCtx); int32_t topBotFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); -int32_t getSpreadInfoSize(); bool getSpreadFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool spreadFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo); int32_t spreadFunction(SqlFunctionCtx* pCtx); int32_t spreadFunctionMerge(SqlFunctionCtx* pCtx); int32_t spreadFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); int32_t spreadPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); +int32_t getSpreadInfoSize(); bool getElapsedFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool elapsedFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo); @@ -119,7 +119,10 @@ int32_t elapsedFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); bool getHistogramFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool histogramFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo); int32_t histogramFunction(SqlFunctionCtx* pCtx); +int32_t histogramFunctionMerge(SqlFunctionCtx* pCtx); int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); +int32_t histogramPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); +int32_t getHistogramInfoSize(); bool getHLLFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); int32_t hllFunction(SqlFunctionCtx* pCtx); diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index a7d6fd7371..eca4587c03 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -503,6 +503,58 @@ static int32_t translateHistogram(SFunctionNode* pFunc, char* pErrBuf, int32_t l return TSDB_CODE_SUCCESS; } +static int32_t translateHistogramImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t len, bool isPartial) { + int32_t numOfParams = LIST_LENGTH(pFunc->pParameterList); + if (isPartial) { + if (4 != numOfParams) { + return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); + } + + uint8_t colType = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type; + if (!IS_NUMERIC_TYPE(colType)) { + return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); + } + + // param1 ~ param3 + for (int32_t i = 1; i < numOfParams; ++i) { + SNode* pParamNode = nodesListGetNode(pFunc->pParameterList, i); + if (QUERY_NODE_VALUE != nodeType(pParamNode)) { + return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); + } + + SValueNode* pValue = (SValueNode*)pParamNode; + + pValue->notReserved = true; + } + + if (((SExprNode*)nodesListGetNode(pFunc->pParameterList, 1))->resType.type != TSDB_DATA_TYPE_BINARY || + ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 2))->resType.type != TSDB_DATA_TYPE_BINARY || + ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 3))->resType.type != TSDB_DATA_TYPE_BIGINT) { + return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); + } + + pFunc->node.resType = (SDataType){.bytes = getHistogramInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; + } else { + if (1 != numOfParams) { + return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); + } + + if (((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type != TSDB_DATA_TYPE_BINARY) { + return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); + } + + pFunc->node.resType = (SDataType){.bytes = 512, .type = TSDB_DATA_TYPE_BINARY}; + } + return TSDB_CODE_SUCCESS; +} + +static int32_t translateHistogramPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { + return translateHistogramImpl(pFunc, pErrBuf, len, true); +} +static int32_t translateHistogramMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { + return translateHistogramImpl(pFunc, pErrBuf, len, false); +} + static int32_t translateHLL(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { if (1 != LIST_LENGTH(pFunc->pParameterList)) { return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); @@ -1394,6 +1446,28 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .getEnvFunc = getHistogramFuncEnv, .initFunc = histogramFunctionSetup, .processFunc = histogramFunction, + .finalizeFunc = histogramFinalize, + .pPartialFunc = "_histogram_partial", + .pMergeFunc = "_histogram_merge" + }, + { + .name = "_histogram_partial", + .type = FUNCTION_TYPE_HISTOGRAM_PARTIAL, + .classification = FUNC_MGT_AGG_FUNC, + .translateFunc = translateHistogramPartial, + .getEnvFunc = getHistogramFuncEnv, + .initFunc = histogramFunctionSetup, + .processFunc = histogramFunction, + .finalizeFunc = histogramPartialFinalize + }, + { + .name = "_histogram_merge", + .type = FUNCTION_TYPE_HISTOGRAM_MERGE, + .classification = FUNC_MGT_AGG_FUNC, + .translateFunc = translateHistogramMerge, + .getEnvFunc = getHistogramFuncEnv, + .initFunc = functionSetup, + .processFunc = histogramFunctionMerge, .finalizeFunc = histogramFinalize }, { diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 0852779791..e0302f9598 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -2187,9 +2187,7 @@ int32_t apercentilePartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx); SAPercentileInfo* pInfo = (SAPercentileInfo*)GET_ROWCELL_INTERBUF(pResInfo); - int32_t bytesHist = (int32_t)(sizeof(SAPercentileInfo) + sizeof(SHistogramInfo) + sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1)); - int32_t bytesDigest = (int32_t)(sizeof(SAPercentileInfo) + TDIGEST_SIZE(COMPRESSION)); - int32_t resultBytes = TMAX(bytesHist, bytesDigest); + int32_t resultBytes = getApercentileMaxSize(); char *res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char)); if (pInfo->algo == APERCT_ALGO_TDIGEST) { @@ -3138,6 +3136,10 @@ int32_t elapsedFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { return functionFinalize(pCtx, pBlock); } +int32_t getHistogramInfoSize() { + return (int32_t)sizeof(SHistoFuncInfo) + HISTOGRAM_MAX_BINS_NUM * sizeof(SHistoFuncBin); +} + bool getHistogramFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) { pEnv->calcMemSize = sizeof(SHistoFuncInfo) + HISTOGRAM_MAX_BINS_NUM * sizeof(SHistoFuncBin); return true; @@ -3348,6 +3350,30 @@ int32_t histogramFunction(SqlFunctionCtx *pCtx) { return TSDB_CODE_SUCCESS; } +int32_t histogramFunctionMerge(SqlFunctionCtx *pCtx) { + SInputColumnInfoData* pInput = &pCtx->input; + SColumnInfoData* pCol = pInput->pData[0]; + ASSERT(pCol->info.type == TSDB_DATA_TYPE_BINARY); + + SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + + int32_t start = pInput->startRowIndex; + char* data = colDataGetData(pCol, start); + SHistoFuncInfo* pInputInfo = (SHistoFuncInfo *)varDataVal(data); + + pInfo->normalized = pInputInfo->normalized; + pInfo->numOfBins = pInputInfo->numOfBins; + pInfo->totalCount += pInputInfo->totalCount; + for (int32_t k = 0; k < pInfo->numOfBins; ++k) { + pInfo->bins[k].lower = pInputInfo->bins[k].lower; + pInfo->bins[k].upper = pInputInfo->bins[k].upper; + pInfo->bins[k].count += pInputInfo->bins[k].count; + } + + SET_VAL(GET_RES_INFO(pCtx), pInfo->numOfBins, pInfo->numOfBins); + return TSDB_CODE_SUCCESS; +} + int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx); SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); @@ -3384,6 +3410,24 @@ int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { return pResInfo->numOfRes; } +int32_t histogramPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { + SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx); + SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + int32_t resultBytes = getHistogramInfoSize(); + char *res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char)); + + memcpy(varDataVal(res), pInfo, resultBytes); + varDataSetLen(res, resultBytes); + + int32_t slotId = pCtx->pExpr->base.resSchema.slotId; + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId); + + colDataAppend(pCol, pBlock->info.rows, res, false); + + taosMemoryFree(res); + return 1; +} + bool getHLLFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) { pEnv->calcMemSize = sizeof(SHLLInfo); return true;