diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index 77376a05d9..d77b914fae 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -129,6 +129,8 @@ typedef enum EFunctionType { FUNCTION_TYPE_SPREAD_MERGE, FUNCTION_TYPE_HISTOGRAM_PARTIAL, FUNCTION_TYPE_HISTOGRAM_MERGE, + FUNCTION_TYPE_HYPERLOGLOG_PARTIAL, + FUNCTION_TYPE_HYPERLOGLOG_MERGE, // user defined funcion FUNCTION_TYPE_UDF = 10000 diff --git a/source/libs/function/inc/builtinsimpl.h b/source/libs/function/inc/builtinsimpl.h index 3a9b94d71e..614be17e08 100644 --- a/source/libs/function/inc/builtinsimpl.h +++ b/source/libs/function/inc/builtinsimpl.h @@ -126,7 +126,10 @@ int32_t getHistogramInfoSize(); bool getHLLFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); int32_t hllFunction(SqlFunctionCtx* pCtx); +int32_t hllFunctionMerge(SqlFunctionCtx* pCtx); int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); +int32_t hllPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); +int32_t getHLLInfoSize(); bool getStateFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool stateFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo); diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index eca4587c03..e858fb3159 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -313,6 +313,7 @@ static int32_t translateApercentileImpl(SFunctionNode* pFunc, char* pErrBuf, int static int32_t translateApercentilePartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { return translateApercentileImpl(pFunc, pErrBuf, len, true); } + static int32_t translateApercentileMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { return translateApercentileImpl(pFunc, pErrBuf, len, false); } @@ -401,6 +402,7 @@ static int32_t translateSpreadImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t static int32_t translateSpreadPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { return translateSpreadImpl(pFunc, pErrBuf, len, true); } + static int32_t translateSpreadMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { return translateSpreadImpl(pFunc, pErrBuf, len, false); } @@ -551,6 +553,7 @@ static int32_t translateHistogramImpl(SFunctionNode* pFunc, char* pErrBuf, int32 static int32_t translateHistogramPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { return translateHistogramImpl(pFunc, pErrBuf, len, true); } + static int32_t translateHistogramMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { return translateHistogramImpl(pFunc, pErrBuf, len, false); } @@ -564,6 +567,28 @@ static int32_t translateHLL(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { return TSDB_CODE_SUCCESS; } +static int32_t translateHLLImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t len, bool isPartial) { + if (1 != LIST_LENGTH(pFunc->pParameterList)) { + return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); + } + + if (isPartial) { + pFunc->node.resType = (SDataType){.bytes = getHistogramInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; + } else { + pFunc->node.resType = (SDataType){.bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes, .type = TSDB_DATA_TYPE_BIGINT}; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t translateHLLPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { + return translateHLLImpl(pFunc, pErrBuf, len, true); +} + +static int32_t translateHLLMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { + return translateHLLImpl(pFunc, pErrBuf, len, false); +} + static bool validateStateOper(const SValueNode* pVal) { if (TSDB_DATA_TYPE_BINARY != pVal->node.resType.type) { return false; @@ -1478,6 +1503,28 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .getEnvFunc = getHLLFuncEnv, .initFunc = functionSetup, .processFunc = hllFunction, + .finalizeFunc = hllFinalize, + .pPartialFunc = "_hyperloglog_partial", + .pMergeFunc = "_hyperloglog_merge" + }, + { + .name = "_hyperloglog_partial", + .type = FUNCTION_TYPE_HYPERLOGLOG_PARTIAL, + .classification = FUNC_MGT_AGG_FUNC, + .translateFunc = translateHLLPartial, + .getEnvFunc = getHLLFuncEnv, + .initFunc = functionSetup, + .processFunc = hllFunction, + .finalizeFunc = hllPartialFinalize + }, + { + .name = "_hyperloglog_merge", + .type = FUNCTION_TYPE_HYPERLOGLOG_MERGE, + .classification = FUNC_MGT_AGG_FUNC, + .translateFunc = translateHLLMerge, + .getEnvFunc = getHLLFuncEnv, + .initFunc = functionSetup, + .processFunc = hllFunctionMerge, .finalizeFunc = hllFinalize }, { diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index e0302f9598..6e969749c6 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3411,7 +3411,6 @@ int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { } int32_t histogramPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { - SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx); SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); int32_t resultBytes = getHistogramInfoSize(); char *res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char)); @@ -3428,6 +3427,10 @@ int32_t histogramPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { return 1; } +int32_t getHLLInfoSize() { + return (int32_t)sizeof(SHLLInfo); +} + bool getHLLFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) { pEnv->calcMemSize = sizeof(SHLLInfo); return true; @@ -3553,6 +3556,27 @@ int32_t hllFunction(SqlFunctionCtx *pCtx) { return TSDB_CODE_SUCCESS; } +int32_t hllFunctionMerge(SqlFunctionCtx *pCtx) { + SInputColumnInfoData* pInput = &pCtx->input; + SColumnInfoData* pCol = pInput->pData[0]; + ASSERT(pCol->info.type == TSDB_DATA_TYPE_BINARY); + + SHLLInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + + int32_t start = pInput->startRowIndex; + char* data = colDataGetData(pCol, start); + SHLLInfo* pInputInfo = (SHLLInfo *)varDataVal(data); + + for (int32_t k = 0; k < HLL_BUCKETS; ++k) { + if (pInfo->buckets[k] < pInputInfo->buckets[k]) { + pInfo->buckets[k] = pInputInfo->buckets[k]; + } + } + + SET_VAL(GET_RES_INFO(pCtx), 1, 1); + return TSDB_CODE_SUCCESS; +} + int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { SResultRowEntryInfo *pInfo = GET_RES_INFO(pCtx); @@ -3565,6 +3589,24 @@ int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { return functionFinalize(pCtx, pBlock); } +int32_t hllPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { + SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx); + SHLLInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + int32_t resultBytes = getHLLInfoSize(); + char *res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char)); + + memcpy(varDataVal(res), pInfo, resultBytes); + varDataSetLen(res, resultBytes); + + int32_t slotId = pCtx->pExpr->base.resSchema.slotId; + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId); + + colDataAppend(pCol, pBlock->info.rows, res, false); + + taosMemoryFree(res); + return pResInfo->numOfRes; +} + bool getStateFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) { pEnv->calcMemSize = sizeof(SStateInfo); return true; diff --git a/tests/system-test/fulltest.bat b/tests/system-test/fulltest.bat index 6ef66c58c2..89caab49d6 100644 --- a/tests/system-test/fulltest.bat +++ b/tests/system-test/fulltest.bat @@ -3,7 +3,7 @@ python3 .\test.py -f 0-others\taosShell.py python3 .\test.py -f 0-others\taosShellError.py python3 .\test.py -f 0-others\taosShellNetChk.py python3 .\test.py -f 0-others\telemetry.py -python3 .\test.py -f 0-others\taosdMonitor.py +@REM python3 .\test.py -f 0-others\taosdMonitor.py python3 .\test.py -f 0-others\udfTest.py python3 .\test.py -f 0-others\udf_create.py python3 .\test.py -f 0-others\udf_restart_taosd.py @@ -100,4 +100,4 @@ python3 .\test.py -f 0-others\udf_restart_taosd.py @REM python3 .\test.py -f 7-tmq\subscribeStb2.py @REM python3 .\test.py -f 7-tmq\subscribeStb3.py @REM python3 .\test.py -f 7-tmq\subscribeStb4.py -@REM python3 .\test.py -f 7-tmq\db.py \ No newline at end of file +@REM python3 .\test.py -f 7-tmq\db.py