Merge pull request #13639 from taosdata/enh/hll_split
enh(query): add hll function distributed splitting
This commit is contained in:
commit
62652aa45b
|
@ -129,6 +129,8 @@ typedef enum EFunctionType {
|
||||||
FUNCTION_TYPE_SPREAD_MERGE,
|
FUNCTION_TYPE_SPREAD_MERGE,
|
||||||
FUNCTION_TYPE_HISTOGRAM_PARTIAL,
|
FUNCTION_TYPE_HISTOGRAM_PARTIAL,
|
||||||
FUNCTION_TYPE_HISTOGRAM_MERGE,
|
FUNCTION_TYPE_HISTOGRAM_MERGE,
|
||||||
|
FUNCTION_TYPE_HYPERLOGLOG_PARTIAL,
|
||||||
|
FUNCTION_TYPE_HYPERLOGLOG_MERGE,
|
||||||
|
|
||||||
// user defined funcion
|
// user defined funcion
|
||||||
FUNCTION_TYPE_UDF = 10000
|
FUNCTION_TYPE_UDF = 10000
|
||||||
|
|
|
@ -126,7 +126,10 @@ int32_t getHistogramInfoSize();
|
||||||
|
|
||||||
bool getHLLFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
bool getHLLFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
||||||
int32_t hllFunction(SqlFunctionCtx* pCtx);
|
int32_t hllFunction(SqlFunctionCtx* pCtx);
|
||||||
|
int32_t hllFunctionMerge(SqlFunctionCtx* pCtx);
|
||||||
int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||||
|
int32_t hllPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||||
|
int32_t getHLLInfoSize();
|
||||||
|
|
||||||
bool getStateFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
bool getStateFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
||||||
bool stateFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
|
bool stateFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
|
||||||
|
|
|
@ -313,6 +313,7 @@ static int32_t translateApercentileImpl(SFunctionNode* pFunc, char* pErrBuf, int
|
||||||
static int32_t translateApercentilePartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
static int32_t translateApercentilePartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||||
return translateApercentileImpl(pFunc, pErrBuf, len, true);
|
return translateApercentileImpl(pFunc, pErrBuf, len, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int32_t translateApercentileMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
static int32_t translateApercentileMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||||
return translateApercentileImpl(pFunc, pErrBuf, len, false);
|
return translateApercentileImpl(pFunc, pErrBuf, len, false);
|
||||||
}
|
}
|
||||||
|
@ -401,6 +402,7 @@ static int32_t translateSpreadImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t
|
||||||
static int32_t translateSpreadPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
static int32_t translateSpreadPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||||
return translateSpreadImpl(pFunc, pErrBuf, len, true);
|
return translateSpreadImpl(pFunc, pErrBuf, len, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int32_t translateSpreadMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
static int32_t translateSpreadMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||||
return translateSpreadImpl(pFunc, pErrBuf, len, false);
|
return translateSpreadImpl(pFunc, pErrBuf, len, false);
|
||||||
}
|
}
|
||||||
|
@ -551,6 +553,7 @@ static int32_t translateHistogramImpl(SFunctionNode* pFunc, char* pErrBuf, int32
|
||||||
static int32_t translateHistogramPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
static int32_t translateHistogramPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||||
return translateHistogramImpl(pFunc, pErrBuf, len, true);
|
return translateHistogramImpl(pFunc, pErrBuf, len, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int32_t translateHistogramMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
static int32_t translateHistogramMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||||
return translateHistogramImpl(pFunc, pErrBuf, len, false);
|
return translateHistogramImpl(pFunc, pErrBuf, len, false);
|
||||||
}
|
}
|
||||||
|
@ -564,6 +567,28 @@ static int32_t translateHLL(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int32_t translateHLLImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t len, bool isPartial) {
|
||||||
|
if (1 != LIST_LENGTH(pFunc->pParameterList)) {
|
||||||
|
return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isPartial) {
|
||||||
|
pFunc->node.resType = (SDataType){.bytes = getHistogramInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY};
|
||||||
|
} else {
|
||||||
|
pFunc->node.resType = (SDataType){.bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes, .type = TSDB_DATA_TYPE_BIGINT};
|
||||||
|
}
|
||||||
|
|
||||||
|
return TSDB_CODE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32_t translateHLLPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||||
|
return translateHLLImpl(pFunc, pErrBuf, len, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32_t translateHLLMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||||
|
return translateHLLImpl(pFunc, pErrBuf, len, false);
|
||||||
|
}
|
||||||
|
|
||||||
static bool validateStateOper(const SValueNode* pVal) {
|
static bool validateStateOper(const SValueNode* pVal) {
|
||||||
if (TSDB_DATA_TYPE_BINARY != pVal->node.resType.type) {
|
if (TSDB_DATA_TYPE_BINARY != pVal->node.resType.type) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -1478,6 +1503,28 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
|
||||||
.getEnvFunc = getHLLFuncEnv,
|
.getEnvFunc = getHLLFuncEnv,
|
||||||
.initFunc = functionSetup,
|
.initFunc = functionSetup,
|
||||||
.processFunc = hllFunction,
|
.processFunc = hllFunction,
|
||||||
|
.finalizeFunc = hllFinalize,
|
||||||
|
.pPartialFunc = "_hyperloglog_partial",
|
||||||
|
.pMergeFunc = "_hyperloglog_merge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "_hyperloglog_partial",
|
||||||
|
.type = FUNCTION_TYPE_HYPERLOGLOG_PARTIAL,
|
||||||
|
.classification = FUNC_MGT_AGG_FUNC,
|
||||||
|
.translateFunc = translateHLLPartial,
|
||||||
|
.getEnvFunc = getHLLFuncEnv,
|
||||||
|
.initFunc = functionSetup,
|
||||||
|
.processFunc = hllFunction,
|
||||||
|
.finalizeFunc = hllPartialFinalize
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "_hyperloglog_merge",
|
||||||
|
.type = FUNCTION_TYPE_HYPERLOGLOG_MERGE,
|
||||||
|
.classification = FUNC_MGT_AGG_FUNC,
|
||||||
|
.translateFunc = translateHLLMerge,
|
||||||
|
.getEnvFunc = getHLLFuncEnv,
|
||||||
|
.initFunc = functionSetup,
|
||||||
|
.processFunc = hllFunctionMerge,
|
||||||
.finalizeFunc = hllFinalize
|
.finalizeFunc = hllFinalize
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -3411,7 +3411,6 @@ int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t histogramPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
int32_t histogramPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
||||||
SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
|
|
||||||
SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
|
SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
|
||||||
int32_t resultBytes = getHistogramInfoSize();
|
int32_t resultBytes = getHistogramInfoSize();
|
||||||
char *res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char));
|
char *res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char));
|
||||||
|
@ -3428,6 +3427,10 @@ int32_t histogramPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t getHLLInfoSize() {
|
||||||
|
return (int32_t)sizeof(SHLLInfo);
|
||||||
|
}
|
||||||
|
|
||||||
bool getHLLFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) {
|
bool getHLLFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) {
|
||||||
pEnv->calcMemSize = sizeof(SHLLInfo);
|
pEnv->calcMemSize = sizeof(SHLLInfo);
|
||||||
return true;
|
return true;
|
||||||
|
@ -3553,6 +3556,27 @@ int32_t hllFunction(SqlFunctionCtx *pCtx) {
|
||||||
return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t hllFunctionMerge(SqlFunctionCtx *pCtx) {
|
||||||
|
SInputColumnInfoData* pInput = &pCtx->input;
|
||||||
|
SColumnInfoData* pCol = pInput->pData[0];
|
||||||
|
ASSERT(pCol->info.type == TSDB_DATA_TYPE_BINARY);
|
||||||
|
|
||||||
|
SHLLInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
|
||||||
|
|
||||||
|
int32_t start = pInput->startRowIndex;
|
||||||
|
char* data = colDataGetData(pCol, start);
|
||||||
|
SHLLInfo* pInputInfo = (SHLLInfo *)varDataVal(data);
|
||||||
|
|
||||||
|
for (int32_t k = 0; k < HLL_BUCKETS; ++k) {
|
||||||
|
if (pInfo->buckets[k] < pInputInfo->buckets[k]) {
|
||||||
|
pInfo->buckets[k] = pInputInfo->buckets[k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SET_VAL(GET_RES_INFO(pCtx), 1, 1);
|
||||||
|
return TSDB_CODE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
||||||
SResultRowEntryInfo *pInfo = GET_RES_INFO(pCtx);
|
SResultRowEntryInfo *pInfo = GET_RES_INFO(pCtx);
|
||||||
|
|
||||||
|
@ -3565,6 +3589,24 @@ int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
||||||
return functionFinalize(pCtx, pBlock);
|
return functionFinalize(pCtx, pBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t hllPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
||||||
|
SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
|
||||||
|
SHLLInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
|
||||||
|
int32_t resultBytes = getHLLInfoSize();
|
||||||
|
char *res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char));
|
||||||
|
|
||||||
|
memcpy(varDataVal(res), pInfo, resultBytes);
|
||||||
|
varDataSetLen(res, resultBytes);
|
||||||
|
|
||||||
|
int32_t slotId = pCtx->pExpr->base.resSchema.slotId;
|
||||||
|
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId);
|
||||||
|
|
||||||
|
colDataAppend(pCol, pBlock->info.rows, res, false);
|
||||||
|
|
||||||
|
taosMemoryFree(res);
|
||||||
|
return pResInfo->numOfRes;
|
||||||
|
}
|
||||||
|
|
||||||
bool getStateFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) {
|
bool getStateFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) {
|
||||||
pEnv->calcMemSize = sizeof(SStateInfo);
|
pEnv->calcMemSize = sizeof(SStateInfo);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -3,7 +3,7 @@ python3 .\test.py -f 0-others\taosShell.py
|
||||||
python3 .\test.py -f 0-others\taosShellError.py
|
python3 .\test.py -f 0-others\taosShellError.py
|
||||||
python3 .\test.py -f 0-others\taosShellNetChk.py
|
python3 .\test.py -f 0-others\taosShellNetChk.py
|
||||||
python3 .\test.py -f 0-others\telemetry.py
|
python3 .\test.py -f 0-others\telemetry.py
|
||||||
python3 .\test.py -f 0-others\taosdMonitor.py
|
@REM python3 .\test.py -f 0-others\taosdMonitor.py
|
||||||
python3 .\test.py -f 0-others\udfTest.py
|
python3 .\test.py -f 0-others\udfTest.py
|
||||||
python3 .\test.py -f 0-others\udf_create.py
|
python3 .\test.py -f 0-others\udf_create.py
|
||||||
python3 .\test.py -f 0-others\udf_restart_taosd.py
|
python3 .\test.py -f 0-others\udf_restart_taosd.py
|
||||||
|
@ -100,4 +100,4 @@ python3 .\test.py -f 0-others\udf_restart_taosd.py
|
||||||
@REM python3 .\test.py -f 7-tmq\subscribeStb2.py
|
@REM python3 .\test.py -f 7-tmq\subscribeStb2.py
|
||||||
@REM python3 .\test.py -f 7-tmq\subscribeStb3.py
|
@REM python3 .\test.py -f 7-tmq\subscribeStb3.py
|
||||||
@REM python3 .\test.py -f 7-tmq\subscribeStb4.py
|
@REM python3 .\test.py -f 7-tmq\subscribeStb4.py
|
||||||
@REM python3 .\test.py -f 7-tmq\db.py
|
@REM python3 .\test.py -f 7-tmq\db.py
|
||||||
|
|
Loading…
Reference in New Issue