Merge pull request #13616 from taosdata/enh/histogram_split
enh(query): add histogram function distributed splitting
This commit is contained in:
commit
db9b56777f
|
@ -127,6 +127,8 @@ typedef enum EFunctionType {
|
|||
FUNCTION_TYPE_APERCENTILE_MERGE,
|
||||
FUNCTION_TYPE_SPREAD_PARTIAL,
|
||||
FUNCTION_TYPE_SPREAD_MERGE,
|
||||
FUNCTION_TYPE_HISTOGRAM_PARTIAL,
|
||||
FUNCTION_TYPE_HISTOGRAM_MERGE,
|
||||
|
||||
// user defined funcion
|
||||
FUNCTION_TYPE_UDF = 10000
|
||||
|
|
|
@ -78,7 +78,6 @@ bool percentileFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultI
|
|||
int32_t percentileFunction(SqlFunctionCtx *pCtx);
|
||||
int32_t percentileFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||
|
||||
int32_t getApercentileMaxSize();
|
||||
bool getApercentileFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
||||
bool apercentileFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
|
||||
int32_t apercentileFunction(SqlFunctionCtx *pCtx);
|
||||
|
@ -86,6 +85,7 @@ int32_t apercentileFunctionMerge(SqlFunctionCtx* pCtx);
|
|||
int32_t apercentileFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||
int32_t apercentilePartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||
int32_t apercentileCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx);
|
||||
int32_t getApercentileMaxSize();
|
||||
|
||||
bool getDiffFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
||||
bool diffFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResInfo);
|
||||
|
@ -103,13 +103,13 @@ int32_t topFunction(SqlFunctionCtx *pCtx);
|
|||
int32_t bottomFunction(SqlFunctionCtx *pCtx);
|
||||
int32_t topBotFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||
|
||||
int32_t getSpreadInfoSize();
|
||||
bool getSpreadFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
||||
bool spreadFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
|
||||
int32_t spreadFunction(SqlFunctionCtx* pCtx);
|
||||
int32_t spreadFunctionMerge(SqlFunctionCtx* pCtx);
|
||||
int32_t spreadFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||
int32_t spreadPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||
int32_t getSpreadInfoSize();
|
||||
|
||||
bool getElapsedFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
||||
bool elapsedFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
|
||||
|
@ -119,7 +119,10 @@ int32_t elapsedFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
|||
bool getHistogramFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
||||
bool histogramFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
|
||||
int32_t histogramFunction(SqlFunctionCtx* pCtx);
|
||||
int32_t histogramFunctionMerge(SqlFunctionCtx* pCtx);
|
||||
int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||
int32_t histogramPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock);
|
||||
int32_t getHistogramInfoSize();
|
||||
|
||||
bool getHLLFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
|
||||
int32_t hllFunction(SqlFunctionCtx* pCtx);
|
||||
|
|
|
@ -503,6 +503,58 @@ static int32_t translateHistogram(SFunctionNode* pFunc, char* pErrBuf, int32_t l
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
static int32_t translateHistogramImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t len, bool isPartial) {
|
||||
int32_t numOfParams = LIST_LENGTH(pFunc->pParameterList);
|
||||
if (isPartial) {
|
||||
if (4 != numOfParams) {
|
||||
return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName);
|
||||
}
|
||||
|
||||
uint8_t colType = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type;
|
||||
if (!IS_NUMERIC_TYPE(colType)) {
|
||||
return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName);
|
||||
}
|
||||
|
||||
// param1 ~ param3
|
||||
for (int32_t i = 1; i < numOfParams; ++i) {
|
||||
SNode* pParamNode = nodesListGetNode(pFunc->pParameterList, i);
|
||||
if (QUERY_NODE_VALUE != nodeType(pParamNode)) {
|
||||
return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName);
|
||||
}
|
||||
|
||||
SValueNode* pValue = (SValueNode*)pParamNode;
|
||||
|
||||
pValue->notReserved = true;
|
||||
}
|
||||
|
||||
if (((SExprNode*)nodesListGetNode(pFunc->pParameterList, 1))->resType.type != TSDB_DATA_TYPE_BINARY ||
|
||||
((SExprNode*)nodesListGetNode(pFunc->pParameterList, 2))->resType.type != TSDB_DATA_TYPE_BINARY ||
|
||||
((SExprNode*)nodesListGetNode(pFunc->pParameterList, 3))->resType.type != TSDB_DATA_TYPE_BIGINT) {
|
||||
return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName);
|
||||
}
|
||||
|
||||
pFunc->node.resType = (SDataType){.bytes = getHistogramInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY};
|
||||
} else {
|
||||
if (1 != numOfParams) {
|
||||
return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName);
|
||||
}
|
||||
|
||||
if (((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type != TSDB_DATA_TYPE_BINARY) {
|
||||
return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName);
|
||||
}
|
||||
|
||||
pFunc->node.resType = (SDataType){.bytes = 512, .type = TSDB_DATA_TYPE_BINARY};
|
||||
}
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
static int32_t translateHistogramPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||
return translateHistogramImpl(pFunc, pErrBuf, len, true);
|
||||
}
|
||||
static int32_t translateHistogramMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||
return translateHistogramImpl(pFunc, pErrBuf, len, false);
|
||||
}
|
||||
|
||||
static int32_t translateHLL(SFunctionNode* pFunc, char* pErrBuf, int32_t len) {
|
||||
if (1 != LIST_LENGTH(pFunc->pParameterList)) {
|
||||
return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName);
|
||||
|
@ -1394,6 +1446,28 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
|
|||
.getEnvFunc = getHistogramFuncEnv,
|
||||
.initFunc = histogramFunctionSetup,
|
||||
.processFunc = histogramFunction,
|
||||
.finalizeFunc = histogramFinalize,
|
||||
.pPartialFunc = "_histogram_partial",
|
||||
.pMergeFunc = "_histogram_merge"
|
||||
},
|
||||
{
|
||||
.name = "_histogram_partial",
|
||||
.type = FUNCTION_TYPE_HISTOGRAM_PARTIAL,
|
||||
.classification = FUNC_MGT_AGG_FUNC,
|
||||
.translateFunc = translateHistogramPartial,
|
||||
.getEnvFunc = getHistogramFuncEnv,
|
||||
.initFunc = histogramFunctionSetup,
|
||||
.processFunc = histogramFunction,
|
||||
.finalizeFunc = histogramPartialFinalize
|
||||
},
|
||||
{
|
||||
.name = "_histogram_merge",
|
||||
.type = FUNCTION_TYPE_HISTOGRAM_MERGE,
|
||||
.classification = FUNC_MGT_AGG_FUNC,
|
||||
.translateFunc = translateHistogramMerge,
|
||||
.getEnvFunc = getHistogramFuncEnv,
|
||||
.initFunc = functionSetup,
|
||||
.processFunc = histogramFunctionMerge,
|
||||
.finalizeFunc = histogramFinalize
|
||||
},
|
||||
{
|
||||
|
|
|
@ -2187,9 +2187,7 @@ int32_t apercentilePartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
|||
SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
|
||||
SAPercentileInfo* pInfo = (SAPercentileInfo*)GET_ROWCELL_INTERBUF(pResInfo);
|
||||
|
||||
int32_t bytesHist = (int32_t)(sizeof(SAPercentileInfo) + sizeof(SHistogramInfo) + sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1));
|
||||
int32_t bytesDigest = (int32_t)(sizeof(SAPercentileInfo) + TDIGEST_SIZE(COMPRESSION));
|
||||
int32_t resultBytes = TMAX(bytesHist, bytesDigest);
|
||||
int32_t resultBytes = getApercentileMaxSize();
|
||||
char *res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char));
|
||||
|
||||
if (pInfo->algo == APERCT_ALGO_TDIGEST) {
|
||||
|
@ -3138,6 +3136,10 @@ int32_t elapsedFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
|||
return functionFinalize(pCtx, pBlock);
|
||||
}
|
||||
|
||||
int32_t getHistogramInfoSize() {
|
||||
return (int32_t)sizeof(SHistoFuncInfo) + HISTOGRAM_MAX_BINS_NUM * sizeof(SHistoFuncBin);
|
||||
}
|
||||
|
||||
bool getHistogramFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) {
|
||||
pEnv->calcMemSize = sizeof(SHistoFuncInfo) + HISTOGRAM_MAX_BINS_NUM * sizeof(SHistoFuncBin);
|
||||
return true;
|
||||
|
@ -3348,6 +3350,30 @@ int32_t histogramFunction(SqlFunctionCtx *pCtx) {
|
|||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t histogramFunctionMerge(SqlFunctionCtx *pCtx) {
|
||||
SInputColumnInfoData* pInput = &pCtx->input;
|
||||
SColumnInfoData* pCol = pInput->pData[0];
|
||||
ASSERT(pCol->info.type == TSDB_DATA_TYPE_BINARY);
|
||||
|
||||
SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
|
||||
|
||||
int32_t start = pInput->startRowIndex;
|
||||
char* data = colDataGetData(pCol, start);
|
||||
SHistoFuncInfo* pInputInfo = (SHistoFuncInfo *)varDataVal(data);
|
||||
|
||||
pInfo->normalized = pInputInfo->normalized;
|
||||
pInfo->numOfBins = pInputInfo->numOfBins;
|
||||
pInfo->totalCount += pInputInfo->totalCount;
|
||||
for (int32_t k = 0; k < pInfo->numOfBins; ++k) {
|
||||
pInfo->bins[k].lower = pInputInfo->bins[k].lower;
|
||||
pInfo->bins[k].upper = pInputInfo->bins[k].upper;
|
||||
pInfo->bins[k].count += pInputInfo->bins[k].count;
|
||||
}
|
||||
|
||||
SET_VAL(GET_RES_INFO(pCtx), pInfo->numOfBins, pInfo->numOfBins);
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
||||
SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
|
||||
SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
|
||||
|
@ -3384,6 +3410,24 @@ int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
|||
return pResInfo->numOfRes;
|
||||
}
|
||||
|
||||
int32_t histogramPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
|
||||
SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx);
|
||||
SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
|
||||
int32_t resultBytes = getHistogramInfoSize();
|
||||
char *res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char));
|
||||
|
||||
memcpy(varDataVal(res), pInfo, resultBytes);
|
||||
varDataSetLen(res, resultBytes);
|
||||
|
||||
int32_t slotId = pCtx->pExpr->base.resSchema.slotId;
|
||||
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId);
|
||||
|
||||
colDataAppend(pCol, pBlock->info.rows, res, false);
|
||||
|
||||
taosMemoryFree(res);
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool getHLLFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) {
|
||||
pEnv->calcMemSize = sizeof(SHLLInfo);
|
||||
return true;
|
||||
|
|
Loading…
Reference in New Issue