From db48f118a13a08af695d4d9e2289fa8057c41e71 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Mon, 25 Jul 2022 15:34:46 +0800 Subject: [PATCH 1/6] enh(query): add selectivity for diff function TD-17659 --- include/libs/function/function.h | 1 + source/libs/executor/src/executorimpl.c | 5 +++ source/libs/function/src/builtins.c | 2 +- source/libs/function/src/builtinsimpl.c | 44 +++++++++++++++++++++++-- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/include/libs/function/function.h b/include/libs/function/function.h index 8cb48cc9f0..8fa63bbd45 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -143,6 +143,7 @@ typedef struct SqlFunctionCtx { struct SExprInfo *pExpr; struct SDiskbasedBuf *pBuf; struct SSDataBlock *pSrcBlock; + struct SSDataBlock *pDstBlock; // used by indifinite rows function to set selectivity int32_t curBufPage; bool increase; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 7bac828a53..9dc6e7898e 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -651,6 +651,11 @@ int32_t projectApplyFunctions(SExprInfo* pExpr, SSDataBlock* pResult, SSDataBloc pfCtx->pTsOutput = (SColumnInfoData*)pCtx[*outputColIndex].pOutput; } + // link pDstBlock to set selectivity value + if (pfCtx->subsidiaries.num > 0) { + pfCtx->pDstBlock = pResult; + } + numOfRows = pfCtx->fpSet.process(pfCtx); } else if (fmIsAggFunc(pfCtx->functionId)) { // _group_key function for "partition by tbname" + csum(col_name) query diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 324a17320e..78c65d40f5 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2425,7 +2425,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "diff", .type = FUNCTION_TYPE_DIFF, - .classification = FUNC_MGT_INDEFINITE_ROWS_FUNC | FUNC_MGT_TIMELINE_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, + .classification = FUNC_MGT_INDEFINITE_ROWS_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_TIMELINE_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, .translateFunc = translateDiff, .getEnvFunc = getDiffFuncEnv, .initFunc = diffFunctionSetup, diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index b6fe5b9998..2d3f649739 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -1624,6 +1624,10 @@ int32_t minmaxFunctionFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { } void setNullSelectivityValue(SqlFunctionCtx* pCtx, SSDataBlock* pBlock, int32_t rowIndex) { + if (pCtx->subsidiaries.num <= 0) { + return; + } + for (int32_t j = 0; j < pCtx->subsidiaries.num; ++j) { SqlFunctionCtx* pc = pCtx->subsidiaries.pCtx[j]; int32_t dstSlotId = pc->pExpr->base.resSchema.slotId; @@ -1655,8 +1659,6 @@ void setSelectivityValue(SqlFunctionCtx* pCtx, SSDataBlock* pBlock, const STuple SFunctParam* pFuncParam = &pc->pExpr->base.pParam[0]; int32_t dstSlotId = pc->pExpr->base.resSchema.slotId; - int32_t ps = 0; - SColumnInfoData* pDstCol = taosArrayGet(pBlock->pDataBlock, dstSlotId); ASSERT(pc->pExpr->base.resSchema.bytes == pDstCol->info.bytes); if (nullList[j]) { @@ -1678,6 +1680,39 @@ void releaseSource(STuplePos* pPos) { // Todo(liuyao) relase row } +// This function append the selectivity to subsidiaries function context directly, without fetching data +// from intermediate disk based buf page +void appendSelectivityValue(SqlFunctionCtx* pCtx, int32_t rowIndex) { + if (pCtx->subsidiaries.num <= 0) { + return; + } + + for (int32_t j = 0; j < pCtx->subsidiaries.num; ++j) { + SqlFunctionCtx* pc = pCtx->subsidiaries.pCtx[j]; + + // get data from source col + SFunctParam* pFuncParam = &pc->pExpr->base.pParam[0]; + int32_t srcSlotId = pFuncParam->pCol->slotId; + + SColumnInfoData* pSrcCol = taosArrayGet(pCtx->pSrcBlock->pDataBlock, srcSlotId); + + char* pData = colDataGetData(pSrcCol, rowIndex); + + // append to dest col + int32_t dstSlotId = pc->pExpr->base.resSchema.slotId; + + SColumnInfoData* pDstCol = taosArrayGet(pCtx->pDstBlock->pDataBlock, dstSlotId); + ASSERT(pc->pExpr->base.resSchema.bytes == pDstCol->info.bytes); + + if (colDataIsNull_s(pSrcCol, rowIndex) == true) { + colDataAppendNULL(pDstCol, rowIndex); + } else { + colDataAppend(pDstCol, rowIndex, pData, false); + } + } + +} + void replaceTupleData(STuplePos* pDestPos, STuplePos* pSourcePos) { releaseSource(pDestPos); *pDestPos = *pSourcePos; @@ -3154,6 +3189,7 @@ static void doHandleDiff(SDiffInfo* pDiffInfo, int32_t type, const char* pv, SCo colDataAppendInt64(pOutput, pos, &delta); } pDiffInfo->prev.i64 = v; + break; } case TSDB_DATA_TYPE_BOOL: @@ -3247,6 +3283,8 @@ int32_t diffFunction(SqlFunctionCtx* pCtx) { if (pDiffInfo->hasPrev) { doHandleDiff(pDiffInfo, pInputCol->info.type, pv, pOutput, pos, pCtx->order); + // handle selectivity + appendSelectivityValue(pCtx, pos); numOfElems++; } else { @@ -3273,6 +3311,8 @@ int32_t diffFunction(SqlFunctionCtx* pCtx) { // there is a row of previous data block to be handled in the first place. if (pDiffInfo->hasPrev) { doHandleDiff(pDiffInfo, pInputCol->info.type, pv, pOutput, pos, pCtx->order); + // handle selectivity + appendSelectivityValue(pCtx, pos); numOfElems++; } else { From 8537449f9081cb850913eb4dbce97dbfadc03e1c Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Mon, 25 Jul 2022 15:54:36 +0800 Subject: [PATCH 2/6] fix diff function selectivity output index --- source/libs/function/src/builtinsimpl.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 2d3f649739..ba389180af 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -1682,7 +1682,7 @@ void releaseSource(STuplePos* pPos) { // This function append the selectivity to subsidiaries function context directly, without fetching data // from intermediate disk based buf page -void appendSelectivityValue(SqlFunctionCtx* pCtx, int32_t rowIndex) { +void appendSelectivityValue(SqlFunctionCtx* pCtx, int32_t rowIndex, int32_t pos) { if (pCtx->subsidiaries.num <= 0) { return; } @@ -1705,9 +1705,9 @@ void appendSelectivityValue(SqlFunctionCtx* pCtx, int32_t rowIndex) { ASSERT(pc->pExpr->base.resSchema.bytes == pDstCol->info.bytes); if (colDataIsNull_s(pSrcCol, rowIndex) == true) { - colDataAppendNULL(pDstCol, rowIndex); + colDataAppendNULL(pDstCol, pos); } else { - colDataAppend(pDstCol, rowIndex, pData, false); + colDataAppend(pDstCol, pos, pData, false); } } @@ -3284,7 +3284,9 @@ int32_t diffFunction(SqlFunctionCtx* pCtx) { if (pDiffInfo->hasPrev) { doHandleDiff(pDiffInfo, pInputCol->info.type, pv, pOutput, pos, pCtx->order); // handle selectivity - appendSelectivityValue(pCtx, pos); + if (pCtx->subsidiaries.num > 0) { + appendSelectivityValue(pCtx, i, pos); + } numOfElems++; } else { @@ -3312,7 +3314,9 @@ int32_t diffFunction(SqlFunctionCtx* pCtx) { if (pDiffInfo->hasPrev) { doHandleDiff(pDiffInfo, pInputCol->info.type, pv, pOutput, pos, pCtx->order); // handle selectivity - appendSelectivityValue(pCtx, pos); + if (pCtx->subsidiaries.num > 0) { + appendSelectivityValue(pCtx, i, pos); + } numOfElems++; } else { From e8da4f429c88c3eca9c6aa971f4645c4e3acb79c Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Mon, 25 Jul 2022 16:04:28 +0800 Subject: [PATCH 3/6] enh(query): add derivative function selectivity TD-17659 --- source/libs/function/src/builtins.c | 2 +- source/libs/function/src/builtinsimpl.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 78c65d40f5..c55d15badb 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2220,7 +2220,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "derivative", .type = FUNCTION_TYPE_DERIVATIVE, - .classification = FUNC_MGT_INDEFINITE_ROWS_FUNC | FUNC_MGT_TIMELINE_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC, + .classification = FUNC_MGT_INDEFINITE_ROWS_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_TIMELINE_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC, .translateFunc = translateDerivative, .getEnvFunc = getDerivativeFuncEnv, .initFunc = derivativeFuncSetup, diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index ba389180af..5ad433c9ff 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -5767,6 +5767,12 @@ int32_t derivativeFunction(SqlFunctionCtx* pCtx) { if (pTsOutput != NULL) { colDataAppendInt64(pTsOutput, pos, &tsList[i]); } + + // handle selectivity + if (pCtx->subsidiaries.num > 0) { + appendSelectivityValue(pCtx, i, pos); + } + numOfElems++; } } @@ -5799,6 +5805,12 @@ int32_t derivativeFunction(SqlFunctionCtx* pCtx) { if (pTsOutput != NULL) { colDataAppendInt64(pTsOutput, pos, &pDerivInfo->prevTs); } + + // handle selectivity + if (pCtx->subsidiaries.num > 0) { + appendSelectivityValue(pCtx, i, pos); + } + numOfElems++; } } From 292801c9f9922abbfee78b816b542a0b64effea0 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Mon, 25 Jul 2022 17:23:56 +0800 Subject: [PATCH 4/6] fix test cases --- tests/system-test/2-query/diff.py | 74 +++++++++++++++++++++- tests/system-test/2-query/function_diff.py | 4 +- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/tests/system-test/2-query/diff.py b/tests/system-test/2-query/diff.py index 30b588fa97..c6800d9a8a 100644 --- a/tests/system-test/2-query/diff.py +++ b/tests/system-test/2-query/diff.py @@ -95,7 +95,6 @@ class TDTestCase: tdSql.error("select diff(col12) from stb_1") tdSql.error("select diff(col13) from stb_1") tdSql.error("select diff(col14) from stb_1") - tdSql.error("select ts,diff(col1),ts from stb_1") tdSql.query("select diff(col1) from stb_1") tdSql.checkRows(10) @@ -115,6 +114,79 @@ class TDTestCase: tdSql.query("select diff(col6) from stb_1") tdSql.checkRows(10) + # check selectivity + tdSql.query("select ts, diff(col1), col2 from stb_1") + tdSql.checkRows(10) + tdSql.checkData(0, 0, "2018-09-17 09:00:00.000") + tdSql.checkData(1, 0, "2018-09-17 09:00:00.001") + tdSql.checkData(2, 0, "2018-09-17 09:00:00.002") + tdSql.checkData(3, 0, "2018-09-17 09:00:00.003") + tdSql.checkData(4, 0, "2018-09-17 09:00:00.004") + tdSql.checkData(5, 0, "2018-09-17 09:00:00.005") + tdSql.checkData(6, 0, "2018-09-17 09:00:00.006") + tdSql.checkData(7, 0, "2018-09-17 09:00:00.007") + tdSql.checkData(8, 0, "2018-09-17 09:00:00.008") + tdSql.checkData(9, 0, "2018-09-17 09:00:00.009") + + tdSql.checkData(0, 1, 1) + tdSql.checkData(1, 1, 1) + tdSql.checkData(2, 1, 1) + tdSql.checkData(3, 1, 1) + tdSql.checkData(4, 1, 1) + tdSql.checkData(5, 1, 1) + tdSql.checkData(6, 1, 1) + tdSql.checkData(7, 1, 1) + tdSql.checkData(8, 1, 1) + tdSql.checkData(9, 1, 1) + + tdSql.checkData(0, 2, 0) + tdSql.checkData(1, 2, 1) + tdSql.checkData(2, 2, 2) + tdSql.checkData(3, 2, 3) + tdSql.checkData(4, 2, 4) + tdSql.checkData(5, 2, 5) + tdSql.checkData(6, 2, 6) + tdSql.checkData(7, 2, 7) + tdSql.checkData(8, 2, 8) + tdSql.checkData(9, 2, 9) + + tdSql.query("select ts, diff(col1), col2 from stb order by ts") + tdSql.checkRows(10) + + tdSql.checkData(0, 0, "2018-09-17 09:00:00.000") + tdSql.checkData(1, 0, "2018-09-17 09:00:00.001") + tdSql.checkData(2, 0, "2018-09-17 09:00:00.002") + tdSql.checkData(3, 0, "2018-09-17 09:00:00.003") + tdSql.checkData(4, 0, "2018-09-17 09:00:00.004") + tdSql.checkData(5, 0, "2018-09-17 09:00:00.005") + tdSql.checkData(6, 0, "2018-09-17 09:00:00.006") + tdSql.checkData(7, 0, "2018-09-17 09:00:00.007") + tdSql.checkData(8, 0, "2018-09-17 09:00:00.008") + tdSql.checkData(9, 0, "2018-09-17 09:00:00.009") + + tdSql.checkData(0, 1, 1) + tdSql.checkData(1, 1, 1) + tdSql.checkData(2, 1, 1) + tdSql.checkData(3, 1, 1) + tdSql.checkData(4, 1, 1) + tdSql.checkData(5, 1, 1) + tdSql.checkData(6, 1, 1) + tdSql.checkData(7, 1, 1) + tdSql.checkData(8, 1, 1) + tdSql.checkData(9, 1, 1) + + tdSql.checkData(0, 2, 0) + tdSql.checkData(1, 2, 1) + tdSql.checkData(2, 2, 2) + tdSql.checkData(3, 2, 3) + tdSql.checkData(4, 2, 4) + tdSql.checkData(5, 2, 5) + tdSql.checkData(6, 2, 6) + tdSql.checkData(7, 2, 7) + tdSql.checkData(8, 2, 8) + tdSql.checkData(9, 2, 9) + + tdSql.execute('''create table stb1(ts timestamp, col1 tinyint, col2 smallint, col3 int, col4 bigint, col5 float, col6 double, col7 bool, col8 binary(20), col9 nchar(20), col11 tinyint unsigned, col12 smallint unsigned, col13 int unsigned, col14 bigint unsigned) tags(loc nchar(20))''') tdSql.execute("create table stb1_1 using stb tags('shanghai')") diff --git a/tests/system-test/2-query/function_diff.py b/tests/system-test/2-query/function_diff.py index 5e95510c1d..99e87e6cd6 100644 --- a/tests/system-test/2-query/function_diff.py +++ b/tests/system-test/2-query/function_diff.py @@ -283,14 +283,14 @@ class TDTestCase: tdSql.error(self.diff_query_form(alias=", diff(c1)")) # mix with calculation function 2 # tdSql.error(self.diff_query_form(alias=" + 2")) # mix with arithmetic 1 tdSql.error(self.diff_query_form(alias=" + avg(c1)")) # mix with arithmetic 2 - tdSql.error(self.diff_query_form(alias=", c2")) # mix with other 1 + tdSql.query(self.diff_query_form(alias=", c2")) # mix with other 1 # tdSql.error(self.diff_query_form(table_expr="stb1")) # select stb directly stb_join = { "col": "stb1.c1", "table_expr": "stb1, stb2", "condition": "where stb1.ts=stb2.ts and stb1.st1=stb2.st2 order by stb1.ts" } - tdSql.error(self.diff_query_form(**stb_join)) # stb join + tdSql.query(self.diff_query_form(**stb_join)) # stb join interval_sql = { "condition": "where ts>0 and ts < now interval(1h) fill(next)" } From b43903f3d4c6d21198daefa535e51dea710ad229 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Tue, 26 Jul 2022 10:54:56 +0800 Subject: [PATCH 5/6] fix unit test --- source/libs/parser/test/parSelectTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/parser/test/parSelectTest.cpp b/source/libs/parser/test/parSelectTest.cpp index 849ba14d11..7376cc1fa3 100644 --- a/source/libs/parser/test/parSelectTest.cpp +++ b/source/libs/parser/test/parSelectTest.cpp @@ -144,7 +144,7 @@ TEST_F(ParserSelectTest, IndefiniteRowsFunc) { TEST_F(ParserSelectTest, IndefiniteRowsFuncSemanticCheck) { useDb("root", "test"); - run("SELECT DIFF(c1), c2 FROM t1", TSDB_CODE_PAR_NOT_SINGLE_GROUP); + run("SELECT DIFF(c1), c2 FROM t1"); run("SELECT DIFF(c1), tbname FROM t1", TSDB_CODE_PAR_NOT_SINGLE_GROUP); From 3b719337c6166cee6843ed4bde8d56e408a0df01 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Tue, 26 Jul 2022 14:14:19 +0800 Subject: [PATCH 6/6] fix ut test case --- source/libs/parser/test/parSelectTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/parser/test/parSelectTest.cpp b/source/libs/parser/test/parSelectTest.cpp index 7376cc1fa3..951ca5e40d 100644 --- a/source/libs/parser/test/parSelectTest.cpp +++ b/source/libs/parser/test/parSelectTest.cpp @@ -146,7 +146,7 @@ TEST_F(ParserSelectTest, IndefiniteRowsFuncSemanticCheck) { run("SELECT DIFF(c1), c2 FROM t1"); - run("SELECT DIFF(c1), tbname FROM t1", TSDB_CODE_PAR_NOT_SINGLE_GROUP); + run("SELECT DIFF(c1), tbname FROM t1"); run("SELECT DIFF(c1), count(*) FROM t1", TSDB_CODE_PAR_NOT_ALLOWED_FUNC);