From b2c8d2161fc4fea47eeb390da968defeaa468358 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sat, 6 Aug 2022 10:47:15 +0800 Subject: [PATCH 1/5] fix: all null value group with all null out --- source/libs/function/src/builtinsimpl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index e78a78e7d8..7fd48519a3 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3625,6 +3625,11 @@ int32_t topBotFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { // todo assign the tag value and the corresponding row data int32_t currentRow = pBlock->info.rows; + if (pEntryInfo->numOfRes <= 0) { + colDataAppendNULL(pCol, currentRow); + setNullSelectivityValue(pCtx, pBlock, currentRow); + return pEntryInfo->numOfRes; + } for (int32_t i = 0; i < pEntryInfo->numOfRes; ++i) { STopBotResItem* pItem = &pRes->pItems[i]; if (type == TSDB_DATA_TYPE_FLOAT) { @@ -4957,6 +4962,11 @@ int32_t sampleFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId); int32_t currentRow = pBlock->info.rows; + if (pInfo->numSampled == 0) { + colDataAppendNULL(pCol, currentRow); + setNullSelectivityValue(pCtx, pBlock, currentRow); + return pInfo->numSampled; + } for (int32_t i = 0; i < pInfo->numSampled; ++i) { colDataAppend(pCol, currentRow + i, pInfo->data + i * pInfo->colBytes, false); setSelectivityValue(pCtx, pBlock, &pInfo->tuplePos[i], currentRow + i); From 580b2e587b65833b4bb5e37e4a09fa281b5a6086 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sat, 6 Aug 2022 14:29:12 +0800 Subject: [PATCH 2/5] fix: set selectivity value for the null group --- source/libs/function/src/builtinsimpl.c | 43 ++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 7fd48519a3..e3e98a6895 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -64,6 +64,9 @@ typedef struct SMinmaxResInfo { bool assign; // assign the first value or not int64_t v; STuplePos tuplePos; + + STuplePos nullTuplePos; + bool nullTupleSaved; } SMinmaxResInfo; typedef struct STopBotResItem { @@ -75,6 +78,10 @@ typedef struct STopBotResItem { typedef struct STopBotRes { int32_t maxSize; int16_t type; + + STuplePos nullTuplePos; + bool nullTupleSaved; + STopBotResItem* pItems; } STopBotRes; @@ -221,6 +228,10 @@ typedef struct SSampleInfo { int32_t numSampled; uint8_t colType; int16_t colBytes; + + STuplePos nullTuplePos; + bool nullTupleSaved; + char* data; STuplePos* tuplePos; } SSampleInfo; @@ -1134,6 +1145,9 @@ bool minmaxFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResultInfo) SMinmaxResInfo* buf = GET_ROWCELL_INTERBUF(pResultInfo); buf->assign = false; buf->tuplePos.pageId = -1; + + buf->nullTupleSaved = false; + buf->nullTuplePos.pageId = -1; return true; } @@ -1575,6 +1589,10 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { } _min_max_over: + if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pBuf->nullTupleSaved ) { + doSaveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pBuf->nullTuplePos); + pBuf->nullTupleSaved = true; + } return numOfElems; } @@ -1615,7 +1633,7 @@ int32_t minmaxFunctionFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { if (pEntryInfo->numOfRes > 0) { setSelectivityValue(pCtx, pBlock, &pRes->tuplePos, currentRow); } else { - setNullSelectivityValue(pCtx, pBlock, currentRow); + setSelectivityValue(pCtx, pBlock, &pRes->nullTuplePos, currentRow); } return pEntryInfo->numOfRes; @@ -3366,6 +3384,8 @@ bool topBotFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo) { pRes->maxSize = pCtx->param[1].param.i; + pRes->nullTupleSaved = false; + pRes->nullTuplePos.pageId = -1; return true; } @@ -3403,6 +3423,10 @@ int32_t topFunction(SqlFunctionCtx* pCtx) { doAddIntoResult(pCtx, data, i, pCtx->pSrcBlock, pRes->type, pInput->uid, pResInfo, true); } + if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pRes->nullTupleSaved) { + doSaveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pRes->nullTuplePos); + pRes->nullTupleSaved = true; + } return TSDB_CODE_SUCCESS; } @@ -3427,6 +3451,11 @@ int32_t bottomFunction(SqlFunctionCtx* pCtx) { doAddIntoResult(pCtx, data, i, pCtx->pSrcBlock, pRes->type, pInput->uid, pResInfo, false); } + if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pRes->nullTupleSaved) { + doSaveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pRes->nullTuplePos); + pRes->nullTupleSaved = true; + } + return TSDB_CODE_SUCCESS; } @@ -3627,7 +3656,7 @@ int32_t topBotFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { int32_t currentRow = pBlock->info.rows; if (pEntryInfo->numOfRes <= 0) { colDataAppendNULL(pCol, currentRow); - setNullSelectivityValue(pCtx, pBlock, currentRow); + setSelectivityValue(pCtx, pBlock, &pRes->nullTuplePos, currentRow); return pEntryInfo->numOfRes; } for (int32_t i = 0; i < pEntryInfo->numOfRes; ++i) { @@ -4902,7 +4931,8 @@ bool sampleFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResultInfo) pInfo->numSampled = 0; pInfo->colType = pCtx->resDataInfo.type; pInfo->colBytes = pCtx->resDataInfo.bytes; - + pInfo->nullTuplePos.pageId = -1; + pInfo->nullTupleSaved = false; pInfo->data = (char*)pInfo + sizeof(SSampleInfo); pInfo->tuplePos = (STuplePos*)((char*)pInfo + sizeof(SSampleInfo) + pInfo->samples * pInfo->colBytes); @@ -4948,6 +4978,11 @@ int32_t sampleFunction(SqlFunctionCtx* pCtx) { doReservoirSample(pCtx, pInfo, data, i); } + if (pInfo->numSampled == 0 && pCtx->subsidiaries.num > 0 && !pInfo->nullTupleSaved) { + doSaveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, &pInfo->nullTuplePos); + pInfo->nullTupleSaved = true; + } + SET_VAL(pResInfo, pInfo->numSampled, pInfo->numSampled); return TSDB_CODE_SUCCESS; } @@ -4964,7 +4999,7 @@ int32_t sampleFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { int32_t currentRow = pBlock->info.rows; if (pInfo->numSampled == 0) { colDataAppendNULL(pCol, currentRow); - setNullSelectivityValue(pCtx, pBlock, currentRow); + setSelectivityValue(pCtx, pBlock, &pInfo->nullTuplePos, currentRow); return pInfo->numSampled; } for (int32_t i = 0; i < pInfo->numSampled; ++i) { From a16a6b73c8ff73976daa63a667a339184113e4d1 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sat, 6 Aug 2022 20:11:12 +0800 Subject: [PATCH 3/5] fix: force at least one row for agg/window group --- source/libs/executor/src/executorimpl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 7c19ad024e..b99efce106 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1448,6 +1448,10 @@ static void doUpdateNumOfRows(SResultRow* pRow, int32_t numOfExprs, const int32_ pRow->numOfRows = pResInfo->numOfRes; } } + // TODO: if all expr skips all blocks, e.g. all null inputs for max function, output one row in final result. + if (pRow->numOfRows == 0) { + pRow->numOfRows = 1; + } } // todo extract method with copytoSSDataBlock From 81d3a2c3856ee566a1a7ed6a53ecdfe53ca08b67 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sat, 6 Aug 2022 21:39:53 +0800 Subject: [PATCH 4/5] fix: last/first must return not null values --- include/libs/function/functionMgt.h | 1 + source/libs/executor/src/executorimpl.c | 13 +++++++++---- source/libs/function/src/functionMgt.c | 12 ++++++++++++ tests/script/tsim/parser/function.sim | 6 +++--- tests/system-test/2-query/avg.py | 2 +- .../2-query/distribute_agg_apercentile.py | 2 +- tests/system-test/2-query/distribute_agg_max.py | 2 +- tests/system-test/2-query/distribute_agg_min.py | 2 +- tests/system-test/2-query/distribute_agg_spread.py | 2 +- tests/system-test/2-query/sample.py | 4 ++-- 10 files changed, 32 insertions(+), 14 deletions(-) diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index f6f2ceb957..e2846c5974 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -202,6 +202,7 @@ bool fmIsForbidStreamFunc(int32_t funcId); bool fmIsIntervalInterpoFunc(int32_t funcId); bool fmIsInterpFunc(int32_t funcId); bool fmIsLastRowFunc(int32_t funcId); +bool fmIsReturnNotNullFunc(int32_t funcId); bool fmIsSelectValueFunc(int32_t funcId); bool fmIsSystemInfoFunc(int32_t funcId); bool fmIsImplicitTsFunc(int32_t funcId); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index b99efce106..09094b1b14 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1437,7 +1437,8 @@ static void setExecutionContext(SOperatorInfo* pOperator, int32_t numOfOutput, u pAggInfo->groupId = groupId; } -static void doUpdateNumOfRows(SResultRow* pRow, int32_t numOfExprs, const int32_t* rowCellOffset) { +static void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t numOfExprs, const int32_t* rowCellOffset) { + bool returnNotNull = false; for (int32_t j = 0; j < numOfExprs; ++j) { struct SResultRowEntryInfo* pResInfo = getResultEntryInfo(pRow, j, rowCellOffset); if (!isRowEntryInitialized(pResInfo)) { @@ -1447,9 +1448,13 @@ static void doUpdateNumOfRows(SResultRow* pRow, int32_t numOfExprs, const int32_ if (pRow->numOfRows < pResInfo->numOfRes) { pRow->numOfRows = pResInfo->numOfRes; } + + if (fmIsReturnNotNullFunc(pCtx[j].functionId)) { + returnNotNull = true; + } } // TODO: if all expr skips all blocks, e.g. all null inputs for max function, output one row in final result. - if (pRow->numOfRows == 0) { + if (pRow->numOfRows == 0 && !returnNotNull) { pRow->numOfRows = 1; } } @@ -1462,7 +1467,7 @@ int32_t finalizeResultRowIntoResultDataBlock(SDiskbasedBuf* pBuf, SResultRowPosi SFilePage* page = getBufPage(pBuf, resultRowPosition->pageId); SResultRow* pRow = (SResultRow*)((char*)page + resultRowPosition->offset); - doUpdateNumOfRows(pRow, numOfExprs, rowCellOffset); + doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowCellOffset); if (pRow->numOfRows == 0) { releaseBufPage(pBuf, page); return 0; @@ -1518,7 +1523,7 @@ int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprI SResultRow* pRow = (SResultRow*)((char*)page + pPos->pos.offset); - doUpdateNumOfRows(pRow, numOfExprs, rowCellOffset); + doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowCellOffset); if (pRow->numOfRows == 0) { pGroupResInfo->index += 1; releaseBufPage(pBuf, page); diff --git a/source/libs/function/src/functionMgt.c b/source/libs/function/src/functionMgt.c index 339ff3808b..88efa7c2ff 100644 --- a/source/libs/function/src/functionMgt.c +++ b/source/libs/function/src/functionMgt.c @@ -221,6 +221,18 @@ bool fmIsLastRowFunc(int32_t funcId) { return FUNCTION_TYPE_LAST_ROW == funcMgtBuiltins[funcId].type; } +bool fmIsReturnNotNullFunc(int32_t funcId) { + if (funcId < 0 || funcId >= funcMgtBuiltinsNum) { + return false; + } + return FUNCTION_TYPE_LAST == funcMgtBuiltins[funcId].type || + FUNCTION_TYPE_LAST_PARTIAL == funcMgtBuiltins[funcId].type || + FUNCTION_TYPE_LAST_MERGE == funcMgtBuiltins[funcId].type || + FUNCTION_TYPE_FIRST == funcMgtBuiltins[funcId].type || + FUNCTION_TYPE_FIRST_PARTIAL == funcMgtBuiltins[funcId].type || + FUNCTION_TYPE_FIRST_MERGE == funcMgtBuiltins[funcId].type; +} + bool fmIsSelectValueFunc(int32_t funcId) { if (funcId < 0 || funcId >= funcMgtBuiltinsNum) { return false; diff --git a/tests/script/tsim/parser/function.sim b/tests/script/tsim/parser/function.sim index 110901a6e1..9423f53e3a 100644 --- a/tests/script/tsim/parser/function.sim +++ b/tests/script/tsim/parser/function.sim @@ -327,8 +327,8 @@ endi print =================>td-2610 sql select stddev(k) from tm2 where ts='2020-12-29 18:46:19.109' -if $rows != 0 then - print expect 0, actual:$rows +if $rows != 1 then + print expect 1, actual:$rows return -1 endi sql select twa(k) from tm2 where ts='2020-12-29 18:46:19.109' @@ -406,7 +406,7 @@ if $data00 != 1.378704626 then endi sql select stddev(c) from m1 -if $rows != 0 then +if $rows != 1 then return -1 endi diff --git a/tests/system-test/2-query/avg.py b/tests/system-test/2-query/avg.py index ea7c3329ea..2afcc29ac8 100644 --- a/tests/system-test/2-query/avg.py +++ b/tests/system-test/2-query/avg.py @@ -295,7 +295,7 @@ class TDTestCase: tdSql.checkData(0, 0, 4.500000000) tdSql.query(f" select avg(c1) from {dbname}.stb1 where c1 is null ") - tdSql.checkRows(0) + tdSql.checkRows(1) def avg_func_filter(self, dbname="db"): diff --git a/tests/system-test/2-query/distribute_agg_apercentile.py b/tests/system-test/2-query/distribute_agg_apercentile.py index ad754ad805..34164acfa4 100644 --- a/tests/system-test/2-query/distribute_agg_apercentile.py +++ b/tests/system-test/2-query/distribute_agg_apercentile.py @@ -86,7 +86,7 @@ class TDTestCase: def distribute_agg_query(self, dbname="testdb"): # basic filter tdSql.query(f"select apercentile(c1 , 20) from {dbname}.stb1 where c1 is null") - tdSql.checkRows(0) + tdSql.checkRows(1) tdSql.query(f"select apercentile(c1 , 20) from {dbname}.stb1 where t1=1") tdSql.checkData(0,0,2.800000000) diff --git a/tests/system-test/2-query/distribute_agg_max.py b/tests/system-test/2-query/distribute_agg_max.py index a7b31a2084..f67088d9e6 100644 --- a/tests/system-test/2-query/distribute_agg_max.py +++ b/tests/system-test/2-query/distribute_agg_max.py @@ -168,7 +168,7 @@ class TDTestCase: def distribute_agg_query(self, dbname="testdb"): # basic filter tdSql.query(f"select max(c1) from {dbname}.stb1 where c1 is null") - tdSql.checkRows(0) + tdSql.checkRows(1) tdSql.query(f"select max(c1) from {dbname}.stb1 where t1=1") tdSql.checkData(0,0,10) diff --git a/tests/system-test/2-query/distribute_agg_min.py b/tests/system-test/2-query/distribute_agg_min.py index cc50092451..29e7c5f519 100644 --- a/tests/system-test/2-query/distribute_agg_min.py +++ b/tests/system-test/2-query/distribute_agg_min.py @@ -167,7 +167,7 @@ class TDTestCase: def distribute_agg_query(self, dbname="testdb"): # basic filter tdSql.query(f"select min(c1) from {dbname}.stb1 where c1 is null") - tdSql.checkRows(0) + tdSql.checkRows(1) tdSql.query(f"select min(c1) from {dbname}.stb1 where t1=1") tdSql.checkData(0,0,2) diff --git a/tests/system-test/2-query/distribute_agg_spread.py b/tests/system-test/2-query/distribute_agg_spread.py index 842a74628d..135a61d194 100644 --- a/tests/system-test/2-query/distribute_agg_spread.py +++ b/tests/system-test/2-query/distribute_agg_spread.py @@ -195,7 +195,7 @@ class TDTestCase: def distribute_agg_query(self): # basic filter tdSql.query("select spread(c1) from stb1 where c1 is null") - tdSql.checkRows(0) + tdSql.checkRows(1) tdSql.query("select spread(c1) from stb1 where t1=1") tdSql.checkData(0,0,8.000000000) diff --git a/tests/system-test/2-query/sample.py b/tests/system-test/2-query/sample.py index b6bdd8926e..f09265c300 100644 --- a/tests/system-test/2-query/sample.py +++ b/tests/system-test/2-query/sample.py @@ -743,7 +743,7 @@ class TDTestCase: # filter data tdSql.query(" select sample(c1, 20 ) from t1 where c1 is null ") - tdSql.checkRows(0) + tdSql.checkRows(1) tdSql.query(" select sample(c1, 20 ) from t1 where c1 =6 ") tdSql.checkRows(1) @@ -891,4 +891,4 @@ class TDTestCase: tdLog.success("%s successfully executed" % __file__) tdCases.addWindows(__file__, TDTestCase()) -tdCases.addLinux(__file__, TDTestCase()) \ No newline at end of file +tdCases.addLinux(__file__, TDTestCase()) From be8c3628c5bc126d2c82dec67f453f5329138915 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sun, 7 Aug 2022 06:08:12 +0800 Subject: [PATCH 5/5] fix: trigger ci test --- source/libs/executor/src/executorimpl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 5666dded83..cffd38c98a 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1453,7 +1453,8 @@ static void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t nu returnNotNull = true; } } - // TODO: if all expr skips all blocks, e.g. all null inputs for max function, output one row in final result. + // if all expr skips all blocks, e.g. all null inputs for max function, output one row in final result. + // except for first/last, which require not null output, output no rows if (pRow->numOfRows == 0 && !returnNotNull) { pRow->numOfRows = 1; }