diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index afe639b856..d3f03e8e9c 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2734,7 +2734,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "mode", .type = FUNCTION_TYPE_MODE, - .classification = FUNC_MGT_AGG_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC, .translateFunc = translateMode, .getEnvFunc = getModeFuncEnv, .initFunc = modeFunctionSetup, diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 1501bb6d67..51e5ee0e8c 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -255,8 +255,9 @@ typedef struct SUniqueInfo { } SUniqueInfo; typedef struct SModeItem { - int64_t count; - char data[]; + int64_t count; + STuplePos tuplePos; + char data[]; } SModeItem; typedef struct SModeInfo { @@ -264,6 +265,10 @@ typedef struct SModeInfo { uint8_t colType; int16_t colBytes; SHashObj* pHash; + + STuplePos nullTuplePos; + bool nullTupleSaved; + char pItems[]; } SModeInfo; @@ -5391,10 +5396,13 @@ bool modeFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo) { } else { pInfo->pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); } + pInfo->nullTupleSaved = false; + pInfo->nullTuplePos.pageId = -1; + return true; } -static void doModeAdd(SModeInfo* pInfo, char* data) { +static void doModeAdd(SModeInfo* pInfo, int32_t rowIndex, SqlFunctionCtx* pCtx, char* data) { int32_t hashKeyBytes = IS_STR_DATA_TYPE(pInfo->colType) ? varDataTLen(data) : pInfo->colBytes; SModeItem** pHashItem = taosHashGet(pInfo->pHash, data, hashKeyBytes); if (pHashItem == NULL) { @@ -5403,10 +5411,17 @@ static void doModeAdd(SModeInfo* pInfo, char* data) { memcpy(pItem->data, data, hashKeyBytes); pItem->count += 1; + if (pCtx->subsidiaries.num > 0) { + pItem->tuplePos = saveTupleData(pCtx, rowIndex, pCtx->pSrcBlock, NULL); + } + taosHashPut(pInfo->pHash, data, hashKeyBytes, &pItem, sizeof(SModeItem*)); pInfo->numOfPoints++; } else { (*pHashItem)->count += 1; + if (pCtx->subsidiaries.num > 0) { + updateTupleData(pCtx, rowIndex, pCtx->pSrcBlock, &((*pHashItem)->tuplePos)); + } } } @@ -5428,7 +5443,7 @@ int32_t modeFunction(SqlFunctionCtx* pCtx) { } numOfElems++; - doModeAdd(pInfo, data); + doModeAdd(pInfo, i, pCtx, data); if (sizeof(SModeInfo) + pInfo->numOfPoints * (sizeof(SModeItem) + pInfo->colBytes) >= MODE_MAX_RESULT_SIZE) { taosHashCleanup(pInfo->pHash); @@ -5436,6 +5451,11 @@ int32_t modeFunction(SqlFunctionCtx* pCtx) { } } + if (numOfElems == 0 && pCtx->subsidiaries.num > 0 && !pInfo->nullTupleSaved) { + pInfo->nullTuplePos = saveTupleData(pCtx, pInput->startRowIndex, pCtx->pSrcBlock, NULL); + pInfo->nullTupleSaved = true; + } + SET_VAL(pResInfo, numOfElems, 1); return TSDB_CODE_SUCCESS; @@ -5461,8 +5481,10 @@ int32_t modeFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { if (maxCount != 0) { SModeItem* pResItem = (SModeItem*)(pInfo->pItems + resIndex * (sizeof(SModeItem) + pInfo->colBytes)); colDataAppend(pCol, currentRow, pResItem->data, false); + setSelectivityValue(pCtx, pBlock, &pResItem->tuplePos, currentRow); } else { colDataAppendNULL(pCol, currentRow); + setSelectivityValue(pCtx, pBlock, &pInfo->nullTuplePos, currentRow); } taosHashCleanup(pInfo->pHash); diff --git a/tests/system-test/0-others/udfTest.py b/tests/system-test/0-others/udfTest.py index 1f6096dd5a..15253df0c4 100644 --- a/tests/system-test/0-others/udfTest.py +++ b/tests/system-test/0-others/udfTest.py @@ -280,7 +280,6 @@ class TDTestCase: tdSql.error("select udf1(num1) , irate(num1) from tb;") tdSql.error("select udf1(num1) , sum(num1) from tb;") tdSql.error("select udf1(num1) , stddev(num1) from tb;") - tdSql.error("select udf1(num1) , mode(num1) from tb;") tdSql.error("select udf1(num1) , HYPERLOGLOG(num1) from tb;") # stable tdSql.error("select udf1(c1) , count(c1) from stb1;") @@ -289,7 +288,6 @@ class TDTestCase: tdSql.error("select udf1(c1) , irate(c1) from stb1;") tdSql.error("select udf1(c1) , sum(c1) from stb1;") tdSql.error("select udf1(c1) , stddev(c1) from stb1;") - tdSql.error("select udf1(c1) , mode(c1) from stb1;") tdSql.error("select udf1(c1) , HYPERLOGLOG(c1) from stb1;") # regular table with select functions diff --git a/tests/system-test/0-others/udf_cfg2.py b/tests/system-test/0-others/udf_cfg2.py index b49c99de34..869cb098e2 100644 --- a/tests/system-test/0-others/udf_cfg2.py +++ b/tests/system-test/0-others/udf_cfg2.py @@ -282,7 +282,6 @@ class TDTestCase: tdSql.error("select udf1(num1) , irate(num1) from tb;") tdSql.error("select udf1(num1) , sum(num1) from tb;") tdSql.error("select udf1(num1) , stddev(num1) from tb;") - tdSql.error("select udf1(num1) , mode(num1) from tb;") tdSql.error("select udf1(num1) , HYPERLOGLOG(num1) from tb;") # stable tdSql.error("select udf1(c1) , count(c1) from stb1;") @@ -291,7 +290,6 @@ class TDTestCase: tdSql.error("select udf1(c1) , irate(c1) from stb1;") tdSql.error("select udf1(c1) , sum(c1) from stb1;") tdSql.error("select udf1(c1) , stddev(c1) from stb1;") - tdSql.error("select udf1(c1) , mode(c1) from stb1;") tdSql.error("select udf1(c1) , HYPERLOGLOG(c1) from stb1;") # regular table with select functions diff --git a/tests/system-test/0-others/udf_create.py b/tests/system-test/0-others/udf_create.py index 9426e7eacb..65dad64000 100644 --- a/tests/system-test/0-others/udf_create.py +++ b/tests/system-test/0-others/udf_create.py @@ -282,7 +282,6 @@ class TDTestCase: tdSql.error("select udf1(num1) , irate(num1) from tb;") tdSql.error("select udf1(num1) , sum(num1) from tb;") tdSql.error("select udf1(num1) , stddev(num1) from tb;") - tdSql.error("select udf1(num1) , mode(num1) from tb;") tdSql.error("select udf1(num1) , HYPERLOGLOG(num1) from tb;") # stable tdSql.error("select udf1(c1) , count(c1) from stb1;") @@ -291,7 +290,6 @@ class TDTestCase: tdSql.error("select udf1(c1) , irate(c1) from stb1;") tdSql.error("select udf1(c1) , sum(c1) from stb1;") tdSql.error("select udf1(c1) , stddev(c1) from stb1;") - tdSql.error("select udf1(c1) , mode(c1) from stb1;") tdSql.error("select udf1(c1) , HYPERLOGLOG(c1) from stb1;") # regular table with select functions diff --git a/tests/system-test/0-others/udf_restart_taosd.py b/tests/system-test/0-others/udf_restart_taosd.py index f6134909b4..dae707520f 100644 --- a/tests/system-test/0-others/udf_restart_taosd.py +++ b/tests/system-test/0-others/udf_restart_taosd.py @@ -279,7 +279,6 @@ class TDTestCase: tdSql.error("select udf1(num1) , irate(num1) from tb;") tdSql.error("select udf1(num1) , sum(num1) from tb;") tdSql.error("select udf1(num1) , stddev(num1) from tb;") - tdSql.error("select udf1(num1) , mode(num1) from tb;") tdSql.error("select udf1(num1) , HYPERLOGLOG(num1) from tb;") # stable tdSql.error("select udf1(c1) , count(c1) from stb1;") @@ -288,7 +287,6 @@ class TDTestCase: tdSql.error("select udf1(c1) , irate(c1) from stb1;") tdSql.error("select udf1(c1) , sum(c1) from stb1;") tdSql.error("select udf1(c1) , stddev(c1) from stb1;") - tdSql.error("select udf1(c1) , mode(c1) from stb1;") tdSql.error("select udf1(c1) , HYPERLOGLOG(c1) from stb1;") # regular table with select functions diff --git a/tests/system-test/2-query/mode.py b/tests/system-test/2-query/mode.py new file mode 100644 index 0000000000..72bae55261 --- /dev/null +++ b/tests/system-test/2-query/mode.py @@ -0,0 +1,208 @@ +from random import randint, random +from numpy import equal +import taos +import sys +import datetime +import inspect + +from util.log import * +from util.sql import * +from util.cases import * + +class TDTestCase: + + def init(self, conn, logSql, replicaVar=1): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + + def prepare_datas(self, dbname="db"): + tdSql.execute( + f'''create table {dbname}.stb + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + + tdSql.execute( + f''' + create table {dbname}.tb + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + for i in range(2): + tdSql.execute(f'create table {dbname}.ctb{i+1} using {dbname}.stb tags ( {i+1} )') + + tdSql.execute(f"insert into {dbname}.ctb1 values ('2020-12-11 00:00:00.000', 0, 0, 0, 0, 0, 0, 0, 'binary0', 'nchar0', now()+8a )") + tdSql.execute(f"insert into {dbname}.ctb1 values ('2020-12-11 00:00:01.000', 9, -99999, -999, -99, -9.99, -99.99, 1, 'binary9', 'nchar9', now()+9a )") + tdSql.execute(f"insert into {dbname}.ctb1 values ('2020-12-11 00:00:02.000', 9, -99999, -999, -99, -9.99, NULL, 1, 'binary9', 'nchar9', now()+9a )") + tdSql.execute(f"insert into {dbname}.ctb1 values ('2020-12-11 00:00:03.000', 9, -99999, -999, NULL, -9.99, -99.99, 1, 'binary9', 'nchar9', now()+9a )") + + tdSql.execute(f"insert into {dbname}.ctb2 values ('2020-12-12 00:00:00.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ") + tdSql.execute(f"insert into {dbname}.ctb2 values ('2020-12-12 00:00:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ") + tdSql.execute(f"insert into {dbname}.ctb2 values ('2020-12-12 00:00:02.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ") + + tdSql.execute( + f'''insert into {dbname}.tb values + ( '2020-04-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) + ( '2020-10-21 01:01:01.000', 1, 11111, 111, 11, 1.11, 11.11, 1, "binary1", "nchar1", now()+1a ) + ( '2020-12-31 01:01:01.000', 2, 22222, 222, 22, 2.22, 22.22, 0, "binary2", "nchar2", now()+2a ) + ( '2021-01-01 01:01:06.000', 3, 33333, 333, 33, 3.33, 33.33, 0, "binary3", "nchar3", now()+3a ) + ( '2021-05-07 01:01:10.000', 4, 44444, 444, 44, 4.44, 44.44, 1, "binary4", "nchar4", now()+4a ) + ( '2021-07-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) + ( '2021-09-30 01:01:16.000', 5, 55555, 555, 55, 5.55, 55.55, 0, "binary5", "nchar5", now()+5a ) + ( '2022-02-01 01:01:20.000', 6, 66666, 666, 66, 6.66, 66.66, 1, "binary6", "nchar6", now()+6a ) + ( '2022-10-28 01:01:26.000', 7, 00000, 000, 00, 0.00, 00.00, 1, "binary7", "nchar7", "1970-01-01 08:00:00.000" ) + ( '2022-12-01 01:01:30.000', 8, -88888, -888, -88, -8.88, -88.88, 0, "binary8", "nchar8", "1969-01-01 01:00:00.000" ) + ( '2022-12-31 01:01:36.000', 9, -99999999999999999, -999, -99, -9.99, -999999999999999999999.99, 1, "binary9", "nchar9", "1900-01-01 00:00:00.000" ) + ( '2023-02-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) + ''' + ) + + def check_mode_function(self, dbname="db"): + + # normal table + tdSql.query(f"select ts,mode(c1) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, 9) + + tdSql.query(f"select ts,mode(c2) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, -99999999999999999) + + tdSql.query(f"select ts,mode(c3) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, -999) + + tdSql.query(f"select ts,mode(c4) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, -99) + + tdSql.query(f"select ts,mode(c5) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, -9.99) + + tdSql.query(f"select ts,mode(c6) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, -1e+21) + + tdSql.query(f"select ts,mode(c7) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, True) + + tdSql.query(f"select ts,mode(c8) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, 'binary9') + + tdSql.query(f"select ts,mode(c9) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, 'nchar9') + + tdSql.query(f"select ts,c3,c5,c8,mode(c1) from {dbname}.tb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2022, 12, 31, 1, 1, 36)) + tdSql.checkData(0, 1, -999) + tdSql.checkData(0, 2, -9.99) + tdSql.checkData(0, 3, 'binary9') + tdSql.checkData(0, 4, 9) + + # super table + tdSql.query(f"select ts,mode(c1) from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 3)) + tdSql.checkData(0, 1, 9) + + tdSql.query(f"select ts,mode(c2) from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 3)) + tdSql.checkData(0, 1, -99999) + + tdSql.query(f"select ts,mode(c3) from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 3)) + tdSql.checkData(0, 1, -999) + + tdSql.query(f"select ts,mode(c4) from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 2)) + tdSql.checkData(0, 1, -99) + + tdSql.query(f"select ts,mode(c5) from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 3)) + tdSql.checkData(0, 1, -9.99) + + tdSql.query(f"select ts,mode(c6) from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 3)) + tdSql.checkData(0, 1, -99.99) + + tdSql.query(f"select ts,mode(c7) from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 3)) + tdSql.checkData(0, 1, True) + + tdSql.query(f"select ts,mode(c8) from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 3)) + tdSql.checkData(0, 1, 'binary9') + + tdSql.query(f"select ts,mode(c9) from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 3)) + tdSql.checkData(0, 1, 'nchar9') + + tdSql.query(f"select ts,mode(c1),c3,c5,c8 from {dbname}.stb") + tdSql.checkRows(1) + ts = tdSql.getData(0, 0) + tdSql.checkEqual(ts, datetime.datetime(2020, 12, 11, 0, 0, 3)) + tdSql.checkData(0, 1, 9) + tdSql.checkData(0, 2, -999) + tdSql.checkData(0, 3, -9.99) + tdSql.checkData(0, 4, 'binary9') + + def run(self): # sourcery skip: extract-duplicate-method, remove-redundant-fstring + tdSql.prepare() + + tdLog.printNoPrefix("==========step1:create table ==============") + + self.prepare_datas() + + tdLog.printNoPrefix("==========step2:test results ==============") + + self.check_mode_function() + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index 0672c03ee8..0ff4fb9157 100755 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -142,6 +142,8 @@ python3 ./test.py -f 2-query/max.py python3 ./test.py -f 2-query/max.py -R python3 ./test.py -f 2-query/min.py python3 ./test.py -f 2-query/min.py -R +python3 ./test.py -f 2-query/mode.py +python3 ./test.py -f 2-query/mode.py -R python3 ./test.py -f 2-query/Now.py python3 ./test.py -f 2-query/Now.py -R python3 ./test.py -f 2-query/percentile.py @@ -386,6 +388,7 @@ python3 ./test.py -f 2-query/Now.py -Q 2 python3 ./test.py -f 2-query/Today.py -Q 2 python3 ./test.py -f 2-query/max.py -Q 2 python3 ./test.py -f 2-query/min.py -Q 2 +python3 ./test.py -f 2-query/mode.py -Q 2 python3 ./test.py -f 2-query/count.py -Q 2 # python3 ./test.py -f 2-query/countAlwaysReturnValue.py -Q 2 python3 ./test.py -f 2-query/last.py -Q 2 @@ -482,6 +485,7 @@ python3 ./test.py -f 2-query/Now.py -Q 3 python3 ./test.py -f 2-query/Today.py -Q 3 python3 ./test.py -f 2-query/max.py -Q 3 python3 ./test.py -f 2-query/min.py -Q 3 +python3 ./test.py -f 2-query/mode.py -Q 3 python3 ./test.py -f 2-query/count.py -Q 3 # python3 ./test.py -f 2-query/countAlwaysReturnValue.py -Q 3 python3 ./test.py -f 2-query/last.py -Q 3 @@ -580,6 +584,7 @@ python3 ./test.py -f 2-query/Now.py -Q 4 python3 ./test.py -f 2-query/Today.py -Q 4 python3 ./test.py -f 2-query/max.py -Q 4 python3 ./test.py -f 2-query/min.py -Q 4 +python3 ./test.py -f 2-query/mode.py -Q 4 python3 ./test.py -f 2-query/count.py -Q 4 # python3 ./test.py -f 2-query/countAlwaysReturnValue.py -Q 4 python3 ./test.py -f 2-query/last.py -Q 4