From 512671df856d685b057ac87186f3abe450280f47 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Wed, 26 Jun 2024 02:56:49 +0000 Subject: [PATCH 1/4] Add index filtering based on column value type --- source/libs/index/src/indexFilter.c | 47 +++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/source/libs/index/src/indexFilter.c b/source/libs/index/src/indexFilter.c index cb42e60c01..d5d4592b47 100644 --- a/source/libs/index/src/indexFilter.c +++ b/source/libs/index/src/indexFilter.c @@ -624,6 +624,31 @@ static int32_t sifSetFltParam(SIFParam *left, SIFParam *right, SDataTypeBuf *typ } return 0; } + +static int8_t sifCheckNumericTypeSame(uint8_t left, uint8_t right) { + if (left != right) { + return 0; + } + return 1; +} +static int8_t sifShouldUseIndexBasedOnType(SIFParam *left, SIFParam *right) { + if (left->colValType == TSDB_DATA_TYPE_GEOMETRY || right->colValType == TSDB_DATA_TYPE_GEOMETRY) { + return 0; + } + if (IS_VAR_DATA_TYPE(left->colValType) && !IS_VAR_DATA_TYPE(right->colValType)) { + return 0; + } + if (IS_NUMERIC_TYPE(left->colValType) && !IS_NUMERIC_TYPE(right->colValType)) { + return 0; + } + if (IS_NUMERIC_TYPE(left->colValType) && IS_NUMERIC_TYPE(right->colValType)) { + if (!sifCheckNumericTypeSame(left->colValType, right->colValType)) { + return 0; + } + } + + return 1; +} static int32_t sifDoIndex(SIFParam *left, SIFParam *right, int8_t operType, SIFParam *output) { int ret = 0; SIndexMetaArg *arg = &output->arg; @@ -641,8 +666,13 @@ static int32_t sifDoIndex(SIFParam *left, SIFParam *right, int8_t operType, SIFP ret = indexJsonSearch(arg->ivtIdx, mtm, output->result); indexMultiTermQueryDestroy(mtm); } else { - if (left->colValType == TSDB_DATA_TYPE_GEOMETRY || right->colValType == TSDB_DATA_TYPE_GEOMETRY) { - return TSDB_CODE_QRY_GEO_NOT_SUPPORT_ERROR; + // if (left->colValType == TSDB_DATA_TYPE_GEOMETRY || right->colValType == TSDB_DATA_TYPE_GEOMETRY) { + // return TSDB_CODE_QRY_GEO_NOT_SUPPORT_ERROR; + // } + int8_t useIndex = sifShouldUseIndexBasedOnType(left, right); + if (!useIndex) { + output->status = SFLT_NOT_INDEX; + return -1; } bool reverse = false, equal = false; @@ -660,15 +690,12 @@ static int32_t sifDoIndex(SIFParam *left, SIFParam *right, int8_t operType, SIFP SDataTypeBuf typedata; memset(&typedata, 0, sizeof(typedata)); - if (IS_VAR_DATA_TYPE(left->colValType)) { - if (!IS_VAR_DATA_TYPE(right->colValType)) { - NUM_TO_STRING(right->colValType, right->condValue, sizeof(buf) - 2, buf + VARSTR_HEADER_SIZE); - varDataSetLen(buf, strlen(buf + VARSTR_HEADER_SIZE)); - param.val = buf; - } - } else { - if (sifSetFltParam(left, right, &typedata, ¶m) != 0) return -1; + + if (sifSetFltParam(left, right, &typedata, ¶m) != 0) { + output->status = SFLT_NOT_INDEX; + return -1; } + ret = left->api.metaFilterTableIds(arg->metaEx, ¶m, output->result); if (ret == 0) { taosArraySort(output->result, uidCompare); From 58b3dd7b99c649e6608772bf6c25da4077eb86bf Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Wed, 26 Jun 2024 08:45:14 +0000 Subject: [PATCH 2/4] Add index filtering based on column value type --- source/libs/index/src/indexFilter.c | 32 +++----- tests/script/tsim/tagindex/indexOverflow.sim | 82 ++++++++++++++++++++ 2 files changed, 93 insertions(+), 21 deletions(-) create mode 100644 tests/script/tsim/tagindex/indexOverflow.sim diff --git a/source/libs/index/src/indexFilter.c b/source/libs/index/src/indexFilter.c index d5d4592b47..80994775d5 100644 --- a/source/libs/index/src/indexFilter.c +++ b/source/libs/index/src/indexFilter.c @@ -625,28 +625,21 @@ static int32_t sifSetFltParam(SIFParam *left, SIFParam *right, SDataTypeBuf *typ return 0; } -static int8_t sifCheckNumericTypeSame(uint8_t left, uint8_t right) { - if (left != right) { - return 0; - } - return 1; -} static int8_t sifShouldUseIndexBasedOnType(SIFParam *left, SIFParam *right) { - if (left->colValType == TSDB_DATA_TYPE_GEOMETRY || right->colValType == TSDB_DATA_TYPE_GEOMETRY) { + // not compress + if (left->colValType == TSDB_DATA_TYPE_FLOAT) return 0; + + if (left->colValType == TSDB_DATA_TYPE_GEOMETRY || right->colValType == TSDB_DATA_TYPE_GEOMETRY || + left->colValType == TSDB_DATA_TYPE_JSON || right->colValType == TSDB_DATA_TYPE_JSON) { return 0; } - if (IS_VAR_DATA_TYPE(left->colValType) && !IS_VAR_DATA_TYPE(right->colValType)) { - return 0; - } - if (IS_NUMERIC_TYPE(left->colValType) && !IS_NUMERIC_TYPE(right->colValType)) { - return 0; - } - if (IS_NUMERIC_TYPE(left->colValType) && IS_NUMERIC_TYPE(right->colValType)) { - if (!sifCheckNumericTypeSame(left->colValType, right->colValType)) { - return 0; - } - } + if (IS_VAR_DATA_TYPE(left->colValType)) { + if (!IS_VAR_DATA_TYPE(right->colValType)) return 0; + } else if (IS_NUMERIC_TYPE(left->colValType)) { + if (!IS_NUMERIC_TYPE(right->colValType)) return 0; + if (left->colValType != right->colValType) return 0; + } return 1; } static int32_t sifDoIndex(SIFParam *left, SIFParam *right, int8_t operType, SIFParam *output) { @@ -666,9 +659,6 @@ static int32_t sifDoIndex(SIFParam *left, SIFParam *right, int8_t operType, SIFP ret = indexJsonSearch(arg->ivtIdx, mtm, output->result); indexMultiTermQueryDestroy(mtm); } else { - // if (left->colValType == TSDB_DATA_TYPE_GEOMETRY || right->colValType == TSDB_DATA_TYPE_GEOMETRY) { - // return TSDB_CODE_QRY_GEO_NOT_SUPPORT_ERROR; - // } int8_t useIndex = sifShouldUseIndexBasedOnType(left, right); if (!useIndex) { output->status = SFLT_NOT_INDEX; diff --git a/tests/script/tsim/tagindex/indexOverflow.sim b/tests/script/tsim/tagindex/indexOverflow.sim new file mode 100644 index 0000000000..9e297099d1 --- /dev/null +++ b/tests/script/tsim/tagindex/indexOverflow.sim @@ -0,0 +1,82 @@ + +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +print ======== step0 +$dbPrefix = ta_3_db +$tbPrefix = ta_3_tb +$mtPrefix = ta_3_mt +$lastRowNum = 0 +$tbNum = 100000 +$rowNum = 20 +$totalNum = 200 + +print =============== create database +sql create database $dbPrefix +sql use $dbPrefix + + + +sql create table if not exists $mtPrefix (ts timestamp, c1 int) tags (t1 tinyint, t1c tinyint) +$i = 0 +$tinyLimit = 127 +$tinyTable = tinyTable +while $i < $tinyLimit + $tb = $tinyTable . $i + sql insert into $tb using $mtPrefix tags( $i , $i ) values( now , $i ) + $i = $i + 1 +endw + +$i = 0 +$maxTinyLimit = 200 + +# 1. compress index and no-index to verify resultset +# 2. compress resultset of index filter and scalar filter +while $i < $maxTinyLimit + sql select * from $mtPrefix where t1 <= $i + $lastRowNum = $rows + + sql select * from $mtPrefix where t1c <= $i + if $lastRowNum != $rows then + return -1 + endi + + $i = $i + 1 +endw + + +$tbPrefix = ta_3_tb_c +$mtPrefix = ta_3_mt_c +$colPrefix = 'col' +sql create table if not exists $mtPrefix (ts timestamp, c1 int) tags (t1 nchar(18), t1c nchar(18)) +$i = 0 +$tinyLimit = 127 +while $i < $tinyLimit + $tb = $tbPrefix . $i + sql insert into $tb using $mtPrefix tags( $colPrefix , $colPrefix ) values( now , $i ) + $i = $i + 1 +endw + +$i = 0 +$maxTinyLimit = 200 + +# 1. compress index and no-index to verify resultset +# 2. compress resultset of index filter and scalar filter +while $i < $maxTinyLimit + sql select * from $mtPrefix where t1 <= $i + $lastRowNum = $rows + + sql select * from $mtPrefix where t1c <= $i + if $lastRowNum != $rows then + return -1 + endi + + $i = $i + 1 +endw + + + + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file From a5441cdcdeb3145dfe17eb728d9f0c5ed4dec0f9 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Wed, 26 Jun 2024 08:49:38 +0000 Subject: [PATCH 3/4] Add index filtering based on column value type --- source/libs/index/src/indexFilter.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/libs/index/src/indexFilter.c b/source/libs/index/src/indexFilter.c index 80994775d5..323f855601 100644 --- a/source/libs/index/src/indexFilter.c +++ b/source/libs/index/src/indexFilter.c @@ -637,7 +637,6 @@ static int8_t sifShouldUseIndexBasedOnType(SIFParam *left, SIFParam *right) { if (IS_VAR_DATA_TYPE(left->colValType)) { if (!IS_VAR_DATA_TYPE(right->colValType)) return 0; } else if (IS_NUMERIC_TYPE(left->colValType)) { - if (!IS_NUMERIC_TYPE(right->colValType)) return 0; if (left->colValType != right->colValType) return 0; } return 1; From a1ee30e5fcbf79beedd61d73d95b992ab437197f Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Wed, 26 Jun 2024 09:12:26 +0000 Subject: [PATCH 4/4] Add index filtering based on column value type --- tests/parallel_test/cases.task | 2 ++ tests/script/win-test-file | 1 + 2 files changed, 3 insertions(+) diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index b96c8eb030..533923ec73 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1488,12 +1488,14 @@ ,,y,script,./test.sh -f tmp/monitor.sim ,,y,script,./test.sh -f tsim/tagindex/add_index.sim ,,n,script,./test.sh -f tsim/tagindex/sma_and_tag_index.sim +,,y,script,./test.sh -f tsim/tagindex/indexOverflow.sim ,,y,script,./test.sh -f tsim/view/view.sim ,,y,script,./test.sh -f tsim/query/cache_last.sim ,,y,script,./test.sh -f tsim/query/const.sim ,,y,script,./test.sh -f tsim/query/nestedJoinView.sim + #develop test ,,n,develop-test,python3 ./test.py -f 2-query/table_count_scan.py ,,n,develop-test,python3 ./test.py -f 2-query/pseudo_column.py diff --git a/tests/script/win-test-file b/tests/script/win-test-file index d51de0a61b..acc4c74d21 100644 --- a/tests/script/win-test-file +++ b/tests/script/win-test-file @@ -401,6 +401,7 @@ ./test.sh -f tsim/tag/tbNameIn.sim ./test.sh -f tmp/monitor.sim ./test.sh -f tsim/tagindex/add_index.sim +./test.sh -f tsim/tagindex/indexOverflow.sim ./test.sh -f tsim/tagindex/sma_and_tag_index.sim ./test.sh -f tsim/view/view.sim ./test.sh -f tsim/query/cache_last.sim