From a861537bc075f8b1dc2ebccd4ad4cdfaa7ceb1a6 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Mon, 29 Aug 2022 15:44:10 +0800 Subject: [PATCH] fix: fix filter group merge error --- source/libs/scalar/inc/filterInt.h | 2 +- source/libs/scalar/src/filter.c | 82 ++++++++++++++++++++++++----- tests/script/jenkins/basic.txt | 1 + tests/script/tsim/scalar/filter.sim | 38 +++++++++++++ 4 files changed, 110 insertions(+), 13 deletions(-) create mode 100644 tests/script/tsim/scalar/filter.sim diff --git a/source/libs/scalar/inc/filterInt.h b/source/libs/scalar/inc/filterInt.h index 23693c785a..23d88d1a26 100644 --- a/source/libs/scalar/inc/filterInt.h +++ b/source/libs/scalar/inc/filterInt.h @@ -276,7 +276,7 @@ struct SFilterInfo { #define FILTER_CLR_FLAG(st, f) st &= (~f) #define SIMPLE_COPY_VALUES(dst, src) *((int64_t *)dst) = *((int64_t *)src) -#define FILTER_PACKAGE_UNIT_HASH_KEY(v, optr, idx1, idx2) do { char *_t = (char *)v; _t[0] = optr; *(uint32_t *)(_t + 1) = idx1; *(uint32_t *)(_t + 3) = idx2; } while (0) +#define FLT_PACKAGE_UNIT_HASH_KEY(v, op1, op2, lidx, ridx, ridx2) do { char *_t = (char *)(v); _t[0] = (op1); _t[1] = (op2); *(uint32_t *)(_t + 2) = (lidx); *(uint32_t *)(_t + 2 + sizeof(uint32_t)) = (ridx); } while (0) #define FILTER_GREATER(cr,sflag,eflag) ((cr > 0) || ((cr == 0) && (FILTER_GET_FLAG(sflag,RANGE_FLG_EXCLUDE) || FILTER_GET_FLAG(eflag,RANGE_FLG_EXCLUDE)))) #define FILTER_COPY_RA(dst, src) do { (dst)->sflag = (src)->sflag; (dst)->eflag = (src)->eflag; (dst)->s = (src)->s; (dst)->e = (src)->e; } while (0) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 4377dbf14e..b27a61b8bd 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -910,14 +910,14 @@ int32_t filterAddFieldFromNode(SFilterInfo *info, SNode *node, SFilterFieldId *f return TSDB_CODE_SUCCESS; } -int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFilterFieldId *right, uint32_t *uidx) { +int32_t filterAddUnitImpl(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFilterFieldId *right, uint8_t optr2, SFilterFieldId *right2, uint32_t *uidx) { if (FILTER_GET_FLAG(info->options, FLT_OPTION_NEED_UNIQE)) { if (info->pctx.unitHash == NULL) { info->pctx.unitHash = taosHashInit(FILTER_DEFAULT_GROUP_SIZE * FILTER_DEFAULT_UNIT_SIZE, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, false); } else { - int64_t v = 0; - FILTER_PACKAGE_UNIT_HASH_KEY(&v, optr, left->idx, right ? right->idx : -1); - void *hu = taosHashGet(info->pctx.unitHash, &v, sizeof(v)); + char v[14] = {0}; + FLT_PACKAGE_UNIT_HASH_KEY(&v, optr, optr2, left->idx, (right ? right->idx : -1), (right2 ? right2->idx : -1)); + void *hu = taosHashGet(info->pctx.unitHash, v, sizeof(v)); if (hu) { *uidx = *(uint32_t *)hu; return TSDB_CODE_SUCCESS; @@ -939,7 +939,11 @@ int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFi if (right) { u->right = *right; } - + u->compare.optr2 = optr2; + if (right2) { + u->right2 = *right2; + } + if (u->right.type == FLD_TYPE_VALUE) { SFilterField *val = FILTER_UNIT_RIGHT_FIELD(info, u); assert(FILTER_GET_FLAG(val->flag, FLD_TYPE_VALUE)); @@ -960,9 +964,9 @@ int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFi *uidx = info->unitNum; if (FILTER_GET_FLAG(info->options, FLT_OPTION_NEED_UNIQE)) { - int64_t v = 0; - FILTER_PACKAGE_UNIT_HASH_KEY(&v, optr, left->idx, right ? right->idx : -1); - taosHashPut(info->pctx.unitHash, &v, sizeof(v), uidx, sizeof(*uidx)); + char v[14] = {0}; + FLT_PACKAGE_UNIT_HASH_KEY(&v, optr, optr2, left->idx, (right ? right->idx : -1), (right2 ? right2->idx : -1)); + taosHashPut(info->pctx.unitHash, v, sizeof(v), uidx, sizeof(*uidx)); } ++info->unitNum; @@ -971,6 +975,9 @@ int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFi } +int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFilterFieldId *right, uint32_t *uidx) { + return filterAddUnitImpl(info, optr, left, right, 0, NULL, uidx); +} int32_t filterAddUnitToGroup(SFilterGroup *group, uint32_t unitIdx) { if (group->unitNum >= group->unitSize) { @@ -1147,8 +1154,8 @@ int32_t filterAddGroupUnitFromCtx(SFilterInfo *dst, SFilterInfo *src, SFilterRan SIMPLE_COPY_VALUES(data2, &ra->e); filterAddField(dst, NULL, &data2, FLD_TYPE_VALUE, &right2, tDataTypes[type].bytes, true); - filterAddUnit(dst, FILTER_GET_FLAG(ra->sflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_GREATER_THAN : OP_TYPE_GREATER_EQUAL, &left, &right, &uidx); - filterAddUnitRight(dst, FILTER_GET_FLAG(ra->eflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_LOWER_THAN : OP_TYPE_LOWER_EQUAL, &right2, uidx); + filterAddUnitImpl(dst, FILTER_GET_FLAG(ra->sflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_GREATER_THAN : OP_TYPE_GREATER_EQUAL, &left, &right, + FILTER_GET_FLAG(ra->eflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_LOWER_THAN : OP_TYPE_LOWER_EQUAL, &right2, &uidx); filterAddUnitToGroup(g, uidx); return TSDB_CODE_SUCCESS; } @@ -1222,8 +1229,8 @@ int32_t filterAddGroupUnitFromCtx(SFilterInfo *dst, SFilterInfo *src, SFilterRan SIMPLE_COPY_VALUES(data2, &r->ra.e); filterAddField(dst, NULL, &data2, FLD_TYPE_VALUE, &right2, tDataTypes[type].bytes, true); - filterAddUnit(dst, FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_GREATER_THAN : OP_TYPE_GREATER_EQUAL, &left, &right, &uidx); - filterAddUnitRight(dst, FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_LOWER_THAN : OP_TYPE_LOWER_EQUAL, &right2, uidx); + filterAddUnitImpl(dst, FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_GREATER_THAN : OP_TYPE_GREATER_EQUAL, &left, &right, + FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_LOWER_THAN : OP_TYPE_LOWER_EQUAL, &right2, &uidx); filterAddUnitToGroup(g, uidx); } @@ -2073,6 +2080,44 @@ int32_t filterMergeGroupUnits(SFilterInfo *info, SFilterGroupCtx** gRes, int32_t return TSDB_CODE_SUCCESS; } +bool filterIsSameUnits(SFilterColInfo* pCol1, SFilterColInfo* pCol2) { + if (pCol1->type != pCol2->type) { + return false; + } + + if (RANGE_TYPE_MR_CTX == pCol1->type) { + SFilterRangeCtx* pCtx1 = (SFilterRangeCtx*)pCol1->info; + SFilterRangeCtx* pCtx2 = (SFilterRangeCtx*)pCol2->info; + + if ((pCtx1->isnull != pCtx2->isnull) || (pCtx1->notnull != pCtx2->notnull) || (pCtx1->isrange != pCtx2->isrange)) { + return false; + } + + + SFilterRangeNode* pNode1 = pCtx1->rs; + SFilterRangeNode* pNode2 = pCtx2->rs; + + while (true) { + if (NULL == pNode1 && NULL == pNode2) { + break; + } + + if (NULL == pNode1 || NULL == pNode2) { + return false; + } + + if (pNode1->ra.s != pNode2->ra.s || pNode1->ra.e != pNode2->ra.e || pNode1->ra.sflag != pNode2->ra.sflag || pNode1->ra.eflag != pNode2->ra.eflag) { + return false; + } + + pNode1 = pNode1->next; + pNode2 = pNode2->next; + } + } + + return true; +} + void filterCheckColConflict(SFilterGroupCtx* gRes1, SFilterGroupCtx* gRes2, bool *conflict) { uint32_t idx1 = 0, idx2 = 0, m = 0, n = 0; bool equal = false; @@ -2098,6 +2143,11 @@ void filterCheckColConflict(SFilterGroupCtx* gRes1, SFilterGroupCtx* gRes2, bool return; } + if (!filterIsSameUnits(&gRes1->colInfo[idx1], &gRes2->colInfo[idx2])) { + *conflict = true; + return; + } + // for long in operation if (gRes1->colInfo[idx1].optr == OP_TYPE_EQUAL && gRes2->colInfo[idx2].optr == OP_TYPE_EQUAL) { SFilterRangeCtx* ctx = gRes1->colInfo[idx1].info; @@ -2711,17 +2761,22 @@ int32_t filterRmUnitByRange(SFilterInfo *info, SColumnDataAgg *pDataStatis, int3 for (uint32_t g = 0; g < info->groupNum; ++g) { SFilterGroup *group = &info->groups[g]; + // first is block unint num for a group, following append unitNum blkUnitIdx for this group *unitNum = group->unitNum; all = 0; empty = 0; + // save group idx start pointer + uint32_t * pGroupIdx = unitIdx; for (uint32_t u = 0; u < group->unitNum; ++u) { uint32_t uidx = group->unitIdxs[u]; if (info->blkUnitRes[uidx] == 1) { + // blkUnitRes == 1 is always true, so need not compare every time, delete this unit from group --(*unitNum); all = 1; continue; } else if (info->blkUnitRes[uidx] == -1) { + // blkUnitRes == -1 is alwary false, so in group is alwary false, need delete this group from blkGroupNum *unitNum = 0; empty = 1; break; @@ -2731,6 +2786,9 @@ int32_t filterRmUnitByRange(SFilterInfo *info, SColumnDataAgg *pDataStatis, int3 } if (*unitNum == 0) { + // if unit num is zero, reset unitIdx to start on this group + unitIdx = pGroupIdx; + --info->blkGroupNum; assert(empty || all); diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 97295d75e0..46bae734ea 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -344,6 +344,7 @@ # --- scalar ---- ./test.sh -f tsim/scalar/in.sim ./test.sh -f tsim/scalar/scalar.sim +./test.sh -f tsim/scalar/filter.sim # ---- alter ---- ./test.sh -f tsim/alter/cached_schema_after_alter.sim diff --git a/tests/script/tsim/scalar/filter.sim b/tests/script/tsim/scalar/filter.sim new file mode 100644 index 0000000000..9231662278 --- /dev/null +++ b/tests/script/tsim/scalar/filter.sim @@ -0,0 +1,38 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +print ======== step1 +sql drop database if exists db1; +sql create database db1 vgroups 3; +sql use db1; +sql create stable st1 (fts timestamp, fbool bool, ftiny tinyint, fsmall smallint, fint int, fbig bigint, futiny tinyint unsigned, fusmall smallint unsigned, fuint int unsigned, fubig bigint unsigned, ffloat float, fdouble double, fbin binary(10), fnchar nchar(10)) tags(tts timestamp, tbool bool, ttiny tinyint, tsmall smallint, tint int, tbig bigint, tutiny tinyint unsigned, tusmall smallint unsigned, tuint int unsigned, tubig bigint unsigned, tfloat float, tdouble double, tbin binary(10), tnchar nchar(10)); +sql create table tb1 using st1 tags('2022-07-10 16:31:00', true, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 'a', 'a'); +sql create table tb2 using st1 tags('2022-07-10 16:32:00', false, 2, 2, 2, 2, 2, 2, 2, 2, 2.0, 2.0, 'b', 'b'); +sql create table tb3 using st1 tags('2022-07-10 16:33:00', true, 3, 3, 3, 3, 3, 3, 3, 3, 3.0, 3.0, 'c', 'c'); + +sql insert into tb1 values ('2022-07-10 16:31:01', false, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 'a', 'a'); +sql insert into tb1 values ('2022-07-10 16:31:02', true, 2, 2, 2, 2, 2, 2, 2, 2, 2.0, 2.0, 'b', 'b'); +sql insert into tb1 values ('2022-07-10 16:31:03', false, 3, 3, 3, 3, 3, 3, 3, 3, 3.0, 3.0, 'c', 'c'); +sql insert into tb1 values ('2022-07-10 16:31:04', true, 4, 4, 4, 4, 4, 4, 4, 4, 4.0, 4.0, 'd', 'd'); +sql insert into tb1 values ('2022-07-10 16:31:05', false, 5, 5, 5, 5, 5, 5, 5, 5, 5.0, 5.0, 'e', 'e'); + +sql insert into tb2 values ('2022-07-10 16:32:01', false, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 'a', 'a'); +sql insert into tb2 values ('2022-07-10 16:32:02', true, 2, 2, 2, 2, 2, 2, 2, 2, 2.0, 2.0, 'b', 'b'); +sql insert into tb2 values ('2022-07-10 16:32:03', false, 3, 3, 3, 3, 3, 3, 3, 3, 3.0, 3.0, 'c', 'c'); +sql insert into tb2 values ('2022-07-10 16:32:04', true, 4, 4, 4, 4, 4, 4, 4, 4, 4.0, 4.0, 'd', 'd'); +sql insert into tb2 values ('2022-07-10 16:32:05', false, 5, 5, 5, 5, 5, 5, 5, 5, 5.0, 5.0, 'e', 'e'); + +sql insert into tb3 values ('2022-07-10 16:33:01', false, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 'a', 'a'); +sql insert into tb3 values ('2022-07-10 16:33:02', true, 2, 2, 2, 2, 2, 2, 2, 2, 2.0, 2.0, 'b', 'b'); +sql insert into tb3 values ('2022-07-10 16:33:03', false, 3, 3, 3, 3, 3, 3, 3, 3, 3.0, 3.0, 'c', 'c'); +sql insert into tb3 values ('2022-07-10 16:33:04', true, 4, 4, 4, 4, 4, 4, 4, 4, 4.0, 4.0, 'd', 'd'); +sql insert into tb3 values ('2022-07-10 16:33:05', false, 5, 5, 5, 5, 5, 5, 5, 5, 5.0, 5.0, 'e', 'e'); + +sql select * from st1 where (ttiny > 2 or ftiny < 5) and ftiny > 2; +if $rows != 7 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT