From f52038dbfc0d99b333f28ac9a990d9245542729a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sun, 10 Jul 2022 11:35:24 +0800 Subject: [PATCH 1/3] add test case --- source/libs/index/inc/indexFstSparse.h | 13 +++--- source/libs/index/src/indexFstDfa.c | 15 ++++--- source/libs/index/src/indexFstSparse.c | 44 +++++++++++++++------ source/libs/index/test/fstUtilUT.cc | 55 ++++++++++++++++++++++---- 4 files changed, 97 insertions(+), 30 deletions(-) diff --git a/source/libs/index/inc/indexFstSparse.h b/source/libs/index/inc/indexFstSparse.h index 665fb2ba5c..bd704fb427 100644 --- a/source/libs/index/inc/indexFstSparse.h +++ b/source/libs/index/inc/indexFstSparse.h @@ -23,17 +23,18 @@ extern "C" { #endif typedef struct FstSparseSet { - uint32_t *dense; - uint32_t *sparse; - int32_t size; + int32_t *dense; + int32_t *sparse; + int32_t size; + int32_t cap; } FstSparseSet; FstSparseSet *sparSetCreate(int32_t sz); void sparSetDestroy(FstSparseSet *s); uint32_t sparSetLen(FstSparseSet *ss); -uint32_t sparSetAdd(FstSparseSet *ss, uint32_t ip); -uint32_t sparSetGet(FstSparseSet *ss, uint32_t i); -bool sparSetContains(FstSparseSet *ss, uint32_t ip); +bool sparSetAdd(FstSparseSet *ss, int32_t ip, int32_t *val); +bool sparSetGet(FstSparseSet *ss, int32_t i, int32_t *val); +bool sparSetContains(FstSparseSet *ss, int32_t ip); void sparSetClear(FstSparseSet *ss); #ifdef __cplusplus diff --git a/source/libs/index/src/indexFstDfa.c b/source/libs/index/src/indexFstDfa.c index b820f16a2a..275580ebdc 100644 --- a/source/libs/index/src/indexFstDfa.c +++ b/source/libs/index/src/indexFstDfa.c @@ -105,8 +105,9 @@ bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet sparSetClear(cur); DfaState *t = taosArrayGet(builder->dfa->states, state); for (int i = 0; i < taosArrayGetSize(t->insts); i++) { - uint32_t ip = *(int32_t *)taosArrayGet(t->insts, i); - sparSetAdd(cur, ip); + int32_t ip = *(int32_t *)taosArrayGet(t->insts, i); + bool succ = sparSetAdd(cur, ip, NULL); + assert(succ == true); } dfaRun(builder->dfa, cur, next, byte); @@ -126,7 +127,9 @@ bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t * bool isMatch = false; for (int i = 0; i < sparSetLen(set); i++) { - uint32_t ip = sparSetGet(set, i); + int32_t ip; + + if (false == sparSetGet(set, i, &ip)) continue; Inst *inst = taosArrayGet(builder->dfa->insts, ip); if (inst->ty == JUMP || inst->ty == SPLIT) { @@ -186,7 +189,8 @@ void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip) { if (sparSetContains(set, ip)) { return; } - sparSetAdd(set, ip); + bool succ = sparSetAdd(set, ip, NULL); + assert(succ == true); Inst *inst = taosArrayGet(dfa->insts, ip); if (inst->ty == MATCH || inst->ty == RANGE) { // do nothing @@ -203,7 +207,8 @@ bool dfaRun(FstDfa *dfa, FstSparseSet *from, FstSparseSet *to, uint8_t byte) { bool isMatch = false; sparSetClear(to); for (int i = 0; i < sparSetLen(from); i++) { - uint32_t ip = sparSetGet(from, i); + int32_t ip; + if (false == sparSetGet(from, i, &ip)) continue; Inst *inst = taosArrayGet(dfa->insts, ip); if (inst->ty == JUMP || inst->ty == SPLIT) { diff --git a/source/libs/index/src/indexFstSparse.c b/source/libs/index/src/indexFstSparse.c index 71d8854dcc..99ed5d6429 100644 --- a/source/libs/index/src/indexFstSparse.c +++ b/source/libs/index/src/indexFstSparse.c @@ -21,8 +21,12 @@ FstSparseSet *sparSetCreate(int32_t sz) { return NULL; } - ss->dense = (uint32_t *)taosMemoryCalloc(sz, sizeof(uint32_t)); - ss->sparse = (uint32_t *)taosMemoryCalloc(sz, sizeof(uint32_t)); + ss->dense = (int32_t *)taosMemoryMalloc(sz * sizeof(int32_t)); + memset(ss->dense, -1, sz * sizeof(int32_t)); + ss->sparse = (int32_t *)taosMemoryMalloc(sz * sizeof(int32_t)); + memset(ss->sparse, -1, sz * sizeof(int32_t)); + ss->cap = sz; + ss->size = 0; return ss; } @@ -38,23 +42,39 @@ uint32_t sparSetLen(FstSparseSet *ss) { // Get occupied size return ss == NULL ? 0 : ss->size; } -uint32_t sparSetAdd(FstSparseSet *ss, uint32_t ip) { +bool sparSetAdd(FstSparseSet *ss, int32_t ip, int32_t *idx) { if (ss == NULL) { - return 0; + return false; + } + if (ip >= ss->cap) { + return false; } uint32_t i = ss->size; ss->dense[i] = ip; ss->sparse[ip] = i; ss->size += 1; - return i; + + if (idx != NULL) *idx = i; + + return true; } -uint32_t sparSetGet(FstSparseSet *ss, uint32_t i) { - // check later - return ss->dense[i]; +bool sparSetGet(FstSparseSet *ss, int32_t idx, int32_t *ip) { + if (idx >= ss->cap || idx >= ss->size) { + return false; + } + int32_t val = ss->dense[idx]; + if (ip != NULL) { + *ip = val; + } + return val == -1 ? false : true; } -bool sparSetContains(FstSparseSet *ss, uint32_t ip) { - uint32_t i = ss->sparse[ip]; - if (i < ss->size && ss->dense[i] == ip) { +bool sparSetContains(FstSparseSet *ss, int32_t ip) { + if (ip >= ss->cap) { + return false; + } + int32_t i = ss->sparse[ip]; + + if (i < ss->cap && i < ss->size && ss->dense[i] == ip) { return true; } else { return false; @@ -64,5 +84,7 @@ void sparSetClear(FstSparseSet *ss) { if (ss == NULL) { return; } + memset(ss->dense, -1, ss->cap * sizeof(int32_t)); + memset(ss->sparse, -1, ss->cap * sizeof(int32_t)); ss->size = 0; } diff --git a/source/libs/index/test/fstUtilUT.cc b/source/libs/index/test/fstUtilUT.cc index 2c29758756..22fe1a9150 100644 --- a/source/libs/index/test/fstUtilUT.cc +++ b/source/libs/index/test/fstUtilUT.cc @@ -51,10 +51,18 @@ class FstSparseSetEnv : public ::testing::Test { }; // test FstDfaBuilder -TEST_F(FstUtilEnv, test1) {} -TEST_F(FstUtilEnv, test2) {} -TEST_F(FstUtilEnv, test3) {} -TEST_F(FstUtilEnv, test4) {} +TEST_F(FstUtilEnv, test1) { + // test +} +TEST_F(FstUtilEnv, test2) { + // test +} +TEST_F(FstUtilEnv, test3) { + // test +} +TEST_F(FstUtilEnv, test4) { + // test +} // test FstRegex @@ -64,7 +72,38 @@ TEST_F(FstRegexEnv, test3) {} TEST_F(FstRegexEnv, test4) {} // test FstSparseSet -TEST_F(FstSparseSetEnv, test1) {} -TEST_F(FstSparseSetEnv, test2) {} -TEST_F(FstSparseSetEnv, test3) {} -TEST_F(FstSparseSetEnv, test4) {} +TEST_F(FstSparseSetEnv, test1) { + for (int8_t i = 0; i < 20; i++) { + int32_t val = -1; + bool succ = sparSetAdd(set, 'a' + i, &val); + } + EXPECT_EQ(sparSetLen(set), 20); + for (int8_t i = 0; i < 20; i++) { + int val = -1; + bool find = sparSetGet(set, i, &val); + EXPECT_EQ(find, true); + EXPECT_EQ(val, i + 'a'); + } + for (int8_t i = 'a'; i < 'a' + 20; i++) { + EXPECT_EQ(sparSetContains(set, i), true); + } + + for (int8_t i = 'A'; i < 20; i++) { + EXPECT_EQ(sparSetContains(set, 'A'), false); + } + + for (int i = 512; i < 1000; i++) { + EXPECT_EQ(sparSetAdd(set, i, NULL), false); + + EXPECT_EQ(sparSetGet(set, i, NULL), false); + EXPECT_EQ(sparSetContains(set, i), false); + } + sparSetClear(set); + + for (int i = 'a'; i < 'a' + 20; i++) { + EXPECT_EQ(sparSetGet(set, i, NULL), false); + } + for (int i = 1000; i < 2000; i++) { + EXPECT_EQ(sparSetGet(set, i, NULL), false); + } +} From 06fe7f6a5a853c91c55fa6a8a01c6e7d94e6d73d Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sun, 10 Jul 2022 16:17:42 +0800 Subject: [PATCH 2/3] update test case --- source/libs/index/inc/indexFstDfa.h | 2 +- source/libs/index/src/indexFstDfa.c | 12 +++++------- source/libs/index/src/indexFstRegex.c | 7 +++++-- source/libs/index/src/indexFstSparse.c | 14 ++++++++++---- source/libs/index/test/fstUtilUT.cc | 5 ++++- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/source/libs/index/inc/indexFstDfa.h b/source/libs/index/inc/indexFstDfa.h index 9ca10897fd..5a5622e280 100644 --- a/source/libs/index/inc/indexFstDfa.h +++ b/source/libs/index/inc/indexFstDfa.h @@ -51,7 +51,7 @@ FstDfa *dfaBuilderBuild(FstDfaBuilder *builder); bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet *next, uint32_t state, uint8_t bytes, uint32_t *result); -bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result); +bool dfaBuilderCacheState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result); /* * dfa related func diff --git a/source/libs/index/src/indexFstDfa.c b/source/libs/index/src/indexFstDfa.c index 275580ebdc..b8ac3bd3f5 100644 --- a/source/libs/index/src/indexFstDfa.c +++ b/source/libs/index/src/indexFstDfa.c @@ -64,7 +64,7 @@ void dfaBuilderDestroy(FstDfaBuilder *builder) { taosMemoryFree(builder); } -FstDfa *dfaBuilder(FstDfaBuilder *builder) { +FstDfa *dfaBuilderBuild(FstDfaBuilder *builder) { uint32_t sz = taosArrayGetSize(builder->dfa->insts); FstSparseSet *cur = sparSetCreate(sz); FstSparseSet *nxt = sparSetCreate(sz); @@ -73,7 +73,7 @@ FstDfa *dfaBuilder(FstDfaBuilder *builder) { SArray *states = taosArrayInit(0, sizeof(uint32_t)); uint32_t result; - if (dfaBuilderCachedState(builder, cur, &result)) { + if (dfaBuilderCacheState(builder, cur, &result)) { taosArrayPush(states, &result); } SHashObj *seen = taosHashInit(12, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK); @@ -98,8 +98,6 @@ FstDfa *dfaBuilder(FstDfaBuilder *builder) { return builder->dfa; } -FstDfa *dfaBuilderBuild(FstDfaBuilder *builer) { return NULL; } - bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet *next, uint32_t state, uint8_t byte, uint32_t *result) { sparSetClear(cur); @@ -114,7 +112,7 @@ bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet t = taosArrayGet(builder->dfa->states, state); uint32_t nxtState; - if (dfaBuilderCachedState(builder, next, &nxtState)) { + if (dfaBuilderCacheState(builder, next, &nxtState)) { t->next[byte] = nxtState; *result = nxtState; return true; @@ -122,7 +120,7 @@ bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet return false; } -bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result) { +bool dfaBuilderCacheState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result) { SArray *tinsts = taosArrayInit(4, sizeof(uint32_t)); bool isMatch = false; @@ -190,7 +188,7 @@ void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip) { return; } bool succ = sparSetAdd(set, ip, NULL); - assert(succ == true); + // assert(succ == true); Inst *inst = taosArrayGet(dfa->insts, ip); if (inst->ty == MATCH || inst->ty == RANGE) { // do nothing diff --git a/source/libs/index/src/indexFstRegex.c b/source/libs/index/src/indexFstRegex.c index 37cb58996f..0189e9fe6e 100644 --- a/source/libs/index/src/indexFstRegex.c +++ b/source/libs/index/src/indexFstRegex.c @@ -29,8 +29,11 @@ FstRegex *regexCreate(const char *str) { regex->orig = orig; // construct insts based on str - SArray *insts = NULL; - + SArray *insts = taosArrayInit(256, sizeof(uint8_t)); + for (int i = 0; i < strlen(str); i++) { + uint8_t v = str[i]; + taosArrayPush(insts, &v); + } FstDfaBuilder *builder = dfaBuilderCreate(insts); regex->dfa = dfaBuilderBuild(builder); return regex; diff --git a/source/libs/index/src/indexFstSparse.c b/source/libs/index/src/indexFstSparse.c index 99ed5d6429..5fdd797aa8 100644 --- a/source/libs/index/src/indexFstSparse.c +++ b/source/libs/index/src/indexFstSparse.c @@ -15,6 +15,11 @@ #include "indexFstSparse.h" +static void sparSetUtil(int32_t *buf, int32_t cap) { + for (int32_t i = 0; i < cap; i++) { + buf[i] = -1; + } +} FstSparseSet *sparSetCreate(int32_t sz) { FstSparseSet *ss = taosMemoryCalloc(1, sizeof(FstSparseSet)); if (ss == NULL) { @@ -22,9 +27,10 @@ FstSparseSet *sparSetCreate(int32_t sz) { } ss->dense = (int32_t *)taosMemoryMalloc(sz * sizeof(int32_t)); - memset(ss->dense, -1, sz * sizeof(int32_t)); ss->sparse = (int32_t *)taosMemoryMalloc(sz * sizeof(int32_t)); - memset(ss->sparse, -1, sz * sizeof(int32_t)); + sparSetUtil(ss->dense, sz); + sparSetUtil(ss->sparse, sz); + ss->cap = sz; ss->size = 0; @@ -84,7 +90,7 @@ void sparSetClear(FstSparseSet *ss) { if (ss == NULL) { return; } - memset(ss->dense, -1, ss->cap * sizeof(int32_t)); - memset(ss->sparse, -1, ss->cap * sizeof(int32_t)); + sparSetUtil(ss->dense, ss->cap); + sparSetUtil(ss->sparse, ss->cap); ss->size = 0; } diff --git a/source/libs/index/test/fstUtilUT.cc b/source/libs/index/test/fstUtilUT.cc index 22fe1a9150..593a312c9e 100644 --- a/source/libs/index/test/fstUtilUT.cc +++ b/source/libs/index/test/fstUtilUT.cc @@ -66,7 +66,10 @@ TEST_F(FstUtilEnv, test4) { // test FstRegex -TEST_F(FstRegexEnv, test1) {} +TEST_F(FstRegexEnv, test1) { + // + EXPECT_EQ(regex != NULL, true); +} TEST_F(FstRegexEnv, test2) {} TEST_F(FstRegexEnv, test3) {} TEST_F(FstRegexEnv, test4) {} From bb760de1cfedd40089cdb5df7b3e9784c3d96788 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sun, 10 Jul 2022 17:08:33 +0800 Subject: [PATCH 3/3] add test case --- source/libs/index/src/indexFstDfa.c | 1 - source/libs/index/src/indexFstRegex.c | 5 +---- source/libs/index/src/indexFstSparse.c | 8 ++++---- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/source/libs/index/src/indexFstDfa.c b/source/libs/index/src/indexFstDfa.c index b8ac3bd3f5..046ed0f4f4 100644 --- a/source/libs/index/src/indexFstDfa.c +++ b/source/libs/index/src/indexFstDfa.c @@ -126,7 +126,6 @@ bool dfaBuilderCacheState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *r for (int i = 0; i < sparSetLen(set); i++) { int32_t ip; - if (false == sparSetGet(set, i, &ip)) continue; Inst *inst = taosArrayGet(builder->dfa->insts, ip); diff --git a/source/libs/index/src/indexFstRegex.c b/source/libs/index/src/indexFstRegex.c index 0189e9fe6e..e148f211f2 100644 --- a/source/libs/index/src/indexFstRegex.c +++ b/source/libs/index/src/indexFstRegex.c @@ -22,11 +22,8 @@ FstRegex *regexCreate(const char *str) { if (regex == NULL) { return NULL; } - int32_t sz = (int32_t)strlen(str); - char *orig = taosMemoryCalloc(1, sz); - memcpy(orig, str, sz); - regex->orig = orig; + regex->orig = tstrdup(str); // construct insts based on str SArray *insts = taosArrayInit(256, sizeof(uint8_t)); diff --git a/source/libs/index/src/indexFstSparse.c b/source/libs/index/src/indexFstSparse.c index 5fdd797aa8..60eb7afd90 100644 --- a/source/libs/index/src/indexFstSparse.c +++ b/source/libs/index/src/indexFstSparse.c @@ -52,7 +52,7 @@ bool sparSetAdd(FstSparseSet *ss, int32_t ip, int32_t *idx) { if (ss == NULL) { return false; } - if (ip >= ss->cap) { + if (ip >= ss->cap || ip < 0) { return false; } uint32_t i = ss->size; @@ -65,7 +65,7 @@ bool sparSetAdd(FstSparseSet *ss, int32_t ip, int32_t *idx) { return true; } bool sparSetGet(FstSparseSet *ss, int32_t idx, int32_t *ip) { - if (idx >= ss->cap || idx >= ss->size) { + if (idx >= ss->cap || idx >= ss->size || idx < 0) { return false; } int32_t val = ss->dense[idx]; @@ -75,12 +75,12 @@ bool sparSetGet(FstSparseSet *ss, int32_t idx, int32_t *ip) { return val == -1 ? false : true; } bool sparSetContains(FstSparseSet *ss, int32_t ip) { - if (ip >= ss->cap) { + if (ip >= ss->cap || ip < 0) { return false; } int32_t i = ss->sparse[ip]; - if (i < ss->cap && i < ss->size && ss->dense[i] == ip) { + if (i >= 0 && i < ss->cap && i < ss->size && ss->dense[i] == ip) { return true; } else { return false;