From 65d72a48b476db424efad471f2ace89acc119044 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 28 Mar 2022 16:22:37 +0800 Subject: [PATCH 1/5] fst query --- source/libs/index/src/index_fst.c | 2 +- source/libs/index/src/index_fst_automation.c | 12 +++++- source/libs/index/test/fstTest.cc | 43 ++++++++++++++++++-- 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 09f382bbdc..f35d7d39f2 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -1317,7 +1317,7 @@ StreamWithStateResult* streamWithStateNextWith(StreamWithState* sws, StreamCallb if (FST_NODE_ADDR(p->node) != fstGetRootAddr(sws->fst)) { taosArrayPop(sws->inp); } - streamStateDestroy(p); + // streamStateDestroy(p); continue; } FstTransition trn; diff --git a/source/libs/index/src/index_fst_automation.c b/source/libs/index/src/index_fst_automation.c index 668a527d4a..32623b09b4 100644 --- a/source/libs/index/src/index_fst_automation.c +++ b/source/libs/index/src/index_fst_automation.c @@ -85,10 +85,20 @@ static void* prefixStart(AutomationCtx* ctx) { }; static bool prefixIsMatch(AutomationCtx* ctx, void* sv) { StartWithStateValue* ssv = (StartWithStateValue*)sv; - return ssv->val == strlen(ctx->data); + if (ssv == NULL) { + return false; + } + if (ssv->type == FST_INT) { + return ssv->val == strlen(ctx->data); + } else { + return false; + } } static bool prefixCanMatch(AutomationCtx* ctx, void* sv) { StartWithStateValue* ssv = (StartWithStateValue*)sv; + if (ssv == NULL) { + return false; + } return ssv->val >= 0; } static bool prefixWillAlwaysMatch(AutomationCtx* ctx, void* state) { return true; } diff --git a/source/libs/index/test/fstTest.cc b/source/libs/index/test/fstTest.cc index 94923726dd..7a8ee975c4 100644 --- a/source/libs/index/test/fstTest.cc +++ b/source/libs/index/test/fstTest.cc @@ -243,8 +243,7 @@ void checkFstCheckIterator() { std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl; fw->Put("Hello world", 1); - fw->Put("hello world", 2); - fw->Put("hello worle", 3); + fw->Put("Hello worle", 2); fw->Put("hello worlf", 4); delete fw; @@ -258,7 +257,42 @@ void checkFstCheckIterator() { // prefix search std::vector result; - AutomationCtx* ctx = automCtxCreate((void*)"H", AUTOMATION_PREFIX); + AutomationCtx* ctx = automCtxCreate((void*)"He", AUTOMATION_ALWAYS); + m->Search(ctx, result); + std::cout << "size: " << result.size() << std::endl; + // assert(result.size() == count); + for (int i = 0; i < result.size(); i++) { + // assert(result[i] == i); // check result + } + + taosMemoryFree(ctx); + delete m; +} +void checkFstCheckIteratorPrefix() { + FstWriter* fw = new FstWriter; + int64_t s = taosGetTimestampUs(); + int count = 2; + // Performance_fstWriteRecords(fw); + int64_t e = taosGetTimestampUs(); + + std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl; + + fw->Put("Hello world", 1); + fw->Put("Hello worle", 2); + fw->Put("hello worlf", 4); + delete fw; + + FstReadMemory* m = new FstReadMemory(1024 * 64); + if (m->init() == false) { + std::cout << "init readMemory failed" << std::endl; + delete m; + return; + } + + // prefix search + std::vector result; + + AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_PREFIX); m->Search(ctx, result); std::cout << "size: " << result.size() << std::endl; // assert(result.size() == count); @@ -332,7 +366,8 @@ int main(int argc, char* argv[]) { // path suid colName ver // iterTFileReader(argv[1], argv[2], argv[3], argv[4]); //} - checkFstCheckIterator(); + // checkFstCheckIterator(); + checkFstCheckIteratorPrefix(); // checkFstLongTerm(); // checkFstPrefixSearch(); From 33bd962894a88b30d918b27f51f37147add94ffb Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 28 Mar 2022 16:40:33 +0800 Subject: [PATCH 2/5] fst query --- source/libs/index/test/fstTest.cc | 39 +++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/source/libs/index/test/fstTest.cc b/source/libs/index/test/fstTest.cc index 7a8ee975c4..e7e5070d14 100644 --- a/source/libs/index/test/fstTest.cc +++ b/source/libs/index/test/fstTest.cc @@ -97,6 +97,7 @@ class FstReadMemory { std::string key(ch, sz); printf("key: %s, val: %" PRIu64 "\n", key.c_str(), (uint64_t)(rt->out.out)); swsResultDestroy(rt); + result.push_back(rt->out.out); } for (size_t i = 0; i < result.size(); i++) { } @@ -280,6 +281,11 @@ void checkFstCheckIteratorPrefix() { fw->Put("Hello world", 1); fw->Put("Hello worle", 2); fw->Put("hello worlf", 4); + fw->Put("ja", 4); + fw->Put("jb", 4); + fw->Put("jc", 4); + fw->Put("jddddddddd", 4); + fw->Put("jefffffff", 4); delete fw; FstReadMemory* m = new FstReadMemory(1024 * 64); @@ -288,19 +294,32 @@ void checkFstCheckIteratorPrefix() { delete m; return; } + { + // prefix search + std::vector result; - // prefix search - std::vector result; - - AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_PREFIX); - m->Search(ctx, result); - std::cout << "size: " << result.size() << std::endl; - // assert(result.size() == count); - for (int i = 0; i < result.size(); i++) { - // assert(result[i] == i); // check result + AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_PREFIX); + m->Search(ctx, result); + assert(result.size() == 1); + taosMemoryFree(ctx); } + { + // prefix search + std::vector result; - taosMemoryFree(ctx); + AutomationCtx* ctx = automCtxCreate((void*)"Hello", AUTOMATION_PREFIX); + m->Search(ctx, result); + assert(result.size() == 2); + taosMemoryFree(ctx); + } + { + std::vector result; + + AutomationCtx* ctx = automCtxCreate((void*)"jddd", AUTOMATION_PREFIX); + m->Search(ctx, result); + assert(result.size() == 1); + taosMemoryFree(ctx); + } delete m; } From 3f23495983061c6d25a1cdbacca6b3328fa64288 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 28 Mar 2022 16:46:21 +0800 Subject: [PATCH 3/5] fst query --- source/libs/index/src/index_fst_automation.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/source/libs/index/src/index_fst_automation.c b/source/libs/index/src/index_fst_automation.c index 32623b09b4..cf9d88b73e 100644 --- a/source/libs/index/src/index_fst_automation.c +++ b/source/libs/index/src/index_fst_automation.c @@ -164,15 +164,7 @@ AutomationCtx* automCtxCreate(void* data, AutomationType atype) { // add more search type } - char* dst = NULL; - if (data != NULL) { - char* src = (char*)data; - size_t len = strlen(src); - dst = (char*)taosMemoryCalloc(1, len * sizeof(char) + 1); - memcpy(dst, src, len); - } - - ctx->data = dst; + ctx->data = strdup((char*)data); ctx->type = atype; ctx->stdata = (void*)sv; return ctx; From 30d49687e9a7e24d22f863dbdbd3dbad29681962 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 28 Mar 2022 19:01:57 +0800 Subject: [PATCH 4/5] update index query --- source/libs/index/inc/index_fst.h | 14 +++---- source/libs/index/src/index_fst.c | 5 +-- source/libs/index/test/fstTest.cc | 67 +++++++++++++++++++++++++++++-- 3 files changed, 72 insertions(+), 14 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index cf5c3f306b..b131aa0d9d 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -21,9 +21,9 @@ extern "C" { #endif #include "indexInt.h" -#include "index_fst_node.h" #include "index_fst_automation.h" #include "index_fst_counting_writer.h" +#include "index_fst_node.h" #include "index_fst_registry.h" #include "index_fst_util.h" @@ -257,9 +257,9 @@ typedef struct FstMeta { } FstMeta; typedef struct Fst { - FstMeta* meta; - FstSlice* data; // - FstNode* root; // + FstMeta* meta; + FstSlice* data; // + FstNode* root; // TdThreadMutex mtx; } Fst; @@ -325,10 +325,10 @@ StreamWithStateResult* streamWithStateNextWith(StreamWithState* sws, StreamCallb FstStreamBuilder* fstStreamBuilderCreate(Fst* fst, AutomationCtx* aut); void fstStreamBuilderDestroy(FstStreamBuilder* b); -// set up bound range -// refator, simple code by marco -FstStreamBuilder* fstStreamBuilderRange(FstStreamBuilder* b, FstSlice* val, RangeType type); +// set up bound range +// refator later: to simple code by marco +void fstStreamBuilderSetRange(FstStreamBuilder* b, FstSlice* val, RangeType type); #ifdef __cplusplus } diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index f35d7d39f2..5fd8865b21 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -1425,9 +1425,9 @@ void fstStreamBuilderDestroy(FstStreamBuilder* b) { taosMemoryFreeClear(b->max); taosMemoryFree(b); } -FstStreamBuilder* fstStreamBuilderRange(FstStreamBuilder* b, FstSlice* val, RangeType type) { +void fstStreamBuilderSetRange(FstStreamBuilder* b, FstSlice* val, RangeType type) { if (b == NULL) { - return NULL; + return; } if (type == GE) { b->min->type = Included; @@ -1446,5 +1446,4 @@ FstStreamBuilder* fstStreamBuilderRange(FstStreamBuilder* b, FstSlice* val, Rang fstSliceDestroy(&(b->max->data)); b->max->data = fstSliceDeepCopy(val, 0, FST_SLICE_LEN(val) - 1); } - return b; } diff --git a/source/libs/index/test/fstTest.cc b/source/libs/index/test/fstTest.cc index e7e5070d14..1b9acbd67e 100644 --- a/source/libs/index/test/fstTest.cc +++ b/source/libs/index/test/fstTest.cc @@ -96,12 +96,36 @@ class FstReadMemory { char* ch = (char*)fstSliceData(s, &sz); std::string key(ch, sz); printf("key: %s, val: %" PRIu64 "\n", key.c_str(), (uint64_t)(rt->out.out)); - swsResultDestroy(rt); result.push_back(rt->out.out); + swsResultDestroy(rt); } - for (size_t i = 0; i < result.size(); i++) { + return true; + } + bool SearchRange(AutomationCtx* ctx, const std::string& low, const std::string& high, std::vector& result) { + FstStreamBuilder* sb = fstSearch(_fst, ctx); + + FstSlice l = fstSliceCreate((uint8_t*)low.c_str(), low.size()); + FstSlice h = fstSliceCreate((uint8_t*)high.c_str(), high.size()); + + // range [low, high); + fstStreamBuilderSetRange(sb, &l, GE); + fstStreamBuilderSetRange(sb, &h, LT); + + fstSliceDestroy(&l); + fstSliceDestroy(&h); + + StreamWithState* st = streamBuilderIntoStream(sb); + StreamWithStateResult* rt = NULL; + while ((rt = streamWithStateNextWith(st, NULL)) != NULL) { + // result.push_back((uint64_t)(rt->out.out)); + FstSlice* s = &rt->data; + int32_t sz = 0; + char* ch = (char*)fstSliceData(s, &sz); + std::string key(ch, sz); + printf("key: %s, val: %" PRIu64 "\n", key.c_str(), (uint64_t)(rt->out.out)); + result.push_back(rt->out.out); + swsResultDestroy(rt); } - std::cout << std::endl; return true; } bool SearchWithTimeCostUs(AutomationCtx* ctx, std::vector& result) { @@ -322,6 +346,40 @@ void checkFstCheckIteratorPrefix() { } delete m; } +void checkFstCheckIteratorRange() { + FstWriter* fw = new FstWriter; + int64_t s = taosGetTimestampUs(); + int count = 2; + // Performance_fstWriteRecords(fw); + int64_t e = taosGetTimestampUs(); + + std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl; + + fw->Put("a", 1); + fw->Put("b", 2); + fw->Put("c", 3); + fw->Put("d", 4); + fw->Put("e", 5); + delete fw; + + FstReadMemory* m = new FstReadMemory(1024 * 64); + if (m->init() == false) { + std::cout << "init readMemory failed" << std::endl; + delete m; + return; + } + { + // prefix search + std::vector result; + + AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_ALWAYS); + + // [b, e) + m->SearchRange(ctx, "b", "e", result); + // assert(result.size() == 1); + taosMemoryFree(ctx); + } +} void fst_get(Fst* fst) { for (int i = 0; i < 10000; i++) { @@ -386,7 +444,8 @@ int main(int argc, char* argv[]) { // iterTFileReader(argv[1], argv[2], argv[3], argv[4]); //} // checkFstCheckIterator(); - checkFstCheckIteratorPrefix(); + // checkFstCheckIteratorPrefix(); + checkFstCheckIteratorRange(); // checkFstLongTerm(); // checkFstPrefixSearch(); From 0c5f2d1da95899c0b1789d4e66b47a15eac4547f Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 28 Mar 2022 21:41:41 +0800 Subject: [PATCH 5/5] update index range query --- source/libs/index/src/index_fst.c | 3 +- source/libs/index/src/index_fst_automation.c | 2 +- source/libs/index/test/fstTest.cc | 82 ++++++++++++++++++-- 3 files changed, 79 insertions(+), 8 deletions(-) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 5fd8865b21..3edf5fa406 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -720,7 +720,6 @@ bool fstNodeFindInput(FstNode* node, uint8_t b, uint64_t* res) { uint64_t out = fstStateFindInput(st, node, b, &null); if (null == false) { *res = out; - } else { s = false; } } @@ -1184,7 +1183,7 @@ StreamWithState* streamWithStateCreate(Fst* fst, AutomationCtx* automation, FstB sws->aut = automation; sws->inp = (SArray*)taosArrayInit(256, sizeof(uint8_t)); - sws->emptyOutput.null = false; + sws->emptyOutput.null = true; sws->emptyOutput.out = 0; sws->stack = (SArray*)taosArrayInit(256, sizeof(StreamState)); diff --git a/source/libs/index/src/index_fst_automation.c b/source/libs/index/src/index_fst_automation.c index cf9d88b73e..20e981559d 100644 --- a/source/libs/index/src/index_fst_automation.c +++ b/source/libs/index/src/index_fst_automation.c @@ -164,7 +164,7 @@ AutomationCtx* automCtxCreate(void* data, AutomationType atype) { // add more search type } - ctx->data = strdup((char*)data); + ctx->data = (data != NULL ? strdup((char*)data) : NULL); ctx->type = atype; ctx->stdata = (void*)sv; return ctx; diff --git a/source/libs/index/test/fstTest.cc b/source/libs/index/test/fstTest.cc index 1b9acbd67e..a38a7c9701 100644 --- a/source/libs/index/test/fstTest.cc +++ b/source/libs/index/test/fstTest.cc @@ -258,7 +258,7 @@ void checkFstLongTerm() { // taosMemoryFree(ctx); // delete m; } -void checkFstCheckIterator() { +void checkFstCheckIterator1() { FstWriter* fw = new FstWriter; int64_t s = taosGetTimestampUs(); int count = 2; @@ -293,6 +293,41 @@ void checkFstCheckIterator() { taosMemoryFree(ctx); delete m; } +void checkFstCheckIterator2() { + FstWriter* fw = new FstWriter; + int64_t s = taosGetTimestampUs(); + int count = 2; + // Performance_fstWriteRecords(fw); + int64_t e = taosGetTimestampUs(); + + std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl; + + fw->Put("a", 1); + fw->Put("b", 2); + fw->Put("c", 4); + delete fw; + + FstReadMemory* m = new FstReadMemory(1024 * 64); + if (m->init() == false) { + std::cout << "init readMemory failed" << std::endl; + delete m; + return; + } + + // prefix search + std::vector result; + + AutomationCtx* ctx = automCtxCreate((void*)"He", AUTOMATION_ALWAYS); + m->Search(ctx, result); + std::cout << "size: " << result.size() << std::endl; + // assert(result.size() == count); + for (int i = 0; i < result.size(); i++) { + // assert(result[i] == i); // check result + } + + taosMemoryFree(ctx); + delete m; +} void checkFstCheckIteratorPrefix() { FstWriter* fw = new FstWriter; int64_t s = taosGetTimestampUs(); @@ -346,7 +381,7 @@ void checkFstCheckIteratorPrefix() { } delete m; } -void checkFstCheckIteratorRange() { +void checkFstCheckIteratorRange1() { FstWriter* fw = new FstWriter; int64_t s = taosGetTimestampUs(); int count = 2; @@ -376,7 +411,42 @@ void checkFstCheckIteratorRange() { // [b, e) m->SearchRange(ctx, "b", "e", result); - // assert(result.size() == 1); + assert(result.size() == 3); + taosMemoryFree(ctx); + } +} +void checkFstCheckIteratorRange2() { + FstWriter* fw = new FstWriter; + int64_t s = taosGetTimestampUs(); + int count = 2; + // Performance_fstWriteRecords(fw); + int64_t e = taosGetTimestampUs(); + + std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl; + + fw->Put("ab", 1); + fw->Put("bd", 2); + fw->Put("cdd", 3); + fw->Put("cde", 3); + fw->Put("ddd", 4); + fw->Put("ed", 5); + delete fw; + + FstReadMemory* m = new FstReadMemory(1024 * 64); + if (m->init() == false) { + std::cout << "init readMemory failed" << std::endl; + delete m; + return; + } + { + // prefix search + std::vector result; + + AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_ALWAYS); + + // [b, e) + m->SearchRange(ctx, "b", "ed", result); + assert(result.size() == 4); taosMemoryFree(ctx); } } @@ -443,9 +513,11 @@ int main(int argc, char* argv[]) { // path suid colName ver // iterTFileReader(argv[1], argv[2], argv[3], argv[4]); //} - // checkFstCheckIterator(); + // checkFstCheckIterator1(); + // checkFstCheckIterator2(); // checkFstCheckIteratorPrefix(); - checkFstCheckIteratorRange(); + checkFstCheckIteratorRange1(); + checkFstCheckIteratorRange2(); // checkFstLongTerm(); // checkFstPrefixSearch();