Merge pull request #11055 from taosdata/feature/index_query

fst query
This commit is contained in:
Yihao Deng 2022-03-28 22:15:47 +08:00 committed by GitHub
commit 2a012b6133
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 215 additions and 30 deletions

View File

@ -21,9 +21,9 @@ extern "C" {
#endif #endif
#include "indexInt.h" #include "indexInt.h"
#include "index_fst_node.h"
#include "index_fst_automation.h" #include "index_fst_automation.h"
#include "index_fst_counting_writer.h" #include "index_fst_counting_writer.h"
#include "index_fst_node.h"
#include "index_fst_registry.h" #include "index_fst_registry.h"
#include "index_fst_util.h" #include "index_fst_util.h"
@ -257,9 +257,9 @@ typedef struct FstMeta {
} FstMeta; } FstMeta;
typedef struct Fst { typedef struct Fst {
FstMeta* meta; FstMeta* meta;
FstSlice* data; // FstSlice* data; //
FstNode* root; // FstNode* root; //
TdThreadMutex mtx; TdThreadMutex mtx;
} Fst; } Fst;
@ -325,10 +325,10 @@ StreamWithStateResult* streamWithStateNextWith(StreamWithState* sws, StreamCallb
FstStreamBuilder* fstStreamBuilderCreate(Fst* fst, AutomationCtx* aut); FstStreamBuilder* fstStreamBuilderCreate(Fst* fst, AutomationCtx* aut);
void fstStreamBuilderDestroy(FstStreamBuilder* b); void fstStreamBuilderDestroy(FstStreamBuilder* b);
// set up bound range
// refator, simple code by marco
FstStreamBuilder* fstStreamBuilderRange(FstStreamBuilder* b, FstSlice* val, RangeType type); // set up bound range
// refator later: to simple code by marco
void fstStreamBuilderSetRange(FstStreamBuilder* b, FstSlice* val, RangeType type);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -720,7 +720,6 @@ bool fstNodeFindInput(FstNode* node, uint8_t b, uint64_t* res) {
uint64_t out = fstStateFindInput(st, node, b, &null); uint64_t out = fstStateFindInput(st, node, b, &null);
if (null == false) { if (null == false) {
*res = out; *res = out;
} else {
s = false; s = false;
} }
} }
@ -1184,7 +1183,7 @@ StreamWithState* streamWithStateCreate(Fst* fst, AutomationCtx* automation, FstB
sws->aut = automation; sws->aut = automation;
sws->inp = (SArray*)taosArrayInit(256, sizeof(uint8_t)); sws->inp = (SArray*)taosArrayInit(256, sizeof(uint8_t));
sws->emptyOutput.null = false; sws->emptyOutput.null = true;
sws->emptyOutput.out = 0; sws->emptyOutput.out = 0;
sws->stack = (SArray*)taosArrayInit(256, sizeof(StreamState)); sws->stack = (SArray*)taosArrayInit(256, sizeof(StreamState));
@ -1317,7 +1316,7 @@ StreamWithStateResult* streamWithStateNextWith(StreamWithState* sws, StreamCallb
if (FST_NODE_ADDR(p->node) != fstGetRootAddr(sws->fst)) { if (FST_NODE_ADDR(p->node) != fstGetRootAddr(sws->fst)) {
taosArrayPop(sws->inp); taosArrayPop(sws->inp);
} }
streamStateDestroy(p); // streamStateDestroy(p);
continue; continue;
} }
FstTransition trn; FstTransition trn;
@ -1425,9 +1424,9 @@ void fstStreamBuilderDestroy(FstStreamBuilder* b) {
taosMemoryFreeClear(b->max); taosMemoryFreeClear(b->max);
taosMemoryFree(b); taosMemoryFree(b);
} }
FstStreamBuilder* fstStreamBuilderRange(FstStreamBuilder* b, FstSlice* val, RangeType type) { void fstStreamBuilderSetRange(FstStreamBuilder* b, FstSlice* val, RangeType type) {
if (b == NULL) { if (b == NULL) {
return NULL; return;
} }
if (type == GE) { if (type == GE) {
b->min->type = Included; b->min->type = Included;
@ -1446,5 +1445,4 @@ FstStreamBuilder* fstStreamBuilderRange(FstStreamBuilder* b, FstSlice* val, Rang
fstSliceDestroy(&(b->max->data)); fstSliceDestroy(&(b->max->data));
b->max->data = fstSliceDeepCopy(val, 0, FST_SLICE_LEN(val) - 1); b->max->data = fstSliceDeepCopy(val, 0, FST_SLICE_LEN(val) - 1);
} }
return b;
} }

View File

@ -85,10 +85,20 @@ static void* prefixStart(AutomationCtx* ctx) {
}; };
static bool prefixIsMatch(AutomationCtx* ctx, void* sv) { static bool prefixIsMatch(AutomationCtx* ctx, void* sv) {
StartWithStateValue* ssv = (StartWithStateValue*)sv; StartWithStateValue* ssv = (StartWithStateValue*)sv;
return ssv->val == strlen(ctx->data); if (ssv == NULL) {
return false;
}
if (ssv->type == FST_INT) {
return ssv->val == strlen(ctx->data);
} else {
return false;
}
} }
static bool prefixCanMatch(AutomationCtx* ctx, void* sv) { static bool prefixCanMatch(AutomationCtx* ctx, void* sv) {
StartWithStateValue* ssv = (StartWithStateValue*)sv; StartWithStateValue* ssv = (StartWithStateValue*)sv;
if (ssv == NULL) {
return false;
}
return ssv->val >= 0; return ssv->val >= 0;
} }
static bool prefixWillAlwaysMatch(AutomationCtx* ctx, void* state) { return true; } static bool prefixWillAlwaysMatch(AutomationCtx* ctx, void* state) { return true; }
@ -154,15 +164,7 @@ AutomationCtx* automCtxCreate(void* data, AutomationType atype) {
// add more search type // add more search type
} }
char* dst = NULL; ctx->data = (data != NULL ? strdup((char*)data) : NULL);
if (data != NULL) {
char* src = (char*)data;
size_t len = strlen(src);
dst = (char*)taosMemoryCalloc(1, len * sizeof(char) + 1);
memcpy(dst, src, len);
}
ctx->data = dst;
ctx->type = atype; ctx->type = atype;
ctx->stdata = (void*)sv; ctx->stdata = (void*)sv;
return ctx; return ctx;

View File

@ -96,11 +96,36 @@ class FstReadMemory {
char* ch = (char*)fstSliceData(s, &sz); char* ch = (char*)fstSliceData(s, &sz);
std::string key(ch, sz); std::string key(ch, sz);
printf("key: %s, val: %" PRIu64 "\n", key.c_str(), (uint64_t)(rt->out.out)); printf("key: %s, val: %" PRIu64 "\n", key.c_str(), (uint64_t)(rt->out.out));
result.push_back(rt->out.out);
swsResultDestroy(rt); swsResultDestroy(rt);
} }
for (size_t i = 0; i < result.size(); i++) { return true;
}
bool SearchRange(AutomationCtx* ctx, const std::string& low, const std::string& high, std::vector<uint64_t>& result) {
FstStreamBuilder* sb = fstSearch(_fst, ctx);
FstSlice l = fstSliceCreate((uint8_t*)low.c_str(), low.size());
FstSlice h = fstSliceCreate((uint8_t*)high.c_str(), high.size());
// range [low, high);
fstStreamBuilderSetRange(sb, &l, GE);
fstStreamBuilderSetRange(sb, &h, LT);
fstSliceDestroy(&l);
fstSliceDestroy(&h);
StreamWithState* st = streamBuilderIntoStream(sb);
StreamWithStateResult* rt = NULL;
while ((rt = streamWithStateNextWith(st, NULL)) != NULL) {
// result.push_back((uint64_t)(rt->out.out));
FstSlice* s = &rt->data;
int32_t sz = 0;
char* ch = (char*)fstSliceData(s, &sz);
std::string key(ch, sz);
printf("key: %s, val: %" PRIu64 "\n", key.c_str(), (uint64_t)(rt->out.out));
result.push_back(rt->out.out);
swsResultDestroy(rt);
} }
std::cout << std::endl;
return true; return true;
} }
bool SearchWithTimeCostUs(AutomationCtx* ctx, std::vector<uint64_t>& result) { bool SearchWithTimeCostUs(AutomationCtx* ctx, std::vector<uint64_t>& result) {
@ -233,7 +258,7 @@ void checkFstLongTerm() {
// taosMemoryFree(ctx); // taosMemoryFree(ctx);
// delete m; // delete m;
} }
void checkFstCheckIterator() { void checkFstCheckIterator1() {
FstWriter* fw = new FstWriter; FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs(); int64_t s = taosGetTimestampUs();
int count = 2; int count = 2;
@ -243,8 +268,7 @@ void checkFstCheckIterator() {
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl; std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("Hello world", 1); fw->Put("Hello world", 1);
fw->Put("hello world", 2); fw->Put("Hello worle", 2);
fw->Put("hello worle", 3);
fw->Put("hello worlf", 4); fw->Put("hello worlf", 4);
delete fw; delete fw;
@ -258,7 +282,7 @@ void checkFstCheckIterator() {
// prefix search // prefix search
std::vector<uint64_t> result; std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"H", AUTOMATION_PREFIX); AutomationCtx* ctx = automCtxCreate((void*)"He", AUTOMATION_ALWAYS);
m->Search(ctx, result); m->Search(ctx, result);
std::cout << "size: " << result.size() << std::endl; std::cout << "size: " << result.size() << std::endl;
// assert(result.size() == count); // assert(result.size() == count);
@ -269,6 +293,163 @@ void checkFstCheckIterator() {
taosMemoryFree(ctx); taosMemoryFree(ctx);
delete m; delete m;
} }
void checkFstCheckIterator2() {
FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs();
int count = 2;
// Performance_fstWriteRecords(fw);
int64_t e = taosGetTimestampUs();
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("a", 1);
fw->Put("b", 2);
fw->Put("c", 4);
delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64);
if (m->init() == false) {
std::cout << "init readMemory failed" << std::endl;
delete m;
return;
}
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"He", AUTOMATION_ALWAYS);
m->Search(ctx, result);
std::cout << "size: " << result.size() << std::endl;
// assert(result.size() == count);
for (int i = 0; i < result.size(); i++) {
// assert(result[i] == i); // check result
}
taosMemoryFree(ctx);
delete m;
}
void checkFstCheckIteratorPrefix() {
FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs();
int count = 2;
// Performance_fstWriteRecords(fw);
int64_t e = taosGetTimestampUs();
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("Hello world", 1);
fw->Put("Hello worle", 2);
fw->Put("hello worlf", 4);
fw->Put("ja", 4);
fw->Put("jb", 4);
fw->Put("jc", 4);
fw->Put("jddddddddd", 4);
fw->Put("jefffffff", 4);
delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64);
if (m->init() == false) {
std::cout << "init readMemory failed" << std::endl;
delete m;
return;
}
{
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_PREFIX);
m->Search(ctx, result);
assert(result.size() == 1);
taosMemoryFree(ctx);
}
{
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"Hello", AUTOMATION_PREFIX);
m->Search(ctx, result);
assert(result.size() == 2);
taosMemoryFree(ctx);
}
{
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"jddd", AUTOMATION_PREFIX);
m->Search(ctx, result);
assert(result.size() == 1);
taosMemoryFree(ctx);
}
delete m;
}
void checkFstCheckIteratorRange1() {
FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs();
int count = 2;
// Performance_fstWriteRecords(fw);
int64_t e = taosGetTimestampUs();
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("a", 1);
fw->Put("b", 2);
fw->Put("c", 3);
fw->Put("d", 4);
fw->Put("e", 5);
delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64);
if (m->init() == false) {
std::cout << "init readMemory failed" << std::endl;
delete m;
return;
}
{
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_ALWAYS);
// [b, e)
m->SearchRange(ctx, "b", "e", result);
assert(result.size() == 3);
taosMemoryFree(ctx);
}
}
void checkFstCheckIteratorRange2() {
FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs();
int count = 2;
// Performance_fstWriteRecords(fw);
int64_t e = taosGetTimestampUs();
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("ab", 1);
fw->Put("bd", 2);
fw->Put("cdd", 3);
fw->Put("cde", 3);
fw->Put("ddd", 4);
fw->Put("ed", 5);
delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64);
if (m->init() == false) {
std::cout << "init readMemory failed" << std::endl;
delete m;
return;
}
{
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_ALWAYS);
// [b, e)
m->SearchRange(ctx, "b", "ed", result);
assert(result.size() == 4);
taosMemoryFree(ctx);
}
}
void fst_get(Fst* fst) { void fst_get(Fst* fst) {
for (int i = 0; i < 10000; i++) { for (int i = 0; i < 10000; i++) {
@ -332,7 +513,11 @@ int main(int argc, char* argv[]) {
// path suid colName ver // path suid colName ver
// iterTFileReader(argv[1], argv[2], argv[3], argv[4]); // iterTFileReader(argv[1], argv[2], argv[3], argv[4]);
//} //}
checkFstCheckIterator(); // checkFstCheckIterator1();
// checkFstCheckIterator2();
// checkFstCheckIteratorPrefix();
checkFstCheckIteratorRange1();
checkFstCheckIteratorRange2();
// checkFstLongTerm(); // checkFstLongTerm();
// checkFstPrefixSearch(); // checkFstPrefixSearch();