Merge pull request #11055 from taosdata/feature/index_query

fst query
This commit is contained in:
Yihao Deng 2022-03-28 22:15:47 +08:00 committed by GitHub
commit 2a012b6133
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 215 additions and 30 deletions

View File

@ -21,9 +21,9 @@ extern "C" {
#endif
#include "indexInt.h"
#include "index_fst_node.h"
#include "index_fst_automation.h"
#include "index_fst_counting_writer.h"
#include "index_fst_node.h"
#include "index_fst_registry.h"
#include "index_fst_util.h"
@ -257,9 +257,9 @@ typedef struct FstMeta {
} FstMeta;
typedef struct Fst {
FstMeta* meta;
FstSlice* data; //
FstNode* root; //
FstMeta* meta;
FstSlice* data; //
FstNode* root; //
TdThreadMutex mtx;
} Fst;
@ -325,10 +325,10 @@ StreamWithStateResult* streamWithStateNextWith(StreamWithState* sws, StreamCallb
FstStreamBuilder* fstStreamBuilderCreate(Fst* fst, AutomationCtx* aut);
void fstStreamBuilderDestroy(FstStreamBuilder* b);
// set up bound range
// refator, simple code by marco
FstStreamBuilder* fstStreamBuilderRange(FstStreamBuilder* b, FstSlice* val, RangeType type);
// set up bound range
// refator later: to simple code by marco
void fstStreamBuilderSetRange(FstStreamBuilder* b, FstSlice* val, RangeType type);
#ifdef __cplusplus
}

View File

@ -720,7 +720,6 @@ bool fstNodeFindInput(FstNode* node, uint8_t b, uint64_t* res) {
uint64_t out = fstStateFindInput(st, node, b, &null);
if (null == false) {
*res = out;
} else {
s = false;
}
}
@ -1184,7 +1183,7 @@ StreamWithState* streamWithStateCreate(Fst* fst, AutomationCtx* automation, FstB
sws->aut = automation;
sws->inp = (SArray*)taosArrayInit(256, sizeof(uint8_t));
sws->emptyOutput.null = false;
sws->emptyOutput.null = true;
sws->emptyOutput.out = 0;
sws->stack = (SArray*)taosArrayInit(256, sizeof(StreamState));
@ -1317,7 +1316,7 @@ StreamWithStateResult* streamWithStateNextWith(StreamWithState* sws, StreamCallb
if (FST_NODE_ADDR(p->node) != fstGetRootAddr(sws->fst)) {
taosArrayPop(sws->inp);
}
streamStateDestroy(p);
// streamStateDestroy(p);
continue;
}
FstTransition trn;
@ -1425,9 +1424,9 @@ void fstStreamBuilderDestroy(FstStreamBuilder* b) {
taosMemoryFreeClear(b->max);
taosMemoryFree(b);
}
FstStreamBuilder* fstStreamBuilderRange(FstStreamBuilder* b, FstSlice* val, RangeType type) {
void fstStreamBuilderSetRange(FstStreamBuilder* b, FstSlice* val, RangeType type) {
if (b == NULL) {
return NULL;
return;
}
if (type == GE) {
b->min->type = Included;
@ -1446,5 +1445,4 @@ FstStreamBuilder* fstStreamBuilderRange(FstStreamBuilder* b, FstSlice* val, Rang
fstSliceDestroy(&(b->max->data));
b->max->data = fstSliceDeepCopy(val, 0, FST_SLICE_LEN(val) - 1);
}
return b;
}

View File

@ -85,10 +85,20 @@ static void* prefixStart(AutomationCtx* ctx) {
};
static bool prefixIsMatch(AutomationCtx* ctx, void* sv) {
StartWithStateValue* ssv = (StartWithStateValue*)sv;
return ssv->val == strlen(ctx->data);
if (ssv == NULL) {
return false;
}
if (ssv->type == FST_INT) {
return ssv->val == strlen(ctx->data);
} else {
return false;
}
}
static bool prefixCanMatch(AutomationCtx* ctx, void* sv) {
StartWithStateValue* ssv = (StartWithStateValue*)sv;
if (ssv == NULL) {
return false;
}
return ssv->val >= 0;
}
static bool prefixWillAlwaysMatch(AutomationCtx* ctx, void* state) { return true; }
@ -154,15 +164,7 @@ AutomationCtx* automCtxCreate(void* data, AutomationType atype) {
// add more search type
}
char* dst = NULL;
if (data != NULL) {
char* src = (char*)data;
size_t len = strlen(src);
dst = (char*)taosMemoryCalloc(1, len * sizeof(char) + 1);
memcpy(dst, src, len);
}
ctx->data = dst;
ctx->data = (data != NULL ? strdup((char*)data) : NULL);
ctx->type = atype;
ctx->stdata = (void*)sv;
return ctx;

View File

@ -96,11 +96,36 @@ class FstReadMemory {
char* ch = (char*)fstSliceData(s, &sz);
std::string key(ch, sz);
printf("key: %s, val: %" PRIu64 "\n", key.c_str(), (uint64_t)(rt->out.out));
result.push_back(rt->out.out);
swsResultDestroy(rt);
}
for (size_t i = 0; i < result.size(); i++) {
return true;
}
bool SearchRange(AutomationCtx* ctx, const std::string& low, const std::string& high, std::vector<uint64_t>& result) {
FstStreamBuilder* sb = fstSearch(_fst, ctx);
FstSlice l = fstSliceCreate((uint8_t*)low.c_str(), low.size());
FstSlice h = fstSliceCreate((uint8_t*)high.c_str(), high.size());
// range [low, high);
fstStreamBuilderSetRange(sb, &l, GE);
fstStreamBuilderSetRange(sb, &h, LT);
fstSliceDestroy(&l);
fstSliceDestroy(&h);
StreamWithState* st = streamBuilderIntoStream(sb);
StreamWithStateResult* rt = NULL;
while ((rt = streamWithStateNextWith(st, NULL)) != NULL) {
// result.push_back((uint64_t)(rt->out.out));
FstSlice* s = &rt->data;
int32_t sz = 0;
char* ch = (char*)fstSliceData(s, &sz);
std::string key(ch, sz);
printf("key: %s, val: %" PRIu64 "\n", key.c_str(), (uint64_t)(rt->out.out));
result.push_back(rt->out.out);
swsResultDestroy(rt);
}
std::cout << std::endl;
return true;
}
bool SearchWithTimeCostUs(AutomationCtx* ctx, std::vector<uint64_t>& result) {
@ -233,7 +258,7 @@ void checkFstLongTerm() {
// taosMemoryFree(ctx);
// delete m;
}
void checkFstCheckIterator() {
void checkFstCheckIterator1() {
FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs();
int count = 2;
@ -243,8 +268,7 @@ void checkFstCheckIterator() {
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("Hello world", 1);
fw->Put("hello world", 2);
fw->Put("hello worle", 3);
fw->Put("Hello worle", 2);
fw->Put("hello worlf", 4);
delete fw;
@ -258,7 +282,7 @@ void checkFstCheckIterator() {
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"H", AUTOMATION_PREFIX);
AutomationCtx* ctx = automCtxCreate((void*)"He", AUTOMATION_ALWAYS);
m->Search(ctx, result);
std::cout << "size: " << result.size() << std::endl;
// assert(result.size() == count);
@ -269,6 +293,163 @@ void checkFstCheckIterator() {
taosMemoryFree(ctx);
delete m;
}
void checkFstCheckIterator2() {
FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs();
int count = 2;
// Performance_fstWriteRecords(fw);
int64_t e = taosGetTimestampUs();
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("a", 1);
fw->Put("b", 2);
fw->Put("c", 4);
delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64);
if (m->init() == false) {
std::cout << "init readMemory failed" << std::endl;
delete m;
return;
}
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"He", AUTOMATION_ALWAYS);
m->Search(ctx, result);
std::cout << "size: " << result.size() << std::endl;
// assert(result.size() == count);
for (int i = 0; i < result.size(); i++) {
// assert(result[i] == i); // check result
}
taosMemoryFree(ctx);
delete m;
}
void checkFstCheckIteratorPrefix() {
FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs();
int count = 2;
// Performance_fstWriteRecords(fw);
int64_t e = taosGetTimestampUs();
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("Hello world", 1);
fw->Put("Hello worle", 2);
fw->Put("hello worlf", 4);
fw->Put("ja", 4);
fw->Put("jb", 4);
fw->Put("jc", 4);
fw->Put("jddddddddd", 4);
fw->Put("jefffffff", 4);
delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64);
if (m->init() == false) {
std::cout << "init readMemory failed" << std::endl;
delete m;
return;
}
{
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_PREFIX);
m->Search(ctx, result);
assert(result.size() == 1);
taosMemoryFree(ctx);
}
{
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"Hello", AUTOMATION_PREFIX);
m->Search(ctx, result);
assert(result.size() == 2);
taosMemoryFree(ctx);
}
{
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"jddd", AUTOMATION_PREFIX);
m->Search(ctx, result);
assert(result.size() == 1);
taosMemoryFree(ctx);
}
delete m;
}
void checkFstCheckIteratorRange1() {
FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs();
int count = 2;
// Performance_fstWriteRecords(fw);
int64_t e = taosGetTimestampUs();
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("a", 1);
fw->Put("b", 2);
fw->Put("c", 3);
fw->Put("d", 4);
fw->Put("e", 5);
delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64);
if (m->init() == false) {
std::cout << "init readMemory failed" << std::endl;
delete m;
return;
}
{
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_ALWAYS);
// [b, e)
m->SearchRange(ctx, "b", "e", result);
assert(result.size() == 3);
taosMemoryFree(ctx);
}
}
void checkFstCheckIteratorRange2() {
FstWriter* fw = new FstWriter;
int64_t s = taosGetTimestampUs();
int count = 2;
// Performance_fstWriteRecords(fw);
int64_t e = taosGetTimestampUs();
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("ab", 1);
fw->Put("bd", 2);
fw->Put("cdd", 3);
fw->Put("cde", 3);
fw->Put("ddd", 4);
fw->Put("ed", 5);
delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64);
if (m->init() == false) {
std::cout << "init readMemory failed" << std::endl;
delete m;
return;
}
{
// prefix search
std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"he", AUTOMATION_ALWAYS);
// [b, e)
m->SearchRange(ctx, "b", "ed", result);
assert(result.size() == 4);
taosMemoryFree(ctx);
}
}
void fst_get(Fst* fst) {
for (int i = 0; i < 10000; i++) {
@ -332,7 +513,11 @@ int main(int argc, char* argv[]) {
// path suid colName ver
// iterTFileReader(argv[1], argv[2], argv[3], argv[4]);
//}
checkFstCheckIterator();
// checkFstCheckIterator1();
// checkFstCheckIterator2();
// checkFstCheckIteratorPrefix();
checkFstCheckIteratorRange1();
checkFstCheckIteratorRange2();
// checkFstLongTerm();
// checkFstPrefixSearch();