From 06fe44cabdb27c57ca57e8f04f7f1d358904dcd4 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 22 Nov 2021 17:43:03 +0800 Subject: [PATCH 01/18] add compare --- source/libs/index/inc/index_fst.h | 2 +- source/libs/index/inc/index_fst_util.h | 2 ++ source/libs/index/src/index_fst.c | 26 +++++++++++++++++++++----- source/libs/index/src/index_fst_util.c | 18 ++++++++++++++++++ 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 5a8138b126..cb60ef7cb8 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -94,7 +94,7 @@ typedef struct FstBuilderNodeUnfinished { } FstBuilderNodeUnfinished; void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr); -void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr); +void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out); /* * FstNode and helper function diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index fc7dd44637..6410df6d61 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -78,5 +78,7 @@ FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end); FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen); bool fstSliceEmpty(FstSlice *slice); +int fstSliceCompare(FstSlice *a, FstSlice *b); + #endif diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index a7ae3f2fb6..22cfe76de4 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -307,10 +307,10 @@ FstBuilder *fstBuilderCreate(void *w, FstType ty) { } -void fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDupe) { - return; +bool fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDupe) { + + return true; } - CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) { if (FST_BUILDER_NODE_IS_FINAL(bn) && FST_BUILDER_NODE_TRANS_ISEMPTY(bn) @@ -336,6 +336,8 @@ CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) { } + + FstSlice fstNodeAsSlice(FstNode *node) { FstSlice *slice = &node->data; FstSlice s = fstSliceCopy(slice, slice->end, slice->dLen - 1); @@ -354,11 +356,25 @@ FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out) { void fstLastTransitionDestroy(FstLastTransition *trn) { free(trn); } -void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr) { +void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *unNode, CompiledAddr addr) { + FstLastTransition *trn = unNode->last; + FstTransition t = {.inp = trn->inp, .out = trn->out, .addr = addr}; + taosArrayPush(unNode->node->trans, &t); return; } -void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr) { +void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *unNode, Output out) { + if (FST_BUILDER_NODE_IS_FINAL(unNode->node)) { + unNode->node->finalOutput += out; + } + size_t sz = taosArrayGetSize(unNode->node->trans); + for (size_t i = 0; i < sz; i++) { + FstTransition *trn = taosArrayGet(unNode->node->trans, i); + trn->out += out; + } + if (unNode->last) { + unNode->last->out += out; + } return; } diff --git a/source/libs/index/src/index_fst_util.c b/source/libs/index/src/index_fst_util.c index 20751baf5f..23e7056c64 100644 --- a/source/libs/index/src/index_fst_util.c +++ b/source/libs/index/src/index_fst_util.c @@ -113,3 +113,21 @@ bool fstSliceEmpty(FstSlice *slice) { +int fstSliceCompare(FstSlice *a, FstSlice *b) { + uint32_t aLen = (a->end - a->start + 1); + uint32_t bLen = (b->end - b->start + 1); + uint32_t mLen = (aLen < bLen ? aLen : bLen); + for (int i = 0; i < mLen; i++) { + uint8_t x = a->data[i + a->start]; + uint8_t y = b->data[i + b->start]; + if (x == y) { continue; } + else if (x < y) { return -1; } + else { return 1; } + } + if (aLen == bLen) { return 0; } + else if (aLen < bLen) { return -1; } + else { return 1; } +} + + + From e769d0a00230266eb9f140128840aae911dee1cc Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 22 Nov 2021 18:40:44 +0800 Subject: [PATCH 02/18] refactor builder struct --- source/libs/index/inc/index_fst.h | 9 ++++++++- source/libs/index/inc/index_fst_util.h | 6 +++--- source/libs/index/src/index_fst.c | 22 ++++++++++++++++++---- source/libs/index/src/index_fst_util.c | 10 ++++------ 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index cb60ef7cb8..86346f64ff 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -36,6 +36,8 @@ typedef struct FstRange { typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State; typedef enum { Included, Excluded, Unbounded} FstBound; +typedef enum {Ordered, OutOfOrdered, DuplicateKey} OrderType; + /* @@ -66,11 +68,16 @@ typedef struct FstBuilder { FstCountingWriter *wrt; // The FST raw data is written directly to `wtr`. FstUnFinishedNodes *unfinished; // The stack of unfinished nodes FstRegistry* registry; // A map of finished nodes. - SArray* last; // The last word added + FstSlice last; // The last word added CompiledAddr lastAddr; // The address of the last compiled node uint64_t len; // num of keys added } FstBuilder; +FstBuilder *fstBuilderCreate(void *w, FstType ty); +OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup); +CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn); + + typedef struct FstTransitions { FstNode *node; diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index 6410df6d61..36639af6f3 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -70,11 +70,11 @@ CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr); typedef struct FstSlice { uint8_t *data; uint64_t dLen; - uint32_t start; - uint32_t end; + int32_t start; + int32_t end; } FstSlice; -FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end); +FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end); FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen); bool fstSliceEmpty(FstSlice *slice); diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 22cfe76de4..6206180668 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -300,16 +300,30 @@ FstBuilder *fstBuilderCreate(void *w, FstType ty) { b->wrt = fstCountingWriterCreate(w); b->unfinished = fstUnFinishedNodesCreate(); b->registry = fstRegistryCreate(10000, 2) ; - b->last = NULL; + b->last = fstSliceCreate(NULL, 0); b->lastAddr = NONE_ADDRESS; b->len = 0; return b; } -bool fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDupe) { - - return true; + +OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup) { + FstSlice *input = &bs; + if (fstSliceEmpty(&b->last)) { + // deep copy or not + b->last = fstSliceCopy(&bs, input->start, input->end); + } else { + int comp = fstSliceCompare(&b->last, &bs); + if (comp == 0 && ckDup) { + return DuplicateKey; + } else if (comp == 1) { + return OutOfOrdered; + } + // deep copy or not + b->last = fstSliceCopy(&bs, input->start, input->end); + } + return Ordered; } CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) { if (FST_BUILDER_NODE_IS_FINAL(bn) diff --git a/source/libs/index/src/index_fst_util.c b/source/libs/index/src/index_fst_util.c index 23e7056c64..8e0a104b5f 100644 --- a/source/libs/index/src/index_fst_util.c +++ b/source/libs/index/src/index_fst_util.c @@ -94,7 +94,7 @@ FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) { FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1}; return slice; } -FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end) { +FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end) { FstSlice t; if (start >= slice->dLen || end >= slice->dLen || start > end) { t.data = NULL; @@ -111,12 +111,10 @@ bool fstSliceEmpty(FstSlice *slice) { return slice->data == NULL || slice->dLen <= 0; } - - int fstSliceCompare(FstSlice *a, FstSlice *b) { - uint32_t aLen = (a->end - a->start + 1); - uint32_t bLen = (b->end - b->start + 1); - uint32_t mLen = (aLen < bLen ? aLen : bLen); + int32_t aLen = (a->end - a->start + 1); + int32_t bLen = (b->end - b->start + 1); + int32_t mLen = (aLen < bLen ? aLen : bLen); for (int i = 0; i < mLen; i++) { uint8_t x = a->data[i + a->start]; uint8_t y = b->data[i + b->start]; From 5441bab5dc111a15ad166336163d8ce4958dab13 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 22 Nov 2021 19:27:41 +0800 Subject: [PATCH 03/18] refactor builder struct --- source/libs/index/inc/index_fst.h | 2 ++ source/libs/index/src/index_fst.c | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 86346f64ff..6fbf9ded57 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -73,8 +73,10 @@ typedef struct FstBuilder { uint64_t len; // num of keys added } FstBuilder; + FstBuilder *fstBuilderCreate(void *w, FstType ty); OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup); +void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate); CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn); diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 6206180668..4b6fdeb079 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -307,7 +307,6 @@ FstBuilder *fstBuilderCreate(void *w, FstType ty) { } - OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup) { FstSlice *input = &bs; if (fstSliceEmpty(&b->last)) { @@ -325,6 +324,22 @@ OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup) { } return Ordered; } +void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate) { + CompiledAddr addr = NONE_ADDRESS; + while (istate + 1 < FST_UNFINISHED_NODES_LEN(b->unfinished)) { + FstBuilderNode *n = NULL; + if (addr == NONE_ADDRESS) { + n = fstUnFinishedNodesPopEmpty(b->unfinished); + } else { + n = fstUnFinishedNodesPopFreeze(b->unfinished, addr); + } + addr = fstBuilderCompile(b, n); + assert(addr != NONE_ADDRESS); + fstBuilderNodeDestroy(n); + } + fstUnFinishedNodesTopLastFreeze(b->unfinished, addr); + return; +} CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) { if (FST_BUILDER_NODE_IS_FINAL(bn) && FST_BUILDER_NODE_TRANS_ISEMPTY(bn) From 260588b6922a1251503cc23386731dc0abee873c Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 22 Nov 2021 23:07:07 +0800 Subject: [PATCH 04/18] update fst build struct --- source/libs/index/inc/index_fst.h | 2 ++ source/libs/index/inc/index_fst_util.h | 3 ++- source/libs/index/src/index_fst.c | 37 +++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 6fbf9ded57..0a75f33bac 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -75,6 +75,8 @@ typedef struct FstBuilder { FstBuilder *fstBuilderCreate(void *w, FstType ty); +void fstBuilderDestroy(FstBuilder *b); +void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in); OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup); void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate); CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn); diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index 36639af6f3..6490054b91 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -77,8 +77,9 @@ typedef struct FstSlice { FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end); FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen); bool fstSliceEmpty(FstSlice *slice); - int fstSliceCompare(FstSlice *a, FstSlice *b); +#define FST_SLICE_LEN(s) (s->end - s->start + 1) + #endif diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 4b6fdeb079..4b96915941 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -29,7 +29,7 @@ void unFinishedNodeDestroyElem(void* elem) { fstBuilderNodeDestroy(b->node); free(b->last); } -void fstUnFinishedNodeDestroy(FstUnFinishedNodes *nodes) { +void fstUnFinishedNodesDestroy(FstUnFinishedNodes *nodes) { if (nodes == NULL) { return; } taosArrayDestroyEx(nodes->stack, unFinishedNodeDestroyElem); @@ -305,7 +305,42 @@ FstBuilder *fstBuilderCreate(void *w, FstType ty) { b->len = 0; return b; } +void fstBuilderDestroy(FstBuilder *b) { + if (b == NULL) { return; } + fstCountingWriterDestroy(b->wrt); + fstUnFinishedNodesDestroy(b->unfinished); + fstRegistryDestroy(b->registry); + free(b); +} +void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in) { + FstSlice *s = &bs; + if (fstSliceEmpty(s)) { + b->len = 1; + fstUnFinishedNodesSetRootOutput(b->unfinished, in); + return; + } + Output out; + uint64_t prefixLen; + if (in != 0) { //if let Some(in) = in + prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out); + } else { + prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs); + out = 0; + } + + if (prefixLen == FST_SLICE_LEN(s)) { + assert(out != 0); + return; + } + + b->len += 1; + fstBuilderCompileFrom(b, prefixLen); + + FstSlice sub = fstSliceCopy(s, prefixLen, s->end); + fstUnFinishedNodesAddSuffix(b->unfinished, sub, out); + return; + } OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup) { FstSlice *input = &bs; From ccca561d11fdf75d26a3eac9bba6a3262c90d250 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 23 Nov 2021 09:52:29 +0800 Subject: [PATCH 05/18] update fst build struct --- source/libs/index/src/index_fst.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 4b96915941..8cb318bbce 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -313,6 +313,18 @@ void fstBuilderDestroy(FstBuilder *b) { fstRegistryDestroy(b->registry); free(b); } + + +bool fstBuilderInsert(FstBuilder *b, FstSlice bs, Output in) { + OrderType t = fstBuilderCheckLastKey(b, bs, true); + if (t == Ordered) { + // add log info + fstBuilderInsertOutput(b, bs, in); + return true; + } + return false; +} + void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in) { FstSlice *s = &bs; if (fstSliceEmpty(s)) { @@ -422,6 +434,8 @@ void fstLastTransitionDestroy(FstLastTransition *trn) { } void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *unNode, CompiledAddr addr) { FstLastTransition *trn = unNode->last; + if (trn == NULL) { return; } + FstTransition t = {.inp = trn->inp, .out = trn->out, .addr = addr}; taosArrayPush(unNode->node->trans, &t); return; From dc9163a29a31968cea12371b080268286d2896c2 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 23 Nov 2021 12:13:44 +0800 Subject: [PATCH 06/18] update fst build struct --- source/libs/index/inc/index_fst.h | 19 +++++- source/libs/index/src/index_fst.c | 100 ++++++++++++++++++++---------- 2 files changed, 84 insertions(+), 35 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 0a75f33bac..4e2bee3f97 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -88,6 +88,19 @@ typedef struct FstTransitions { FstRange range; } FstTransitions; +//FstState and relation function + +typedef struct FstState { + State state; + uint8_t val; +} FstState; + +FstState fstStateCreate(FstSlice* data, CompiledAddr addr); + +#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext) +#define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans) +#define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans) +#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal) typedef struct FstLastTransition { @@ -104,6 +117,8 @@ typedef struct FstBuilderNodeUnfinished { FstLastTransition* last; } FstBuilderNodeUnfinished; + + void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr); void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out); @@ -113,7 +128,7 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Out typedef struct FstNode { FstSlice data; uint64_t version; - State state; + FstState state; CompiledAddr start; CompiledAddr end; bool isFinal; @@ -171,6 +186,4 @@ void fstLastTransitionDestroy(FstLastTransition *trn); - - #endif diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 8cb318bbce..a18c5d6479 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -155,65 +155,86 @@ uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, return res; } + +FstState fstStateCreate(FstSlice* date, CompiledAddr addr) { + FstState fs = {.state = EmptyFinal, .val = 0}; + if (addr == EMPTY_ADDRESS) { + fs.state = EmptyFinal; + fs.val = 0; + return fs; + } + + FstSlice *s = date; + uint8_t v = s->data[addr]; + uint8_t t = (v & 0b11000000) >> 6; + if (t == 0b11) { + fs.state = OneTransNext; + fs.val = v; + } else if (t == 0b10) { + fs.state = OneTrans; + fs.val = v; + } else { + fs.state = AnyTrans; + fs.val = v; + } + return fs; +} + // fst node function - - FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) { FstNode *n = (FstNode *)malloc(sizeof(FstNode)); if (n == NULL) { return NULL; } - if (addr == EMPTY_ADDRESS) { + FstState st = fstStateCreate(slice, addr); + + if (st.state == EmptyFinal) { n->data = fstSliceCreate(NULL, 0); n->version = version; - n->state = EmptyFinal; + n->state = st; n->start = EMPTY_ADDRESS; n->end = EMPTY_ADDRESS; n->isFinal = true; n->nTrans = 0; n->sizes = 0; n->finalOutput = 0; - } - uint8_t v = slice->data[addr]; - uint8_t s = (v & 0b11000000) >> 6; - if (s == 0b11) { // oneTransNext + } else if (st.state == OneTransNext) { n->data = fstSliceCopy(slice, 0, addr); n->version = version; - n->state = OneTransNext; + n->state = st; n->start = addr; n->end = addr; //? s.end_addr(data); n->isFinal = false; n->sizes = 0; n->nTrans = 0; n->finalOutput = 0; - } else if (v == 0b10) { // oneTrans + } else if (st.state == OneTrans) { uint64_t sz; // fetch sz from addr n->data = fstSliceCopy(slice, 0, addr); n->version = version; - n->state = OneTrans; + n->state = st; n->start = addr; n->end = addr; // s.end_addr(data, sz); n->isFinal = false; n->nTrans = 1; n->sizes = sz; n->finalOutput = 0; - } else { // anyTrans + } else { uint64_t sz; // s.sizes(data) uint32_t nTrans; // s.ntrans(data) n->data = *slice; n->version = version; - n->state = AnyTrans; + n->state = st; n->start = addr; n->end = addr; // s.end_addr(version, data, sz, ntrans); n->isFinal = false; // s.is_final_state(); n->nTrans = nTrans; n->sizes = sz; n->finalOutput = 0; // s.final_output(version, data, sz, ntrans); - } + } return n; } void fstNodeDestroy(FstNode *node) { - if (node == NULL) { return; } free(node); } FstTransitions* fstNodeTransitions(FstNode *node) { @@ -222,50 +243,65 @@ FstTransitions* fstNodeTransitions(FstNode *node) { return NULL; } FstRange range = {.start = 0, .end = FST_NODE_LEN(node)}; - t->node = node; t->range = range; + t->node = node; return t; } -bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res) { + +// Returns the transition at index `i`. +bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *trn) { bool s = true; - if (node->state == OneTransNext) { - - } else if (node->state == OneTrans) { - - } else if (node->state == AnyTrans) { - + //FstState st = node->state; + if (FST_STATE_ONE_TRNAS_NEXT(node)) { + trn->inp = 0; + trn->out = 0; + trn->addr = 0; + } else if (FST_STATE_ONE_TRNAS(node)) { + trn->inp = 0; + trn->out = 0; + trn->addr = 0; + } else if (FST_STATE_ANY_TRANS(node)) { + trn->inp = 0; + trn->out = 0; + trn->addr = 0; } else { s = false; } return s; } +// Returns the transition address of the `i`th transition bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) { bool s = true; - if (node->state == OneTransNext) { + if (FST_STATE_ONE_TRNAS_NEXT(node)) { - } else if (node->state == OneTrans) { + } else if (FST_STATE_ONE_TRNAS(node)) { - } else if (node->state == AnyTrans) { + } else if (FST_STATE_ANY_TRANS(node)) { - } else if (node->state == EmptyFinal){ + } else if (FST_STATE_EMPTY_FINAL(node)){ s = false; + } else { + assert(0); } return s; } +// Finds the `i`th transition corresponding to the given input byte. +// If no transition for this byte exists, then `false` is returned. bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) { bool s = true; - uint8_t input; // s.input - if (node->state == OneTransNext) { + uint8_t input; // get s.input + FstState fs = node->state; + if (FST_STATE_ONE_TRNAS_NEXT(node)) { if (b == input) { *res = 0; } else { return s ; } - } else if (node->state == OneTrans) { + } else if (FST_STATE_ONE_TRNAS(node)) { if (b == input) { *res = 0; } else {return s;} - } else if (node->state == AnyTrans) { + } else if (FST_STATE_ANY_TRANS(node)) { - } else if (node->state == EmptyFinal) { + } else if (FST_STATE_EMPTY_FINAL(node)) { s = false; } return s; From 93c102e2940e35d7a350e1f205ae5bb8a97b9f68 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 23 Nov 2021 14:15:23 +0800 Subject: [PATCH 07/18] update fst build struct --- source/libs/index/src/index_fst.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index a18c5d6479..3362dbbad7 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -156,27 +156,22 @@ uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, } -FstState fstStateCreate(FstSlice* date, CompiledAddr addr) { - FstState fs = {.state = EmptyFinal, .val = 0}; +FstState fstStateCreate(FstSlice* slice, CompiledAddr addr) { + FstState fs = {.state = EmptyFinal, .val = 0}; if (addr == EMPTY_ADDRESS) { - fs.state = EmptyFinal; - fs.val = 0; return fs; } - - FstSlice *s = date; - uint8_t v = s->data[addr]; + + uint8_t v = slice->data[addr]; uint8_t t = (v & 0b11000000) >> 6; if (t == 0b11) { fs.state = OneTransNext; - fs.val = v; } else if (t == 0b10) { - fs.state = OneTrans; - fs.val = v; + fs.state = OneTrans; } else { - fs.state = AnyTrans; - fs.val = v; + fs.state = AnyTrans; } + fs.val = v; return fs; } From 0aa47daf89c409b869a2539a6a98092a2cca5815 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 23 Nov 2021 20:01:19 +0800 Subject: [PATCH 08/18] update fst build struct --- source/libs/index/inc/index_fst.h | 27 ++++++++++++++++++++++++++- source/libs/index/src/index_fst.c | 17 +++++++++++++++-- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 4e2bee3f97..40e79b716e 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -95,7 +95,32 @@ typedef struct FstState { uint8_t val; } FstState; -FstState fstStateCreate(FstSlice* data, CompiledAddr addr); +FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr); +FstState fstStateCreate(State state); + +//compile +void fstStateCompileForOneTransNext(FstState state, FstCountingWriter *w, CompiledAddr addr, uint8_t inp); +void fstStateCompileForOneTrans(FstState state, FstCountingWriter *w, CompiledAddr addr, FstTransition trn); +void fstStateCompileForAnyTrans(FstState state, FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); + +// set_comm_input +void fstStateSetCommInputForOneTransNext(FstState state, uint8_t inp); +void fstStateSetCommInputForOneTrans(FstState state, uint8_t inp); + +// comm_input +uint8_t fstStateCommInputForOneTransNext(FstState state); +uint8_t fstStateCommInputForOneTrans(FstState state); + +// input_len + +uint64_t fstStateInputLenForOneTransNext(FstState state); +uint64_t fstStateInputLenForOneTrans(FstState state); + + + + + + #define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext) #define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 3362dbbad7..5031c071fa 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -156,7 +156,7 @@ uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, } -FstState fstStateCreate(FstSlice* slice, CompiledAddr addr) { +FstState fstStateCreateFrom(FstSlice* slice, CompiledAddr addr) { FstState fs = {.state = EmptyFinal, .val = 0}; if (addr == EMPTY_ADDRESS) { return fs; @@ -175,13 +175,26 @@ FstState fstStateCreate(FstSlice* slice, CompiledAddr addr) { return fs; } +static FstState stateDict[] = { + {.state = OneTransNext, .val = 0b11000000}, + {.state = OneTrans, .val = 0b10000000}, + {.state = AnyTrans, .val = 0b00000000}, + {.state = EmptyFinal, .val = 0b00000000} +}; + +FstState fstStateCreate(State state){ + uint8_t idx = (uint8_t)state; + return stateDict[idx]; + +} + // fst node function FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) { FstNode *n = (FstNode *)malloc(sizeof(FstNode)); if (n == NULL) { return NULL; } - FstState st = fstStateCreate(slice, addr); + FstState st = fstStateCreateFrom(slice, addr); if (st.state == EmptyFinal) { n->data = fstSliceCreate(NULL, 0); From 169f6b3ad8c148aa576585d79c3428da6da822dc Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 23 Nov 2021 23:53:45 +0800 Subject: [PATCH 09/18] update fst build struct --- source/libs/index/inc/index_fst.h | 50 ++++++++-- source/libs/index/inc/index_fst_common.h | 7 ++ source/libs/index/inc/index_fst_util.h | 6 +- source/libs/index/src/index_fst.c | 113 ++++++++++++++++++++++- source/libs/index/src/index_fst_common.c | 3 +- source/libs/index/src/index_fst_util.c | 1 + 6 files changed, 167 insertions(+), 13 deletions(-) create mode 100644 source/libs/index/inc/index_fst_common.h diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 40e79b716e..97f3232867 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -99,22 +99,54 @@ FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr); FstState fstStateCreate(State state); //compile -void fstStateCompileForOneTransNext(FstState state, FstCountingWriter *w, CompiledAddr addr, uint8_t inp); -void fstStateCompileForOneTrans(FstState state, FstCountingWriter *w, CompiledAddr addr, FstTransition trn); -void fstStateCompileForAnyTrans(FstState state, FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); +void fstStateCompileForOneTransNext(FstState* state, FstCountingWriter *w, CompiledAddr addr, uint8_t inp); +void fstStateCompileForOneTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstTransition trn); +void fstStateCompileForAnyTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); // set_comm_input -void fstStateSetCommInputForOneTransNext(FstState state, uint8_t inp); -void fstStateSetCommInputForOneTrans(FstState state, uint8_t inp); +void fstStateSetCommInput(FstState* state, uint8_t inp); // comm_input -uint8_t fstStateCommInputForOneTransNext(FstState state); -uint8_t fstStateCommInputForOneTrans(FstState state); +uint8_t fstStateCommInput(FstState* state); // input_len -uint64_t fstStateInputLenForOneTransNext(FstState state); -uint64_t fstStateInputLenForOneTrans(FstState state); +uint64_t fstStateInputLen(FstState* state); + + +// end_addr +uint64_t fstStateEndAddrForOneTransNext(FstState* state, FstSlice *data); +uint64_t fstStateEndAddrForOneTrans(FstState *state, FstSlice *data, PackSizes sizes); +uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); +// input +uint8_t fstStateInput(FstState *state, FstNode *node); +uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i); + +// trans_addr +CompiledAddr fstStateTransAddr(FstState *state, FstNode *node); +CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i); + +// sizes +PackSizes fstStateSizes(FstState *state, FstSlice *data); +// Output +Output fstStateOutput(FstState *state, FstNode *node); +Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i); + +// anyTrans specify function + +void fstStateSetFinalState(FstState *state, bool yes); +bool fstStateIsFinalState(FstState *state); +void fstStateSetStateNtrans(FstState *state, uint8_t n); +// state_ntrans +void fstStateStateNtrans(FstState *state); +uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans); +uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans); +uint64_t fstStateNtransLen(FstState *state); +uint64_t fstStateNtrans(FstState *state); +Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); +uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b); + + diff --git a/source/libs/index/inc/index_fst_common.h b/source/libs/index/inc/index_fst_common.h new file mode 100644 index 0000000000..b261f4090c --- /dev/null +++ b/source/libs/index/inc/index_fst_common.h @@ -0,0 +1,7 @@ +#ifndef __INDEX_FST_COMM_H__ +#define __INDEX_FST_COMM_H__ + +extern const uint8_t COMMON_INPUTS[]; +extern char const COMMON_INPUTS_INV[]; + +#endif diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index 6490054b91..ad2e5510d7 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -18,7 +18,7 @@ #define __INDEX_FST_UTIL_H__ #include "tarray.h" - +#include "index_fst_common.h" typedef uint64_t FstType; typedef uint64_t CompiledAddr; @@ -44,6 +44,8 @@ extern const uint64_t TRANS_INDEX_THRESHOLD; // // `0` is a legal value which means there are no transitions/outputs + +extern #define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0) #define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4) #define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0) @@ -79,7 +81,7 @@ FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen); bool fstSliceEmpty(FstSlice *slice); int fstSliceCompare(FstSlice *a, FstSlice *b); -#define FST_SLICE_LEN(s) (s->end - s->start + 1) +#define FST_SLICE_LEN(s) ((s)->end - (s)->start + 1) #endif diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 5031c071fa..9198fadbe4 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -185,8 +185,119 @@ static FstState stateDict[] = { FstState fstStateCreate(State state){ uint8_t idx = (uint8_t)state; return stateDict[idx]; - } +//compile +void fstStateCompileForOneTransNext(FstState* state, FstCountingWriter *w, CompiledAddr addr, uint8_t inp) { + return ; +} +void fstStateCompileForOneTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstTransition trn) { + return ; + +} +void fstStateCompileForAnyTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node) { + return; +} + +// set_comm_input +void fstStateSetCommInput(FstState* s, uint8_t inp) { + assert(s->state == OneTransNext || s->state == OneTrans); + + uint8_t val; + COMMON_INDEX(inp, 0x111111, val); + s->val = (s->val & stateDict[s->state].val) | val; +} + +// comm_input +uint8_t fstStateCommInput(FstState* s) { + assert(s->state == OneTransNext || s->state == OneTrans); + uint8_t v = s->val & 0b00111111; + //v = 0 indicate that common_input is None + return v == 0 ? 0 : COMMON_INPUT(v); +} + +// input_len + +uint64_t fstStateInputLen(FstState* s) { + assert(s->state == OneTransNext || s->state == OneTrans); + return fstStateCommInput(s) == 0 ? 1 : 0; +} + + +// end_addr +uint64_t fstStateEndAddrForOneTransNext(FstState* s, FstSlice *data) { + return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s); +} +uint64_t fstStateEndAddrForOneTrans(FstState *s, FstSlice *data, PackSizes sizes) { + return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s) - 1 - FST_GET_TRANSITION_PACK_SIZE(sizes) - FST_GET_OUTPUT_PACK_SIZE(sizes); +} +uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans) { + return 1; +} +// input +uint8_t fstStateInput(FstState *state, FstNode *node) { + return 1; +} +uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i) { + return 1; +} + +// trans_addr +CompiledAddr fstStateTransAddr(FstState *state, FstNode *node) { + return 1; +} +CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i) { + return 1; +} + +// sizes +PackSizes fstStateSizes(FstState *state, FstSlice *data) { + return 1; +} +// Output +Output fstStateOutput(FstState *state, FstNode *node) { + return 1; + +} +Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i) { + return 1; +} + +// anyTrans specify function + +void fstStateSetFinalState(FstState *state, bool yes) { + return; +} +bool fstStateIsFinalState(FstState *state) { + return false; +} +void fstStateSetStateNtrans(FstState *state, uint8_t n) { + return; +} +// state_ntrans +void fstStateStateNtrans(FstState *state) { + return ; +} +uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans) { + return 1; +} +uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans) { + return 1; +} +uint64_t fstStateNtransLen(FstState *state) { + return 1; +} +uint64_t fstStateNtrans(FstState *state) { + return 1; +} +Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans) { + return 1; + +} +uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b) { + return 1; + +} + // fst node function diff --git a/source/libs/index/src/index_fst_common.c b/source/libs/index/src/index_fst_common.c index 4ab78cddc5..97fb88d60e 100644 --- a/source/libs/index/src/index_fst_common.c +++ b/source/libs/index/src/index_fst_common.c @@ -14,6 +14,7 @@ */ #include "tutil.h" + const uint8_t COMMON_INPUTS[] = { 84, // '\x00' 85, // '\x01' @@ -273,7 +274,7 @@ const uint8_t COMMON_INPUTS[] = { 255, // 'ΓΏ' }; -char const COMMON_INPUTS_INV[] = { +const char COMMON_INPUTS_INV[] = { 't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w', '.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=', ':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6', diff --git a/source/libs/index/src/index_fst_util.c b/source/libs/index/src/index_fst_util.c index 8e0a104b5f..c4499f8e0d 100644 --- a/source/libs/index/src/index_fst_util.c +++ b/source/libs/index/src/index_fst_util.c @@ -13,6 +13,7 @@ * along with this program. If not, see . */ #include "index_fst_util.h" +#include "index_fst_common.h" From 169d9e17ed863a02c594919cbd88593222d05726 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 24 Nov 2021 14:05:25 +0800 Subject: [PATCH 10/18] update fst build struct --- source/libs/index/inc/index_fst.h | 2 +- source/libs/index/inc/index_fst_util.h | 1 - source/libs/index/src/index_fst.c | 89 +++++++++++++++++++++----- 3 files changed, 75 insertions(+), 17 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 97f3232867..706a4bfec5 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -151,7 +151,6 @@ uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b); - #define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext) @@ -205,6 +204,7 @@ typedef struct FstNode { // Return the address of this node. #define FST_NODE_ADDR(node) node->start + FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data); void fstNodeDestroy(FstNode *fstNode); diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index ad2e5510d7..416abf1307 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -45,7 +45,6 @@ extern const uint64_t TRANS_INDEX_THRESHOLD; // `0` is a legal value which means there are no transitions/outputs -extern #define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0) #define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4) #define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 9198fadbe4..29c4ed423a 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -228,35 +228,92 @@ uint64_t fstStateEndAddrForOneTransNext(FstState* s, FstSlice *data) { return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s); } uint64_t fstStateEndAddrForOneTrans(FstState *s, FstSlice *data, PackSizes sizes) { - return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s) - 1 - FST_GET_TRANSITION_PACK_SIZE(sizes) - FST_GET_OUTPUT_PACK_SIZE(sizes); + return FST_SLICE_LEN(data) + - 1 + - fstStateInputLen(s) + - 1 // pack size + - FST_GET_TRANSITION_PACK_SIZE(sizes) + - FST_GET_OUTPUT_PACK_SIZE(sizes); } uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans) { - return 1; + uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(sizes); + uint8_t finalOsize = !fstStateIsFinalState(state) ? 0 : oSizes; + return FST_SLICE_LEN(date) + - 1 + - fstStateNtransLen(state) + - 1 //pack size + - fstStateTotalTransSize(state, version, sizes, nTrans) + - nTrans * oSizes // output values + - finalOsize; // final output } // input -uint8_t fstStateInput(FstState *state, FstNode *node) { - return 1; +uint8_t fstStateInput(FstState *s, FstNode *node) { + assert(s->state == OneTransNext || s->state == OneTrans); + FstSlice *slice = &node->data; + uint8_t inp = fstStateCommInput(s); + return inp != 0 ? inp : slice->data[slice->start - 1]; } -uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i) { - return 1; +uint8_t fstStateInputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { + assert(s->state == AnyTrans); + FstSlice *slice = &node->data; + + uint64_t at = node->start + - fstStateNtransLen(s) + - 1 // pack size + - fstStateTransIndexSize(s, node->version, node->nTrans) + - i + - 1; // the output size + return slice->data[at]; } // trans_addr -CompiledAddr fstStateTransAddr(FstState *state, FstNode *node) { - return 1; +CompiledAddr fstStateTransAddr(FstState *s, FstNode *node) { + assert(s->state == OneTransNext || s->state == OneTrans); + FstSlice *slice = &node->data; + if (s->state == OneTransNext) { + return (CompiledAddr)(node->end); + } else { + PackSizes sizes = node->sizes; + uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(sizes); + uint64_t i = node->start + - fstStateInputLen(s) + - 1 // PackSizes + - tSizes; + + // refactor error logic + return unpackDelta(slice->data + i, tSizes, node->end); + } } -CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i) { - return 1; +CompiledAddr fstStateTransAddrForAnyTrans(FstState *s, FstNode *node, uint64_t i) { + assert(s->state == AnyTrans); + + FstSlice *slice = &node->data; + uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(node->sizes); + uint64_t at = node->start + - fstStateNtransLen(s) + - 1 + - fstStateTransIndexSize(s, node->version, node->nTrans) + - node->nTrans + - (i * tSizes) + - tSizes; + return unpackDelta(slice->data + at, tSizes, node->end); } // sizes -PackSizes fstStateSizes(FstState *state, FstSlice *data) { - return 1; +PackSizes fstStateSizes(FstState *s, FstSlice *slice) { + assert(s->state == OneTrans || s->state == AnyTrans) ; + uint64_t i; + if (s->state == OneTrans) { + i = FST_SLICE_LEN(slice) - 1 - fstStateInputLen(s) - 1; + } else { + i = FST_SLICE_LEN(slice) - 1 - fstStateNtransLen(s) - 1; + } + + return (PackSizes)(slice->data[slice->start + i]); } // Output Output fstStateOutput(FstState *state, FstNode *node) { - return 1; - + } Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i) { return 1; @@ -281,7 +338,9 @@ uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes siz return 1; } uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans) { - return 1; + if (version >= 2 && nTrans > TRANS_INDEX_THRESHOLD) + return 256; + return 0; } uint64_t fstStateNtransLen(FstState *state) { return 1; From c5d53978743be3800a0cb95df6bb68cddd4f783a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 24 Nov 2021 18:41:00 +0800 Subject: [PATCH 11/18] update fst struct --- source/libs/index/inc/index_fst.h | 6 +- source/libs/index/src/index_fst.c | 140 ++++++++++++++++++++++++------ 2 files changed, 117 insertions(+), 29 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 706a4bfec5..2fa8b2ebba 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -138,13 +138,13 @@ void fstStateSetFinalState(FstState *state, bool yes); bool fstStateIsFinalState(FstState *state); void fstStateSetStateNtrans(FstState *state, uint8_t n); // state_ntrans -void fstStateStateNtrans(FstState *state); +uint8_t fstStateStateNtrans(FstState *state, bool *null); uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans); uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans); uint64_t fstStateNtransLen(FstState *state); -uint64_t fstStateNtrans(FstState *state); +uint64_t fstStateNtrans(FstState *state, FstSlice *slice); Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans); -uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b); +uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b, bool *null); diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 29c4ed423a..8d3e1a0551 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -281,7 +281,7 @@ CompiledAddr fstStateTransAddr(FstState *s, FstNode *node) { - tSizes; // refactor error logic - return unpackDelta(slice->data + i, tSizes, node->end); + return unpackDelta(slice->data + slice->start + i, tSizes, node->end); } } CompiledAddr fstStateTransAddrForAnyTrans(FstState *s, FstNode *node, uint64_t i) { @@ -296,7 +296,7 @@ CompiledAddr fstStateTransAddrForAnyTrans(FstState *s, FstNode *node, uint64_t i - node->nTrans - (i * tSizes) - tSizes; - return unpackDelta(slice->data + at, tSizes, node->end); + return unpackDelta(slice->data + slice->start + at, tSizes, node->end); } // sizes @@ -312,49 +312,137 @@ PackSizes fstStateSizes(FstState *s, FstSlice *slice) { return (PackSizes)(slice->data[slice->start + i]); } // Output -Output fstStateOutput(FstState *state, FstNode *node) { +Output fstStateOutput(FstState *s, FstNode *node) { + assert(s->state == OneTrans); + + uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(node->sizes); + if (oSizes == 0) { + return 0; + } + FstSlice *slice = &node->data; + uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(node->sizes); + + uint64_t i = node->start + - fstStateInputLen(s); + - 1 + - tSizes + - oSizes; + return unpackUint64(slice->data + slice->start + i, oSizes); } -Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i) { - return 1; +Output fstStateOutputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { + assert(s->state == AnyTrans); + + uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(node->sizes); + if (oSizes == 0) { + return 0; + } + FstSlice *slice = &node->data; + uint64_t at = node->start + - fstStateNtransLen(s) + - 1 // pack size + - fstStateTotalTransSize(s, node->version, node->sizes, node->nTrans) + - (i * oSizes) + - oSizes; + return unpackUint64(slice->data + slice->start + at, oSizes); } // anyTrans specify function -void fstStateSetFinalState(FstState *state, bool yes) { +void fstStateSetFinalState(FstState *s, bool yes) { + assert(s->state == AnyTrans); + if (yes) { s->val |= 0b01000000; } return; } -bool fstStateIsFinalState(FstState *state) { - return false; +bool fstStateIsFinalState(FstState *s) { + assert(s->state == AnyTrans); + return (s->val & 0b01000000) == 0b01000000; } -void fstStateSetStateNtrans(FstState *state, uint8_t n) { + +void fstStateSetStateNtrans(FstState *s, uint8_t n) { + assert(s->state == AnyTrans); + if (n <= 0b00111111) { + s->val = (s->val & 0b11000000) | n; + } return; } // state_ntrans -void fstStateStateNtrans(FstState *state) { - return ; +uint8_t fstStateStateNtrans(FstState *s, bool *null) { + assert(s->state == AnyTrans); + *null = false; + uint8_t n = s->val & 0b00111111; + + if (n == 0) { + *null = true; // None + } + return n; } -uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans) { - return 1; +uint64_t fstStateTotalTransSize(FstState *s, uint64_t version, PackSizes sizes, uint64_t nTrans) { + assert(s->state == AnyTrans); + uint64_t idxSize = fstStateTransIndexSize(s, version, nTrans); + return nTrans + (nTrans * FST_GET_TRANSITION_PACK_SIZE(sizes)) + idxSize; } -uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans) { - if (version >= 2 && nTrans > TRANS_INDEX_THRESHOLD) - return 256; - return 0; +uint64_t fstStateTransIndexSize(FstState *s, uint64_t version, uint64_t nTrans) { + assert(s->state == AnyTrans); + return (version >= 2 &&nTrans > TRANS_INDEX_THRESHOLD) ? 256 : 0; } -uint64_t fstStateNtransLen(FstState *state) { - return 1; +uint64_t fstStateNtransLen(FstState *s) { + assert(s->state == AnyTrans); + bool null = false; + fstStateStateNtrans(s, &null); + return null == true ? 1 : 0; } -uint64_t fstStateNtrans(FstState *state) { - return 1; +uint64_t fstStateNtrans(FstState *s, FstSlice *slice) { + bool null = false; + uint8_t n = fstStateStateNtrans(s, &null); + if (null != true) { + return n; + } + n = slice->data[slice->end - 1]; // data[data.len() - 2] + return n == 1 ? 256: n; // // "1" is never a normal legal value here, because if there, // is only 1 transition, then it is encoded in the state byte } -Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans) { - return 1; +Output fstStateFinalOutput(FstState *s, uint64_t version, FstSlice *slice, PackSizes sizes, uint64_t nTrans) { + uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(sizes); + if (oSizes == 0 || !fstStateIsFinalState(s)) { + return 0; + } + + uint64_t at = FST_SLICE_LEN(slice) + - 1 + - fstStateNtransLen(s) + - fstStateTotalTransSize(s, version, sizes, nTrans) + - (nTrans * oSizes) + - oSizes; + return unpackUint64(slice->data + slice->start + at, (uint8_t)oSizes); } -uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b) { - return 1; - +uint64_t fstStateFindInput(FstState *s, FstNode *node, uint8_t b, bool *null) { + assert(s->state == AnyTrans); + FstSlice *slice = &node->data; + if (node->version >= 2 && node->nTrans > TRANS_INDEX_THRESHOLD) { + uint64_t at = node->start + - fstStateNtransLen(s) + - 1 // pack size + - fstStateTransIndexSize(s, node->version, node->nTrans); + uint64_t i = slice->data[slice->start + at + b]; + if (i >= node->nTrans) { + *null = true; + } + return i; + } else { + uint64_t start = node->start + - fstStateNtransLen(s) + - 1 // pack size + - node->nTrans; + uint64_t end = start + node->nTrans; + uint64_t len = end - start; + for(int i = 0; i < len; i++) { + uint8_t v = slice->data[slice->start + i]; + if (v == b) { + return node->nTrans - i - 1; // bug + } + } + } } From d674fcc8babb404eb3f97f9a666f669b937d9e18 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 24 Nov 2021 20:03:20 +0800 Subject: [PATCH 12/18] update fst struct --- source/libs/index/inc/index_fst.h | 8 ++++---- source/libs/index/src/index_fst.c | 31 ++++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 2fa8b2ebba..b137690a05 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -99,15 +99,15 @@ FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr); FstState fstStateCreate(State state); //compile -void fstStateCompileForOneTransNext(FstState* state, FstCountingWriter *w, CompiledAddr addr, uint8_t inp); -void fstStateCompileForOneTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstTransition trn); -void fstStateCompileForAnyTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); +void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp); +void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition trn); +void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); // set_comm_input void fstStateSetCommInput(FstState* state, uint8_t inp); // comm_input -uint8_t fstStateCommInput(FstState* state); +uint8_t fstStateCommInput(FstState* state, bool *null); // input_len diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 8d3e1a0551..31b7668156 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -187,14 +187,24 @@ FstState fstStateCreate(State state){ return stateDict[idx]; } //compile -void fstStateCompileForOneTransNext(FstState* state, FstCountingWriter *w, CompiledAddr addr, uint8_t inp) { +void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp) { + FstState s = fstStateCreate(OneTransNext); + fstStateSetCommInput(&s, inp); + + bool null = false; + uint8_t v = fstStateCommInput(&s, &null); + if (null) { + // w->write_all(&[inp]) + } + // w->write_all(&[s.val]) return ; } -void fstStateCompileForOneTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstTransition trn) { +void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition trn) { + return ; } -void fstStateCompileForAnyTrans(FstState* state, FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node) { +void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node) { return; } @@ -208,9 +218,13 @@ void fstStateSetCommInput(FstState* s, uint8_t inp) { } // comm_input -uint8_t fstStateCommInput(FstState* s) { +uint8_t fstStateCommInput(FstState* s, bool *null) { assert(s->state == OneTransNext || s->state == OneTrans); uint8_t v = s->val & 0b00111111; + if (v == 0) { + *null = true; + return v; + } //v = 0 indicate that common_input is None return v == 0 ? 0 : COMMON_INPUT(v); } @@ -219,7 +233,9 @@ uint8_t fstStateCommInput(FstState* s) { uint64_t fstStateInputLen(FstState* s) { assert(s->state == OneTransNext || s->state == OneTrans); - return fstStateCommInput(s) == 0 ? 1 : 0; + bool null = false; + fstStateCommInput(s, &null); + return null ? 1 : 0 ; } @@ -250,8 +266,9 @@ uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice uint8_t fstStateInput(FstState *s, FstNode *node) { assert(s->state == OneTransNext || s->state == OneTrans); FstSlice *slice = &node->data; - uint8_t inp = fstStateCommInput(s); - return inp != 0 ? inp : slice->data[slice->start - 1]; + bool null = false; + uint8_t inp = fstStateCommInput(s, &null); + return null == false ? inp : slice->data[slice->start - 1]; } uint8_t fstStateInputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { assert(s->state == AnyTrans); From 7b62d02f95e734c9a5e70cdd2b6e4ce18ede3a27 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 24 Nov 2021 20:17:37 +0800 Subject: [PATCH 13/18] update fst struct --- source/libs/index/inc/index_fst.h | 2 +- source/libs/index/src/index_fst.c | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index b137690a05..7ab9358cd1 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -100,7 +100,7 @@ FstState fstStateCreate(State state); //compile void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp); -void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition trn); +void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition *trn); void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node); // set_comm_input diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 31b7668156..ac51a6f117 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -195,12 +195,14 @@ void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uin uint8_t v = fstStateCommInput(&s, &null); if (null) { // w->write_all(&[inp]) - } + fstCountingWriterWrite(w, &inp, 1); + } + fstCountingWriterWrite(w, &(s.val), 1); // w->write_all(&[s.val]) - return ; + return; } -void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition trn) { - +void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition* trn) { + Output val = trn->out; return ; } From 4e9fba6dd317229f087afac910b4eb2d303ee71a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 25 Nov 2021 14:37:32 +0800 Subject: [PATCH 14/18] update fst struct --- source/libs/index/src/index_fst.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index ac51a6f117..8df04e9273 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -240,12 +240,13 @@ uint64_t fstStateInputLen(FstState* s) { return null ? 1 : 0 ; } - // end_addr uint64_t fstStateEndAddrForOneTransNext(FstState* s, FstSlice *data) { + assert(s->state == OneTransNext); return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s); } uint64_t fstStateEndAddrForOneTrans(FstState *s, FstSlice *data, PackSizes sizes) { + assert(s->state == OneTrans); return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s) @@ -488,34 +489,36 @@ FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) { n->version = version; n->state = st; n->start = addr; - n->end = addr; //? s.end_addr(data); + n->end = fstStateEndAddrForOneTransNext(&st, slice); //? s.end_addr(data); n->isFinal = false; n->sizes = 0; - n->nTrans = 0; + n->nTrans = 1; n->finalOutput = 0; } else if (st.state == OneTrans) { - uint64_t sz; // fetch sz from addr - n->data = fstSliceCopy(slice, 0, addr); + FstSlice data = fstSliceCopy(slice, 0, addr); + PackSizes sz = fstStateSizes(&st, &data); + n->data = fstSliceCopy(slice, 0, addr); n->version = version; n->state = st; n->start = addr; - n->end = addr; // s.end_addr(data, sz); + n->end = fstStateEndAddrForOneTrans(&st, slice, sz); // s.end_addr(data, sz); n->isFinal = false; n->nTrans = 1; n->sizes = sz; n->finalOutput = 0; } else { - uint64_t sz; // s.sizes(data) - uint32_t nTrans; // s.ntrans(data) + + uint64_t sz = fstStateSizes(&st, slice); // s.sizes(data) + uint32_t nTrans = fstStateNtrans(&st, slice); // s.ntrans(data) n->data = *slice; n->version = version; n->state = st; n->start = addr; - n->end = addr; // s.end_addr(version, data, sz, ntrans); - n->isFinal = false; // s.is_final_state(); + n->end = fstStateEndAddrForAnyTrans(&st, version, slice, sz, nTrans); // s.end_addr(version, data, sz, ntrans); + n->isFinal = fstStateIsFinalState(&st); // s.is_final_state(); n->nTrans = nTrans; n->sizes = sz; - n->finalOutput = 0; // s.final_output(version, data, sz, ntrans); + n->finalOutput = fstStateFinalOutput(&st, version, slice, sz, nTrans); // s.final_output(version, data, sz, ntrans); } return n; } @@ -598,13 +601,16 @@ bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr if (sz == 0 && builderNode->isFinal && builderNode->finalOutput == 0) { return true; } else if (sz != 1 || builderNode->isFinal) { + fstStateCompileForAnyTrans(w, addr, builderNode); // AnyTrans->Compile(w, addr, node); } else { FstTransition *tran = taosArrayGet(builderNode->trans, 0); if (tran->addr == lastAddr && tran->out == 0) { + fstStateCompileForOneTransNext(w, addr, tran->inp); //OneTransNext::compile(w, lastAddr, tran->inp); return true; } else { + fstStateCompileForOneTrans(w, addr, tran); //OneTrans::Compile(w, lastAddr, *tran); return true; } From 21592f572e5dc4430288c938b6ec65c086d924a9 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 25 Nov 2021 17:39:53 +0800 Subject: [PATCH 15/18] update fst struct --- source/libs/index/src/index_fst.c | 68 ++++++++++++++++--------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 8df04e9273..00375a261b 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -181,6 +181,8 @@ static FstState stateDict[] = { {.state = AnyTrans, .val = 0b00000000}, {.state = EmptyFinal, .val = 0b00000000} }; +// debug +static char *fStStateStr[] = {"ONE_TRANS_NEXT", "ONE_TRANS", "ANY_TRANS", "EMPTY_FINAL"}; FstState fstStateCreate(State state){ uint8_t idx = (uint8_t)state; @@ -507,7 +509,6 @@ FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) { n->sizes = sz; n->finalOutput = 0; } else { - uint64_t sz = fstStateSizes(&st, slice); // s.sizes(data) uint32_t nTrans = fstStateNtrans(&st, slice); // s.ntrans(data) n->data = *slice; @@ -539,19 +540,19 @@ FstTransitions* fstNodeTransitions(FstNode *node) { // Returns the transition at index `i`. bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *trn) { bool s = true; - //FstState st = node->state; - if (FST_STATE_ONE_TRNAS_NEXT(node)) { - trn->inp = 0; + FstState *st = &node->state; + if (st->state == OneTransNext) { + trn->inp = fstStateInput(st, node); trn->out = 0; - trn->addr = 0; - } else if (FST_STATE_ONE_TRNAS(node)) { - trn->inp = 0; - trn->out = 0; - trn->addr = 0; - } else if (FST_STATE_ANY_TRANS(node)) { - trn->inp = 0; - trn->out = 0; - trn->addr = 0; + trn->addr = fstStateTransAddr(st, node); + } else if (st->state == OneTrans) { + trn->inp = fstStateInput(st, node); + trn->out = fstStateOutput(st, node); + trn->addr = fstStateTransAddr(st, node); + } else if (st->state == AnyTrans) { + trn->inp = fstStateInputForAnyTrans(st, node, i); + trn->out = fstStateOutputForAnyTrans(st, node, i); + trn->addr = fstStateTransAddrForAnyTrans(st, node, i); } else { s = false; } @@ -561,12 +562,15 @@ bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *trn) { // Returns the transition address of the `i`th transition bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) { bool s = true; - if (FST_STATE_ONE_TRNAS_NEXT(node)) { - - } else if (FST_STATE_ONE_TRNAS(node)) { - - } else if (FST_STATE_ANY_TRANS(node)) { - + FstState *st = &node->state; + if (st->state == OneTransNext) { + assert(i == 0); + fstStateTransAddr(st, node); + } else if (st->state == OneTrans) { + assert(i == 0); + fstStateTransAddr(st, node); + } else if (st->state == AnyTrans) { + fstStateTransAddrForAnyTrans(st, node, i); } else if (FST_STATE_EMPTY_FINAL(node)){ s = false; } else { @@ -579,19 +583,19 @@ bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) { // If no transition for this byte exists, then `false` is returned. bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) { bool s = true; - uint8_t input; // get s.input - FstState fs = node->state; - if (FST_STATE_ONE_TRNAS_NEXT(node)) { - if (b == input) { *res = 0; } - else { return s ; } - } else if (FST_STATE_ONE_TRNAS(node)) { - if (b == input) { *res = 0; } - else {return s;} - } else if (FST_STATE_ANY_TRANS(node)) { - - } else if (FST_STATE_EMPTY_FINAL(node)) { - s = false; - } + FstState *st = &node->state; + if (st->state == OneTransNext) { + if (fstStateInput(st,node) == b) { *res = 0; } + else { s = false; } } + else if (st->state == OneTrans) { + if (fstStateInput(st, node) == b) { *res = 0 ;} + else { s = false; } + } else if (st->state == AnyTrans) { + bool null = false; + uint64_t out = fstStateFindInput(st, node, b, &null); + if (null == false) { *res = out; } + else { s = false;} + } return s; } From 55282bbfa2f1dd9a841d22d51e5f479c09089a5f Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 25 Nov 2021 19:35:31 +0800 Subject: [PATCH 16/18] add index test example --- .../index/inc/index_fst_counting_writer.h | 3 + source/libs/index/inc/index_fst_util.h | 4 +- source/libs/index/src/index_fst.c | 93 ++++++++++++++++++- .../index/src/index_fst_counting_writer.c | 5 +- 4 files changed, 98 insertions(+), 7 deletions(-) diff --git a/source/libs/index/inc/index_fst_counting_writer.h b/source/libs/index/inc/index_fst_counting_writer.h index 0eba963239..dbfbe80494 100644 --- a/source/libs/index/inc/index_fst_counting_writer.h +++ b/source/libs/index/inc/index_fst_counting_writer.h @@ -34,6 +34,9 @@ FstCountingWriter *fstCountingWriterCreate(void *wtr); void fstCountingWriterDestroy(FstCountingWriter *w); +void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes); + + #define FST_WRITER_COUNT(writer) (writer->count) #define FST_WRITER_INTER_WRITER(writer) (writer->wtr) #define FST_WRITE_CHECK_SUMMER(writer) (writer->summer) diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index 416abf1307..5b84632418 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -45,9 +45,9 @@ extern const uint64_t TRANS_INDEX_THRESHOLD; // `0` is a legal value which means there are no transitions/outputs -#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0) +#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4); } while(0) #define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4) -#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0) +#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz; } while(0) #define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111) #define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1] diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 00375a261b..ddefb29061 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -16,6 +16,12 @@ #include "index_fst.h" + +static void fstPackDeltaIn(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr, uint8_t nBytes) { + CompiledAddr deltaAddr = (transAddr == EMPTY_ADDRESS) ? EMPTY_ADDRESS : nodeAddr - transAddr; + fstCountingWriterPackUintIn(wrt, deltaAddr, nBytes); +} + FstUnFinishedNodes *fstUnFinishedNodesCreate() { FstUnFinishedNodes *nodes = malloc(sizeof(FstUnFinishedNodes)); if (nodes == NULL) { return NULL; } @@ -175,18 +181,18 @@ FstState fstStateCreateFrom(FstSlice* slice, CompiledAddr addr) { return fs; } -static FstState stateDict[] = { +static FstState fstStateDict[] = { {.state = OneTransNext, .val = 0b11000000}, {.state = OneTrans, .val = 0b10000000}, {.state = AnyTrans, .val = 0b00000000}, {.state = EmptyFinal, .val = 0b00000000} }; // debug -static char *fStStateStr[] = {"ONE_TRANS_NEXT", "ONE_TRANS", "ANY_TRANS", "EMPTY_FINAL"}; +static const char *fstStateStr[] = {"ONE_TRANS_NEXT", "ONE_TRANS", "ANY_TRANS", "EMPTY_FINAL"}; FstState fstStateCreate(State state){ uint8_t idx = (uint8_t)state; - return stateDict[idx]; + return fstStateDict[idx]; } //compile void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp) { @@ -209,6 +215,77 @@ void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTran } void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node) { + size_t sz = taosArrayGetSize(node->trans); + assert(sz <= 256); + + uint8_t tSize = 0; + uint8_t oSize = packSize(node->finalOutput) ; + + // finalOutput.is_zero() + bool anyOuts = (node->finalOutput != 0) ; + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + tSize = MAX(tSize, packDeltaSize(addr, t->addr)); + oSize = MAX(oSize, packSize(t->out)); + anyOuts = anyOuts || (t->out != 0); + } + + PackSizes packSizes = 0; + if (anyOuts) { FST_SET_OUTPUT_PACK_SIZE(packSizes, oSize); } + else { FST_SET_OUTPUT_PACK_SIZE(packSizes, 0); } + + FST_SET_TRANSITION_PACK_SIZE(packSizes, tSize); + + FstState st = fstStateCreate(AnyTrans); + fstStateSetFinalState(&st, node->isFinal); + fstStateSetStateNtrans(&st, (uint8_t)sz); + + if (anyOuts) { + if (FST_BUILDER_NODE_IS_FINAL(node)) { + fstCountingWriterPackUintIn(w, node->finalOutput, oSize); + } + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + fstCountingWriterPackUintIn(w, t->out, oSize); + } + } + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + fstPackDeltaIn(w, addr, t->addr, tSize); + } + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + fstCountingWriterWrite(w, (char *)&t->inp, 1); + //fstPackDeltaIn(w, addr, t->addr, tSize); + } + if (sz > TRANS_INDEX_THRESHOLD) { + // A value of 255 indicates that no transition exists for the byte + // at that index. (Except when there are 256 transitions.) Namely, + // any value greater than or equal to the number of transitions in + // this node indicates an absent transition. + uint8_t *index = malloc(sizeof(uint8_t) * 256); + for (uint8_t i = 0; i < 256; i++) { + index[i] = 255; + } + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + index[t->inp] = i; + fstCountingWriterWrite(w, (char *)index, sizeof(index)); + //fstPackDeltaIn(w, addr, t->addr, tSize); + } + } + fstCountingWriterWrite(w, (char *)&packSizes, 1); + bool null = false; + fstStateStateNtrans(&st, &null); + if (null == true) { + // 256 can't be represented in a u8, so we abuse the fact that + // the # of transitions can never be 1 here, since 1 is always + // encoded in the state byte. + uint8_t v = 1; + if (sz == 256) { fstCountingWriterWrite(w, (char *)&v, 1); } + else { fstCountingWriterWrite(w, (char *)&sz, 1); } + } + fstCountingWriterWrite(w, (char *)(&(st.val)), 1); return; } @@ -218,7 +295,7 @@ void fstStateSetCommInput(FstState* s, uint8_t inp) { uint8_t val; COMMON_INDEX(inp, 0x111111, val); - s->val = (s->val & stateDict[s->state].val) | val; + s->val = (s->val & fstStateDict[s->state].val) | val; } // comm_input @@ -523,6 +600,14 @@ FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) { } return n; } + +// debug state transition +static const char *fstNodeState(FstNode *node) { + FstState *st = &node->state; + return fstStateStr[st->state]; +} + + void fstNodeDestroy(FstNode *node) { free(node); } diff --git a/source/libs/index/src/index_fst_counting_writer.c b/source/libs/index/src/index_fst_counting_writer.c index 1486b9b203..23bf4d37c8 100644 --- a/source/libs/index/src/index_fst_counting_writer.c +++ b/source/libs/index/src/index_fst_counting_writer.c @@ -36,10 +36,13 @@ uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t return bufLen; } -int FstCountingWriterFlush(FstCountingWriter *write) { +int fstCountingWriterFlush(FstCountingWriter *write) { //write->wtr->flush return 1; } +void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes) { + return; +} From 20203e47eb5d9991a8f272a62c3fac8d24df74cc Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 25 Nov 2021 19:57:50 +0800 Subject: [PATCH 17/18] add fst core example --- source/libs/index/src/index_fst.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index ddefb29061..38ea7a4c95 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -21,6 +21,10 @@ static void fstPackDeltaIn(FstCountingWriter *wrt, CompiledAddr nodeAddr, Compil CompiledAddr deltaAddr = (transAddr == EMPTY_ADDRESS) ? EMPTY_ADDRESS : nodeAddr - transAddr; fstCountingWriterPackUintIn(wrt, deltaAddr, nBytes); } +static void fstPackDetla(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr) { + uint8_t nBytes = packDeltaSize(nodeAddr, transAddr); + fstPackDeltaIn(wrt, nodeAddr, transAddr, nBytes); +} FstUnFinishedNodes *fstUnFinishedNodesCreate() { FstUnFinishedNodes *nodes = malloc(sizeof(FstUnFinishedNodes)); @@ -167,7 +171,6 @@ FstState fstStateCreateFrom(FstSlice* slice, CompiledAddr addr) { if (addr == EMPTY_ADDRESS) { return fs; } - uint8_t v = slice->data[addr]; uint8_t t = (v & 0b11000000) >> 6; if (t == 0b11) { @@ -211,6 +214,7 @@ void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uin } void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition* trn) { Output val = trn->out; + return ; } From d39f80185f70837646bb9494a9da08b77c81a1be Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 25 Nov 2021 20:42:19 +0800 Subject: [PATCH 18/18] update fst core struct --- .../index/inc/index_fst_counting_writer.h | 1 + source/libs/index/src/index_fst.c | 26 ++++++++++++++++--- .../index/src/index_fst_counting_writer.c | 15 +++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/source/libs/index/inc/index_fst_counting_writer.h b/source/libs/index/inc/index_fst_counting_writer.h index dbfbe80494..fbb2f1cff7 100644 --- a/source/libs/index/inc/index_fst_counting_writer.h +++ b/source/libs/index/inc/index_fst_counting_writer.h @@ -35,6 +35,7 @@ void fstCountingWriterDestroy(FstCountingWriter *w); void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes); +uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n); #define FST_WRITER_COUNT(writer) (writer->count) diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 38ea7a4c95..8b9aa22fc6 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -21,9 +21,10 @@ static void fstPackDeltaIn(FstCountingWriter *wrt, CompiledAddr nodeAddr, Compil CompiledAddr deltaAddr = (transAddr == EMPTY_ADDRESS) ? EMPTY_ADDRESS : nodeAddr - transAddr; fstCountingWriterPackUintIn(wrt, deltaAddr, nBytes); } -static void fstPackDetla(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr) { +static uint8_t fstPackDetla(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr) { uint8_t nBytes = packDeltaSize(nodeAddr, transAddr); fstPackDeltaIn(wrt, nodeAddr, transAddr, nBytes); + return nBytes; } FstUnFinishedNodes *fstUnFinishedNodesCreate() { @@ -213,8 +214,24 @@ void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uin return; } void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition* trn) { - Output val = trn->out; - + Output out = trn->out; + uint8_t outPackSize = (out == 0 ? 0 : fstCountingWriterPackUint(w, out)); + uint8_t transPackSize = fstPackDetla(w, addr, trn->addr); + PackSizes packSizes = 0; + + FST_SET_OUTPUT_PACK_SIZE(packSizes, outPackSize); + FST_SET_TRANSITION_PACK_SIZE(packSizes, transPackSize); + fstCountingWriterWrite(w, (char *)&packSizes, sizeof(packSizes)); + + FstState st = fstStateCreate(OneTrans); + + fstStateSetCommInput(&st, trn->inp); + bool null = false; + uint8_t inp = fstStateCommInput(&st, &null); + if (null == true) { + fstCountingWriterWrite(w, (char *)&trn->inp, sizeof(trn->inp)); + } + fstCountingWriterWrite(w, (char *)(&(st.val)), sizeof(st.val)); return ; } @@ -267,7 +284,7 @@ void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuil // at that index. (Except when there are 256 transitions.) Namely, // any value greater than or equal to the number of transitions in // this node indicates an absent transition. - uint8_t *index = malloc(sizeof(uint8_t) * 256); + uint8_t *index = (uint8_t *)malloc(sizeof(uint8_t) * 256); for (uint8_t i = 0; i < 256; i++) { index[i] = 255; } @@ -277,6 +294,7 @@ void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuil fstCountingWriterWrite(w, (char *)index, sizeof(index)); //fstPackDeltaIn(w, addr, t->addr, tSize); } + free(index); } fstCountingWriterWrite(w, (char *)&packSizes, 1); bool null = false; diff --git a/source/libs/index/src/index_fst_counting_writer.c b/source/libs/index/src/index_fst_counting_writer.c index 23bf4d37c8..b253db986a 100644 --- a/source/libs/index/src/index_fst_counting_writer.c +++ b/source/libs/index/src/index_fst_counting_writer.c @@ -13,6 +13,7 @@ * along with this program. If not, see . */ #include "tutil.h" +#include "index_fst_util.h" #include "index_fst_counting_writer.h" FstCountingWriter *fstCountingWriterCreate(void *wrt) { @@ -42,7 +43,21 @@ int fstCountingWriterFlush(FstCountingWriter *write) { } void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes) { + assert(1 <= nBytes && nBytes <= 8); + uint8_t *buf = calloc(8, sizeof(uint8_t)); + for (uint8_t i = 0; i < nBytes; i++) { + buf[i] = (uint8_t)n; + n = n >> 8; + } + fstCountingWriterWrite(writer, buf, nBytes); + free(buf); return; } +uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n) { + uint8_t nBytes = packSize(n); + fstCountingWriterPackUintIn(writer, n, nBytes); + return nBytes; +} +