diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h
index 5a8138b126..7ab9358cd1 100644
--- a/source/libs/index/inc/index_fst.h
+++ b/source/libs/index/inc/index_fst.h
@@ -36,6 +36,8 @@ typedef struct FstRange {
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
typedef enum { Included, Excluded, Unbounded} FstBound;
+typedef enum {Ordered, OutOfOrdered, DuplicateKey} OrderType;
+
/*
@@ -66,17 +68,95 @@ typedef struct FstBuilder {
FstCountingWriter *wrt; // The FST raw data is written directly to `wtr`.
FstUnFinishedNodes *unfinished; // The stack of unfinished nodes
FstRegistry* registry; // A map of finished nodes.
- SArray* last; // The last word added
+ FstSlice last; // The last word added
CompiledAddr lastAddr; // The address of the last compiled node
uint64_t len; // num of keys added
} FstBuilder;
+FstBuilder *fstBuilderCreate(void *w, FstType ty);
+void fstBuilderDestroy(FstBuilder *b);
+void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in);
+OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup);
+void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate);
+CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn);
+
+
+
typedef struct FstTransitions {
FstNode *node;
FstRange range;
} FstTransitions;
+//FstState and relation function
+
+typedef struct FstState {
+ State state;
+ uint8_t val;
+} FstState;
+
+FstState fstStateCreateFrom(FstSlice* data, CompiledAddr addr);
+FstState fstStateCreate(State state);
+
+//compile
+void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp);
+void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition *trn);
+void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node);
+
+// set_comm_input
+void fstStateSetCommInput(FstState* state, uint8_t inp);
+
+// comm_input
+uint8_t fstStateCommInput(FstState* state, bool *null);
+
+// input_len
+
+uint64_t fstStateInputLen(FstState* state);
+
+
+// end_addr
+uint64_t fstStateEndAddrForOneTransNext(FstState* state, FstSlice *data);
+uint64_t fstStateEndAddrForOneTrans(FstState *state, FstSlice *data, PackSizes sizes);
+uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans);
+// input
+uint8_t fstStateInput(FstState *state, FstNode *node);
+uint8_t fstStateInputForAnyTrans(FstState *state, FstNode *node, uint64_t i);
+
+// trans_addr
+CompiledAddr fstStateTransAddr(FstState *state, FstNode *node);
+CompiledAddr fstStateTransAddrForAnyTrans(FstState *state, FstNode *node, uint64_t i);
+
+// sizes
+PackSizes fstStateSizes(FstState *state, FstSlice *data);
+// Output
+Output fstStateOutput(FstState *state, FstNode *node);
+Output fstStateOutputForAnyTrans(FstState *state, FstNode *node, uint64_t i);
+
+// anyTrans specify function
+
+void fstStateSetFinalState(FstState *state, bool yes);
+bool fstStateIsFinalState(FstState *state);
+void fstStateSetStateNtrans(FstState *state, uint8_t n);
+// state_ntrans
+uint8_t fstStateStateNtrans(FstState *state, bool *null);
+uint64_t fstStateTotalTransSize(FstState *state, uint64_t version, PackSizes size, uint64_t nTrans);
+uint64_t fstStateTransIndexSize(FstState *state, uint64_t version, uint64_t nTrans);
+uint64_t fstStateNtransLen(FstState *state);
+uint64_t fstStateNtrans(FstState *state, FstSlice *slice);
+Output fstStateFinalOutput(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans);
+uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b, bool *null);
+
+
+
+
+
+
+
+
+#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext)
+#define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans)
+#define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans)
+#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal)
typedef struct FstLastTransition {
@@ -93,8 +173,10 @@ typedef struct FstBuilderNodeUnfinished {
FstLastTransition* last;
} FstBuilderNodeUnfinished;
+
+
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
-void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr);
+void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out);
/*
* FstNode and helper function
@@ -102,7 +184,7 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Com
typedef struct FstNode {
FstSlice data;
uint64_t version;
- State state;
+ FstState state;
CompiledAddr start;
CompiledAddr end;
bool isFinal;
@@ -122,6 +204,7 @@ typedef struct FstNode {
// Return the address of this node.
#define FST_NODE_ADDR(node) node->start
+
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
void fstNodeDestroy(FstNode *fstNode);
@@ -160,6 +243,4 @@ void fstLastTransitionDestroy(FstLastTransition *trn);
-
-
#endif
diff --git a/source/libs/index/inc/index_fst_common.h b/source/libs/index/inc/index_fst_common.h
new file mode 100644
index 0000000000..b261f4090c
--- /dev/null
+++ b/source/libs/index/inc/index_fst_common.h
@@ -0,0 +1,7 @@
+#ifndef __INDEX_FST_COMM_H__
+#define __INDEX_FST_COMM_H__
+
+extern const uint8_t COMMON_INPUTS[];
+extern char const COMMON_INPUTS_INV[];
+
+#endif
diff --git a/source/libs/index/inc/index_fst_counting_writer.h b/source/libs/index/inc/index_fst_counting_writer.h
index 0eba963239..fbb2f1cff7 100644
--- a/source/libs/index/inc/index_fst_counting_writer.h
+++ b/source/libs/index/inc/index_fst_counting_writer.h
@@ -34,6 +34,10 @@ FstCountingWriter *fstCountingWriterCreate(void *wtr);
void fstCountingWriterDestroy(FstCountingWriter *w);
+void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes);
+uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n);
+
+
#define FST_WRITER_COUNT(writer) (writer->count)
#define FST_WRITER_INTER_WRITER(writer) (writer->wtr)
#define FST_WRITE_CHECK_SUMMER(writer) (writer->summer)
diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h
index fc7dd44637..5b84632418 100644
--- a/source/libs/index/inc/index_fst_util.h
+++ b/source/libs/index/inc/index_fst_util.h
@@ -18,7 +18,7 @@
#define __INDEX_FST_UTIL_H__
#include "tarray.h"
-
+#include "index_fst_common.h"
typedef uint64_t FstType;
typedef uint64_t CompiledAddr;
@@ -44,9 +44,10 @@ extern const uint64_t TRANS_INDEX_THRESHOLD;
//
// `0` is a legal value which means there are no transitions/outputs
-#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
+
+#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4); } while(0)
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
-#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0)
+#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz; } while(0)
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
@@ -70,13 +71,16 @@ CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr);
typedef struct FstSlice {
uint8_t *data;
uint64_t dLen;
- uint32_t start;
- uint32_t end;
+ int32_t start;
+ int32_t end;
} FstSlice;
-FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end);
+FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end);
FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen);
bool fstSliceEmpty(FstSlice *slice);
+int fstSliceCompare(FstSlice *a, FstSlice *b);
+
+#define FST_SLICE_LEN(s) ((s)->end - (s)->start + 1)
#endif
diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c
index a7ae3f2fb6..8b9aa22fc6 100644
--- a/source/libs/index/src/index_fst.c
+++ b/source/libs/index/src/index_fst.c
@@ -16,6 +16,17 @@
#include "index_fst.h"
+
+static void fstPackDeltaIn(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr, uint8_t nBytes) {
+ CompiledAddr deltaAddr = (transAddr == EMPTY_ADDRESS) ? EMPTY_ADDRESS : nodeAddr - transAddr;
+ fstCountingWriterPackUintIn(wrt, deltaAddr, nBytes);
+}
+static uint8_t fstPackDetla(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr) {
+ uint8_t nBytes = packDeltaSize(nodeAddr, transAddr);
+ fstPackDeltaIn(wrt, nodeAddr, transAddr, nBytes);
+ return nBytes;
+}
+
FstUnFinishedNodes *fstUnFinishedNodesCreate() {
FstUnFinishedNodes *nodes = malloc(sizeof(FstUnFinishedNodes));
if (nodes == NULL) { return NULL; }
@@ -29,7 +40,7 @@ void unFinishedNodeDestroyElem(void* elem) {
fstBuilderNodeDestroy(b->node);
free(b->last);
}
-void fstUnFinishedNodeDestroy(FstUnFinishedNodes *nodes) {
+void fstUnFinishedNodesDestroy(FstUnFinishedNodes *nodes) {
if (nodes == NULL) { return; }
taosArrayDestroyEx(nodes->stack, unFinishedNodeDestroyElem);
@@ -155,65 +166,471 @@ uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node,
return res;
}
+
+FstState fstStateCreateFrom(FstSlice* slice, CompiledAddr addr) {
+ FstState fs = {.state = EmptyFinal, .val = 0};
+ if (addr == EMPTY_ADDRESS) {
+ return fs;
+ }
+ uint8_t v = slice->data[addr];
+ uint8_t t = (v & 0b11000000) >> 6;
+ if (t == 0b11) {
+ fs.state = OneTransNext;
+ } else if (t == 0b10) {
+ fs.state = OneTrans;
+ } else {
+ fs.state = AnyTrans;
+ }
+ fs.val = v;
+ return fs;
+}
+
+static FstState fstStateDict[] = {
+ {.state = OneTransNext, .val = 0b11000000},
+ {.state = OneTrans, .val = 0b10000000},
+ {.state = AnyTrans, .val = 0b00000000},
+ {.state = EmptyFinal, .val = 0b00000000}
+};
+// debug
+static const char *fstStateStr[] = {"ONE_TRANS_NEXT", "ONE_TRANS", "ANY_TRANS", "EMPTY_FINAL"};
+
+FstState fstStateCreate(State state){
+ uint8_t idx = (uint8_t)state;
+ return fstStateDict[idx];
+}
+//compile
+void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp) {
+ FstState s = fstStateCreate(OneTransNext);
+ fstStateSetCommInput(&s, inp);
+
+ bool null = false;
+ uint8_t v = fstStateCommInput(&s, &null);
+ if (null) {
+ // w->write_all(&[inp])
+ fstCountingWriterWrite(w, &inp, 1);
+ }
+ fstCountingWriterWrite(w, &(s.val), 1);
+ // w->write_all(&[s.val])
+ return;
+}
+void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTransition* trn) {
+ Output out = trn->out;
+ uint8_t outPackSize = (out == 0 ? 0 : fstCountingWriterPackUint(w, out));
+ uint8_t transPackSize = fstPackDetla(w, addr, trn->addr);
+ PackSizes packSizes = 0;
+
+ FST_SET_OUTPUT_PACK_SIZE(packSizes, outPackSize);
+ FST_SET_TRANSITION_PACK_SIZE(packSizes, transPackSize);
+ fstCountingWriterWrite(w, (char *)&packSizes, sizeof(packSizes));
+
+ FstState st = fstStateCreate(OneTrans);
+
+ fstStateSetCommInput(&st, trn->inp);
+ bool null = false;
+ uint8_t inp = fstStateCommInput(&st, &null);
+ if (null == true) {
+ fstCountingWriterWrite(w, (char *)&trn->inp, sizeof(trn->inp));
+ }
+ fstCountingWriterWrite(w, (char *)(&(st.val)), sizeof(st.val));
+ return ;
+
+}
+void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node) {
+ size_t sz = taosArrayGetSize(node->trans);
+ assert(sz <= 256);
+
+ uint8_t tSize = 0;
+ uint8_t oSize = packSize(node->finalOutput) ;
+
+ // finalOutput.is_zero()
+ bool anyOuts = (node->finalOutput != 0) ;
+ for (size_t i = 0; i < sz; i++) {
+ FstTransition *t = taosArrayGet(node->trans, i);
+ tSize = MAX(tSize, packDeltaSize(addr, t->addr));
+ oSize = MAX(oSize, packSize(t->out));
+ anyOuts = anyOuts || (t->out != 0);
+ }
+
+ PackSizes packSizes = 0;
+ if (anyOuts) { FST_SET_OUTPUT_PACK_SIZE(packSizes, oSize); }
+ else { FST_SET_OUTPUT_PACK_SIZE(packSizes, 0); }
+
+ FST_SET_TRANSITION_PACK_SIZE(packSizes, tSize);
+
+ FstState st = fstStateCreate(AnyTrans);
+ fstStateSetFinalState(&st, node->isFinal);
+ fstStateSetStateNtrans(&st, (uint8_t)sz);
+
+ if (anyOuts) {
+ if (FST_BUILDER_NODE_IS_FINAL(node)) {
+ fstCountingWriterPackUintIn(w, node->finalOutput, oSize);
+ }
+ for (size_t i = 0; i < sz; i++) {
+ FstTransition *t = taosArrayGet(node->trans, i);
+ fstCountingWriterPackUintIn(w, t->out, oSize);
+ }
+ }
+ for (size_t i = 0; i < sz; i++) {
+ FstTransition *t = taosArrayGet(node->trans, i);
+ fstPackDeltaIn(w, addr, t->addr, tSize);
+ }
+ for (size_t i = 0; i < sz; i++) {
+ FstTransition *t = taosArrayGet(node->trans, i);
+ fstCountingWriterWrite(w, (char *)&t->inp, 1);
+ //fstPackDeltaIn(w, addr, t->addr, tSize);
+ }
+ if (sz > TRANS_INDEX_THRESHOLD) {
+ // A value of 255 indicates that no transition exists for the byte
+ // at that index. (Except when there are 256 transitions.) Namely,
+ // any value greater than or equal to the number of transitions in
+ // this node indicates an absent transition.
+ uint8_t *index = (uint8_t *)malloc(sizeof(uint8_t) * 256);
+ for (uint8_t i = 0; i < 256; i++) {
+ index[i] = 255;
+ }
+ for (size_t i = 0; i < sz; i++) {
+ FstTransition *t = taosArrayGet(node->trans, i);
+ index[t->inp] = i;
+ fstCountingWriterWrite(w, (char *)index, sizeof(index));
+ //fstPackDeltaIn(w, addr, t->addr, tSize);
+ }
+ free(index);
+ }
+ fstCountingWriterWrite(w, (char *)&packSizes, 1);
+ bool null = false;
+ fstStateStateNtrans(&st, &null);
+ if (null == true) {
+ // 256 can't be represented in a u8, so we abuse the fact that
+ // the # of transitions can never be 1 here, since 1 is always
+ // encoded in the state byte.
+ uint8_t v = 1;
+ if (sz == 256) { fstCountingWriterWrite(w, (char *)&v, 1); }
+ else { fstCountingWriterWrite(w, (char *)&sz, 1); }
+ }
+ fstCountingWriterWrite(w, (char *)(&(st.val)), 1);
+ return;
+}
+
+// set_comm_input
+void fstStateSetCommInput(FstState* s, uint8_t inp) {
+ assert(s->state == OneTransNext || s->state == OneTrans);
+
+ uint8_t val;
+ COMMON_INDEX(inp, 0x111111, val);
+ s->val = (s->val & fstStateDict[s->state].val) | val;
+}
+
+// comm_input
+uint8_t fstStateCommInput(FstState* s, bool *null) {
+ assert(s->state == OneTransNext || s->state == OneTrans);
+ uint8_t v = s->val & 0b00111111;
+ if (v == 0) {
+ *null = true;
+ return v;
+ }
+ //v = 0 indicate that common_input is None
+ return v == 0 ? 0 : COMMON_INPUT(v);
+}
+
+// input_len
+
+uint64_t fstStateInputLen(FstState* s) {
+ assert(s->state == OneTransNext || s->state == OneTrans);
+ bool null = false;
+ fstStateCommInput(s, &null);
+ return null ? 1 : 0 ;
+}
+
+// end_addr
+uint64_t fstStateEndAddrForOneTransNext(FstState* s, FstSlice *data) {
+ assert(s->state == OneTransNext);
+ return FST_SLICE_LEN(data) - 1 - fstStateInputLen(s);
+}
+uint64_t fstStateEndAddrForOneTrans(FstState *s, FstSlice *data, PackSizes sizes) {
+ assert(s->state == OneTrans);
+ return FST_SLICE_LEN(data)
+ - 1
+ - fstStateInputLen(s)
+ - 1 // pack size
+ - FST_GET_TRANSITION_PACK_SIZE(sizes)
+ - FST_GET_OUTPUT_PACK_SIZE(sizes);
+}
+uint64_t fstStateEndAddrForAnyTrans(FstState *state, uint64_t version, FstSlice *date, PackSizes sizes, uint64_t nTrans) {
+ uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(sizes);
+ uint8_t finalOsize = !fstStateIsFinalState(state) ? 0 : oSizes;
+ return FST_SLICE_LEN(date)
+ - 1
+ - fstStateNtransLen(state)
+ - 1 //pack size
+ - fstStateTotalTransSize(state, version, sizes, nTrans)
+ - nTrans * oSizes // output values
+ - finalOsize; // final output
+}
+// input
+uint8_t fstStateInput(FstState *s, FstNode *node) {
+ assert(s->state == OneTransNext || s->state == OneTrans);
+ FstSlice *slice = &node->data;
+ bool null = false;
+ uint8_t inp = fstStateCommInput(s, &null);
+ return null == false ? inp : slice->data[slice->start - 1];
+}
+uint8_t fstStateInputForAnyTrans(FstState *s, FstNode *node, uint64_t i) {
+ assert(s->state == AnyTrans);
+ FstSlice *slice = &node->data;
+
+ uint64_t at = node->start
+ - fstStateNtransLen(s)
+ - 1 // pack size
+ - fstStateTransIndexSize(s, node->version, node->nTrans)
+ - i
+ - 1; // the output size
+ return slice->data[at];
+}
+
+// trans_addr
+CompiledAddr fstStateTransAddr(FstState *s, FstNode *node) {
+ assert(s->state == OneTransNext || s->state == OneTrans);
+ FstSlice *slice = &node->data;
+ if (s->state == OneTransNext) {
+ return (CompiledAddr)(node->end);
+ } else {
+ PackSizes sizes = node->sizes;
+ uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(sizes);
+ uint64_t i = node->start
+ - fstStateInputLen(s)
+ - 1 // PackSizes
+ - tSizes;
+
+ // refactor error logic
+ return unpackDelta(slice->data + slice->start + i, tSizes, node->end);
+ }
+}
+CompiledAddr fstStateTransAddrForAnyTrans(FstState *s, FstNode *node, uint64_t i) {
+ assert(s->state == AnyTrans);
+
+ FstSlice *slice = &node->data;
+ uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(node->sizes);
+ uint64_t at = node->start
+ - fstStateNtransLen(s)
+ - 1
+ - fstStateTransIndexSize(s, node->version, node->nTrans)
+ - node->nTrans
+ - (i * tSizes)
+ - tSizes;
+ return unpackDelta(slice->data + slice->start + at, tSizes, node->end);
+}
+
+// sizes
+PackSizes fstStateSizes(FstState *s, FstSlice *slice) {
+ assert(s->state == OneTrans || s->state == AnyTrans) ;
+ uint64_t i;
+ if (s->state == OneTrans) {
+ i = FST_SLICE_LEN(slice) - 1 - fstStateInputLen(s) - 1;
+ } else {
+ i = FST_SLICE_LEN(slice) - 1 - fstStateNtransLen(s) - 1;
+ }
+
+ return (PackSizes)(slice->data[slice->start + i]);
+}
+// Output
+Output fstStateOutput(FstState *s, FstNode *node) {
+ assert(s->state == OneTrans);
+
+ uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(node->sizes);
+ if (oSizes == 0) {
+ return 0;
+ }
+ FstSlice *slice = &node->data;
+ uint8_t tSizes = FST_GET_TRANSITION_PACK_SIZE(node->sizes);
+
+ uint64_t i = node->start
+ - fstStateInputLen(s);
+ - 1
+ - tSizes
+ - oSizes;
+ return unpackUint64(slice->data + slice->start + i, oSizes);
+
+}
+Output fstStateOutputForAnyTrans(FstState *s, FstNode *node, uint64_t i) {
+ assert(s->state == AnyTrans);
+
+ uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(node->sizes);
+ if (oSizes == 0) {
+ return 0;
+ }
+ FstSlice *slice = &node->data;
+ uint64_t at = node->start
+ - fstStateNtransLen(s)
+ - 1 // pack size
+ - fstStateTotalTransSize(s, node->version, node->sizes, node->nTrans)
+ - (i * oSizes)
+ - oSizes;
+ return unpackUint64(slice->data + slice->start + at, oSizes);
+}
+
+// anyTrans specify function
+
+void fstStateSetFinalState(FstState *s, bool yes) {
+ assert(s->state == AnyTrans);
+ if (yes) { s->val |= 0b01000000; }
+ return;
+}
+bool fstStateIsFinalState(FstState *s) {
+ assert(s->state == AnyTrans);
+ return (s->val & 0b01000000) == 0b01000000;
+}
+
+void fstStateSetStateNtrans(FstState *s, uint8_t n) {
+ assert(s->state == AnyTrans);
+ if (n <= 0b00111111) {
+ s->val = (s->val & 0b11000000) | n;
+ }
+ return;
+}
+// state_ntrans
+uint8_t fstStateStateNtrans(FstState *s, bool *null) {
+ assert(s->state == AnyTrans);
+ *null = false;
+ uint8_t n = s->val & 0b00111111;
+
+ if (n == 0) {
+ *null = true; // None
+ }
+ return n;
+}
+uint64_t fstStateTotalTransSize(FstState *s, uint64_t version, PackSizes sizes, uint64_t nTrans) {
+ assert(s->state == AnyTrans);
+ uint64_t idxSize = fstStateTransIndexSize(s, version, nTrans);
+ return nTrans + (nTrans * FST_GET_TRANSITION_PACK_SIZE(sizes)) + idxSize;
+}
+uint64_t fstStateTransIndexSize(FstState *s, uint64_t version, uint64_t nTrans) {
+ assert(s->state == AnyTrans);
+ return (version >= 2 &&nTrans > TRANS_INDEX_THRESHOLD) ? 256 : 0;
+}
+uint64_t fstStateNtransLen(FstState *s) {
+ assert(s->state == AnyTrans);
+ bool null = false;
+ fstStateStateNtrans(s, &null);
+ return null == true ? 1 : 0;
+}
+uint64_t fstStateNtrans(FstState *s, FstSlice *slice) {
+ bool null = false;
+ uint8_t n = fstStateStateNtrans(s, &null);
+ if (null != true) {
+ return n;
+ }
+ n = slice->data[slice->end - 1]; // data[data.len() - 2]
+ return n == 1 ? 256: n; // // "1" is never a normal legal value here, because if there, // is only 1 transition, then it is encoded in the state byte
+}
+Output fstStateFinalOutput(FstState *s, uint64_t version, FstSlice *slice, PackSizes sizes, uint64_t nTrans) {
+ uint8_t oSizes = FST_GET_OUTPUT_PACK_SIZE(sizes);
+ if (oSizes == 0 || !fstStateIsFinalState(s)) {
+ return 0;
+ }
+
+ uint64_t at = FST_SLICE_LEN(slice)
+ - 1
+ - fstStateNtransLen(s)
+ - fstStateTotalTransSize(s, version, sizes, nTrans)
+ - (nTrans * oSizes)
+ - oSizes;
+ return unpackUint64(slice->data + slice->start + at, (uint8_t)oSizes);
+
+}
+uint64_t fstStateFindInput(FstState *s, FstNode *node, uint8_t b, bool *null) {
+ assert(s->state == AnyTrans);
+ FstSlice *slice = &node->data;
+ if (node->version >= 2 && node->nTrans > TRANS_INDEX_THRESHOLD) {
+ uint64_t at = node->start
+ - fstStateNtransLen(s)
+ - 1 // pack size
+ - fstStateTransIndexSize(s, node->version, node->nTrans);
+ uint64_t i = slice->data[slice->start + at + b];
+ if (i >= node->nTrans) {
+ *null = true;
+ }
+ return i;
+ } else {
+ uint64_t start = node->start
+ - fstStateNtransLen(s)
+ - 1 // pack size
+ - node->nTrans;
+ uint64_t end = start + node->nTrans;
+ uint64_t len = end - start;
+ for(int i = 0; i < len; i++) {
+ uint8_t v = slice->data[slice->start + i];
+ if (v == b) {
+ return node->nTrans - i - 1; // bug
+ }
+ }
+ }
+}
+
+
// fst node function
-
-
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) {
FstNode *n = (FstNode *)malloc(sizeof(FstNode));
if (n == NULL) { return NULL; }
- if (addr == EMPTY_ADDRESS) {
+ FstState st = fstStateCreateFrom(slice, addr);
+
+ if (st.state == EmptyFinal) {
n->data = fstSliceCreate(NULL, 0);
n->version = version;
- n->state = EmptyFinal;
+ n->state = st;
n->start = EMPTY_ADDRESS;
n->end = EMPTY_ADDRESS;
n->isFinal = true;
n->nTrans = 0;
n->sizes = 0;
n->finalOutput = 0;
- }
- uint8_t v = slice->data[addr];
- uint8_t s = (v & 0b11000000) >> 6;
- if (s == 0b11) { // oneTransNext
+ } else if (st.state == OneTransNext) {
n->data = fstSliceCopy(slice, 0, addr);
n->version = version;
- n->state = OneTransNext;
+ n->state = st;
n->start = addr;
- n->end = addr; //? s.end_addr(data);
+ n->end = fstStateEndAddrForOneTransNext(&st, slice); //? s.end_addr(data);
n->isFinal = false;
n->sizes = 0;
- n->nTrans = 0;
+ n->nTrans = 1;
n->finalOutput = 0;
- } else if (v == 0b10) { // oneTrans
- uint64_t sz; // fetch sz from addr
- n->data = fstSliceCopy(slice, 0, addr);
+ } else if (st.state == OneTrans) {
+ FstSlice data = fstSliceCopy(slice, 0, addr);
+ PackSizes sz = fstStateSizes(&st, &data);
+ n->data = fstSliceCopy(slice, 0, addr);
n->version = version;
- n->state = OneTrans;
+ n->state = st;
n->start = addr;
- n->end = addr; // s.end_addr(data, sz);
+ n->end = fstStateEndAddrForOneTrans(&st, slice, sz); // s.end_addr(data, sz);
n->isFinal = false;
n->nTrans = 1;
n->sizes = sz;
n->finalOutput = 0;
- } else { // anyTrans
- uint64_t sz; // s.sizes(data)
- uint32_t nTrans; // s.ntrans(data)
+ } else {
+ uint64_t sz = fstStateSizes(&st, slice); // s.sizes(data)
+ uint32_t nTrans = fstStateNtrans(&st, slice); // s.ntrans(data)
n->data = *slice;
n->version = version;
- n->state = AnyTrans;
+ n->state = st;
n->start = addr;
- n->end = addr; // s.end_addr(version, data, sz, ntrans);
- n->isFinal = false; // s.is_final_state();
+ n->end = fstStateEndAddrForAnyTrans(&st, version, slice, sz, nTrans); // s.end_addr(version, data, sz, ntrans);
+ n->isFinal = fstStateIsFinalState(&st); // s.is_final_state();
n->nTrans = nTrans;
n->sizes = sz;
- n->finalOutput = 0; // s.final_output(version, data, sz, ntrans);
- }
+ n->finalOutput = fstStateFinalOutput(&st, version, slice, sz, nTrans); // s.final_output(version, data, sz, ntrans);
+ }
return n;
}
+
+// debug state transition
+static const char *fstNodeState(FstNode *node) {
+ FstState *st = &node->state;
+ return fstStateStr[st->state];
+}
+
+
void fstNodeDestroy(FstNode *node) {
- if (node == NULL) { return; }
free(node);
}
FstTransitions* fstNodeTransitions(FstNode *node) {
@@ -222,52 +639,70 @@ FstTransitions* fstNodeTransitions(FstNode *node) {
return NULL;
}
FstRange range = {.start = 0, .end = FST_NODE_LEN(node)};
- t->node = node;
t->range = range;
+ t->node = node;
return t;
}
-bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res) {
+
+// Returns the transition at index `i`.
+bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *trn) {
bool s = true;
- if (node->state == OneTransNext) {
-
- } else if (node->state == OneTrans) {
-
- } else if (node->state == AnyTrans) {
-
+ FstState *st = &node->state;
+ if (st->state == OneTransNext) {
+ trn->inp = fstStateInput(st, node);
+ trn->out = 0;
+ trn->addr = fstStateTransAddr(st, node);
+ } else if (st->state == OneTrans) {
+ trn->inp = fstStateInput(st, node);
+ trn->out = fstStateOutput(st, node);
+ trn->addr = fstStateTransAddr(st, node);
+ } else if (st->state == AnyTrans) {
+ trn->inp = fstStateInputForAnyTrans(st, node, i);
+ trn->out = fstStateOutputForAnyTrans(st, node, i);
+ trn->addr = fstStateTransAddrForAnyTrans(st, node, i);
} else {
s = false;
}
return s;
}
+// Returns the transition address of the `i`th transition
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) {
bool s = true;
- if (node->state == OneTransNext) {
-
- } else if (node->state == OneTrans) {
-
- } else if (node->state == AnyTrans) {
-
- } else if (node->state == EmptyFinal){
+ FstState *st = &node->state;
+ if (st->state == OneTransNext) {
+ assert(i == 0);
+ fstStateTransAddr(st, node);
+ } else if (st->state == OneTrans) {
+ assert(i == 0);
+ fstStateTransAddr(st, node);
+ } else if (st->state == AnyTrans) {
+ fstStateTransAddrForAnyTrans(st, node, i);
+ } else if (FST_STATE_EMPTY_FINAL(node)){
s = false;
+ } else {
+ assert(0);
}
return s;
}
+// Finds the `i`th transition corresponding to the given input byte.
+// If no transition for this byte exists, then `false` is returned.
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) {
bool s = true;
- uint8_t input; // s.input
- if (node->state == OneTransNext) {
- if (b == input) { *res = 0; }
- else { return s ; }
- } else if (node->state == OneTrans) {
- if (b == input) { *res = 0; }
- else {return s;}
- } else if (node->state == AnyTrans) {
-
- } else if (node->state == EmptyFinal) {
- s = false;
- }
+ FstState *st = &node->state;
+ if (st->state == OneTransNext) {
+ if (fstStateInput(st,node) == b) { *res = 0; }
+ else { s = false; } }
+ else if (st->state == OneTrans) {
+ if (fstStateInput(st, node) == b) { *res = 0 ;}
+ else { s = false; }
+ } else if (st->state == AnyTrans) {
+ bool null = false;
+ uint64_t out = fstStateFindInput(st, node, b, &null);
+ if (null == false) { *res = out; }
+ else { s = false;}
+ }
return s;
}
@@ -277,13 +712,16 @@ bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr
if (sz == 0 && builderNode->isFinal && builderNode->finalOutput == 0) {
return true;
} else if (sz != 1 || builderNode->isFinal) {
+ fstStateCompileForAnyTrans(w, addr, builderNode);
// AnyTrans->Compile(w, addr, node);
} else {
FstTransition *tran = taosArrayGet(builderNode->trans, 0);
if (tran->addr == lastAddr && tran->out == 0) {
+ fstStateCompileForOneTransNext(w, addr, tran->inp);
//OneTransNext::compile(w, lastAddr, tran->inp);
return true;
} else {
+ fstStateCompileForOneTrans(w, addr, tran);
//OneTrans::Compile(w, lastAddr, *tran);
return true;
}
@@ -300,17 +738,93 @@ FstBuilder *fstBuilderCreate(void *w, FstType ty) {
b->wrt = fstCountingWriterCreate(w);
b->unfinished = fstUnFinishedNodesCreate();
b->registry = fstRegistryCreate(10000, 2) ;
- b->last = NULL;
+ b->last = fstSliceCreate(NULL, 0);
b->lastAddr = NONE_ADDRESS;
b->len = 0;
return b;
}
+void fstBuilderDestroy(FstBuilder *b) {
+ if (b == NULL) { return; }
+
+ fstCountingWriterDestroy(b->wrt);
+ fstUnFinishedNodesDestroy(b->unfinished);
+ fstRegistryDestroy(b->registry);
+ free(b);
+}
-void fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDupe) {
- return;
+bool fstBuilderInsert(FstBuilder *b, FstSlice bs, Output in) {
+ OrderType t = fstBuilderCheckLastKey(b, bs, true);
+ if (t == Ordered) {
+ // add log info
+ fstBuilderInsertOutput(b, bs, in);
+ return true;
+ }
+ return false;
+}
+
+void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in) {
+ FstSlice *s = &bs;
+ if (fstSliceEmpty(s)) {
+ b->len = 1;
+ fstUnFinishedNodesSetRootOutput(b->unfinished, in);
+ return;
+ }
+ Output out;
+ uint64_t prefixLen;
+ if (in != 0) { //if let Some(in) = in
+ prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out);
+ } else {
+ prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs);
+ out = 0;
+ }
+
+ if (prefixLen == FST_SLICE_LEN(s)) {
+ assert(out != 0);
+ return;
+ }
+
+ b->len += 1;
+ fstBuilderCompileFrom(b, prefixLen);
+
+ FstSlice sub = fstSliceCopy(s, prefixLen, s->end);
+ fstUnFinishedNodesAddSuffix(b->unfinished, sub, out);
+ return;
+ }
+
+OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup) {
+ FstSlice *input = &bs;
+ if (fstSliceEmpty(&b->last)) {
+ // deep copy or not
+ b->last = fstSliceCopy(&bs, input->start, input->end);
+ } else {
+ int comp = fstSliceCompare(&b->last, &bs);
+ if (comp == 0 && ckDup) {
+ return DuplicateKey;
+ } else if (comp == 1) {
+ return OutOfOrdered;
+ }
+ // deep copy or not
+ b->last = fstSliceCopy(&bs, input->start, input->end);
+ }
+ return Ordered;
}
-
+void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate) {
+ CompiledAddr addr = NONE_ADDRESS;
+ while (istate + 1 < FST_UNFINISHED_NODES_LEN(b->unfinished)) {
+ FstBuilderNode *n = NULL;
+ if (addr == NONE_ADDRESS) {
+ n = fstUnFinishedNodesPopEmpty(b->unfinished);
+ } else {
+ n = fstUnFinishedNodesPopFreeze(b->unfinished, addr);
+ }
+ addr = fstBuilderCompile(b, n);
+ assert(addr != NONE_ADDRESS);
+ fstBuilderNodeDestroy(n);
+ }
+ fstUnFinishedNodesTopLastFreeze(b->unfinished, addr);
+ return;
+}
CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) {
if (FST_BUILDER_NODE_IS_FINAL(bn)
&& FST_BUILDER_NODE_TRANS_ISEMPTY(bn)
@@ -336,6 +850,8 @@ CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) {
}
+
+
FstSlice fstNodeAsSlice(FstNode *node) {
FstSlice *slice = &node->data;
FstSlice s = fstSliceCopy(slice, slice->end, slice->dLen - 1);
@@ -354,11 +870,27 @@ FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out) {
void fstLastTransitionDestroy(FstLastTransition *trn) {
free(trn);
}
-void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr) {
+void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *unNode, CompiledAddr addr) {
+ FstLastTransition *trn = unNode->last;
+ if (trn == NULL) { return; }
+
+ FstTransition t = {.inp = trn->inp, .out = trn->out, .addr = addr};
+ taosArrayPush(unNode->node->trans, &t);
return;
}
-void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr) {
+void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *unNode, Output out) {
+ if (FST_BUILDER_NODE_IS_FINAL(unNode->node)) {
+ unNode->node->finalOutput += out;
+ }
+ size_t sz = taosArrayGetSize(unNode->node->trans);
+ for (size_t i = 0; i < sz; i++) {
+ FstTransition *trn = taosArrayGet(unNode->node->trans, i);
+ trn->out += out;
+ }
+ if (unNode->last) {
+ unNode->last->out += out;
+ }
return;
}
diff --git a/source/libs/index/src/index_fst_common.c b/source/libs/index/src/index_fst_common.c
index 4ab78cddc5..97fb88d60e 100644
--- a/source/libs/index/src/index_fst_common.c
+++ b/source/libs/index/src/index_fst_common.c
@@ -14,6 +14,7 @@
*/
#include "tutil.h"
+
const uint8_t COMMON_INPUTS[] = {
84, // '\x00'
85, // '\x01'
@@ -273,7 +274,7 @@ const uint8_t COMMON_INPUTS[] = {
255, // 'ΓΏ'
};
-char const COMMON_INPUTS_INV[] = {
+const char COMMON_INPUTS_INV[] = {
't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w',
'.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=',
':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6',
diff --git a/source/libs/index/src/index_fst_counting_writer.c b/source/libs/index/src/index_fst_counting_writer.c
index 1486b9b203..b253db986a 100644
--- a/source/libs/index/src/index_fst_counting_writer.c
+++ b/source/libs/index/src/index_fst_counting_writer.c
@@ -13,6 +13,7 @@
* along with this program. If not, see .
*/
#include "tutil.h"
+#include "index_fst_util.h"
#include "index_fst_counting_writer.h"
FstCountingWriter *fstCountingWriterCreate(void *wrt) {
@@ -36,10 +37,27 @@ uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t
return bufLen;
}
-int FstCountingWriterFlush(FstCountingWriter *write) {
+int fstCountingWriterFlush(FstCountingWriter *write) {
//write->wtr->flush
return 1;
}
+void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes) {
+ assert(1 <= nBytes && nBytes <= 8);
+ uint8_t *buf = calloc(8, sizeof(uint8_t));
+ for (uint8_t i = 0; i < nBytes; i++) {
+ buf[i] = (uint8_t)n;
+ n = n >> 8;
+ }
+ fstCountingWriterWrite(writer, buf, nBytes);
+ free(buf);
+ return;
+}
+
+uint8_t fstCountingWriterPackUint(FstCountingWriter *writer, uint64_t n) {
+ uint8_t nBytes = packSize(n);
+ fstCountingWriterPackUintIn(writer, n, nBytes);
+ return nBytes;
+}
diff --git a/source/libs/index/src/index_fst_util.c b/source/libs/index/src/index_fst_util.c
index 20751baf5f..c4499f8e0d 100644
--- a/source/libs/index/src/index_fst_util.c
+++ b/source/libs/index/src/index_fst_util.c
@@ -13,6 +13,7 @@
* along with this program. If not, see .
*/
#include "index_fst_util.h"
+#include "index_fst_common.h"
@@ -94,7 +95,7 @@ FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) {
FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1};
return slice;
}
-FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end) {
+FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end) {
FstSlice t;
if (start >= slice->dLen || end >= slice->dLen || start > end) {
t.data = NULL;
@@ -111,5 +112,21 @@ bool fstSliceEmpty(FstSlice *slice) {
return slice->data == NULL || slice->dLen <= 0;
}
+int fstSliceCompare(FstSlice *a, FstSlice *b) {
+ int32_t aLen = (a->end - a->start + 1);
+ int32_t bLen = (b->end - b->start + 1);
+ int32_t mLen = (aLen < bLen ? aLen : bLen);
+ for (int i = 0; i < mLen; i++) {
+ uint8_t x = a->data[i + a->start];
+ uint8_t y = b->data[i + b->start];
+ if (x == y) { continue; }
+ else if (x < y) { return -1; }
+ else { return 1; }
+ }
+ if (aLen == bLen) { return 0; }
+ else if (aLen < bLen) { return -1; }
+ else { return 1; }
+}
+