diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h
index de4c957e29..61c857ed74 100644
--- a/source/libs/index/inc/index_fst.h
+++ b/source/libs/index/inc/index_fst.h
@@ -13,58 +13,73 @@
* along with this program. If not, see .
*/
-#ifndef _INDEX_FST_H_
-#define _INDEX_FST_H_
-#include "index_fst.h"
+#ifndef __INDEX_FST_H__
+#define __INDEX_FST_H__
+
+
#include "tarray.h"
-
-typedef FstType uint64_t;
-typedef CompiledAddr uint64_t;
-typedef Output uint64_t;
-typedef PackSizes uint8_t;
+#include "index_fst_util.h"
+#include "index_fst_registry.h"
-//A sentinel value used to indicate an empty final state
-const CompileAddr EMPTY_ADDRESS = 0;
-/// A sentinel value used to indicate an invalid state.
-const CompileAddr NONE_ADDRESS = 1;
+typedef struct FstNode FstNode;
+#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a)
-// This version number is written to every finite state transducer created by
-// this crate. When a finite state transducer is read, its version number is
-// checked against this value.
-const uint64_t version = 3;
-// The threshold (in number of transitions) at which an index is created for
-// a node's transitions. This speeds up lookup time at the expense of FST size
-
-const uint64_t TRANS_INDEX_THRESHOLD = 32;
typedef struct FstRange {
uint64_t start;
uint64_t end;
} FstRange;
-enum State { OneTransNext, OneTrans, AnyTrans, EmptyFinal};
-enum FstBound { Included, Excluded, Unbounded};
-typedef struct CheckSummer {
- uint32_t sum;
-};
+typedef struct FstBuilderNode {
+ bool isFinal;
+ Output finalOutput;
+ SArray *trans; //
+} FstBuilderNode;
+
+typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
+typedef enum { Included, Excluded, Unbounded} FstBound;
+
+typedef uint32_t CheckSummer;
-typedef struct FstBuilder {
- FstCountingWriter wtr; // The FST raw data is written directly to `wtr`.
- FstUnFinishedNodes unfinished // The stack of unfinished nodes
- Registry registry // A map of finished nodes.
- SArray* last // The last word added
- CompiledAddr lastAddr // The address of the last compiled node
- uint64_t len // num of keys added
-} FstBuilder;
+/*
+ *
+ * UnFinished node and helper function
+ * TODO: simple function name
+ */
+typedef struct FstUnFinishedNodes {
+ SArray *stack; // } FstUnFinishedNodes;
+} FstUnFinishedNodes;
+
+#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack)
+
+FstUnFinishedNodes *FstUnFinishedNodesCreate();
+void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal);
+FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes);
+FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr);
+FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes);
+void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out);
+void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr);
+void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out);
+uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs);
+uint64_t FstUnFinishedNodesFindCommPreifxAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out);
typedef struct FstCountingWriter {
void* wtr; // wrap any writer that counts and checksum bytes written
uint64_t count;
CheckSummer summer;
-};
+} FstCountingWriter;
+
+typedef struct FstBuilder {
+ FstCountingWriter wtr; // The FST raw data is written directly to `wtr`.
+ FstUnFinishedNodes *unfinished; // The stack of unfinished nodes
+ FstRegistry registry; // A map of finished nodes.
+ SArray* last; // The last word added
+ CompiledAddr lastAddr; // The address of the last compiled node
+ uint64_t len; // num of keys added
+} FstBuilder;
@@ -80,16 +95,6 @@ typedef struct FstTransitions {
FstRange range;
} FstTransitions;
-typedef struct FstUnFinishedNodes {
- SArray *stack; //
-} FstUnFinishedNodes;
-
-typedef struct FstBuilderNode {
- bool isFinal;
- Output finalOutput;
- SArray *trans; //
-} FstBuilderNode;
-
typedef struct FstLastTransition {
@@ -97,13 +102,23 @@ typedef struct FstLastTransition {
Output out;
} FstLastTransition;
+/*
+ * FstBuilderNodeUnfinished and helper function
+ * TODO: simple function name
+ */
typedef struct FstBuilderNodeUnfinished {
- FstBuilderNode node;
- FstLastTransition last;
+ FstBuilderNode *node;
+ FstLastTransition* last;
} FstBuilderNodeUnfinished;
+void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
+void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr);
+
+/*
+ * FstNode and helper function
+ */
typedef struct FstNode {
- uint8_t* data;
+ FstSlice data;
uint64_t version;
State state;
CompiledAddr start;
@@ -114,6 +129,28 @@ typedef struct FstNode {
Output finalOutput;
} FstNode;
+// If this node is final and has a terminal output value, then it is, returned. Otherwise, a zero output is returned
+#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
+// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer.
+#define FST_NODE_IS_FINAL(node) node->isFinal
+// Returns the number of transitions in this node, The maximum number of transitions is 256.
+#define FST_NODE_LEN(node) node->nTrans
+// Returns true if and only if this node has zero transitions.
+#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
+// Return the address of this node.
+#define FST_NODE_ADDR(node) node->start
+
+FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
+FstTransitions fstNodeTransitionIter(FstNode *node);
+FstTransitions* fstNodeTransitions(FstNode *node);
+bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res);
+bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res);
+bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res);
+bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode);
+FstSlice fstNodeAsSlice(FstNode *node);
+
+
+
typedef struct FstMeta {
uint64_t version;
CompiledAddr rootAddr;
@@ -125,42 +162,21 @@ typedef struct FstMeta {
typedef struct Fst {
FstMeta meta;
void *data; //
-};
+} Fst;
-// ops
+// ops
typedef struct FstIndexedValue {
uint64_t index;
uint64_t value;
-};
+} FstIndexedValue;
-// relate to Regist
-typedef struct FstRegistry {
- SArray *table; //
- uint64_t tableSize; // num of rows
- uint64_t mruSize; // num of columns
-} FstRegistry;
-
-typedef struct FstRegistryCache {
- SArray *cells; //
-} FstRegistryCache;
typedef struct FstRegistryCell {
CompiledAddr addr;
FstBuilderNode *node;
} FstRegistryCell;
-enum FstRegistryEntry {Found, NotFound, Rejected};
-
-FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, uint8_t *data);
-FstTransitions fstNodeTransitionIter(FstNode *node);
-FstTransition fstNodeGetTransitionAt(FstNode *node, uint64_t i);
-CompiledAddr fstNodeGetTransitionAddr(FstNode *node, uint64_t i);
-int64_t fstNodeFindInput(FstNode *node, int8_t b);
-Output fstNodeGetFinalOutput(FstNode *node);
-void* fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledArr addr, FstBuilderNode *builderNode);
-
-
#endif
diff --git a/source/libs/index/inc/index_fst_automation.h b/source/libs/index/inc/index_fst_automation.h
new file mode 100644
index 0000000000..7ad9a500cc
--- /dev/null
+++ b/source/libs/index/inc/index_fst_automation.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+#ifndef __INDEX_FST_AUTAOMATION_H__
+#define __INDEX_FST_AUTAOMATION_H__
+
+struct AutomationCtx;
+
+typedef struct StartWith {
+ AutomationCtx *autoSelf;
+} StartWith;
+
+typedef struct Complement {
+ AutomationCtx *autoSelf;
+} Complement;
+
+// automation
+typedef struct AutomationCtx {
+ void *data;
+} AutomationCtx;
+
+// automation interface
+void (*start)(AutomationCtx *ctx);
+bool (*isMatch)(AutomationCtx *ctx);
+bool (*canMatch)(AutomationCtx *ctx, void *data);
+bool (*willAlwaysMatch)(AutomationCtx *ctx, void *state);
+void* (*accpet)(AutomationCtx *ctx, void *state, uint8_t byte);
+void* (*accpetEof)(AutomationCtx *ctx, *state);
+
+
+#endif
diff --git a/source/libs/index/inc/index_fst_node.h b/source/libs/index/inc/index_fst_node.h
new file mode 100644
index 0000000000..ba2d2ccd02
--- /dev/null
+++ b/source/libs/index/inc/index_fst_node.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef __INDEX_FST_NODE_H__
+#define __INDEX_FST_NODE_H__
+
+
+
+
+#endif
diff --git a/source/libs/index/inc/index_fst_registry.h b/source/libs/index/inc/index_fst_registry.h
new file mode 100644
index 0000000000..6dcb236f29
--- /dev/null
+++ b/source/libs/index/inc/index_fst_registry.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+#ifndef __FST_REGISTRY_H__
+#define __FST_REGISTRY_H__
+
+#include "index_fst_util.h"
+
+
+typedef struct FstRegistry {
+
+} FstRegistry;
+#endif
diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h
new file mode 100644
index 0000000000..fc7dd44637
--- /dev/null
+++ b/source/libs/index/inc/index_fst_util.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+
+#ifndef __INDEX_FST_UTIL_H__
+#define __INDEX_FST_UTIL_H__
+
+#include "tarray.h"
+
+
+typedef uint64_t FstType;
+typedef uint64_t CompiledAddr;
+typedef uint64_t Output;
+typedef uint8_t PackSizes;
+
+
+//A sentinel value used to indicate an empty final state
+extern const CompiledAddr EMPTY_ADDRESS;
+/// A sentinel value used to indicate an invalid state.
+extern const CompiledAddr NONE_ADDRESS;
+
+// This version number is written to every finite state transducer created by
+// this crate. When a finite state transducer is read, its version number is
+// checked against this value.
+extern const uint64_t version;
+// The threshold (in number of transitions) at which an index is created for
+// a node's transitions. This speeds up lookup time at the expense of FST size
+
+extern const uint64_t TRANS_INDEX_THRESHOLD;
+// high 4 bits is transition address packed size.
+// low 4 bits is output value packed size.
+//
+// `0` is a legal value which means there are no transitions/outputs
+
+#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
+#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
+#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0)
+#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
+
+#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
+
+#define COMMON_INDEX(v, max, val) do { \
+ val = ((uint16_t)COMMON_INPUTS[v] + 1)%256; \
+ val = val > max ? 0: val; \
+} while(0)
+
+
+//uint8_t commonInput(uint8_t idx);
+//uint8_t commonIdx(uint8_t v, uint8_t max);
+
+uint8_t packSize(uint64_t n);
+uint64_t unpackUint64(uint8_t *ch, uint8_t sz);
+uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr);
+CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr);
+
+
+
+typedef struct FstSlice {
+ uint8_t *data;
+ uint64_t dLen;
+ uint32_t start;
+ uint32_t end;
+} FstSlice;
+
+FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end);
+FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen);
+bool fstSliceEmpty(FstSlice *slice);
+
+
+#endif
diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c
index 4c6e20a7d5..2974e7f9b5 100644
--- a/source/libs/index/src/index_fst.c
+++ b/source/libs/index/src/index_fst.c
@@ -15,13 +15,143 @@
#include "index_fst.h"
+
+FstUnFinishedNodes *fstUnFinishedNodesCreate() {
+ FstUnFinishedNodes *nodes = malloc(sizeof(FstUnFinishedNodes));
+ if (nodes == NULL) { return NULL; }
+
+ nodes->stack = (SArray *)taosArrayInit(64, sizeof(FstBuilderNodeUnfinished));
+ fstUnFinishedNodesPushEmpty(nodes, false);
+ return nodes;
+}
+void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal) {
+ FstBuilderNode *node = malloc(sizeof(FstBuilderNode));
+ node->isFinal = isFinal;
+ node->finalOutput = 0;
+ node->trans = NULL;
+
+ FstBuilderNodeUnfinished un = {.node = node, .last = NULL};
+ taosArrayPush(nodes->stack, &un);
+
+}
+FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes) {
+ assert(taosArrayGetSize(nodes->stack) == 1);
+
+ FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
+ assert(un->last == NULL);
+ return un->node;
+}
+
+FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) {
+ FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
+ fstBuilderNodeUnfinishedLastCompiled(un, addr);
+ free(un->last); // TODO add func FstLastTransitionFree()
+ return un->node;
+}
+
+FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes) {
+ FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
+ assert(un->last == NULL);
+ return un->node;
+
+}
+void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *nodes, Output out) {
+ FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, 0);
+ un->node->isFinal = true;
+ un->node->finalOutput = out;
+ //un->node->trans = NULL;
+}
+void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) {
+ size_t sz = taosArrayGetSize(nodes->stack) - 1;
+ FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
+ fstBuilderNodeUnfinishedLastCompiled(un, addr);
+}
+void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output out) {
+ FstSlice *s = &bs;
+ if (s->data == NULL || s->dLen == 0 || s->start > s->end) {
+ return;
+ }
+ size_t sz = taosArrayGetSize(nodes->stack) - 1;
+ FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
+ assert(un->last == NULL);
+
+
+ FstLastTransition *trn = malloc(sizeof(FstLastTransition));
+ trn->inp = s->data[s->start];
+ trn->out = out;
+
+ un->last = trn;
+
+ for (uint64_t i = s->start; i <= s->end; i++) {
+ FstBuilderNode *n = malloc(sizeof(FstBuilderNode));
+ n->isFinal = false;
+ n->finalOutput = 0;
+ n->trans = NULL;
+
+ FstLastTransition *trn = malloc(sizeof(FstLastTransition));
+ trn->inp = s->data[i];
+ trn->out = out;
+
+ FstBuilderNodeUnfinished un = {.node = n, .last = trn};
+ taosArrayPush(nodes->stack, &un);
+ }
+ fstUnFinishedNodesPushEmpty(nodes, true);
+}
+
+
+uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs) {
+ FstSlice *s = &bs;
+
+ size_t lsz = (size_t)(s->end - s->start + 1); // data len
+ size_t ssz = taosArrayGetSize(node->stack); // stack size
+
+ uint64_t count = 0;
+ for (size_t i = 0; i < ssz && i < lsz; i++) {
+ FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
+ if (un->last->inp == s->data[s->start + i]) {
+ count++;
+ } else {
+ break;
+ }
+ }
+ return count;
+}
+uint64_t FstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out) {
+ FstSlice *s = &bs;
+
+ size_t lsz = (size_t)(s->end - s->start + 1); // data len
+ size_t ssz = taosArrayGetSize(node->stack); // stack size
+
+ uint64_t res = 0;
+ for (size_t i = 0; i < lsz && i < ssz; i++) {
+ FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
+
+ FstLastTransition *last = un->last;
+ if (last->inp == s->data[s->start + i]) {
+ uint64_t commPrefix = last->out;
+ uint64_t addPrefix = last->out - commPrefix;
+ out = out - commPrefix;
+ last->out = commPrefix;
+ if (addPrefix != 0) {
+ fstBuilderNodeUnfinishedAddOutputPrefix(un, addPrefix);
+ }
+ } else {
+ break;
+ }
+ }
+ return res;
+}
+
// fst node function
-FstNode *fstNodeCreate(int64_t version, ComiledAddr addr, uint8_t *data) {
+
+
+
+FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) {
FstNode *n = (FstNode *)malloc(sizeof(FstNode));
if (n == NULL) { return NULL; }
if (addr == EMPTY_ADDRESS) {
- n->date = NULL;
+ n->data = fstSliceCreate(NULL, 0);
n->version = version;
n->state = EmptyFinal;
n->start = EMPTY_ADDRESS;
@@ -29,20 +159,138 @@ FstNode *fstNodeCreate(int64_t version, ComiledAddr addr, uint8_t *data) {
n->isFinal = true;
n->nTrans = 0;
n->sizes = 0;
- n->finalOutpu = 0;
- return n;
+ n->finalOutput = 0;
+ }
+ uint8_t v = slice->data[addr];
+ uint8_t s = (v & 0b11000000) >> 6;
+ if (s == 0b11) { // oneTransNext
+ n->data = fstSliceCopy(slice, 0, addr);
+ n->version = version;
+ n->state = OneTransNext;
+ n->start = addr;
+ n->end = addr; //? s.end_addr(data);
+ n->isFinal = false;
+ n->sizes = 0;
+ n->nTrans = 0;
+ n->finalOutput = 0;
+ } else if (v == 0b10) { // oneTrans
+ uint64_t sz; // fetch sz from addr
+ n->data = fstSliceCopy(slice, 0, addr);
+ n->version = version;
+ n->state = OneTrans;
+ n->start = addr;
+ n->end = addr; // s.end_addr(data, sz);
+ n->isFinal = false;
+ n->nTrans = 1;
+ n->sizes = sz;
+ n->finalOutput = 0;
+ } else { // anyTrans
+ uint64_t sz; // s.sizes(data)
+ uint32_t nTrans; // s.ntrans(data)
+ n->data = *slice;
+ n->version = version;
+ n->state = AnyTrans;
+ n->start = addr;
+ n->end = addr; // s.end_addr(version, data, sz, ntrans);
+ n->isFinal = false; // s.is_final_state();
+ n->nTrans = nTrans;
+ n->sizes = sz;
+ n->finalOutput = 0; // s.final_output(version, data, sz, ntrans);
}
- uint8_t v = (data[addr] & 0b1100000) >> 6;
- if (v == 0b11) {
-
- } else if (v == 0b10) {
-
- } else {
+ return n;
+}
+FstTransitions* fstNodeTransitions(FstNode *node) {
+ FstTransitions *t = malloc(sizeof(FstTransitions));
+ if (NULL == t) {
+ return NULL;
+ }
+ FstRange range = {.start = 0, .end = FST_NODE_LEN(node)};
+ t->node = node;
+ t->range = range;
+ return t;
+}
+bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res) {
+ bool s = true;
+ if (node->state == OneTransNext) {
- }
-
+ } else if (node->state == OneTrans) {
+
+ } else if (node->state == AnyTrans) {
+
+ } else {
+ s = false;
+ }
+ return s;
+}
+
+bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) {
+ bool s = true;
+ if (node->state == OneTransNext) {
+
+ } else if (node->state == OneTrans) {
+
+ } else if (node->state == AnyTrans) {
+
+ } else if (node->state == EmptyFinal){
+ s = false;
+ }
+ return s;
+}
+
+bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) {
+ bool s = true;
+ uint8_t input; // s.input
+ if (node->state == OneTransNext) {
+ if (b == input) { *res = 0; }
+ else { return s ; }
+ } else if (node->state == OneTrans) {
+ if (b == input) { *res = 0; }
+ else {return s;}
+ } else if (node->state == AnyTrans) {
+
+ } else if (node->state == EmptyFinal) {
+ s = false;
+ }
+ return s;
+}
+
+bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode) {
+ size_t sz = taosArrayGetSize(builderNode->trans);
+ assert(sz < 256);
+ if (sz == 0 && builderNode->isFinal && builderNode->finalOutput == 0) {
+ return true;
+ } else if (sz != 1 || builderNode->isFinal) {
+ // AnyTrans->Compile(w, addr, node);
+ } else {
+ FstTransition *tran = taosArrayGet(builderNode->trans, 0);
+ if (tran->addr == lastAddr && tran->out == 0) {
+ //OneTransNext::compile(w, lastAddr, tran->inp);
+ return true;
+ } else {
+ //OneTrans::Compile(w, lastAddr, *tran);
+ return true;
+ }
+ }
+ return true;
+}
+
+
+
+
+FstBuilder *fstBuilderCreate(void *w, FstType ty) {
+ FstBuilder *b = malloc(sizeof(FstBuilder));
+ if (NULL == b) { return b; }
+
+ FstCountingWriter wtr = {.wtr = w, .count = 0, .summer = 0};
+ b->wtr = wtr;
+ b->unfinished = malloc(sizeof(FstUnFinishedNodes));
+ return b;
}
+FstSlice fstNodeAsSlice(FstNode *node) {
+ FstSlice *slice = &node->data;
+ FstSlice s = fstSliceCopy(slice, slice->end, slice->dLen - 1);
+ return s;
+}
-
diff --git a/source/libs/index/src/index_fst_automation.c b/source/libs/index/src/index_fst_automation.c
new file mode 100644
index 0000000000..f2f48bbc8a
--- /dev/null
+++ b/source/libs/index/src/index_fst_automation.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
diff --git a/source/libs/index/src/index_fst_common.c b/source/libs/index/src/index_fst_common.c
index 39e5f89b35..4ab78cddc5 100644
--- a/source/libs/index/src/index_fst_common.c
+++ b/source/libs/index/src/index_fst_common.c
@@ -12,6 +12,8 @@
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
+
+#include "tutil.h"
const uint8_t COMMON_INPUTS[] = {
84, // '\x00'
85, // '\x01'
@@ -271,7 +273,7 @@ const uint8_t COMMON_INPUTS[] = {
255, // 'ΓΏ'
};
-char const COMMON_INPUTS_INV[] = [
+char const COMMON_INPUTS_INV[] = {
't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w',
'.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=',
':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6',
@@ -300,5 +302,5 @@ char const COMMON_INPUTS_INV[] = [
'\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', '\xf0',
'\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', '\xf8',
'\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
-];
+};
diff --git a/source/libs/index/src/index_fst_node.c b/source/libs/index/src/index_fst_node.c
new file mode 100644
index 0000000000..3d5efd30f3
--- /dev/null
+++ b/source/libs/index/src/index_fst_node.c
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
diff --git a/source/libs/index/src/index_fst_registry.c b/source/libs/index/src/index_fst_registry.c
new file mode 100644
index 0000000000..940c5863f4
--- /dev/null
+++ b/source/libs/index/src/index_fst_registry.c
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+#include "index_fst_registry.h"
+
diff --git a/source/libs/index/src/index_fst_util.c b/source/libs/index/src/index_fst_util.c
new file mode 100644
index 0000000000..20751baf5f
--- /dev/null
+++ b/source/libs/index/src/index_fst_util.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc.
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3
+ * or later ("AGPL"), as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+#include "index_fst_util.h"
+
+
+
+//A sentinel value used to indicate an empty final state
+const CompiledAddr EMPTY_ADDRESS = 0;
+/// A sentinel value used to indicate an invalid state.
+const CompiledAddr NONE_ADDRESS = 1;
+
+// This version number is written to every finite state transducer created by
+// this crate. When a finite state transducer is read, its version number is
+// checked against this value.
+const uint64_t version = 3;
+// The threshold (in number of transitions) at which an index is created for
+// a node's transitions. This speeds up lookup time at the expense of FST size
+
+const uint64_t TRANS_INDEX_THRESHOLD = 32;
+
+
+//uint8_t commonInput(uint8_t idx) {
+// if (idx == 0) { return -1; }
+// else {
+// return COMMON_INPUTS_INV[idx - 1];
+// }
+//}
+//
+//uint8_t commonIdx(uint8_t v, uint8_t max) {
+// uint8_t v = ((uint16_t)tCOMMON_INPUTS[v] + 1)%256;
+// return v > max ? 0: v;
+//}
+
+
+
+uint8_t packSize(uint64_t n) {
+ if (n < (1u << 8)) {
+ return 1;
+ } else if (n < (1u << 16)) {
+ return 2;
+ } else if (n < (1u << 24)) {
+ return 3;
+ } else if (n < ((uint64_t)(1) << 32)) {
+ return 4;
+ } else if (n < ((uint64_t)(1) << 40)) {
+ return 5;
+ } else if (n < ((uint64_t)(1) << 48)) {
+ return 6;
+ } else if (n < ((uint64_t)(1) << 56)) {
+ return 7;
+ } else {
+ return 8;
+ }
+}
+
+uint64_t unpackUint64(uint8_t *ch, uint8_t sz) {
+ uint64_t n;
+ for (uint8_t i = 0; i < sz; i++) {
+ n = n | (ch[i] << (8 * i));
+ }
+ return n;
+}
+uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr) {
+ if (transAddr == EMPTY_ADDRESS) {
+ return packSize(EMPTY_ADDRESS);
+ } else {
+ return packSize(nodeAddr - transAddr);
+ }
+}
+CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr) {
+ uint64_t delta = unpackUint64(data, len);
+ // delta_add = u64_to_usize
+ if (delta == EMPTY_ADDRESS) {
+ return EMPTY_ADDRESS;
+ } else {
+ return nodeAddr - delta;
+ }
+}
+
+// fst slice func
+FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) {
+ FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1};
+ return slice;
+}
+FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end) {
+ FstSlice t;
+ if (start >= slice->dLen || end >= slice->dLen || start > end) {
+ t.data = NULL;
+ return t;
+ };
+
+ t.data = slice->data;
+ t.dLen = slice->dLen;
+ t.start = start;
+ t.end = end;
+ return t;
+}
+bool fstSliceEmpty(FstSlice *slice) {
+ return slice->data == NULL || slice->dLen <= 0;
+}
+
+
+