update fst build struct
This commit is contained in:
parent
ccca561d11
commit
dc9163a29a
|
@ -88,6 +88,19 @@ typedef struct FstTransitions {
|
||||||
FstRange range;
|
FstRange range;
|
||||||
} FstTransitions;
|
} FstTransitions;
|
||||||
|
|
||||||
|
//FstState and relation function
|
||||||
|
|
||||||
|
typedef struct FstState {
|
||||||
|
State state;
|
||||||
|
uint8_t val;
|
||||||
|
} FstState;
|
||||||
|
|
||||||
|
FstState fstStateCreate(FstSlice* data, CompiledAddr addr);
|
||||||
|
|
||||||
|
#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext)
|
||||||
|
#define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans)
|
||||||
|
#define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans)
|
||||||
|
#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal)
|
||||||
|
|
||||||
|
|
||||||
typedef struct FstLastTransition {
|
typedef struct FstLastTransition {
|
||||||
|
@ -104,6 +117,8 @@ typedef struct FstBuilderNodeUnfinished {
|
||||||
FstLastTransition* last;
|
FstLastTransition* last;
|
||||||
} FstBuilderNodeUnfinished;
|
} FstBuilderNodeUnfinished;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
|
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
|
||||||
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out);
|
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out);
|
||||||
|
|
||||||
|
@ -113,7 +128,7 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Out
|
||||||
typedef struct FstNode {
|
typedef struct FstNode {
|
||||||
FstSlice data;
|
FstSlice data;
|
||||||
uint64_t version;
|
uint64_t version;
|
||||||
State state;
|
FstState state;
|
||||||
CompiledAddr start;
|
CompiledAddr start;
|
||||||
CompiledAddr end;
|
CompiledAddr end;
|
||||||
bool isFinal;
|
bool isFinal;
|
||||||
|
@ -171,6 +186,4 @@ void fstLastTransitionDestroy(FstLastTransition *trn);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -155,65 +155,86 @@ uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node,
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
FstState fstStateCreate(FstSlice* date, CompiledAddr addr) {
|
||||||
|
FstState fs = {.state = EmptyFinal, .val = 0};
|
||||||
|
if (addr == EMPTY_ADDRESS) {
|
||||||
|
fs.state = EmptyFinal;
|
||||||
|
fs.val = 0;
|
||||||
|
return fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
FstSlice *s = date;
|
||||||
|
uint8_t v = s->data[addr];
|
||||||
|
uint8_t t = (v & 0b11000000) >> 6;
|
||||||
|
if (t == 0b11) {
|
||||||
|
fs.state = OneTransNext;
|
||||||
|
fs.val = v;
|
||||||
|
} else if (t == 0b10) {
|
||||||
|
fs.state = OneTrans;
|
||||||
|
fs.val = v;
|
||||||
|
} else {
|
||||||
|
fs.state = AnyTrans;
|
||||||
|
fs.val = v;
|
||||||
|
}
|
||||||
|
return fs;
|
||||||
|
}
|
||||||
|
|
||||||
// fst node function
|
// fst node function
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) {
|
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) {
|
||||||
FstNode *n = (FstNode *)malloc(sizeof(FstNode));
|
FstNode *n = (FstNode *)malloc(sizeof(FstNode));
|
||||||
if (n == NULL) { return NULL; }
|
if (n == NULL) { return NULL; }
|
||||||
|
|
||||||
if (addr == EMPTY_ADDRESS) {
|
FstState st = fstStateCreate(slice, addr);
|
||||||
|
|
||||||
|
if (st.state == EmptyFinal) {
|
||||||
n->data = fstSliceCreate(NULL, 0);
|
n->data = fstSliceCreate(NULL, 0);
|
||||||
n->version = version;
|
n->version = version;
|
||||||
n->state = EmptyFinal;
|
n->state = st;
|
||||||
n->start = EMPTY_ADDRESS;
|
n->start = EMPTY_ADDRESS;
|
||||||
n->end = EMPTY_ADDRESS;
|
n->end = EMPTY_ADDRESS;
|
||||||
n->isFinal = true;
|
n->isFinal = true;
|
||||||
n->nTrans = 0;
|
n->nTrans = 0;
|
||||||
n->sizes = 0;
|
n->sizes = 0;
|
||||||
n->finalOutput = 0;
|
n->finalOutput = 0;
|
||||||
}
|
} else if (st.state == OneTransNext) {
|
||||||
uint8_t v = slice->data[addr];
|
|
||||||
uint8_t s = (v & 0b11000000) >> 6;
|
|
||||||
if (s == 0b11) { // oneTransNext
|
|
||||||
n->data = fstSliceCopy(slice, 0, addr);
|
n->data = fstSliceCopy(slice, 0, addr);
|
||||||
n->version = version;
|
n->version = version;
|
||||||
n->state = OneTransNext;
|
n->state = st;
|
||||||
n->start = addr;
|
n->start = addr;
|
||||||
n->end = addr; //? s.end_addr(data);
|
n->end = addr; //? s.end_addr(data);
|
||||||
n->isFinal = false;
|
n->isFinal = false;
|
||||||
n->sizes = 0;
|
n->sizes = 0;
|
||||||
n->nTrans = 0;
|
n->nTrans = 0;
|
||||||
n->finalOutput = 0;
|
n->finalOutput = 0;
|
||||||
} else if (v == 0b10) { // oneTrans
|
} else if (st.state == OneTrans) {
|
||||||
uint64_t sz; // fetch sz from addr
|
uint64_t sz; // fetch sz from addr
|
||||||
n->data = fstSliceCopy(slice, 0, addr);
|
n->data = fstSliceCopy(slice, 0, addr);
|
||||||
n->version = version;
|
n->version = version;
|
||||||
n->state = OneTrans;
|
n->state = st;
|
||||||
n->start = addr;
|
n->start = addr;
|
||||||
n->end = addr; // s.end_addr(data, sz);
|
n->end = addr; // s.end_addr(data, sz);
|
||||||
n->isFinal = false;
|
n->isFinal = false;
|
||||||
n->nTrans = 1;
|
n->nTrans = 1;
|
||||||
n->sizes = sz;
|
n->sizes = sz;
|
||||||
n->finalOutput = 0;
|
n->finalOutput = 0;
|
||||||
} else { // anyTrans
|
} else {
|
||||||
uint64_t sz; // s.sizes(data)
|
uint64_t sz; // s.sizes(data)
|
||||||
uint32_t nTrans; // s.ntrans(data)
|
uint32_t nTrans; // s.ntrans(data)
|
||||||
n->data = *slice;
|
n->data = *slice;
|
||||||
n->version = version;
|
n->version = version;
|
||||||
n->state = AnyTrans;
|
n->state = st;
|
||||||
n->start = addr;
|
n->start = addr;
|
||||||
n->end = addr; // s.end_addr(version, data, sz, ntrans);
|
n->end = addr; // s.end_addr(version, data, sz, ntrans);
|
||||||
n->isFinal = false; // s.is_final_state();
|
n->isFinal = false; // s.is_final_state();
|
||||||
n->nTrans = nTrans;
|
n->nTrans = nTrans;
|
||||||
n->sizes = sz;
|
n->sizes = sz;
|
||||||
n->finalOutput = 0; // s.final_output(version, data, sz, ntrans);
|
n->finalOutput = 0; // s.final_output(version, data, sz, ntrans);
|
||||||
}
|
}
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
void fstNodeDestroy(FstNode *node) {
|
void fstNodeDestroy(FstNode *node) {
|
||||||
if (node == NULL) { return; }
|
|
||||||
free(node);
|
free(node);
|
||||||
}
|
}
|
||||||
FstTransitions* fstNodeTransitions(FstNode *node) {
|
FstTransitions* fstNodeTransitions(FstNode *node) {
|
||||||
|
@ -222,50 +243,65 @@ FstTransitions* fstNodeTransitions(FstNode *node) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
FstRange range = {.start = 0, .end = FST_NODE_LEN(node)};
|
FstRange range = {.start = 0, .end = FST_NODE_LEN(node)};
|
||||||
t->node = node;
|
|
||||||
t->range = range;
|
t->range = range;
|
||||||
|
t->node = node;
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res) {
|
|
||||||
|
// Returns the transition at index `i`.
|
||||||
|
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *trn) {
|
||||||
bool s = true;
|
bool s = true;
|
||||||
if (node->state == OneTransNext) {
|
//FstState st = node->state;
|
||||||
|
if (FST_STATE_ONE_TRNAS_NEXT(node)) {
|
||||||
} else if (node->state == OneTrans) {
|
trn->inp = 0;
|
||||||
|
trn->out = 0;
|
||||||
} else if (node->state == AnyTrans) {
|
trn->addr = 0;
|
||||||
|
} else if (FST_STATE_ONE_TRNAS(node)) {
|
||||||
|
trn->inp = 0;
|
||||||
|
trn->out = 0;
|
||||||
|
trn->addr = 0;
|
||||||
|
} else if (FST_STATE_ANY_TRANS(node)) {
|
||||||
|
trn->inp = 0;
|
||||||
|
trn->out = 0;
|
||||||
|
trn->addr = 0;
|
||||||
} else {
|
} else {
|
||||||
s = false;
|
s = false;
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the transition address of the `i`th transition
|
||||||
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) {
|
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) {
|
||||||
bool s = true;
|
bool s = true;
|
||||||
if (node->state == OneTransNext) {
|
if (FST_STATE_ONE_TRNAS_NEXT(node)) {
|
||||||
|
|
||||||
} else if (node->state == OneTrans) {
|
} else if (FST_STATE_ONE_TRNAS(node)) {
|
||||||
|
|
||||||
} else if (node->state == AnyTrans) {
|
} else if (FST_STATE_ANY_TRANS(node)) {
|
||||||
|
|
||||||
} else if (node->state == EmptyFinal){
|
} else if (FST_STATE_EMPTY_FINAL(node)){
|
||||||
s = false;
|
s = false;
|
||||||
|
} else {
|
||||||
|
assert(0);
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finds the `i`th transition corresponding to the given input byte.
|
||||||
|
// If no transition for this byte exists, then `false` is returned.
|
||||||
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) {
|
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) {
|
||||||
bool s = true;
|
bool s = true;
|
||||||
uint8_t input; // s.input
|
uint8_t input; // get s.input
|
||||||
if (node->state == OneTransNext) {
|
FstState fs = node->state;
|
||||||
|
if (FST_STATE_ONE_TRNAS_NEXT(node)) {
|
||||||
if (b == input) { *res = 0; }
|
if (b == input) { *res = 0; }
|
||||||
else { return s ; }
|
else { return s ; }
|
||||||
} else if (node->state == OneTrans) {
|
} else if (FST_STATE_ONE_TRNAS(node)) {
|
||||||
if (b == input) { *res = 0; }
|
if (b == input) { *res = 0; }
|
||||||
else {return s;}
|
else {return s;}
|
||||||
} else if (node->state == AnyTrans) {
|
} else if (FST_STATE_ANY_TRANS(node)) {
|
||||||
|
|
||||||
} else if (node->state == EmptyFinal) {
|
} else if (FST_STATE_EMPTY_FINAL(node)) {
|
||||||
s = false;
|
s = false;
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
|
|
Loading…
Reference in New Issue