update fst build struct

This commit is contained in:
yihaoDeng 2021-11-23 12:13:44 +08:00
parent ccca561d11
commit dc9163a29a
2 changed files with 84 additions and 35 deletions

View File

@ -88,6 +88,19 @@ typedef struct FstTransitions {
FstRange range; FstRange range;
} FstTransitions; } FstTransitions;
//FstState and relation function
typedef struct FstState {
State state;
uint8_t val;
} FstState;
FstState fstStateCreate(FstSlice* data, CompiledAddr addr);
#define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext)
#define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans)
#define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans)
#define FST_STATE_EMPTY_FINAL(node) (node->state.state == EmptyFinal)
typedef struct FstLastTransition { typedef struct FstLastTransition {
@ -104,6 +117,8 @@ typedef struct FstBuilderNodeUnfinished {
FstLastTransition* last; FstLastTransition* last;
} FstBuilderNodeUnfinished; } FstBuilderNodeUnfinished;
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr); void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out); void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Output out);
@ -113,7 +128,7 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, Out
typedef struct FstNode { typedef struct FstNode {
FstSlice data; FstSlice data;
uint64_t version; uint64_t version;
State state; FstState state;
CompiledAddr start; CompiledAddr start;
CompiledAddr end; CompiledAddr end;
bool isFinal; bool isFinal;
@ -171,6 +186,4 @@ void fstLastTransitionDestroy(FstLastTransition *trn);
#endif #endif

View File

@ -155,65 +155,86 @@ uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node,
return res; return res;
} }
FstState fstStateCreate(FstSlice* date, CompiledAddr addr) {
FstState fs = {.state = EmptyFinal, .val = 0};
if (addr == EMPTY_ADDRESS) {
fs.state = EmptyFinal;
fs.val = 0;
return fs;
}
FstSlice *s = date;
uint8_t v = s->data[addr];
uint8_t t = (v & 0b11000000) >> 6;
if (t == 0b11) {
fs.state = OneTransNext;
fs.val = v;
} else if (t == 0b10) {
fs.state = OneTrans;
fs.val = v;
} else {
fs.state = AnyTrans;
fs.val = v;
}
return fs;
}
// fst node function // fst node function
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) { FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) {
FstNode *n = (FstNode *)malloc(sizeof(FstNode)); FstNode *n = (FstNode *)malloc(sizeof(FstNode));
if (n == NULL) { return NULL; } if (n == NULL) { return NULL; }
if (addr == EMPTY_ADDRESS) { FstState st = fstStateCreate(slice, addr);
if (st.state == EmptyFinal) {
n->data = fstSliceCreate(NULL, 0); n->data = fstSliceCreate(NULL, 0);
n->version = version; n->version = version;
n->state = EmptyFinal; n->state = st;
n->start = EMPTY_ADDRESS; n->start = EMPTY_ADDRESS;
n->end = EMPTY_ADDRESS; n->end = EMPTY_ADDRESS;
n->isFinal = true; n->isFinal = true;
n->nTrans = 0; n->nTrans = 0;
n->sizes = 0; n->sizes = 0;
n->finalOutput = 0; n->finalOutput = 0;
} } else if (st.state == OneTransNext) {
uint8_t v = slice->data[addr];
uint8_t s = (v & 0b11000000) >> 6;
if (s == 0b11) { // oneTransNext
n->data = fstSliceCopy(slice, 0, addr); n->data = fstSliceCopy(slice, 0, addr);
n->version = version; n->version = version;
n->state = OneTransNext; n->state = st;
n->start = addr; n->start = addr;
n->end = addr; //? s.end_addr(data); n->end = addr; //? s.end_addr(data);
n->isFinal = false; n->isFinal = false;
n->sizes = 0; n->sizes = 0;
n->nTrans = 0; n->nTrans = 0;
n->finalOutput = 0; n->finalOutput = 0;
} else if (v == 0b10) { // oneTrans } else if (st.state == OneTrans) {
uint64_t sz; // fetch sz from addr uint64_t sz; // fetch sz from addr
n->data = fstSliceCopy(slice, 0, addr); n->data = fstSliceCopy(slice, 0, addr);
n->version = version; n->version = version;
n->state = OneTrans; n->state = st;
n->start = addr; n->start = addr;
n->end = addr; // s.end_addr(data, sz); n->end = addr; // s.end_addr(data, sz);
n->isFinal = false; n->isFinal = false;
n->nTrans = 1; n->nTrans = 1;
n->sizes = sz; n->sizes = sz;
n->finalOutput = 0; n->finalOutput = 0;
} else { // anyTrans } else {
uint64_t sz; // s.sizes(data) uint64_t sz; // s.sizes(data)
uint32_t nTrans; // s.ntrans(data) uint32_t nTrans; // s.ntrans(data)
n->data = *slice; n->data = *slice;
n->version = version; n->version = version;
n->state = AnyTrans; n->state = st;
n->start = addr; n->start = addr;
n->end = addr; // s.end_addr(version, data, sz, ntrans); n->end = addr; // s.end_addr(version, data, sz, ntrans);
n->isFinal = false; // s.is_final_state(); n->isFinal = false; // s.is_final_state();
n->nTrans = nTrans; n->nTrans = nTrans;
n->sizes = sz; n->sizes = sz;
n->finalOutput = 0; // s.final_output(version, data, sz, ntrans); n->finalOutput = 0; // s.final_output(version, data, sz, ntrans);
} }
return n; return n;
} }
void fstNodeDestroy(FstNode *node) { void fstNodeDestroy(FstNode *node) {
if (node == NULL) { return; }
free(node); free(node);
} }
FstTransitions* fstNodeTransitions(FstNode *node) { FstTransitions* fstNodeTransitions(FstNode *node) {
@ -222,50 +243,65 @@ FstTransitions* fstNodeTransitions(FstNode *node) {
return NULL; return NULL;
} }
FstRange range = {.start = 0, .end = FST_NODE_LEN(node)}; FstRange range = {.start = 0, .end = FST_NODE_LEN(node)};
t->node = node;
t->range = range; t->range = range;
t->node = node;
return t; return t;
} }
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res) {
// Returns the transition at index `i`.
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *trn) {
bool s = true; bool s = true;
if (node->state == OneTransNext) { //FstState st = node->state;
if (FST_STATE_ONE_TRNAS_NEXT(node)) {
} else if (node->state == OneTrans) { trn->inp = 0;
trn->out = 0;
} else if (node->state == AnyTrans) { trn->addr = 0;
} else if (FST_STATE_ONE_TRNAS(node)) {
trn->inp = 0;
trn->out = 0;
trn->addr = 0;
} else if (FST_STATE_ANY_TRANS(node)) {
trn->inp = 0;
trn->out = 0;
trn->addr = 0;
} else { } else {
s = false; s = false;
} }
return s; return s;
} }
// Returns the transition address of the `i`th transition
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) { bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) {
bool s = true; bool s = true;
if (node->state == OneTransNext) { if (FST_STATE_ONE_TRNAS_NEXT(node)) {
} else if (node->state == OneTrans) { } else if (FST_STATE_ONE_TRNAS(node)) {
} else if (node->state == AnyTrans) { } else if (FST_STATE_ANY_TRANS(node)) {
} else if (node->state == EmptyFinal){ } else if (FST_STATE_EMPTY_FINAL(node)){
s = false; s = false;
} else {
assert(0);
} }
return s; return s;
} }
// Finds the `i`th transition corresponding to the given input byte.
// If no transition for this byte exists, then `false` is returned.
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) { bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) {
bool s = true; bool s = true;
uint8_t input; // s.input uint8_t input; // get s.input
if (node->state == OneTransNext) { FstState fs = node->state;
if (FST_STATE_ONE_TRNAS_NEXT(node)) {
if (b == input) { *res = 0; } if (b == input) { *res = 0; }
else { return s ; } else { return s ; }
} else if (node->state == OneTrans) { } else if (FST_STATE_ONE_TRNAS(node)) {
if (b == input) { *res = 0; } if (b == input) { *res = 0; }
else {return s;} else {return s;}
} else if (node->state == AnyTrans) { } else if (FST_STATE_ANY_TRANS(node)) {
} else if (node->state == EmptyFinal) { } else if (FST_STATE_EMPTY_FINAL(node)) {
s = false; s = false;
} }
return s; return s;