Merge remote-tracking branch 'origin/3.0' into feature/dnode3
This commit is contained in:
commit
8acbcb7cdd
|
@ -80,6 +80,9 @@ void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in);
|
||||||
OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup);
|
OrderType fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDup);
|
||||||
void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate);
|
void fstBuilderCompileFrom(FstBuilder *b, uint64_t istate);
|
||||||
CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn);
|
CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn);
|
||||||
|
void* fstBuilerIntoInner(FstBuilder *b);
|
||||||
|
void fstBuilderFinish(FstBuilder *b);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -216,21 +219,6 @@ bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res);
|
||||||
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode);
|
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode);
|
||||||
FstSlice fstNodeAsSlice(FstNode *node);
|
FstSlice fstNodeAsSlice(FstNode *node);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct FstMeta {
|
|
||||||
uint64_t version;
|
|
||||||
CompiledAddr rootAddr;
|
|
||||||
FstType ty;
|
|
||||||
uint64_t len;
|
|
||||||
uint32_t checkSum;
|
|
||||||
} FstMeta;
|
|
||||||
|
|
||||||
typedef struct Fst {
|
|
||||||
FstMeta meta;
|
|
||||||
void *data; //
|
|
||||||
} Fst;
|
|
||||||
|
|
||||||
// ops
|
// ops
|
||||||
|
|
||||||
typedef struct FstIndexedValue {
|
typedef struct FstIndexedValue {
|
||||||
|
@ -242,5 +230,29 @@ FstLastTransition *fstLastTransitionCreate(uint8_t inp, Output out);
|
||||||
void fstLastTransitionDestroy(FstLastTransition *trn);
|
void fstLastTransitionDestroy(FstLastTransition *trn);
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstMeta {
|
||||||
|
uint64_t version;
|
||||||
|
CompiledAddr rootAddr;
|
||||||
|
FstType ty;
|
||||||
|
uint64_t len;
|
||||||
|
uint32_t checkSum;
|
||||||
|
} FstMeta;
|
||||||
|
|
||||||
|
typedef struct Fst {
|
||||||
|
FstMeta *meta;
|
||||||
|
FstSlice *data; //
|
||||||
|
FstNode *root; //
|
||||||
|
} Fst;
|
||||||
|
|
||||||
|
// refactor simple function
|
||||||
|
|
||||||
|
Fst* fstCreate(FstSlice *data);
|
||||||
|
void fstDestroy(Fst *fst);
|
||||||
|
|
||||||
|
bool fstGet(Fst *fst, FstSlice *b, Output *out);
|
||||||
|
FstNode* fstGetNode(Fst *fst, CompiledAddr);
|
||||||
|
FstType fstGetType(Fst *fst);
|
||||||
|
CompiledAddr fstGetRootAddr(Fst *fst);
|
||||||
|
Output fstEmptyFinalOutput(Fst *fst, bool *null);
|
||||||
|
bool fstVerify(Fst *fst);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -27,9 +27,11 @@ typedef struct FstCountingWriter {
|
||||||
|
|
||||||
uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t bufLen);
|
uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t bufLen);
|
||||||
|
|
||||||
int FstCountingWriterFlush(FstCountingWriter *write);
|
int fstCountingWriterFlush(FstCountingWriter *write);
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter *write);
|
||||||
|
|
||||||
FstCountingWriter *fstCountingWriterCreate(void *wtr);
|
FstCountingWriter *fstCountingWriterCreate(void *wtr);
|
||||||
void fstCountingWriterDestroy(FstCountingWriter *w);
|
void fstCountingWriterDestroy(FstCountingWriter *w);
|
||||||
|
|
||||||
|
|
|
@ -32,9 +32,9 @@ extern const CompiledAddr EMPTY_ADDRESS;
|
||||||
extern const CompiledAddr NONE_ADDRESS;
|
extern const CompiledAddr NONE_ADDRESS;
|
||||||
|
|
||||||
// This version number is written to every finite state transducer created by
|
// This version number is written to every finite state transducer created by
|
||||||
// this crate. When a finite state transducer is read, its version number is
|
// this version When a finite state transducer is read, its version number is
|
||||||
// checked against this value.
|
// checked against this value.
|
||||||
extern const uint64_t version;
|
extern const uint64_t VERSION;
|
||||||
// The threshold (in number of transitions) at which an index is created for
|
// The threshold (in number of transitions) at which an index is created for
|
||||||
// a node's transitions. This speeds up lookup time at the expense of FST size
|
// a node's transitions. This speeds up lookup time at the expense of FST size
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "index_fst.h"
|
#include "index_fst.h"
|
||||||
|
#include "tcoding.h"
|
||||||
|
#include "tchecksum.h"
|
||||||
|
|
||||||
|
|
||||||
static void fstPackDeltaIn(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr, uint8_t nBytes) {
|
static void fstPackDeltaIn(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr, uint8_t nBytes) {
|
||||||
|
@ -98,7 +99,7 @@ void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output
|
||||||
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
|
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
|
||||||
assert(un->last == NULL);
|
assert(un->last == NULL);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//FstLastTransition *trn = malloc(sizeof(FstLastTransition));
|
//FstLastTransition *trn = malloc(sizeof(FstLastTransition));
|
||||||
//trn->inp = s->data[s->start];
|
//trn->inp = s->data[s->start];
|
||||||
|
@ -146,24 +147,27 @@ uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node,
|
||||||
size_t lsz = (size_t)(s->end - s->start + 1); // data len
|
size_t lsz = (size_t)(s->end - s->start + 1); // data len
|
||||||
size_t ssz = taosArrayGetSize(node->stack); // stack size
|
size_t ssz = taosArrayGetSize(node->stack); // stack size
|
||||||
|
|
||||||
uint64_t res = 0;
|
uint64_t i = 0;
|
||||||
for (size_t i = 0; i < lsz && i < ssz; i++) {
|
for (i = 0; i < lsz && i < ssz; i++) {
|
||||||
FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
|
FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
|
||||||
|
|
||||||
FstLastTransition *last = un->last;
|
FstLastTransition *t = un->last;
|
||||||
if (last->inp == s->data[s->start + i]) {
|
uint64_t addPrefix = 0;
|
||||||
uint64_t commPrefix = last->out;
|
if (t && t->inp == s->data[s->start + i]) {
|
||||||
uint64_t addPrefix = last->out - commPrefix;
|
uint64_t commPrefix = MIN(t->out, *out);
|
||||||
out = out - commPrefix;
|
uint64_t tAddPrefix = t->out - commPrefix;
|
||||||
last->out = commPrefix;
|
(*out) = (*out) - commPrefix;
|
||||||
if (addPrefix != 0) {
|
t->out = commPrefix;
|
||||||
fstBuilderNodeUnfinishedAddOutputPrefix(un, addPrefix);
|
addPrefix = tAddPrefix;
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
if (addPrefix != 0) {
|
||||||
|
fstBuilderNodeUnfinishedAddOutputPrefix(un, addPrefix);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -771,16 +775,16 @@ void fstBuilderInsertOutput(FstBuilder *b, FstSlice bs, Output in) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Output out;
|
Output out;
|
||||||
uint64_t prefixLen;
|
//if (in != 0) { //if let Some(in) = in
|
||||||
if (in != 0) { //if let Some(in) = in
|
// prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out);
|
||||||
prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out);
|
//} else {
|
||||||
} else {
|
// prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs);
|
||||||
prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs);
|
// out = 0;
|
||||||
out = 0;
|
//}
|
||||||
}
|
uint64_t prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out);
|
||||||
|
|
||||||
if (prefixLen == FST_SLICE_LEN(s)) {
|
if (prefixLen == FST_SLICE_LEN(s)) {
|
||||||
assert(out != 0);
|
assert(out == 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -849,6 +853,31 @@ CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) {
|
||||||
return b->lastAddr;
|
return b->lastAddr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void* fstBuilderInsertInner(FstBuilder *b) {
|
||||||
|
fstBuilderCompileFrom(b, 0);
|
||||||
|
FstBuilderNode *rootNode = fstUnFinishedNodesPopRoot(b->unfinished);
|
||||||
|
CompiledAddr rootAddr = fstBuilderCompile(b, rootNode);
|
||||||
|
|
||||||
|
uint8_t buf64[8] = {0};
|
||||||
|
|
||||||
|
taosEncodeFixedU64((void **)&buf64, b->len);
|
||||||
|
fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64));
|
||||||
|
|
||||||
|
taosEncodeFixedU64((void **)&buf64, rootAddr);
|
||||||
|
fstCountingWriterWrite(b->wrt, buf64, sizeof(buf64));
|
||||||
|
|
||||||
|
uint8_t buf32[4] = {0};
|
||||||
|
uint32_t sum = fstCountingWriterMaskedCheckSum(b->wrt);
|
||||||
|
taosEncodeFixedU32((void **)&buf32, sum);
|
||||||
|
fstCountingWriterWrite(b->wrt, buf32, sizeof(buf32));
|
||||||
|
|
||||||
|
fstCountingWriterFlush(b->wrt);
|
||||||
|
return b->wrt;
|
||||||
|
|
||||||
|
}
|
||||||
|
void fstBuilderFinish(FstBuilder *b) {
|
||||||
|
fstBuilderInsertInner(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -894,4 +923,108 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *unNode, O
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Fst* fstCreate(FstSlice *slice) {
|
||||||
|
char *buf = slice->data;
|
||||||
|
uint64_t skip = 0;
|
||||||
|
uint64_t len = slice->dLen;
|
||||||
|
if (len < 36) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t version;
|
||||||
|
taosDecodeFixedU64(buf, &version);
|
||||||
|
skip += sizeof(version);
|
||||||
|
if (version == 0 || version > VERSION) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t type;
|
||||||
|
taosDecodeFixedU64(buf + skip, &type);
|
||||||
|
skip += sizeof(type);
|
||||||
|
|
||||||
|
uint32_t checkSum = 0;
|
||||||
|
len -= sizeof(checkSum);
|
||||||
|
taosDecodeFixedU32(buf + len, &checkSum);
|
||||||
|
|
||||||
|
CompiledAddr rootAddr;
|
||||||
|
len -= sizeof(rootAddr);
|
||||||
|
taosDecodeFixedU64(buf + len, &rootAddr);
|
||||||
|
|
||||||
|
uint64_t fstLen;
|
||||||
|
len -= sizeof(fstLen);
|
||||||
|
taosDecodeFixedU64(buf + len, &fstLen);
|
||||||
|
//TODO(validat root addr)
|
||||||
|
//
|
||||||
|
Fst *fst= (Fst *)calloc(1, sizeof(Fst));
|
||||||
|
if (fst == NULL) { return NULL; }
|
||||||
|
|
||||||
|
fst->meta = (FstMeta *)malloc(sizeof(FstMeta));
|
||||||
|
if (NULL == fst->meta) {
|
||||||
|
goto FST_CREAT_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
fst->meta->version = version;
|
||||||
|
fst->meta->rootAddr = rootAddr;
|
||||||
|
fst->meta->ty = type;
|
||||||
|
fst->meta->len = fstLen;
|
||||||
|
fst->meta->checkSum = checkSum;
|
||||||
|
fst->data = slice;
|
||||||
|
return fst;
|
||||||
|
|
||||||
|
FST_CREAT_FAILED:
|
||||||
|
free(fst->meta);
|
||||||
|
free(fst);
|
||||||
|
|
||||||
|
}
|
||||||
|
void fstDestroy(Fst *fst) {
|
||||||
|
if (fst) {
|
||||||
|
free(fst->meta);
|
||||||
|
fstNodeDestroy(fst->root);
|
||||||
|
}
|
||||||
|
free(fst);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool fstGet(Fst *fst, FstSlice *b, Output *out) {
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
FstNode* fstGetNode(Fst *fst, CompiledAddr addr) {
|
||||||
|
if (fst->root != NULL) {
|
||||||
|
return fst->root;
|
||||||
|
}
|
||||||
|
fst->root = fstNodeCreate(fst->meta->version, addr, fst->data);
|
||||||
|
return fst->root;
|
||||||
|
|
||||||
|
}
|
||||||
|
FstType fstGetType(Fst *fst) {
|
||||||
|
return fst->meta->ty;
|
||||||
|
}
|
||||||
|
CompiledAddr fstGetRootAddr(Fst *fst) {
|
||||||
|
return fst->meta->rootAddr;
|
||||||
|
}
|
||||||
|
|
||||||
|
Output fstEmptyFinalOutput(Fst *fst, bool *null) {
|
||||||
|
Output res = 0;
|
||||||
|
FstNode *node = fst->root;
|
||||||
|
if (FST_NODE_IS_FINAL(node)) {
|
||||||
|
*null = false;
|
||||||
|
res = FST_NODE_FINAL_OUTPUT(node);
|
||||||
|
} else {
|
||||||
|
*null = true;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool fstVerify(Fst *fst) {
|
||||||
|
uint32_t checkSum = fst->meta->checkSum;
|
||||||
|
FstSlice *data = fst->data;
|
||||||
|
TSCKSUM initSum = 0;
|
||||||
|
if (taosCheckChecksumWhole(data->data, data->dLen)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,9 @@ uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t
|
||||||
return bufLen;
|
return bufLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t fstCountingWriterMaskedCheckSum(FstCountingWriter *write) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
int fstCountingWriterFlush(FstCountingWriter *write) {
|
int fstCountingWriterFlush(FstCountingWriter *write) {
|
||||||
//write->wtr->flush
|
//write->wtr->flush
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -25,7 +25,7 @@ const CompiledAddr NONE_ADDRESS = 1;
|
||||||
// This version number is written to every finite state transducer created by
|
// This version number is written to every finite state transducer created by
|
||||||
// this crate. When a finite state transducer is read, its version number is
|
// this crate. When a finite state transducer is read, its version number is
|
||||||
// checked against this value.
|
// checked against this value.
|
||||||
const uint64_t version = 3;
|
const uint64_t VERSION = 3;
|
||||||
// The threshold (in number of transitions) at which an index is created for
|
// The threshold (in number of transitions) at which an index is created for
|
||||||
// a node's transitions. This speeds up lookup time at the expense of FST size
|
// a node's transitions. This speeds up lookup time at the expense of FST size
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue