refactor idx code
This commit is contained in:
parent
856990c64b
commit
d00158ff69
|
@ -53,7 +53,7 @@ typedef struct FstRange {
|
|||
} FstRange;
|
||||
|
||||
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal } State;
|
||||
typedef enum { Ordered, OutOfOrdered, DuplicateKey } OrderType;
|
||||
typedef enum { Ordered, OutOfOrdered, DuplicateKey } FstOrderType;
|
||||
|
||||
FstBoundWithData* fstBoundStateCreate(FstBound type, FstSlice* data);
|
||||
bool fstBoundWithDataExceededBy(FstBoundWithData* bound, FstSlice* slice);
|
||||
|
@ -106,7 +106,7 @@ bool fstBuilderInsert(FstBuilder* b, FstSlice bs, Output in);
|
|||
void fstBuilderCompileFrom(FstBuilder* b, uint64_t istate);
|
||||
void* fstBuilerIntoInner(FstBuilder* b);
|
||||
void fstBuilderFinish(FstBuilder* b);
|
||||
OrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup);
|
||||
FstOrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup);
|
||||
CompiledAddr fstBuilderCompile(FstBuilder* b, FstBuilderNode* bn);
|
||||
|
||||
typedef struct FstTransitions {
|
||||
|
@ -213,14 +213,18 @@ typedef struct FstNode {
|
|||
// If this node is final and has a terminal output value, then it is, returned.
|
||||
// Otherwise, a zero output is returned
|
||||
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
|
||||
|
||||
// Returns true if and only if this node corresponds to a final or "match",
|
||||
// state in the finite state transducer.
|
||||
#define FST_NODE_IS_FINAL(node) node->isFinal
|
||||
|
||||
// Returns the number of transitions in this node, The maximum number of
|
||||
// transitions is 256.
|
||||
#define FST_NODE_LEN(node) node->nTrans
|
||||
|
||||
// Returns true if and only if this node has zero transitions.
|
||||
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
|
||||
|
||||
// Return the address of this node.
|
||||
#define FST_NODE_ADDR(node) node->start
|
||||
|
||||
|
@ -277,6 +281,8 @@ FStmBuilder* fstSearch(Fst* fst, FAutoCtx* ctx);
|
|||
|
||||
FStmStBuilder* fstSearchWithState(Fst* fst, FAutoCtx* ctx);
|
||||
// into stream to expand later
|
||||
//
|
||||
|
||||
FStmSt* stmBuilderIntoStm(FStmBuilder* sb);
|
||||
|
||||
bool fstVerify(Fst* fst);
|
||||
|
@ -325,7 +331,8 @@ FStmBuilder* stmBuilderCreate(Fst* fst, FAutoCtx* aut);
|
|||
void stmBuilderDestroy(FStmBuilder* b);
|
||||
|
||||
// set up bound range
|
||||
// refator later: to simple code by marco
|
||||
// refator later
|
||||
// simple code by marco
|
||||
void stmBuilderSetRange(FStmBuilder* b, FstSlice* val, RangeType type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -289,22 +289,14 @@ void fstStateCompileForAnyTrans(IdxFstFile* w, CompiledAddr addr, FstBuilderNode
|
|||
for (int32_t i = sz - 1; i >= 0; i--) {
|
||||
FstTransition* t = taosArrayGet(node->trans, i);
|
||||
idxFileWrite(w, (char*)&t->inp, 1);
|
||||
// fstPackDeltaIn(w, addr, t->addr, tSize);
|
||||
}
|
||||
if (sz > TRANS_INDEX_THRESHOLD) {
|
||||
// A value of 255 indicates that no transition exists for the byte
|
||||
// at that index. (Except when there are 256 transitions.) Namely,
|
||||
// any value greater than or equal to the number of transitions in
|
||||
// this node indicates an absent transition.
|
||||
// A value of 255 indicates that no transition exists for the byte at that idx
|
||||
uint8_t* index = (uint8_t*)taosMemoryMalloc(sizeof(uint8_t) * 256);
|
||||
memset(index, 255, sizeof(uint8_t) * 256);
|
||||
/// for (uint8_t i = 0; i < 256; i++) {
|
||||
// index[i] = 255;
|
||||
///}
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
FstTransition* t = taosArrayGet(node->trans, i);
|
||||
index[t->inp] = i;
|
||||
// fstPackDeltaIn(w, addr, t->addr, tSize);
|
||||
}
|
||||
idxFileWrite(w, (char*)index, 256);
|
||||
taosMemoryFree(index);
|
||||
|
@ -344,7 +336,7 @@ uint8_t fstStateCommInput(FstState* s, bool* null) {
|
|||
*null = true;
|
||||
return v;
|
||||
}
|
||||
// v = 0 indicate that common_input is None
|
||||
// 0 indicate that common_input is None
|
||||
return v == 0 ? 0 : COMMON_INPUT(v);
|
||||
}
|
||||
|
||||
|
@ -522,7 +514,6 @@ uint64_t fstStateNtrans(FstState* s, FstSlice* slice) {
|
|||
int32_t len;
|
||||
uint8_t* data = fstSliceData(slice, &len);
|
||||
n = data[len - 2];
|
||||
// n = data[slice->end - 1]; // data[data.len() - 2]
|
||||
return n == 1 ? 256 : n; // // "1" is never a normal legal value here, because if there, // is only 1 transition,
|
||||
// then it is encoded in the state byte
|
||||
}
|
||||
|
@ -546,7 +537,6 @@ uint64_t fstStateFindInput(FstState* s, FstNode* node, uint8_t b, bool* null) {
|
|||
int32_t dlen = 0;
|
||||
uint8_t* data = fstSliceData(slice, &dlen);
|
||||
uint64_t i = data[at + b];
|
||||
// uint64_t i = slice->data[slice->start + at + b];
|
||||
if (i >= node->nTrans) {
|
||||
*null = true;
|
||||
}
|
||||
|
@ -558,17 +548,16 @@ uint64_t fstStateFindInput(FstState* s, FstNode* node, uint8_t b, bool* null) {
|
|||
FstSlice t = fstSliceCopy(slice, start, end - 1);
|
||||
int32_t len = 0;
|
||||
uint8_t* data = fstSliceData(&t, &len);
|
||||
int i = 0;
|
||||
for (; i < len; i++) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
uint8_t v = data[i];
|
||||
if (v == b) {
|
||||
fstSliceDestroy(&t);
|
||||
return node->nTrans - i - 1; // bug
|
||||
}
|
||||
}
|
||||
if (i == len) {
|
||||
if (i + 1 == len) {
|
||||
*null = true;
|
||||
}
|
||||
}
|
||||
fstSliceDestroy(&t);
|
||||
}
|
||||
|
||||
|
@ -737,16 +726,13 @@ bool fstNodeCompile(FstNode* node, void* w, CompiledAddr lastAddr, CompiledAddr
|
|||
return true;
|
||||
} else if (sz != 1 || builderNode->isFinal) {
|
||||
fstStateCompileForAnyTrans(w, addr, builderNode);
|
||||
// AnyTrans->Compile(w, addr, node);
|
||||
} else {
|
||||
FstTransition* tran = taosArrayGet(builderNode->trans, 0);
|
||||
if (tran->addr == lastAddr && tran->out == 0) {
|
||||
fstStateCompileForOneTransNext(w, addr, tran->inp);
|
||||
// OneTransNext::compile(w, lastAddr, tran->inp);
|
||||
return true;
|
||||
} else {
|
||||
fstStateCompileForOneTrans(w, addr, tran);
|
||||
// OneTrans::Compile(w, lastAddr, *tran);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -795,7 +781,7 @@ void fstBuilderDestroy(FstBuilder* b) {
|
|||
}
|
||||
|
||||
bool fstBuilderInsert(FstBuilder* b, FstSlice bs, Output in) {
|
||||
OrderType t = fstBuilderCheckLastKey(b, bs, true);
|
||||
FstOrderType t = fstBuilderCheckLastKey(b, bs, true);
|
||||
if (t == Ordered) {
|
||||
// add log info
|
||||
fstBuilderInsertOutput(b, bs, in);
|
||||
|
@ -812,12 +798,6 @@ void fstBuilderInsertOutput(FstBuilder* b, FstSlice bs, Output in) {
|
|||
fstUnFinishedNodesSetRootOutput(b->unfinished, in);
|
||||
return;
|
||||
}
|
||||
// if (in != 0) { //if let Some(in) = in
|
||||
// prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out);
|
||||
//} else {
|
||||
// prefixLen = fstUnFinishedNodesFindCommPrefix(b->unfinished, bs);
|
||||
// out = 0;
|
||||
//}
|
||||
Output out;
|
||||
uint64_t prefixLen = fstUnFinishedNodesFindCommPrefixAndSetOutput(b->unfinished, bs, in, &out);
|
||||
|
||||
|
@ -835,7 +815,7 @@ void fstBuilderInsertOutput(FstBuilder* b, FstSlice bs, Output in) {
|
|||
return;
|
||||
}
|
||||
|
||||
OrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup) {
|
||||
FstOrderType fstBuilderCheckLastKey(FstBuilder* b, FstSlice bs, bool ckDup) {
|
||||
FstSlice* input = &bs;
|
||||
if (fstSliceIsEmpty(&b->last)) {
|
||||
fstSliceDestroy(&b->last);
|
||||
|
@ -867,7 +847,6 @@ void fstBuilderCompileFrom(FstBuilder* b, uint64_t istate) {
|
|||
|
||||
fstBuilderNodeDestroy(bn);
|
||||
assert(addr != NONE_ADDRESS);
|
||||
// fstBuilderNodeDestroy(n);
|
||||
}
|
||||
fstUnFinishedNodesTopLastFreeze(b->unfinished, addr);
|
||||
return;
|
||||
|
@ -1044,8 +1023,6 @@ void fstDestroy(Fst* fst) {
|
|||
}
|
||||
|
||||
bool fstGet(Fst* fst, FstSlice* b, Output* out) {
|
||||
// dec lock range
|
||||
// taosThreadMutexLock(&fst->mtx);
|
||||
FstNode* root = fstGetRoot(fst);
|
||||
Output tOut = 0;
|
||||
int32_t len;
|
||||
|
@ -1058,7 +1035,6 @@ bool fstGet(Fst* fst, FstSlice* b, Output* out) {
|
|||
uint8_t inp = data[i];
|
||||
Output res = 0;
|
||||
if (false == fstNodeFindInput(root, inp, &res)) {
|
||||
// taosThreadMutexUnlock(&fst->mtx);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1069,7 +1045,6 @@ bool fstGet(Fst* fst, FstSlice* b, Output* out) {
|
|||
taosArrayPush(nodes, &root);
|
||||
}
|
||||
if (!FST_NODE_IS_FINAL(root)) {
|
||||
// taosThreadMutexUnlock(&fst->mtx);
|
||||
return false;
|
||||
} else {
|
||||
tOut = tOut + FST_NODE_FINAL_OUTPUT(root);
|
||||
|
@ -1080,8 +1055,6 @@ bool fstGet(Fst* fst, FstSlice* b, Output* out) {
|
|||
fstNodeDestroy(*node);
|
||||
}
|
||||
taosArrayDestroy(nodes);
|
||||
// fst->root = NULL;
|
||||
// taosThreadMutexUnlock(&fst->mtx);
|
||||
*out = tOut;
|
||||
return true;
|
||||
}
|
||||
|
@ -1231,7 +1204,6 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) {
|
|||
|
||||
FstNode* node = fstGetRoot(sws->fst);
|
||||
Output out = 0;
|
||||
// void* autState = sws->aut->start();
|
||||
void* autState = automFuncs[aut->type].start(aut);
|
||||
|
||||
int32_t len;
|
||||
|
@ -1239,12 +1211,10 @@ bool stmStSeekMin(FStmSt* sws, FstBoundWithData* min) {
|
|||
for (uint32_t i = 0; i < len; i++) {
|
||||
uint8_t b = data[i];
|
||||
uint64_t res = 0;
|
||||
bool find = fstNodeFindInput(node, b, &res);
|
||||
if (find == true) {
|
||||
if (fstNodeFindInput(node, b, &res)) {
|
||||
FstTransition trn;
|
||||
fstNodeGetTransitionAt(node, res, &trn);
|
||||
void* preState = autState;
|
||||
// autState = sws->aut->accept(preState, b);
|
||||
autState = automFuncs[aut->type].accept(aut, preState, b);
|
||||
taosArrayPush(sws->inp, &b);
|
||||
|
||||
|
@ -1379,14 +1349,14 @@ FStmStRslt* stmStNextWith(FStmSt* sws, StreamCallback callback) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
FStmStRslt* swsResultCreate(FstSlice* data, FstOutput fOut, void* state) {
|
||||
FStmStRslt* swsResultCreate(FstSlice* data, FstOutput out, void* state) {
|
||||
FStmStRslt* result = taosMemoryCalloc(1, sizeof(FStmStRslt));
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result->data = fstSliceCopy(data, 0, FST_SLICE_LEN(data) - 1);
|
||||
result->out = fOut;
|
||||
result->out = out;
|
||||
result->state = state;
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -21,12 +21,12 @@ const CompiledAddr EMPTY_ADDRESS = 0;
|
|||
const CompiledAddr NONE_ADDRESS = 1;
|
||||
|
||||
// This version number is written to every finite state transducer created by
|
||||
// this crate. When a finite state transducer is read, its version number is
|
||||
// this version. When a finite state transducer is read, its version number is
|
||||
// checked against this value.
|
||||
const uint64_t VERSION = 3;
|
||||
|
||||
// The threshold (in number of transitions) at which an index is created for
|
||||
// a node's transitions. This speeds up lookup time at the expense of FST size
|
||||
|
||||
const uint64_t TRANS_INDEX_THRESHOLD = 32;
|
||||
|
||||
uint8_t packSize(uint64_t n) {
|
||||
|
@ -52,7 +52,6 @@ uint8_t packSize(uint64_t n) {
|
|||
uint64_t unpackUint64(uint8_t* ch, uint8_t sz) {
|
||||
uint64_t n = 0;
|
||||
for (uint8_t i = 0; i < sz; i++) {
|
||||
//
|
||||
n = n | (ch[i] << (8 * i));
|
||||
}
|
||||
return n;
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include "tcoding.h"
|
||||
#include "tcompare.h"
|
||||
|
||||
const static uint64_t tfileMagicNumber = 0xdb4775248b80fb57ull;
|
||||
const static uint64_t FILE_MAGIC_NUMBER = 0xdb4775248b80fb57ull;
|
||||
|
||||
typedef struct TFileFstIter {
|
||||
FStmBuilder* fb;
|
||||
|
@ -548,9 +548,6 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) {
|
|||
taosArraySortPWithExt((SArray*)(data), tfileValueCompare, &fn);
|
||||
}
|
||||
|
||||
int32_t bufLimit = 64 * 4096, offset = 0;
|
||||
// char* buf = taosMemoryCalloc(1, sizeof(char) * bufLimit);
|
||||
// char* p = buf;
|
||||
int32_t sz = taosArrayGetSize((SArray*)data);
|
||||
int32_t fstOffset = tw->offset;
|
||||
|
||||
|
@ -564,6 +561,9 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) {
|
|||
}
|
||||
tfileWriteFstOffset(tw, fstOffset);
|
||||
|
||||
int32_t bufCap = 8 * 1024;
|
||||
char* buf = taosMemoryCalloc(1, bufCap);
|
||||
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
TFileValue* v = taosArrayGetP((SArray*)data, i);
|
||||
|
||||
|
@ -571,14 +571,18 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) {
|
|||
// check buf has enough space or not
|
||||
int32_t ttsz = TF_TABLE_TATOAL_SIZE(tbsz);
|
||||
|
||||
char* buf = taosMemoryCalloc(1, ttsz * sizeof(char));
|
||||
if (bufCap < ttsz) {
|
||||
bufCap = ttsz;
|
||||
buf = taosMemoryRealloc(buf, bufCap);
|
||||
}
|
||||
char* p = buf;
|
||||
tfileSerialTableIdsToBuf(p, v->tableId);
|
||||
tw->ctx->write(tw->ctx, buf, ttsz);
|
||||
v->offset = tw->offset;
|
||||
tw->offset += ttsz;
|
||||
taosMemoryFree(buf);
|
||||
memset(buf, 0, sizeof(buf));
|
||||
}
|
||||
taosMemoryFree(buf);
|
||||
|
||||
tw->fb = fstBuilderCreate(tw->ctx, 0);
|
||||
if (tw->fb == NULL) {
|
||||
|
@ -869,13 +873,13 @@ static int tfileWriteData(TFileWriter* write, TFileValue* tval) {
|
|||
//}
|
||||
}
|
||||
static int tfileWriteFooter(TFileWriter* write) {
|
||||
char buf[sizeof(tfileMagicNumber) + 1] = {0};
|
||||
char buf[sizeof(FILE_MAGIC_NUMBER) + 1] = {0};
|
||||
void* pBuf = (void*)buf;
|
||||
taosEncodeFixedU64((void**)(void*)&pBuf, tfileMagicNumber);
|
||||
taosEncodeFixedU64((void**)(void*)&pBuf, FILE_MAGIC_NUMBER);
|
||||
int nwrite = write->ctx->write(write->ctx, buf, (int32_t)strlen(buf));
|
||||
|
||||
indexInfo("tfile write footer size: %d", write->ctx->size(write->ctx));
|
||||
assert(nwrite == sizeof(tfileMagicNumber));
|
||||
assert(nwrite == sizeof(FILE_MAGIC_NUMBER));
|
||||
return nwrite;
|
||||
}
|
||||
static int tfileReaderLoadHeader(TFileReader* reader) {
|
||||
|
@ -899,7 +903,7 @@ static int tfileReaderLoadFst(TFileReader* reader) {
|
|||
int size = ctx->size(ctx);
|
||||
|
||||
// current load fst into memory, refactor it later
|
||||
int fstSize = size - reader->header.fstOffset - sizeof(tfileMagicNumber);
|
||||
int fstSize = size - reader->header.fstOffset - sizeof(FILE_MAGIC_NUMBER);
|
||||
char* buf = taosMemoryCalloc(1, fstSize);
|
||||
if (buf == NULL) {
|
||||
return -1;
|
||||
|
@ -959,7 +963,6 @@ static int tfileReaderVerify(TFileReader* reader) {
|
|||
IFileCtx* ctx = reader->ctx;
|
||||
|
||||
uint64_t tMagicNumber = 0;
|
||||
|
||||
char buf[sizeof(tMagicNumber) + 1] = {0};
|
||||
int size = ctx->size(ctx);
|
||||
|
||||
|
@ -970,25 +973,25 @@ static int tfileReaderVerify(TFileReader* reader) {
|
|||
}
|
||||
|
||||
taosDecodeFixedU64(buf, &tMagicNumber);
|
||||
return tMagicNumber == tfileMagicNumber ? 0 : -1;
|
||||
return tMagicNumber == FILE_MAGIC_NUMBER ? 0 : -1;
|
||||
}
|
||||
|
||||
void tfileReaderRef(TFileReader* reader) {
|
||||
if (reader == NULL) {
|
||||
void tfileReaderRef(TFileReader* rd) {
|
||||
if (rd == NULL) {
|
||||
return;
|
||||
}
|
||||
int ref = T_REF_INC(reader);
|
||||
int ref = T_REF_INC(rd);
|
||||
UNUSED(ref);
|
||||
}
|
||||
|
||||
void tfileReaderUnRef(TFileReader* reader) {
|
||||
if (reader == NULL) {
|
||||
void tfileReaderUnRef(TFileReader* rd) {
|
||||
if (rd == NULL) {
|
||||
return;
|
||||
}
|
||||
int ref = T_REF_DEC(reader);
|
||||
int ref = T_REF_DEC(rd);
|
||||
if (ref == 0) {
|
||||
// do nothing
|
||||
tfileReaderDestroy(reader);
|
||||
tfileReaderDestroy(rd);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue