From 55282bbfa2f1dd9a841d22d51e5f479c09089a5f Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 25 Nov 2021 19:35:31 +0800 Subject: [PATCH] add index test example --- .../index/inc/index_fst_counting_writer.h | 3 + source/libs/index/inc/index_fst_util.h | 4 +- source/libs/index/src/index_fst.c | 93 ++++++++++++++++++- .../index/src/index_fst_counting_writer.c | 5 +- 4 files changed, 98 insertions(+), 7 deletions(-) diff --git a/source/libs/index/inc/index_fst_counting_writer.h b/source/libs/index/inc/index_fst_counting_writer.h index 0eba963239..dbfbe80494 100644 --- a/source/libs/index/inc/index_fst_counting_writer.h +++ b/source/libs/index/inc/index_fst_counting_writer.h @@ -34,6 +34,9 @@ FstCountingWriter *fstCountingWriterCreate(void *wtr); void fstCountingWriterDestroy(FstCountingWriter *w); +void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes); + + #define FST_WRITER_COUNT(writer) (writer->count) #define FST_WRITER_INTER_WRITER(writer) (writer->wtr) #define FST_WRITE_CHECK_SUMMER(writer) (writer->summer) diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index 416abf1307..5b84632418 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -45,9 +45,9 @@ extern const uint64_t TRANS_INDEX_THRESHOLD; // `0` is a legal value which means there are no transitions/outputs -#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0) +#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4); } while(0) #define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4) -#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0) +#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz; } while(0) #define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111) #define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1] diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 00375a261b..ddefb29061 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -16,6 +16,12 @@ #include "index_fst.h" + +static void fstPackDeltaIn(FstCountingWriter *wrt, CompiledAddr nodeAddr, CompiledAddr transAddr, uint8_t nBytes) { + CompiledAddr deltaAddr = (transAddr == EMPTY_ADDRESS) ? EMPTY_ADDRESS : nodeAddr - transAddr; + fstCountingWriterPackUintIn(wrt, deltaAddr, nBytes); +} + FstUnFinishedNodes *fstUnFinishedNodesCreate() { FstUnFinishedNodes *nodes = malloc(sizeof(FstUnFinishedNodes)); if (nodes == NULL) { return NULL; } @@ -175,18 +181,18 @@ FstState fstStateCreateFrom(FstSlice* slice, CompiledAddr addr) { return fs; } -static FstState stateDict[] = { +static FstState fstStateDict[] = { {.state = OneTransNext, .val = 0b11000000}, {.state = OneTrans, .val = 0b10000000}, {.state = AnyTrans, .val = 0b00000000}, {.state = EmptyFinal, .val = 0b00000000} }; // debug -static char *fStStateStr[] = {"ONE_TRANS_NEXT", "ONE_TRANS", "ANY_TRANS", "EMPTY_FINAL"}; +static const char *fstStateStr[] = {"ONE_TRANS_NEXT", "ONE_TRANS", "ANY_TRANS", "EMPTY_FINAL"}; FstState fstStateCreate(State state){ uint8_t idx = (uint8_t)state; - return stateDict[idx]; + return fstStateDict[idx]; } //compile void fstStateCompileForOneTransNext(FstCountingWriter *w, CompiledAddr addr, uint8_t inp) { @@ -209,6 +215,77 @@ void fstStateCompileForOneTrans(FstCountingWriter *w, CompiledAddr addr, FstTran } void fstStateCompileForAnyTrans(FstCountingWriter *w, CompiledAddr addr, FstBuilderNode *node) { + size_t sz = taosArrayGetSize(node->trans); + assert(sz <= 256); + + uint8_t tSize = 0; + uint8_t oSize = packSize(node->finalOutput) ; + + // finalOutput.is_zero() + bool anyOuts = (node->finalOutput != 0) ; + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + tSize = MAX(tSize, packDeltaSize(addr, t->addr)); + oSize = MAX(oSize, packSize(t->out)); + anyOuts = anyOuts || (t->out != 0); + } + + PackSizes packSizes = 0; + if (anyOuts) { FST_SET_OUTPUT_PACK_SIZE(packSizes, oSize); } + else { FST_SET_OUTPUT_PACK_SIZE(packSizes, 0); } + + FST_SET_TRANSITION_PACK_SIZE(packSizes, tSize); + + FstState st = fstStateCreate(AnyTrans); + fstStateSetFinalState(&st, node->isFinal); + fstStateSetStateNtrans(&st, (uint8_t)sz); + + if (anyOuts) { + if (FST_BUILDER_NODE_IS_FINAL(node)) { + fstCountingWriterPackUintIn(w, node->finalOutput, oSize); + } + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + fstCountingWriterPackUintIn(w, t->out, oSize); + } + } + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + fstPackDeltaIn(w, addr, t->addr, tSize); + } + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + fstCountingWriterWrite(w, (char *)&t->inp, 1); + //fstPackDeltaIn(w, addr, t->addr, tSize); + } + if (sz > TRANS_INDEX_THRESHOLD) { + // A value of 255 indicates that no transition exists for the byte + // at that index. (Except when there are 256 transitions.) Namely, + // any value greater than or equal to the number of transitions in + // this node indicates an absent transition. + uint8_t *index = malloc(sizeof(uint8_t) * 256); + for (uint8_t i = 0; i < 256; i++) { + index[i] = 255; + } + for (size_t i = 0; i < sz; i++) { + FstTransition *t = taosArrayGet(node->trans, i); + index[t->inp] = i; + fstCountingWriterWrite(w, (char *)index, sizeof(index)); + //fstPackDeltaIn(w, addr, t->addr, tSize); + } + } + fstCountingWriterWrite(w, (char *)&packSizes, 1); + bool null = false; + fstStateStateNtrans(&st, &null); + if (null == true) { + // 256 can't be represented in a u8, so we abuse the fact that + // the # of transitions can never be 1 here, since 1 is always + // encoded in the state byte. + uint8_t v = 1; + if (sz == 256) { fstCountingWriterWrite(w, (char *)&v, 1); } + else { fstCountingWriterWrite(w, (char *)&sz, 1); } + } + fstCountingWriterWrite(w, (char *)(&(st.val)), 1); return; } @@ -218,7 +295,7 @@ void fstStateSetCommInput(FstState* s, uint8_t inp) { uint8_t val; COMMON_INDEX(inp, 0x111111, val); - s->val = (s->val & stateDict[s->state].val) | val; + s->val = (s->val & fstStateDict[s->state].val) | val; } // comm_input @@ -523,6 +600,14 @@ FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) { } return n; } + +// debug state transition +static const char *fstNodeState(FstNode *node) { + FstState *st = &node->state; + return fstStateStr[st->state]; +} + + void fstNodeDestroy(FstNode *node) { free(node); } diff --git a/source/libs/index/src/index_fst_counting_writer.c b/source/libs/index/src/index_fst_counting_writer.c index 1486b9b203..23bf4d37c8 100644 --- a/source/libs/index/src/index_fst_counting_writer.c +++ b/source/libs/index/src/index_fst_counting_writer.c @@ -36,10 +36,13 @@ uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t return bufLen; } -int FstCountingWriterFlush(FstCountingWriter *write) { +int fstCountingWriterFlush(FstCountingWriter *write) { //write->wtr->flush return 1; } +void fstCountingWriterPackUintIn(FstCountingWriter *writer, uint64_t n, uint8_t nBytes) { + return; +}