From d56dbb15ff326f62ffde6ae54210397daced9dbe Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sun, 21 Nov 2021 18:39:35 +0800 Subject: [PATCH 1/2] add fst_registry --- source/libs/index/inc/index_fst.h | 18 --- source/libs/index/inc/index_fst_node.h | 14 ++ source/libs/index/inc/index_fst_registry.h | 36 ++++- source/libs/index/src/index_fst_node.c | 9 ++ source/libs/index/src/index_fst_registry.c | 147 +++++++++++++++++++++ 5 files changed, 205 insertions(+), 19 deletions(-) diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 61c857ed74..1230fe17ff 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -32,12 +32,6 @@ typedef struct FstRange { } FstRange; -typedef struct FstBuilderNode { - bool isFinal; - Output finalOutput; - SArray *trans; // -} FstBuilderNode; - typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State; typedef enum { Included, Excluded, Unbounded} FstBound; @@ -82,14 +76,6 @@ typedef struct FstBuilder { } FstBuilder; - - -typedef struct FstTransition { - uint8_t inp; //The byte input associated with this transition. - Output out; //The output associated with this transition - CompiledAddr addr; //The address of the node that this transition points to -} FstTransition; - typedef struct FstTransitions { FstNode *node; FstRange range; @@ -172,10 +158,6 @@ typedef struct FstIndexedValue { } FstIndexedValue; -typedef struct FstRegistryCell { - CompiledAddr addr; - FstBuilderNode *node; -} FstRegistryCell; diff --git a/source/libs/index/inc/index_fst_node.h b/source/libs/index/inc/index_fst_node.h index ba2d2ccd02..3eec97e3d8 100644 --- a/source/libs/index/inc/index_fst_node.h +++ b/source/libs/index/inc/index_fst_node.h @@ -16,7 +16,21 @@ #ifndef __INDEX_FST_NODE_H__ #define __INDEX_FST_NODE_H__ +#include "index_fst_util.h" +typedef struct FstTransition { + uint8_t inp; //The byte input associated with this transition. + Output out; //The output associated with this transition + CompiledAddr addr; //The address of the node that this transition points to +} FstTransition; + +typedef struct FstBuilderNode { + bool isFinal; + Output finalOutput; + SArray *trans; // +} FstBuilderNode; + +FstBuilderNode *fstBuilderNodeDefault(); #endif diff --git a/source/libs/index/inc/index_fst_registry.h b/source/libs/index/inc/index_fst_registry.h index 6dcb236f29..80c0194f00 100644 --- a/source/libs/index/inc/index_fst_registry.h +++ b/source/libs/index/inc/index_fst_registry.h @@ -16,9 +16,43 @@ #define __FST_REGISTRY_H__ #include "index_fst_util.h" +#include "tarray.h" +#include "index_fst_node.h" + +typedef struct FstRegistryCell { + CompiledAddr addr; + FstBuilderNode *node; +} FstRegistryCell; + +typedef struct FstRegistryCache { + SArray *cells; + uint32_t start; + uint32_t end; +} FstRegistryCache; + +typedef enum {FOUND, NOTFOUND, REJECTED} FstRegistryEntryState; + +typedef struct FstRegistryEntry { + FstRegistryEntryState state; + CompiledAddr addr; + FstRegistryCell *cell; +} FstRegistryEntry; + + + +// Registry relation function typedef struct FstRegistry { - + SArray *table; + uint64_t tableSize; // num of rows + uint64_t mruSize; // num of columns } FstRegistry; + +// +FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize); + +FstRegistryEntry* fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNode); + +uint64_t fstRegistryHash(FstRegistry *registry, FstBuilderNode *node); #endif diff --git a/source/libs/index/src/index_fst_node.c b/source/libs/index/src/index_fst_node.c index 3d5efd30f3..3e8e7c12a2 100644 --- a/source/libs/index/src/index_fst_node.c +++ b/source/libs/index/src/index_fst_node.c @@ -12,4 +12,13 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ +#include "index_fst_node.h" + +FstBuilderNode *fstBuilderNodeDefault() { + FstBuilderNode *bn = malloc(sizeof(FstBuilderNode)); + bn->isFinal = false; + bn->finalOutput = 0; + bn->trans = NULL; + return bn; +} diff --git a/source/libs/index/src/index_fst_registry.c b/source/libs/index/src/index_fst_registry.c index 940c5863f4..7b4ef9da19 100644 --- a/source/libs/index/src/index_fst_registry.c +++ b/source/libs/index/src/index_fst_registry.c @@ -15,3 +15,150 @@ #include "index_fst_registry.h" + +static void fstRegistryCellSwap(SArray *arr, uint32_t a, uint32_t b) { + size_t sz = taosArrayGetSize(arr); + if (a >= sz || b >= sz) { return; } + + FstRegistryCell *cell1 = (FstRegistryCell *)taosArrayGet(arr, a); + FstRegistryCell *cell2 = (FstRegistryCell *)taosArrayGet(arr, b); + + FstRegistryCell t = {.addr = cell1->addr, .node = cell1->node}; + + cell1->addr = cell2->addr; + cell1->node = cell2->node; + + cell2->addr = t.addr; + cell2->node = t.node; + return; +} + +static void fstRegistryCellPromote(SArray *arr, uint32_t start, uint32_t end) { + size_t sz = taosArrayGetSize(arr); + if (start >= sz && end >= sz) {return; } + + assert(start >= end); + + int32_t s = (int32_t)start; + int32_t e = (int32_t)end; + while(s > e) { + fstRegistryCellSwap(arr, s - 1, s); + s -= 1; + } +} +FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize) { + FstRegistry *registry = malloc(sizeof(FstRegistry)); + if (registry == NULL) { return NULL ;} + + uint64_t nCells = tableSize * mruSize; + SArray* tb = (SArray *)taosArrayInit(nCells, sizeof(FstRegistryCell)); + for (uint64_t i = 0; i < nCells; i++) { + FstRegistryCell *cell = taosArrayGet(tb, i); + cell->addr = NONE_ADDRESS; + cell->node = fstBuilderNodeDefault(); + } + + registry->table = tb; + registry->tableSize = tableSize; + registry->mruSize = mruSize; + return registry; +} + +FstRegistryEntry *fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNode) { + if (taosArrayGetSize(registry->table) <= 0) { + return NULL; + } + uint64_t bucket = fstRegistryHash(registry, bNode); + uint64_t start = registry->mruSize * bucket; + uint64_t end = start + registry->mruSize; + + FstRegistryEntry *entry = malloc(sizeof(FstRegistryEntry)); + if (end - start == 1) { + FstRegistryCell *cell = taosArrayGet(registry->table, start); + //cell->isNode && + if (cell->addr != NONE_ADDRESS && cell->node == bNode) { + entry->state = FOUND; + entry->addr = cell->addr ; + return entry; + } else { + // clone from bNode, refactor later + cell->node->isFinal = bNode->isFinal; + cell->node->finalOutput = bNode->finalOutput; + cell->node->trans = bNode->trans; + bNode->trans = NULL; + + entry->state = NOTFOUND; + entry->cell = cell; // copy or not + } + } else if (end - start == 2) { + FstRegistryCell *cell1 = taosArrayGet(registry->table, start); + if (cell1->addr != NONE_ADDRESS && cell1->node == bNode) { + entry->state = FOUND; + entry->addr = cell1->addr; + return entry; + } + FstRegistryCell *cell2 = taosArrayGet(registry->table, start + 1); + if (cell2->addr != NONE_ADDRESS && cell2->node == bNode) { + entry->state = FOUND; + entry->addr = cell2->addr; + // must swap here + fstRegistryCellSwap(registry->table, start, start + 1); + return entry; + } + //clone from bNode, refactor later + cell1->node->isFinal = bNode->isFinal; + cell1->node->finalOutput = bNode->finalOutput; + cell1->node->trans = bNode->trans; + bNode->trans = NULL; + + fstRegistryCellSwap(registry->table, start, start + 1); + FstRegistryCell *cCell = taosArrayGet(registry->table, start); + entry->state = NOTFOUND; + entry->cell = cCell; + } else { + uint32_t i = start; + for (; i < end; i++) { + FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, i); + if (cell->addr != NONE_ADDRESS && cell->node == bNode) { + entry->state = FOUND; + entry->addr = cell->addr; + fstRegistryCellPromote(registry->table, i, start); + break; + } + } + if (i >= end) { + uint64_t last = end - 1; + FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, last); + //clone from bNode, refactor later + cell->node->isFinal = bNode->isFinal; + cell->node->finalOutput = bNode->finalOutput; + cell->node->trans = bNode->trans; + bNode->trans = NULL; + + fstRegistryCellPromote(registry->table, last, start); + FstRegistryCell *cCell = taosArrayGet(registry->table, start); + entry->state = NOTFOUND; + entry->cell = cCell; + } + } + return entry; +} + +uint64_t fstRegistryHash(FstRegistry *registry, FstBuilderNode *bNode) { + //TODO(yihaoDeng): refactor later + const uint64_t FNV_PRIME = 1099511628211; + uint64_t h = 14695981039346656037u; + + h = (h ^ (uint64_t)bNode->isFinal) * FNV_PRIME; + h = (h ^ (bNode)->finalOutput) * FNV_PRIME; + + uint32_t sz = (uint32_t)taosArrayGetSize(bNode->trans); + for (uint32_t i = 0; i < sz; i++) { + FstTransition *trn = taosArrayGet(bNode->trans, i); + h = (h ^ (uint64_t)(trn->inp)) * FNV_PRIME; + h = (h ^ (uint64_t)(trn->out)) * FNV_PRIME; + h = (h ^ (uint64_t)(trn->addr))* FNV_PRIME; + } + return h %(registry->tableSize); +} + From 077b89db174fc4a9f77b61269f6d663a2ac642b3 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sun, 21 Nov 2021 19:01:08 +0800 Subject: [PATCH 2/2] add fst_registry --- source/libs/index/inc/index_fst_node.h | 2 + source/libs/index/inc/index_fst_registry.h | 10 ++-- source/libs/index/src/index_fst_node.c | 11 +++++ source/libs/index/src/index_fst_registry.c | 54 ++++++++++------------ 4 files changed, 42 insertions(+), 35 deletions(-) diff --git a/source/libs/index/inc/index_fst_node.h b/source/libs/index/inc/index_fst_node.h index 3eec97e3d8..09dcd223b1 100644 --- a/source/libs/index/inc/index_fst_node.h +++ b/source/libs/index/inc/index_fst_node.h @@ -33,4 +33,6 @@ typedef struct FstBuilderNode { FstBuilderNode *fstBuilderNodeDefault(); +void fstBuilderNodeCloneFrom(FstBuilderNode *dst, FstBuilderNode *src); + #endif diff --git a/source/libs/index/inc/index_fst_registry.h b/source/libs/index/inc/index_fst_registry.h index 80c0194f00..d54d73683f 100644 --- a/source/libs/index/inc/index_fst_registry.h +++ b/source/libs/index/inc/index_fst_registry.h @@ -26,11 +26,11 @@ typedef struct FstRegistryCell { -typedef struct FstRegistryCache { - SArray *cells; - uint32_t start; - uint32_t end; -} FstRegistryCache; +//typedef struct FstRegistryCache { +// SArray *cells; +// uint32_t start; +// uint32_t end; +//} FstRegistryCache; typedef enum {FOUND, NOTFOUND, REJECTED} FstRegistryEntryState; diff --git a/source/libs/index/src/index_fst_node.c b/source/libs/index/src/index_fst_node.c index 3e8e7c12a2..1511781719 100644 --- a/source/libs/index/src/index_fst_node.c +++ b/source/libs/index/src/index_fst_node.c @@ -22,3 +22,14 @@ FstBuilderNode *fstBuilderNodeDefault() { return bn; } +// not destroy src, User's bussiness +void fstBuilderNodeCloneFrom(FstBuilderNode *dst, FstBuilderNode *src) { + if (dst == NULL || src == NULL) { return; } + + dst->isFinal = src->isFinal; + dst->finalOutput = src->finalOutput ; + dst->trans = src->trans; + + src->trans = NULL; +} + diff --git a/source/libs/index/src/index_fst_registry.c b/source/libs/index/src/index_fst_registry.c index 7b4ef9da19..5d6c7b1712 100644 --- a/source/libs/index/src/index_fst_registry.c +++ b/source/libs/index/src/index_fst_registry.c @@ -16,6 +16,23 @@ #include "index_fst_registry.h" +uint64_t fstRegistryHash(FstRegistry *registry, FstBuilderNode *bNode) { + //TODO(yihaoDeng): refactor later + const uint64_t FNV_PRIME = 1099511628211; + uint64_t h = 14695981039346656037u; + + h = (h ^ (uint64_t)bNode->isFinal) * FNV_PRIME; + h = (h ^ (bNode)->finalOutput) * FNV_PRIME; + + uint32_t sz = (uint32_t)taosArrayGetSize(bNode->trans); + for (uint32_t i = 0; i < sz; i++) { + FstTransition *trn = taosArrayGet(bNode->trans, i); + h = (h ^ (uint64_t)(trn->inp)) * FNV_PRIME; + h = (h ^ (uint64_t)(trn->out)) * FNV_PRIME; + h = (h ^ (uint64_t)(trn->addr))* FNV_PRIME; + } + return h %(registry->tableSize); +} static void fstRegistryCellSwap(SArray *arr, uint32_t a, uint32_t b) { size_t sz = taosArrayGetSize(arr); if (a >= sz || b >= sz) { return; } @@ -46,6 +63,9 @@ static void fstRegistryCellPromote(SArray *arr, uint32_t start, uint32_t end) { s -= 1; } } +#define FST_REGISTRY_CELL_IS_EMPTY(cell) (cell->addr == NONE_ADDRESS) +#define FST_REGISTRY_CELL_INSERT(cell, addr) do {cell->addr = addr;} while(0) + FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize) { FstRegistry *registry = malloc(sizeof(FstRegistry)); if (registry == NULL) { return NULL ;} @@ -82,11 +102,8 @@ FstRegistryEntry *fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNo return entry; } else { // clone from bNode, refactor later - cell->node->isFinal = bNode->isFinal; - cell->node->finalOutput = bNode->finalOutput; - cell->node->trans = bNode->trans; - bNode->trans = NULL; - + // + fstBuilderNodeCloneFrom(cell->node, bNode); entry->state = NOTFOUND; entry->cell = cell; // copy or not } @@ -106,10 +123,7 @@ FstRegistryEntry *fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNo return entry; } //clone from bNode, refactor later - cell1->node->isFinal = bNode->isFinal; - cell1->node->finalOutput = bNode->finalOutput; - cell1->node->trans = bNode->trans; - bNode->trans = NULL; + fstBuilderNodeCloneFrom(cell2->node, bNode); fstRegistryCellSwap(registry->table, start, start + 1); FstRegistryCell *cCell = taosArrayGet(registry->table, start); @@ -130,10 +144,7 @@ FstRegistryEntry *fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNo uint64_t last = end - 1; FstRegistryCell *cell = (FstRegistryCell *)taosArrayGet(registry->table, last); //clone from bNode, refactor later - cell->node->isFinal = bNode->isFinal; - cell->node->finalOutput = bNode->finalOutput; - cell->node->trans = bNode->trans; - bNode->trans = NULL; + fstBuilderNodeCloneFrom(cell->node, bNode); fstRegistryCellPromote(registry->table, last, start); FstRegistryCell *cCell = taosArrayGet(registry->table, start); @@ -144,21 +155,4 @@ FstRegistryEntry *fstRegistryGetEntry(FstRegistry *registry, FstBuilderNode *bNo return entry; } -uint64_t fstRegistryHash(FstRegistry *registry, FstBuilderNode *bNode) { - //TODO(yihaoDeng): refactor later - const uint64_t FNV_PRIME = 1099511628211; - uint64_t h = 14695981039346656037u; - - h = (h ^ (uint64_t)bNode->isFinal) * FNV_PRIME; - h = (h ^ (bNode)->finalOutput) * FNV_PRIME; - - uint32_t sz = (uint32_t)taosArrayGetSize(bNode->trans); - for (uint32_t i = 0; i < sz; i++) { - FstTransition *trn = taosArrayGet(bNode->trans, i); - h = (h ^ (uint64_t)(trn->inp)) * FNV_PRIME; - h = (h ^ (uint64_t)(trn->out)) * FNV_PRIME; - h = (h ^ (uint64_t)(trn->addr))* FNV_PRIME; - } - return h %(registry->tableSize); -}