From 0cd12fd353bad33210fec72b0f788998f796c92a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 22 Nov 2021 12:05:03 +0800 Subject: [PATCH] update builde node compile --- source/libs/index/inc/index_fst.h | 6 +-- .../index/inc/index_fst_counting_writer.h | 42 ++++++++++++++++++ source/libs/index/inc/index_fst_node.h | 6 +++ source/libs/index/inc/index_fst_registry.h | 2 + source/libs/index/src/index_fst.c | 39 +++++++++++++++-- .../index/src/index_fst_counting_writer.c | 43 +++++++++++++++++++ source/libs/index/src/index_fst_node.c | 22 ++++++++++ source/libs/index/src/index_fst_registry.c | 2 - 8 files changed, 153 insertions(+), 9 deletions(-) create mode 100644 source/libs/index/inc/index_fst_counting_writer.h create mode 100644 source/libs/index/src/index_fst_counting_writer.c diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index 04e9b8b4ac..4d124e2abf 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -49,7 +49,7 @@ typedef struct FstUnFinishedNodes { #define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack) -FstUnFinishedNodes *FstUnFinishedNodesCreate(); +FstUnFinishedNodes *fstUnFinishedNodesCreate(); void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal); FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes); FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr); @@ -62,9 +62,9 @@ uint64_t FstUnFinishedNodesFindCommPreifxAndSetOutput(FstUnFinishedNodes *node, typedef struct FstBuilder { - FstCountingWriter wtr; // The FST raw data is written directly to `wtr`. + FstCountingWriter *wrt; // The FST raw data is written directly to `wtr`. FstUnFinishedNodes *unfinished; // The stack of unfinished nodes - FstRegistry registry; // A map of finished nodes. + FstRegistry* registry; // A map of finished nodes. SArray* last; // The last word added CompiledAddr lastAddr; // The address of the last compiled node uint64_t len; // num of keys added diff --git a/source/libs/index/inc/index_fst_counting_writer.h b/source/libs/index/inc/index_fst_counting_writer.h new file mode 100644 index 0000000000..bd8ad13348 --- /dev/null +++ b/source/libs/index/inc/index_fst_counting_writer.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef __INDEX_FST_COUNTING_WRITER_H__ +#define __INDEX_FST_COUNTING_WRITER_H__ + +typedef uint32_t CheckSummer; + + +typedef struct FstCountingWriter { + void* wrt; // wrap any writer that counts and checksum bytes written + uint64_t count; + CheckSummer summer; +} FstCountingWriter; + +uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t bufLen); + +int FstCountingWriterFlush(FstCountingWriter *write); + + +FstCountingWriter *fstCountingWriterCreate(void *wtr); + + +#define FST_WRITER_COUNT(writer) (writer->count) +#define FST_WRITER_INTER_WRITER(writer) (writer->wtr) +#define FST_WRITE_CHECK_SUMMER(writer) (writer->summer) + +#endif + + diff --git a/source/libs/index/inc/index_fst_node.h b/source/libs/index/inc/index_fst_node.h index 631c7026c5..88b72c4b48 100644 --- a/source/libs/index/inc/index_fst_node.h +++ b/source/libs/index/inc/index_fst_node.h @@ -17,7 +17,11 @@ #define __INDEX_FST_NODE_H__ #include "index_fst_util.h" +#include "index_fst_counting_writer.h" +#define FST_BUILDER_NODE_IS_FINAL(bn) (bn->isFinal) +#define FST_BUILDER_NODE_TRANS_ISEMPTY(bn) (taosArrayGetSize(bn->trans) == 0) +#define FST_BUILDER_NODE_FINALOUTPUT_ISZERO(bn) (bn->finalOutput == 0) typedef struct FstTransition { uint8_t inp; //The byte input associated with this transition. @@ -37,4 +41,6 @@ FstBuilderNode *fstBuilderNodeClone(FstBuilderNode *src); void fstBuilderNodeCloneFrom(FstBuilderNode *dst, FstBuilderNode *src); +bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr); + #endif diff --git a/source/libs/index/inc/index_fst_registry.h b/source/libs/index/inc/index_fst_registry.h index f19bb750c2..3f3a690324 100644 --- a/source/libs/index/inc/index_fst_registry.h +++ b/source/libs/index/inc/index_fst_registry.h @@ -24,6 +24,8 @@ typedef struct FstRegistryCell { FstBuilderNode *node; } FstRegistryCell; +#define FST_REGISTRY_CELL_IS_EMPTY(cell) (cell->addr == NONE_ADDRESS) +#define FST_REGISTRY_CELL_INSERT(cell, tAddr) do {cell->addr = tAddr;} while(0) //typedef struct FstRegistryCache { diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index cf8a6eabfe..11b44891f3 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -279,12 +279,43 @@ FstBuilder *fstBuilderCreate(void *w, FstType ty) { FstBuilder *b = malloc(sizeof(FstBuilder)); if (NULL == b) { return b; } - FstCountingWriter wtr = {.wtr = w, .count = 0, .summer = 0}; - b->wtr = wtr; - b->unfinished = malloc(sizeof(FstUnFinishedNodes)); + + b->wrt = fstCountingWriterCreate(w); + b->unfinished = fstUnFinishedNodesCreate(); + b->registry = fstRegistryCreate(10000, 2) ; + b->last = NULL; + b->lastAddr = NONE_ADDRESS; + b->len = 0; return b; - } + + +void fstBuilderCheckLastKey(FstBuilder *b, FstSlice bs, bool ckDupe) { +} + +CompiledAddr fstBuilderCompile(FstBuilder *b, FstBuilderNode *bn) { + if (FST_BUILDER_NODE_IS_FINAL(bn) + && FST_BUILDER_NODE_TRANS_ISEMPTY(bn) + && FST_BUILDER_NODE_FINALOUTPUT_ISZERO(bn)) { + return EMPTY_ADDRESS; + } + FstRegistryEntry *entry = fstRegistryGetEntry(b->registry, bn); + if (entry->state == FOUND) { + CompiledAddr ret = entry->addr; + tfree(entry); + return ret; + } + CompiledAddr startAddr = (CompiledAddr)(FST_WRITER_COUNT(b->wrt)); + + fstBuilderNodeCompileTo(bn, b->wrt, b->lastAddr, startAddr); + b->lastAddr = (CompiledAddr)(FST_WRITER_COUNT(b->wrt)) - 1; + if (entry->state == NOTFOUND) { + FST_REGISTRY_CELL_INSERT(entry->cell, b->lastAddr); + } + free(entry); + return b->lastAddr; +} + FstSlice fstNodeAsSlice(FstNode *node) { FstSlice *slice = &node->data; FstSlice s = fstSliceCopy(slice, slice->end, slice->dLen - 1); diff --git a/source/libs/index/src/index_fst_counting_writer.c b/source/libs/index/src/index_fst_counting_writer.c new file mode 100644 index 0000000000..91da63f600 --- /dev/null +++ b/source/libs/index/src/index_fst_counting_writer.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + + +FstCountingWriter *fstCountingWriterCreate(void *wrt) { + FstCountingWriter *cw = calloc(1, sizeof(FstCountingWriter)); + if (cw == NULL) { return NULL; } + cw->wrt = wrt; + return cw; +} +void FstCountingWriterDestroy(FstCountingWriter *cw) { + // free wrt object: close fd or free mem + free(cw); +} + +uint64_t fstCountingWriterWrite(FstCountingWriter *write, uint8_t *buf, uint32_t bufLen) { + if (write == NULL) { return 0; } + // update checksum + // write data to file/socket or mem + + write->count += bufLen; + return bufLen; +} + +int FstCountingWriterFlush(FstCountingWriter *write) { + //write->wtr->flush + return 1; +} + + + diff --git a/source/libs/index/src/index_fst_node.c b/source/libs/index/src/index_fst_node.c index d7aa1e5041..86f9eb868b 100644 --- a/source/libs/index/src/index_fst_node.c +++ b/source/libs/index/src/index_fst_node.c @@ -54,3 +54,25 @@ void fstBuilderNodeCloneFrom(FstBuilderNode *dst, FstBuilderNode *src) { src->trans = NULL; } +bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr) { + size_t sz = taosArrayGetSize(b->trans); + assert(sz < 256); + if (FST_BUILDER_NODE_IS_FINAL(b) + && FST_BUILDER_NODE_TRANS_ISEMPTY(b) + && FST_BUILDER_NODE_FINALOUTPUT_ISZERO(b)) { + return true; + } else if (sz != 1 || b->isFinal) { + // AnyTrans->Compile(w, addr, node); + } else { + FstTransition *tran = taosArrayGet(b->trans, 0); + if (tran->addr == lastAddr && tran->out == 0) { + //OneTransNext::compile(w, lastAddr, tran->inp); + return true; + } else { + //OneTrans::Compile(w, lastAddr, *tran); + return true; + } + } + return true; + +} diff --git a/source/libs/index/src/index_fst_registry.c b/source/libs/index/src/index_fst_registry.c index 718832c803..900453bdb7 100644 --- a/source/libs/index/src/index_fst_registry.c +++ b/source/libs/index/src/index_fst_registry.c @@ -64,8 +64,6 @@ static void fstRegistryCellPromote(SArray *arr, uint32_t start, uint32_t end) { s -= 1; } } -#define FST_REGISTRY_CELL_IS_EMPTY(cell) (cell->addr == NONE_ADDRESS) -#define FST_REGISTRY_CELL_INSERT(cell, addr) do {cell->addr = addr;} while(0) FstRegistry* fstRegistryCreate(uint64_t tableSize, uint64_t mruSize) { FstRegistry *registry = malloc(sizeof(FstRegistry));