merge develop
This commit is contained in:
commit
09f3e8e19a
|
@ -48,6 +48,7 @@ endif(${BUILD_WITH_ROCKSDB})
|
||||||
## lucene
|
## lucene
|
||||||
if(${BUILD_WITH_LUCENE})
|
if(${BUILD_WITH_LUCENE})
|
||||||
cat("${CMAKE_SUPPORT_DIR}/lucene_CMakeLists.txt.in" ${DEPS_TMP_FILE})
|
cat("${CMAKE_SUPPORT_DIR}/lucene_CMakeLists.txt.in" ${DEPS_TMP_FILE})
|
||||||
|
add_definitions(-DUSE_LUCENE)
|
||||||
endif(${BUILD_WITH_LUCENE})
|
endif(${BUILD_WITH_LUCENE})
|
||||||
|
|
||||||
## NuRaft
|
## NuRaft
|
||||||
|
|
|
@ -22,7 +22,7 @@ option(
|
||||||
option(
|
option(
|
||||||
BUILD_WITH_LUCENE
|
BUILD_WITH_LUCENE
|
||||||
"If build with lucene"
|
"If build with lucene"
|
||||||
OFF
|
off
|
||||||
)
|
)
|
||||||
|
|
||||||
option(
|
option(
|
||||||
|
@ -41,4 +41,4 @@ option(
|
||||||
BUILD_DOCS
|
BUILD_DOCS
|
||||||
"If use doxygen build documents"
|
"If use doxygen build documents"
|
||||||
ON
|
ON
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
|
|
||||||
# lucene
|
# lucene
|
||||||
ExternalProject_Add(lucene
|
ExternalProject_Add(lucene
|
||||||
GIT_REPOSITORY https://github.com/taosdata-contrib/LucenePlusPlus.git
|
GIT_REPOSITORY https://github.com/yihaoDeng/LucenePlusPlus.git
|
||||||
GIT_TAG rel_3.0.8_td
|
|
||||||
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/lucene"
|
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/lucene"
|
||||||
BINARY_DIR ""
|
BINARY_DIR ""
|
||||||
#BUILD_IN_SOURCE TRUE
|
#BUILD_IN_SOURCE TRUE
|
||||||
|
@ -10,4 +9,4 @@ ExternalProject_Add(lucene
|
||||||
BUILD_COMMAND ""
|
BUILD_COMMAND ""
|
||||||
INSTALL_COMMAND ""
|
INSTALL_COMMAND ""
|
||||||
TEST_COMMAND ""
|
TEST_COMMAND ""
|
||||||
)
|
)
|
||||||
|
|
|
@ -68,6 +68,11 @@ endif(${BUILD_WITH_ROCKSDB})
|
||||||
if(${BUILD_WITH_LUCENE})
|
if(${BUILD_WITH_LUCENE})
|
||||||
option(ENABLE_TEST "Enable the tests" OFF)
|
option(ENABLE_TEST "Enable the tests" OFF)
|
||||||
add_subdirectory(lucene)
|
add_subdirectory(lucene)
|
||||||
|
target_include_directories(
|
||||||
|
lucene++
|
||||||
|
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/lucene/include>
|
||||||
|
)
|
||||||
|
|
||||||
endif(${BUILD_WITH_LUCENE})
|
endif(${BUILD_WITH_LUCENE})
|
||||||
|
|
||||||
# NuRaft
|
# NuRaft
|
||||||
|
|
|
@ -16,12 +16,53 @@
|
||||||
#ifndef _TD_INDEX_H_
|
#ifndef _TD_INDEX_H_
|
||||||
#define _TD_INDEX_H_
|
#define _TD_INDEX_H_
|
||||||
|
|
||||||
|
#include "os.h"
|
||||||
|
#include "tarray.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
typedef struct SIndex SIndex;
|
||||||
|
typedef struct SIndexOpts SIndexOpts;
|
||||||
|
typedef struct SIndexMultiTermQuery SIndexMultiTermQuery;
|
||||||
|
typedef struct SArray SIndexMultiTerm;
|
||||||
|
|
||||||
|
typedef enum { MUST = 0, SHOULD = 1, NOT = 2 } EIndexOperatorType;
|
||||||
|
typedef enum { QUERY_TERM = 0, QUERY_PREFIX = 1, QUERY_SUFFIX = 2,QUERY_REGEX = 3} EIndexQueryType;
|
||||||
|
/*
|
||||||
|
* @param: oper
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
SIndexMultiTermQuery *indexMultiTermQueryCreate(EIndexOperatorType oper);
|
||||||
|
void indexMultiTermQueryDestroy(SIndexMultiTermQuery *pQuery);
|
||||||
|
int indexMultiTermQueryAdd(SIndexMultiTermQuery *pQuery, const char *field, int32_t nFields, const char *value, int32_t nValue, EIndexQueryType type);
|
||||||
|
/*
|
||||||
|
* @param:
|
||||||
|
* @param:
|
||||||
|
*/
|
||||||
|
SIndex* indexOpen(SIndexOpts *opt, const char *path);
|
||||||
|
void indexClose(SIndex *index);
|
||||||
|
int indexPut(SIndex *index, SIndexMultiTerm *terms, int uid);
|
||||||
|
int indexDelete(SIndex *index, SIndexMultiTermQuery *query);
|
||||||
|
int indexSearch(SIndex *index, SIndexMultiTermQuery *query, SArray *result);
|
||||||
|
int indexRebuild(SIndex *index, SIndexOpts *opt);
|
||||||
|
/*
|
||||||
|
* @param
|
||||||
|
* @param
|
||||||
|
*/
|
||||||
|
SIndexMultiTerm *indexMultiTermCreate();
|
||||||
|
int indexMultiTermAdd(SIndexMultiTerm *terms, const char *field, int32_t nFields, const char *value, int32_t nValue);
|
||||||
|
void indexMultiTermDestroy(SIndexMultiTerm *terms);
|
||||||
|
/*
|
||||||
|
* @param:
|
||||||
|
* @param:
|
||||||
|
*/
|
||||||
|
SIndexOpts *indexOptsCreate();
|
||||||
|
void indexOptsDestroy(SIndexOpts *opts);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /*_TD_INDEX_H_*/
|
#endif /*_TD_INDEX_H_*/
|
||||||
|
|
|
@ -4,4 +4,27 @@ target_include_directories(
|
||||||
index
|
index
|
||||||
PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/index"
|
PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/index"
|
||||||
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
|
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
|
||||||
)
|
)
|
||||||
|
target_link_libraries(
|
||||||
|
index
|
||||||
|
PUBLIC os
|
||||||
|
PUBLIC util
|
||||||
|
)
|
||||||
|
|
||||||
|
if (${BUILD_WITH_LUCENE})
|
||||||
|
target_include_directories(
|
||||||
|
index
|
||||||
|
PUBLIC "${CMAKE_SOURCE_DIR}/deps/lucene/include"
|
||||||
|
)
|
||||||
|
LINK_DIRECTORIES("${CMAKE_SOURCE_DIR}/deps/lucene/debug/src/core")
|
||||||
|
target_link_libraries(
|
||||||
|
index
|
||||||
|
PUBLIC lucene++
|
||||||
|
)
|
||||||
|
|
||||||
|
endif(${BUILD_WITH_LUCENE})
|
||||||
|
|
||||||
|
if (${BUILD_TEST})
|
||||||
|
add_subdirectory(test)
|
||||||
|
endif(${BUILD_TEST})
|
||||||
|
|
||||||
|
|
|
@ -16,12 +16,52 @@
|
||||||
#ifndef _TD_INDEX_INT_H_
|
#ifndef _TD_INDEX_INT_H_
|
||||||
#define _TD_INDEX_INT_H_
|
#define _TD_INDEX_INT_H_
|
||||||
|
|
||||||
|
#include "index.h"
|
||||||
|
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
#include <lucene++/Lucene_c.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct SIndex {
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
index_t *index;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SIndexOpts {
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
void *opts;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SIndexMultiTermQuery {
|
||||||
|
EIndexOperatorType opera;
|
||||||
|
SArray *query;
|
||||||
|
};
|
||||||
|
|
||||||
|
// field and key;
|
||||||
|
typedef struct SIndexTerm {
|
||||||
|
char *key;
|
||||||
|
int32_t nKey;
|
||||||
|
char *val;
|
||||||
|
int32_t nVal;
|
||||||
|
} SIndexTerm;
|
||||||
|
|
||||||
|
typedef struct SIndexTermQuery {
|
||||||
|
SIndexTerm* field_value;
|
||||||
|
EIndexQueryType type;
|
||||||
|
} SIndexTermQuery;
|
||||||
|
|
||||||
|
|
||||||
|
SIndexTerm *indexTermCreate(const char *key, int32_t nKey, const char *val, int32_t nVal);
|
||||||
|
void indexTermDestroy(SIndexTerm *p);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /*_TD_INDEX_INT_H_*/
|
#endif /*_TD_INDEX_INT_H_*/
|
||||||
|
|
|
@ -0,0 +1,182 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __INDEX_FST_H__
|
||||||
|
#define __INDEX_FST_H__
|
||||||
|
|
||||||
|
|
||||||
|
#include "tarray.h"
|
||||||
|
#include "index_fst_util.h"
|
||||||
|
#include "index_fst_registry.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstNode FstNode;
|
||||||
|
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a)
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstRange {
|
||||||
|
uint64_t start;
|
||||||
|
uint64_t end;
|
||||||
|
} FstRange;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstBuilderNode {
|
||||||
|
bool isFinal;
|
||||||
|
Output finalOutput;
|
||||||
|
SArray *trans; // <FstTransition>
|
||||||
|
} FstBuilderNode;
|
||||||
|
|
||||||
|
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
|
||||||
|
typedef enum { Included, Excluded, Unbounded} FstBound;
|
||||||
|
|
||||||
|
typedef uint32_t CheckSummer;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* UnFinished node and helper function
|
||||||
|
* TODO: simple function name
|
||||||
|
*/
|
||||||
|
typedef struct FstUnFinishedNodes {
|
||||||
|
SArray *stack; // <FstBuilderNodeUnfinished> } FstUnFinishedNodes;
|
||||||
|
} FstUnFinishedNodes;
|
||||||
|
|
||||||
|
#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack)
|
||||||
|
|
||||||
|
FstUnFinishedNodes *FstUnFinishedNodesCreate();
|
||||||
|
void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal);
|
||||||
|
FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes);
|
||||||
|
FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr);
|
||||||
|
FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes);
|
||||||
|
void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out);
|
||||||
|
void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr);
|
||||||
|
void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out);
|
||||||
|
uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs);
|
||||||
|
uint64_t FstUnFinishedNodesFindCommPreifxAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out);
|
||||||
|
|
||||||
|
typedef struct FstCountingWriter {
|
||||||
|
void* wtr; // wrap any writer that counts and checksum bytes written
|
||||||
|
uint64_t count;
|
||||||
|
CheckSummer summer;
|
||||||
|
} FstCountingWriter;
|
||||||
|
|
||||||
|
typedef struct FstBuilder {
|
||||||
|
FstCountingWriter wtr; // The FST raw data is written directly to `wtr`.
|
||||||
|
FstUnFinishedNodes *unfinished; // The stack of unfinished nodes
|
||||||
|
FstRegistry registry; // A map of finished nodes.
|
||||||
|
SArray* last; // The last word added
|
||||||
|
CompiledAddr lastAddr; // The address of the last compiled node
|
||||||
|
uint64_t len; // num of keys added
|
||||||
|
} FstBuilder;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstTransition {
|
||||||
|
uint8_t inp; //The byte input associated with this transition.
|
||||||
|
Output out; //The output associated with this transition
|
||||||
|
CompiledAddr addr; //The address of the node that this transition points to
|
||||||
|
} FstTransition;
|
||||||
|
|
||||||
|
typedef struct FstTransitions {
|
||||||
|
FstNode *node;
|
||||||
|
FstRange range;
|
||||||
|
} FstTransitions;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstLastTransition {
|
||||||
|
uint8_t inp;
|
||||||
|
Output out;
|
||||||
|
} FstLastTransition;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FstBuilderNodeUnfinished and helper function
|
||||||
|
* TODO: simple function name
|
||||||
|
*/
|
||||||
|
typedef struct FstBuilderNodeUnfinished {
|
||||||
|
FstBuilderNode *node;
|
||||||
|
FstLastTransition* last;
|
||||||
|
} FstBuilderNodeUnfinished;
|
||||||
|
|
||||||
|
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
|
||||||
|
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FstNode and helper function
|
||||||
|
*/
|
||||||
|
typedef struct FstNode {
|
||||||
|
FstSlice data;
|
||||||
|
uint64_t version;
|
||||||
|
State state;
|
||||||
|
CompiledAddr start;
|
||||||
|
CompiledAddr end;
|
||||||
|
bool isFinal;
|
||||||
|
uint64_t nTrans;
|
||||||
|
PackSizes sizes;
|
||||||
|
Output finalOutput;
|
||||||
|
} FstNode;
|
||||||
|
|
||||||
|
// If this node is final and has a terminal output value, then it is, returned. Otherwise, a zero output is returned
|
||||||
|
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
|
||||||
|
// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer.
|
||||||
|
#define FST_NODE_IS_FINAL(node) node->isFinal
|
||||||
|
// Returns the number of transitions in this node, The maximum number of transitions is 256.
|
||||||
|
#define FST_NODE_LEN(node) node->nTrans
|
||||||
|
// Returns true if and only if this node has zero transitions.
|
||||||
|
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
|
||||||
|
// Return the address of this node.
|
||||||
|
#define FST_NODE_ADDR(node) node->start
|
||||||
|
|
||||||
|
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
|
||||||
|
FstTransitions fstNodeTransitionIter(FstNode *node);
|
||||||
|
FstTransitions* fstNodeTransitions(FstNode *node);
|
||||||
|
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res);
|
||||||
|
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res);
|
||||||
|
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res);
|
||||||
|
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode);
|
||||||
|
FstSlice fstNodeAsSlice(FstNode *node);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstMeta {
|
||||||
|
uint64_t version;
|
||||||
|
CompiledAddr rootAddr;
|
||||||
|
FstType ty;
|
||||||
|
uint64_t len;
|
||||||
|
uint32_t checkSum;
|
||||||
|
} FstMeta;
|
||||||
|
|
||||||
|
typedef struct Fst {
|
||||||
|
FstMeta meta;
|
||||||
|
void *data; //
|
||||||
|
} Fst;
|
||||||
|
|
||||||
|
// ops
|
||||||
|
|
||||||
|
typedef struct FstIndexedValue {
|
||||||
|
uint64_t index;
|
||||||
|
uint64_t value;
|
||||||
|
} FstIndexedValue;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstRegistryCell {
|
||||||
|
CompiledAddr addr;
|
||||||
|
FstBuilderNode *node;
|
||||||
|
} FstRegistryCell;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,42 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef __INDEX_FST_AUTAOMATION_H__
|
||||||
|
#define __INDEX_FST_AUTAOMATION_H__
|
||||||
|
|
||||||
|
struct AutomationCtx;
|
||||||
|
|
||||||
|
typedef struct StartWith {
|
||||||
|
AutomationCtx *autoSelf;
|
||||||
|
} StartWith;
|
||||||
|
|
||||||
|
typedef struct Complement {
|
||||||
|
AutomationCtx *autoSelf;
|
||||||
|
} Complement;
|
||||||
|
|
||||||
|
// automation
|
||||||
|
typedef struct AutomationCtx {
|
||||||
|
void *data;
|
||||||
|
} AutomationCtx;
|
||||||
|
|
||||||
|
// automation interface
|
||||||
|
void (*start)(AutomationCtx *ctx);
|
||||||
|
bool (*isMatch)(AutomationCtx *ctx);
|
||||||
|
bool (*canMatch)(AutomationCtx *ctx, void *data);
|
||||||
|
bool (*willAlwaysMatch)(AutomationCtx *ctx, void *state);
|
||||||
|
void* (*accpet)(AutomationCtx *ctx, void *state, uint8_t byte);
|
||||||
|
void* (*accpetEof)(AutomationCtx *ctx, *state);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,22 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __INDEX_FST_NODE_H__
|
||||||
|
#define __INDEX_FST_NODE_H__
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,24 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef __FST_REGISTRY_H__
|
||||||
|
#define __FST_REGISTRY_H__
|
||||||
|
|
||||||
|
#include "index_fst_util.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstRegistry {
|
||||||
|
|
||||||
|
} FstRegistry;
|
||||||
|
#endif
|
|
@ -0,0 +1,82 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __INDEX_FST_UTIL_H__
|
||||||
|
#define __INDEX_FST_UTIL_H__
|
||||||
|
|
||||||
|
#include "tarray.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef uint64_t FstType;
|
||||||
|
typedef uint64_t CompiledAddr;
|
||||||
|
typedef uint64_t Output;
|
||||||
|
typedef uint8_t PackSizes;
|
||||||
|
|
||||||
|
|
||||||
|
//A sentinel value used to indicate an empty final state
|
||||||
|
extern const CompiledAddr EMPTY_ADDRESS;
|
||||||
|
/// A sentinel value used to indicate an invalid state.
|
||||||
|
extern const CompiledAddr NONE_ADDRESS;
|
||||||
|
|
||||||
|
// This version number is written to every finite state transducer created by
|
||||||
|
// this crate. When a finite state transducer is read, its version number is
|
||||||
|
// checked against this value.
|
||||||
|
extern const uint64_t version;
|
||||||
|
// The threshold (in number of transitions) at which an index is created for
|
||||||
|
// a node's transitions. This speeds up lookup time at the expense of FST size
|
||||||
|
|
||||||
|
extern const uint64_t TRANS_INDEX_THRESHOLD;
|
||||||
|
// high 4 bits is transition address packed size.
|
||||||
|
// low 4 bits is output value packed size.
|
||||||
|
//
|
||||||
|
// `0` is a legal value which means there are no transitions/outputs
|
||||||
|
|
||||||
|
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
|
||||||
|
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
|
||||||
|
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0)
|
||||||
|
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
|
||||||
|
|
||||||
|
#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
|
||||||
|
|
||||||
|
#define COMMON_INDEX(v, max, val) do { \
|
||||||
|
val = ((uint16_t)COMMON_INPUTS[v] + 1)%256; \
|
||||||
|
val = val > max ? 0: val; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
|
||||||
|
//uint8_t commonInput(uint8_t idx);
|
||||||
|
//uint8_t commonIdx(uint8_t v, uint8_t max);
|
||||||
|
|
||||||
|
uint8_t packSize(uint64_t n);
|
||||||
|
uint64_t unpackUint64(uint8_t *ch, uint8_t sz);
|
||||||
|
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr);
|
||||||
|
CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct FstSlice {
|
||||||
|
uint8_t *data;
|
||||||
|
uint64_t dLen;
|
||||||
|
uint32_t start;
|
||||||
|
uint32_t end;
|
||||||
|
} FstSlice;
|
||||||
|
|
||||||
|
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end);
|
||||||
|
FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen);
|
||||||
|
bool fstSliceEmpty(FstSlice *slice);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
|
@ -13,15 +13,176 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _TD_INDEX_H_
|
#include "index.h"
|
||||||
#define _TD_INDEX_H_
|
#include "indexInt.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef USE_LUCENE
|
||||||
extern "C" {
|
#include "lucene++/Lucene_c.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
static pthread_once_t isInit = PTHREAD_ONCE_INIT;
|
||||||
|
|
||||||
|
static void indexInit();
|
||||||
|
|
||||||
|
SIndex *indexOpen(SIndexOpts *opts, const char *path) {
|
||||||
|
pthread_once(&isInit, indexInit);
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
index_t *index = index_open(path);
|
||||||
|
SIndex *p = malloc(sizeof(SIndex));
|
||||||
|
p->index = index;
|
||||||
|
return p;
|
||||||
|
#endif
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /*_TD_INDEX_H_*/
|
void indexClose(SIndex *index) {
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
index_close(index->index);
|
||||||
|
index->index = NULL;
|
||||||
|
#endif
|
||||||
|
free(index);
|
||||||
|
return;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
#endif
|
||||||
|
int indexPut(SIndex *index, SArray* field_vals, int uid) {
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
index_document_t *doc = index_document_create();
|
||||||
|
|
||||||
|
char buf[16] = {0};
|
||||||
|
sprintf(buf, "%d", uid);
|
||||||
|
|
||||||
|
for (int i = 0; i < taosArrayGetSize(field_vals); i++) {
|
||||||
|
SIndexTerm *p = taosArrayGetP(field_vals, i);
|
||||||
|
index_document_add(doc, (const char *)(p->key), p->nKey, (const char *)(p->val), p->nVal, 1);
|
||||||
|
}
|
||||||
|
index_document_add(doc, NULL, 0, buf, strlen(buf), 0);
|
||||||
|
|
||||||
|
index_put(index->index, doc);
|
||||||
|
index_document_destroy(doc);
|
||||||
|
#endif
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
}
|
||||||
|
int indexSearch(SIndex *index, SIndexMultiTermQuery *multiQuerys, SArray *result) {
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
EIndexOperatorType opera = multiQuerys->opera;
|
||||||
|
|
||||||
|
int nQuery = taosArrayGetSize(multiQuerys->query);
|
||||||
|
char **fields = malloc(sizeof(char *) * nQuery);
|
||||||
|
char **keys = malloc(sizeof(char *) * nQuery);
|
||||||
|
int *types = malloc(sizeof(int) * nQuery);
|
||||||
|
|
||||||
|
for (int i = 0; i < nQuery; i++) {
|
||||||
|
SIndexTermQuery *p = taosArrayGet(multiQuerys->query, i);
|
||||||
|
SIndexTerm *term = p->field_value;
|
||||||
|
|
||||||
|
fields[i] = calloc(1, term->nKey + 1);
|
||||||
|
keys[i] = calloc(1, term->nVal + 1);
|
||||||
|
|
||||||
|
memcpy(fields[i], term->key, term->nKey);
|
||||||
|
memcpy(keys[i], term->val, term->nVal);
|
||||||
|
types[i] = (int)(p->type);
|
||||||
|
}
|
||||||
|
int *tResult = NULL;
|
||||||
|
int tsz= 0;
|
||||||
|
index_multi_search(index->index, (const char **)fields, (const char **)keys, types, nQuery, opera, &tResult, &tsz);
|
||||||
|
|
||||||
|
for (int i = 0; i < tsz; i++) {
|
||||||
|
taosArrayPush(result, &tResult[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < nQuery; i++) {
|
||||||
|
free(fields[i]);
|
||||||
|
free(keys[i]);
|
||||||
|
}
|
||||||
|
free(fields);
|
||||||
|
free(keys);
|
||||||
|
free(types);
|
||||||
|
#endif
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int indexDelete(SIndex *index, SIndexMultiTermQuery *query) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
int indexRebuild(SIndex *index, SIndexOpts *opts);
|
||||||
|
|
||||||
|
|
||||||
|
SIndexOpts *indexOptsCreate() {
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
#endif
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void indexOptsDestroy(SIndexOpts *opts) {
|
||||||
|
#ifdef USE_LUCENE
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* @param: oper
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
SIndexMultiTermQuery *indexMultiTermQueryCreate(EIndexOperatorType opera) {
|
||||||
|
SIndexMultiTermQuery *p = (SIndexMultiTermQuery *)malloc(sizeof(SIndexMultiTermQuery));
|
||||||
|
if (p == NULL) { return NULL; }
|
||||||
|
p->opera = opera;
|
||||||
|
p->query = taosArrayInit(1, sizeof(SIndexTermQuery));
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
void indexMultiTermQueryDestroy(SIndexMultiTermQuery *pQuery) {
|
||||||
|
for (int i = 0; i < taosArrayGetSize(pQuery->query); i++) {
|
||||||
|
SIndexTermQuery *p = (SIndexTermQuery *)taosArrayGet(pQuery->query, i);
|
||||||
|
indexTermDestroy(p->field_value);
|
||||||
|
}
|
||||||
|
taosArrayDestroy(pQuery->query);
|
||||||
|
free(pQuery);
|
||||||
|
};
|
||||||
|
int indexMultiTermQueryAdd(SIndexMultiTermQuery *pQuery, const char *field, int32_t nFields, const char *value, int32_t nValue, EIndexQueryType type){
|
||||||
|
SIndexTerm *t = indexTermCreate(field, nFields, value, nValue);
|
||||||
|
if (t == NULL) {return -1;}
|
||||||
|
SIndexTermQuery q = {.type = type, .field_value = t};
|
||||||
|
taosArrayPush(pQuery->query, &q);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SIndexTerm *indexTermCreate(const char *key, int32_t nKey, const char *val, int32_t nVal) {
|
||||||
|
SIndexTerm *t = (SIndexTerm *)malloc(sizeof(SIndexTerm));
|
||||||
|
t->key = (char *)calloc(nKey + 1, 1);
|
||||||
|
memcpy(t->key, key, nKey);
|
||||||
|
t->nKey = nKey;
|
||||||
|
|
||||||
|
t->val = (char *)calloc(nVal + 1, 1);
|
||||||
|
memcpy(t->val, val, nVal);
|
||||||
|
t->nVal = nVal;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
void indexTermDestroy(SIndexTerm *p) {
|
||||||
|
free(p->key);
|
||||||
|
free(p->val);
|
||||||
|
free(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
SArray *indexMultiTermCreate() {
|
||||||
|
return taosArrayInit(4, sizeof(SIndexTerm *));
|
||||||
|
}
|
||||||
|
|
||||||
|
int indexMultiTermAdd(SArray *array, const char *field, int32_t nField, const char *val, int32_t nVal) {
|
||||||
|
SIndexTerm *term = indexTermCreate(field, nField, val, nVal);
|
||||||
|
if (term == NULL) { return -1; }
|
||||||
|
taosArrayPush(array, &term);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
void indexMultiTermDestroy(SArray *array) {
|
||||||
|
for (int32_t i = 0; i < taosArrayGetSize(array); i++) {
|
||||||
|
SIndexTerm *p = taosArrayGetP(array, i);
|
||||||
|
indexTermDestroy(p);
|
||||||
|
}
|
||||||
|
taosArrayDestroy(array);
|
||||||
|
}
|
||||||
|
void indexInit() {
|
||||||
|
//do nothing
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,296 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "index_fst.h"
|
||||||
|
|
||||||
|
|
||||||
|
FstUnFinishedNodes *fstUnFinishedNodesCreate() {
|
||||||
|
FstUnFinishedNodes *nodes = malloc(sizeof(FstUnFinishedNodes));
|
||||||
|
if (nodes == NULL) { return NULL; }
|
||||||
|
|
||||||
|
nodes->stack = (SArray *)taosArrayInit(64, sizeof(FstBuilderNodeUnfinished));
|
||||||
|
fstUnFinishedNodesPushEmpty(nodes, false);
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal) {
|
||||||
|
FstBuilderNode *node = malloc(sizeof(FstBuilderNode));
|
||||||
|
node->isFinal = isFinal;
|
||||||
|
node->finalOutput = 0;
|
||||||
|
node->trans = NULL;
|
||||||
|
|
||||||
|
FstBuilderNodeUnfinished un = {.node = node, .last = NULL};
|
||||||
|
taosArrayPush(nodes->stack, &un);
|
||||||
|
|
||||||
|
}
|
||||||
|
FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes) {
|
||||||
|
assert(taosArrayGetSize(nodes->stack) == 1);
|
||||||
|
|
||||||
|
FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
|
||||||
|
assert(un->last == NULL);
|
||||||
|
return un->node;
|
||||||
|
}
|
||||||
|
|
||||||
|
FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) {
|
||||||
|
FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
|
||||||
|
fstBuilderNodeUnfinishedLastCompiled(un, addr);
|
||||||
|
free(un->last); // TODO add func FstLastTransitionFree()
|
||||||
|
return un->node;
|
||||||
|
}
|
||||||
|
|
||||||
|
FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes) {
|
||||||
|
FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
|
||||||
|
assert(un->last == NULL);
|
||||||
|
return un->node;
|
||||||
|
|
||||||
|
}
|
||||||
|
void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *nodes, Output out) {
|
||||||
|
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, 0);
|
||||||
|
un->node->isFinal = true;
|
||||||
|
un->node->finalOutput = out;
|
||||||
|
//un->node->trans = NULL;
|
||||||
|
}
|
||||||
|
void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) {
|
||||||
|
size_t sz = taosArrayGetSize(nodes->stack) - 1;
|
||||||
|
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
|
||||||
|
fstBuilderNodeUnfinishedLastCompiled(un, addr);
|
||||||
|
}
|
||||||
|
void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output out) {
|
||||||
|
FstSlice *s = &bs;
|
||||||
|
if (s->data == NULL || s->dLen == 0 || s->start > s->end) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
size_t sz = taosArrayGetSize(nodes->stack) - 1;
|
||||||
|
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
|
||||||
|
assert(un->last == NULL);
|
||||||
|
|
||||||
|
|
||||||
|
FstLastTransition *trn = malloc(sizeof(FstLastTransition));
|
||||||
|
trn->inp = s->data[s->start];
|
||||||
|
trn->out = out;
|
||||||
|
|
||||||
|
un->last = trn;
|
||||||
|
|
||||||
|
for (uint64_t i = s->start; i <= s->end; i++) {
|
||||||
|
FstBuilderNode *n = malloc(sizeof(FstBuilderNode));
|
||||||
|
n->isFinal = false;
|
||||||
|
n->finalOutput = 0;
|
||||||
|
n->trans = NULL;
|
||||||
|
|
||||||
|
FstLastTransition *trn = malloc(sizeof(FstLastTransition));
|
||||||
|
trn->inp = s->data[i];
|
||||||
|
trn->out = out;
|
||||||
|
|
||||||
|
FstBuilderNodeUnfinished un = {.node = n, .last = trn};
|
||||||
|
taosArrayPush(nodes->stack, &un);
|
||||||
|
}
|
||||||
|
fstUnFinishedNodesPushEmpty(nodes, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs) {
|
||||||
|
FstSlice *s = &bs;
|
||||||
|
|
||||||
|
size_t lsz = (size_t)(s->end - s->start + 1); // data len
|
||||||
|
size_t ssz = taosArrayGetSize(node->stack); // stack size
|
||||||
|
|
||||||
|
uint64_t count = 0;
|
||||||
|
for (size_t i = 0; i < ssz && i < lsz; i++) {
|
||||||
|
FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
|
||||||
|
if (un->last->inp == s->data[s->start + i]) {
|
||||||
|
count++;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
uint64_t FstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out) {
|
||||||
|
FstSlice *s = &bs;
|
||||||
|
|
||||||
|
size_t lsz = (size_t)(s->end - s->start + 1); // data len
|
||||||
|
size_t ssz = taosArrayGetSize(node->stack); // stack size
|
||||||
|
|
||||||
|
uint64_t res = 0;
|
||||||
|
for (size_t i = 0; i < lsz && i < ssz; i++) {
|
||||||
|
FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
|
||||||
|
|
||||||
|
FstLastTransition *last = un->last;
|
||||||
|
if (last->inp == s->data[s->start + i]) {
|
||||||
|
uint64_t commPrefix = last->out;
|
||||||
|
uint64_t addPrefix = last->out - commPrefix;
|
||||||
|
out = out - commPrefix;
|
||||||
|
last->out = commPrefix;
|
||||||
|
if (addPrefix != 0) {
|
||||||
|
fstBuilderNodeUnfinishedAddOutputPrefix(un, addPrefix);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// fst node function
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) {
|
||||||
|
FstNode *n = (FstNode *)malloc(sizeof(FstNode));
|
||||||
|
if (n == NULL) { return NULL; }
|
||||||
|
|
||||||
|
if (addr == EMPTY_ADDRESS) {
|
||||||
|
n->data = fstSliceCreate(NULL, 0);
|
||||||
|
n->version = version;
|
||||||
|
n->state = EmptyFinal;
|
||||||
|
n->start = EMPTY_ADDRESS;
|
||||||
|
n->end = EMPTY_ADDRESS;
|
||||||
|
n->isFinal = true;
|
||||||
|
n->nTrans = 0;
|
||||||
|
n->sizes = 0;
|
||||||
|
n->finalOutput = 0;
|
||||||
|
}
|
||||||
|
uint8_t v = slice->data[addr];
|
||||||
|
uint8_t s = (v & 0b11000000) >> 6;
|
||||||
|
if (s == 0b11) { // oneTransNext
|
||||||
|
n->data = fstSliceCopy(slice, 0, addr);
|
||||||
|
n->version = version;
|
||||||
|
n->state = OneTransNext;
|
||||||
|
n->start = addr;
|
||||||
|
n->end = addr; //? s.end_addr(data);
|
||||||
|
n->isFinal = false;
|
||||||
|
n->sizes = 0;
|
||||||
|
n->nTrans = 0;
|
||||||
|
n->finalOutput = 0;
|
||||||
|
} else if (v == 0b10) { // oneTrans
|
||||||
|
uint64_t sz; // fetch sz from addr
|
||||||
|
n->data = fstSliceCopy(slice, 0, addr);
|
||||||
|
n->version = version;
|
||||||
|
n->state = OneTrans;
|
||||||
|
n->start = addr;
|
||||||
|
n->end = addr; // s.end_addr(data, sz);
|
||||||
|
n->isFinal = false;
|
||||||
|
n->nTrans = 1;
|
||||||
|
n->sizes = sz;
|
||||||
|
n->finalOutput = 0;
|
||||||
|
} else { // anyTrans
|
||||||
|
uint64_t sz; // s.sizes(data)
|
||||||
|
uint32_t nTrans; // s.ntrans(data)
|
||||||
|
n->data = *slice;
|
||||||
|
n->version = version;
|
||||||
|
n->state = AnyTrans;
|
||||||
|
n->start = addr;
|
||||||
|
n->end = addr; // s.end_addr(version, data, sz, ntrans);
|
||||||
|
n->isFinal = false; // s.is_final_state();
|
||||||
|
n->nTrans = nTrans;
|
||||||
|
n->sizes = sz;
|
||||||
|
n->finalOutput = 0; // s.final_output(version, data, sz, ntrans);
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
FstTransitions* fstNodeTransitions(FstNode *node) {
|
||||||
|
FstTransitions *t = malloc(sizeof(FstTransitions));
|
||||||
|
if (NULL == t) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
FstRange range = {.start = 0, .end = FST_NODE_LEN(node)};
|
||||||
|
t->node = node;
|
||||||
|
t->range = range;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res) {
|
||||||
|
bool s = true;
|
||||||
|
if (node->state == OneTransNext) {
|
||||||
|
|
||||||
|
} else if (node->state == OneTrans) {
|
||||||
|
|
||||||
|
} else if (node->state == AnyTrans) {
|
||||||
|
|
||||||
|
} else {
|
||||||
|
s = false;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) {
|
||||||
|
bool s = true;
|
||||||
|
if (node->state == OneTransNext) {
|
||||||
|
|
||||||
|
} else if (node->state == OneTrans) {
|
||||||
|
|
||||||
|
} else if (node->state == AnyTrans) {
|
||||||
|
|
||||||
|
} else if (node->state == EmptyFinal){
|
||||||
|
s = false;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) {
|
||||||
|
bool s = true;
|
||||||
|
uint8_t input; // s.input
|
||||||
|
if (node->state == OneTransNext) {
|
||||||
|
if (b == input) { *res = 0; }
|
||||||
|
else { return s ; }
|
||||||
|
} else if (node->state == OneTrans) {
|
||||||
|
if (b == input) { *res = 0; }
|
||||||
|
else {return s;}
|
||||||
|
} else if (node->state == AnyTrans) {
|
||||||
|
|
||||||
|
} else if (node->state == EmptyFinal) {
|
||||||
|
s = false;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode) {
|
||||||
|
size_t sz = taosArrayGetSize(builderNode->trans);
|
||||||
|
assert(sz < 256);
|
||||||
|
if (sz == 0 && builderNode->isFinal && builderNode->finalOutput == 0) {
|
||||||
|
return true;
|
||||||
|
} else if (sz != 1 || builderNode->isFinal) {
|
||||||
|
// AnyTrans->Compile(w, addr, node);
|
||||||
|
} else {
|
||||||
|
FstTransition *tran = taosArrayGet(builderNode->trans, 0);
|
||||||
|
if (tran->addr == lastAddr && tran->out == 0) {
|
||||||
|
//OneTransNext::compile(w, lastAddr, tran->inp);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
//OneTrans::Compile(w, lastAddr, *tran);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
FstBuilder *fstBuilderCreate(void *w, FstType ty) {
|
||||||
|
FstBuilder *b = malloc(sizeof(FstBuilder));
|
||||||
|
if (NULL == b) { return b; }
|
||||||
|
|
||||||
|
FstCountingWriter wtr = {.wtr = w, .count = 0, .summer = 0};
|
||||||
|
b->wtr = wtr;
|
||||||
|
b->unfinished = malloc(sizeof(FstUnFinishedNodes));
|
||||||
|
return b;
|
||||||
|
|
||||||
|
}
|
||||||
|
FstSlice fstNodeAsSlice(FstNode *node) {
|
||||||
|
FstSlice *slice = &node->data;
|
||||||
|
FstSlice s = fstSliceCopy(slice, slice->end, slice->dLen - 1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
|
@ -0,0 +1,306 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "tutil.h"
|
||||||
|
const uint8_t COMMON_INPUTS[] = {
|
||||||
|
84, // '\x00'
|
||||||
|
85, // '\x01'
|
||||||
|
86, // '\x02'
|
||||||
|
87, // '\x03'
|
||||||
|
88, // '\x04'
|
||||||
|
89, // '\x05'
|
||||||
|
90, // '\x06'
|
||||||
|
91, // '\x07'
|
||||||
|
92, // '\x08'
|
||||||
|
93, // '\t'
|
||||||
|
94, // '\n'
|
||||||
|
95, // '\x0b'
|
||||||
|
96, // '\x0c'
|
||||||
|
97, // '\r'
|
||||||
|
98, // '\x0e'
|
||||||
|
99, // '\x0f'
|
||||||
|
100, // '\x10'
|
||||||
|
101, // '\x11'
|
||||||
|
102, // '\x12'
|
||||||
|
103, // '\x13'
|
||||||
|
104, // '\x14'
|
||||||
|
105, // '\x15'
|
||||||
|
106, // '\x16'
|
||||||
|
107, // '\x17'
|
||||||
|
108, // '\x18'
|
||||||
|
109, // '\x19'
|
||||||
|
110, // '\x1a'
|
||||||
|
111, // '\x1b'
|
||||||
|
112, // '\x1c'
|
||||||
|
113, // '\x1d'
|
||||||
|
114, // '\x1e'
|
||||||
|
115, // '\x1f'
|
||||||
|
116, // ' '
|
||||||
|
80, // '!'
|
||||||
|
117, // '"'
|
||||||
|
118, // '#'
|
||||||
|
79, // '$'
|
||||||
|
39, // '%'
|
||||||
|
30, // '&'
|
||||||
|
81, // "'"
|
||||||
|
75, // '('
|
||||||
|
74, // ')'
|
||||||
|
82, // '*'
|
||||||
|
57, // '+'
|
||||||
|
66, // ','
|
||||||
|
16, // '-'
|
||||||
|
12, // '.'
|
||||||
|
2, // '/'
|
||||||
|
19, // '0'
|
||||||
|
20, // '1'
|
||||||
|
21, // '2'
|
||||||
|
27, // '3'
|
||||||
|
32, // '4'
|
||||||
|
29, // '5'
|
||||||
|
35, // '6'
|
||||||
|
36, // '7'
|
||||||
|
37, // '8'
|
||||||
|
34, // '9'
|
||||||
|
24, // ':'
|
||||||
|
73, // ';'
|
||||||
|
119, // '<'
|
||||||
|
23, // '='
|
||||||
|
120, // '>'
|
||||||
|
40, // '?'
|
||||||
|
83, // '@'
|
||||||
|
44, // 'A'
|
||||||
|
48, // 'B'
|
||||||
|
42, // 'C'
|
||||||
|
43, // 'D'
|
||||||
|
49, // 'E'
|
||||||
|
46, // 'F'
|
||||||
|
62, // 'G'
|
||||||
|
61, // 'H'
|
||||||
|
47, // 'I'
|
||||||
|
69, // 'J'
|
||||||
|
68, // 'K'
|
||||||
|
58, // 'L'
|
||||||
|
56, // 'M'
|
||||||
|
55, // 'N'
|
||||||
|
59, // 'O'
|
||||||
|
51, // 'P'
|
||||||
|
72, // 'Q'
|
||||||
|
54, // 'R'
|
||||||
|
45, // 'S'
|
||||||
|
52, // 'T'
|
||||||
|
64, // 'U'
|
||||||
|
65, // 'V'
|
||||||
|
63, // 'W'
|
||||||
|
71, // 'X'
|
||||||
|
67, // 'Y'
|
||||||
|
70, // 'Z'
|
||||||
|
77, // '['
|
||||||
|
121, // '\\'
|
||||||
|
78, // ']'
|
||||||
|
122, // '^'
|
||||||
|
31, // '_'
|
||||||
|
123, // '`'
|
||||||
|
4, // 'a'
|
||||||
|
25, // 'b'
|
||||||
|
9, // 'c'
|
||||||
|
17, // 'd'
|
||||||
|
1, // 'e'
|
||||||
|
26, // 'f'
|
||||||
|
22, // 'g'
|
||||||
|
13, // 'h'
|
||||||
|
7, // 'i'
|
||||||
|
50, // 'j'
|
||||||
|
38, // 'k'
|
||||||
|
14, // 'l'
|
||||||
|
15, // 'm'
|
||||||
|
10, // 'n'
|
||||||
|
3, // 'o'
|
||||||
|
8, // 'p'
|
||||||
|
60, // 'q'
|
||||||
|
6, // 'r'
|
||||||
|
5, // 's'
|
||||||
|
0, // 't'
|
||||||
|
18, // 'u'
|
||||||
|
33, // 'v'
|
||||||
|
11, // 'w'
|
||||||
|
41, // 'x'
|
||||||
|
28, // 'y'
|
||||||
|
53, // 'z'
|
||||||
|
124, // '{'
|
||||||
|
125, // '|'
|
||||||
|
126, // '}'
|
||||||
|
76, // '~'
|
||||||
|
127, // '\x7f'
|
||||||
|
128, // '\x80'
|
||||||
|
129, // '\x81'
|
||||||
|
130, // '\x82'
|
||||||
|
131, // '\x83'
|
||||||
|
132, // '\x84'
|
||||||
|
133, // '\x85'
|
||||||
|
134, // '\x86'
|
||||||
|
135, // '\x87'
|
||||||
|
136, // '\x88'
|
||||||
|
137, // '\x89'
|
||||||
|
138, // '\x8a'
|
||||||
|
139, // '\x8b'
|
||||||
|
140, // '\x8c'
|
||||||
|
141, // '\x8d'
|
||||||
|
142, // '\x8e'
|
||||||
|
143, // '\x8f'
|
||||||
|
144, // '\x90'
|
||||||
|
145, // '\x91'
|
||||||
|
146, // '\x92'
|
||||||
|
147, // '\x93'
|
||||||
|
148, // '\x94'
|
||||||
|
149, // '\x95'
|
||||||
|
150, // '\x96'
|
||||||
|
151, // '\x97'
|
||||||
|
152, // '\x98'
|
||||||
|
153, // '\x99'
|
||||||
|
154, // '\x9a'
|
||||||
|
155, // '\x9b'
|
||||||
|
156, // '\x9c'
|
||||||
|
157, // '\x9d'
|
||||||
|
158, // '\x9e'
|
||||||
|
159, // '\x9f'
|
||||||
|
160, // '\xa0'
|
||||||
|
161, // '¡'
|
||||||
|
162, // '¢'
|
||||||
|
163, // '£'
|
||||||
|
164, // '¤'
|
||||||
|
165, // '¥'
|
||||||
|
166, // '¦'
|
||||||
|
167, // '§'
|
||||||
|
168, // '¨'
|
||||||
|
169, // '©'
|
||||||
|
170, // 'ª'
|
||||||
|
171, // '«'
|
||||||
|
172, // '¬'
|
||||||
|
173, // '\xad'
|
||||||
|
174, // '®'
|
||||||
|
175, // '¯'
|
||||||
|
176, // '°'
|
||||||
|
177, // '±'
|
||||||
|
178, // '²'
|
||||||
|
179, // '³'
|
||||||
|
180, // '´'
|
||||||
|
181, // 'µ'
|
||||||
|
182, // '¶'
|
||||||
|
183, // '·'
|
||||||
|
184, // '¸'
|
||||||
|
185, // '¹'
|
||||||
|
186, // 'º'
|
||||||
|
187, // '»'
|
||||||
|
188, // '¼'
|
||||||
|
189, // '½'
|
||||||
|
190, // '¾'
|
||||||
|
191, // '¿'
|
||||||
|
192, // 'À'
|
||||||
|
193, // 'Á'
|
||||||
|
194, // 'Â'
|
||||||
|
195, // 'Ã'
|
||||||
|
196, // 'Ä'
|
||||||
|
197, // 'Å'
|
||||||
|
198, // 'Æ'
|
||||||
|
199, // 'Ç'
|
||||||
|
200, // 'È'
|
||||||
|
201, // 'É'
|
||||||
|
202, // 'Ê'
|
||||||
|
203, // 'Ë'
|
||||||
|
204, // 'Ì'
|
||||||
|
205, // 'Í'
|
||||||
|
206, // 'Î'
|
||||||
|
207, // 'Ï'
|
||||||
|
208, // 'Ð'
|
||||||
|
209, // 'Ñ'
|
||||||
|
210, // 'Ò'
|
||||||
|
211, // 'Ó'
|
||||||
|
212, // 'Ô'
|
||||||
|
213, // 'Õ'
|
||||||
|
214, // 'Ö'
|
||||||
|
215, // '×'
|
||||||
|
216, // 'Ø'
|
||||||
|
217, // 'Ù'
|
||||||
|
218, // 'Ú'
|
||||||
|
219, // 'Û'
|
||||||
|
220, // 'Ü'
|
||||||
|
221, // 'Ý'
|
||||||
|
222, // 'Þ'
|
||||||
|
223, // 'ß'
|
||||||
|
224, // 'à'
|
||||||
|
225, // 'á'
|
||||||
|
226, // 'â'
|
||||||
|
227, // 'ã'
|
||||||
|
228, // 'ä'
|
||||||
|
229, // 'å'
|
||||||
|
230, // 'æ'
|
||||||
|
231, // 'ç'
|
||||||
|
232, // 'è'
|
||||||
|
233, // 'é'
|
||||||
|
234, // 'ê'
|
||||||
|
235, // 'ë'
|
||||||
|
236, // 'ì'
|
||||||
|
237, // 'í'
|
||||||
|
238, // 'î'
|
||||||
|
239, // 'ï'
|
||||||
|
240, // 'ð'
|
||||||
|
241, // 'ñ'
|
||||||
|
242, // 'ò'
|
||||||
|
243, // 'ó'
|
||||||
|
244, // 'ô'
|
||||||
|
245, // 'õ'
|
||||||
|
246, // 'ö'
|
||||||
|
247, // '÷'
|
||||||
|
248, // 'ø'
|
||||||
|
249, // 'ù'
|
||||||
|
250, // 'ú'
|
||||||
|
251, // 'û'
|
||||||
|
252, // 'ü'
|
||||||
|
253, // 'ý'
|
||||||
|
254, // 'þ'
|
||||||
|
255, // 'ÿ'
|
||||||
|
};
|
||||||
|
|
||||||
|
char const COMMON_INPUTS_INV[] = {
|
||||||
|
't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w',
|
||||||
|
'.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=',
|
||||||
|
':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6',
|
||||||
|
'7', '8', 'k', '%', '?', 'x', 'C', 'D', 'A', 'S', 'F', 'I',
|
||||||
|
'B', 'E', 'j', 'P', 'T', 'z', 'R', 'N', 'M', '+', 'L', 'O',
|
||||||
|
'q', 'H', 'G', 'W', 'U', 'V', ',', 'Y', 'K', 'J', 'Z', 'X',
|
||||||
|
'Q', ';', ')', '(', '~', '[', ']', '$', '!', '\'', '*', '@',
|
||||||
|
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
|
||||||
|
'\x08', '\t', '\n', '\x0b', '\x0c', '\r', '\x0e', '\x0f', '\x10',
|
||||||
|
'\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18',
|
||||||
|
'\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', ' ', '"',
|
||||||
|
'#', '<', '>', '\\', '^', '`', '{', '|', '}','\x7f','\x80',
|
||||||
|
'\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', '\x88',
|
||||||
|
'\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', '\x90',
|
||||||
|
'\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', '\x98',
|
||||||
|
'\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', '\xa0',
|
||||||
|
'\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', '\xa8',
|
||||||
|
'\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', '\xb0',
|
||||||
|
'\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', '\xb8',
|
||||||
|
'\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', '\xc0',
|
||||||
|
'\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', '\xc8',
|
||||||
|
'\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', '\xd0',
|
||||||
|
'\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', '\xd8',
|
||||||
|
'\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', '\xe0',
|
||||||
|
'\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', '\xe8',
|
||||||
|
'\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', '\xf0',
|
||||||
|
'\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', '\xf8',
|
||||||
|
'\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
|
||||||
|
};
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "index_fst_registry.h"
|
||||||
|
|
|
@ -0,0 +1,115 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include "index_fst_util.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//A sentinel value used to indicate an empty final state
|
||||||
|
const CompiledAddr EMPTY_ADDRESS = 0;
|
||||||
|
/// A sentinel value used to indicate an invalid state.
|
||||||
|
const CompiledAddr NONE_ADDRESS = 1;
|
||||||
|
|
||||||
|
// This version number is written to every finite state transducer created by
|
||||||
|
// this crate. When a finite state transducer is read, its version number is
|
||||||
|
// checked against this value.
|
||||||
|
const uint64_t version = 3;
|
||||||
|
// The threshold (in number of transitions) at which an index is created for
|
||||||
|
// a node's transitions. This speeds up lookup time at the expense of FST size
|
||||||
|
|
||||||
|
const uint64_t TRANS_INDEX_THRESHOLD = 32;
|
||||||
|
|
||||||
|
|
||||||
|
//uint8_t commonInput(uint8_t idx) {
|
||||||
|
// if (idx == 0) { return -1; }
|
||||||
|
// else {
|
||||||
|
// return COMMON_INPUTS_INV[idx - 1];
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
//
|
||||||
|
//uint8_t commonIdx(uint8_t v, uint8_t max) {
|
||||||
|
// uint8_t v = ((uint16_t)tCOMMON_INPUTS[v] + 1)%256;
|
||||||
|
// return v > max ? 0: v;
|
||||||
|
//}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t packSize(uint64_t n) {
|
||||||
|
if (n < (1u << 8)) {
|
||||||
|
return 1;
|
||||||
|
} else if (n < (1u << 16)) {
|
||||||
|
return 2;
|
||||||
|
} else if (n < (1u << 24)) {
|
||||||
|
return 3;
|
||||||
|
} else if (n < ((uint64_t)(1) << 32)) {
|
||||||
|
return 4;
|
||||||
|
} else if (n < ((uint64_t)(1) << 40)) {
|
||||||
|
return 5;
|
||||||
|
} else if (n < ((uint64_t)(1) << 48)) {
|
||||||
|
return 6;
|
||||||
|
} else if (n < ((uint64_t)(1) << 56)) {
|
||||||
|
return 7;
|
||||||
|
} else {
|
||||||
|
return 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t unpackUint64(uint8_t *ch, uint8_t sz) {
|
||||||
|
uint64_t n;
|
||||||
|
for (uint8_t i = 0; i < sz; i++) {
|
||||||
|
n = n | (ch[i] << (8 * i));
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr) {
|
||||||
|
if (transAddr == EMPTY_ADDRESS) {
|
||||||
|
return packSize(EMPTY_ADDRESS);
|
||||||
|
} else {
|
||||||
|
return packSize(nodeAddr - transAddr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr) {
|
||||||
|
uint64_t delta = unpackUint64(data, len);
|
||||||
|
// delta_add = u64_to_usize
|
||||||
|
if (delta == EMPTY_ADDRESS) {
|
||||||
|
return EMPTY_ADDRESS;
|
||||||
|
} else {
|
||||||
|
return nodeAddr - delta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fst slice func
|
||||||
|
FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) {
|
||||||
|
FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1};
|
||||||
|
return slice;
|
||||||
|
}
|
||||||
|
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end) {
|
||||||
|
FstSlice t;
|
||||||
|
if (start >= slice->dLen || end >= slice->dLen || start > end) {
|
||||||
|
t.data = NULL;
|
||||||
|
return t;
|
||||||
|
};
|
||||||
|
|
||||||
|
t.data = slice->data;
|
||||||
|
t.dLen = slice->dLen;
|
||||||
|
t.start = start;
|
||||||
|
t.end = end;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
bool fstSliceEmpty(FstSlice *slice) {
|
||||||
|
return slice->data == NULL || slice->dLen <= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
add_executable(indexTest "")
|
||||||
|
target_sources(indexTest
|
||||||
|
PRIVATE
|
||||||
|
"../src/index.c"
|
||||||
|
"indexTests.cpp"
|
||||||
|
)
|
||||||
|
target_include_directories ( indexTest
|
||||||
|
PUBLIC
|
||||||
|
"${CMAKE_SOURCE_DIR}/include/libs/index"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/../inc"
|
||||||
|
)
|
||||||
|
target_link_libraries (indexTest
|
||||||
|
os
|
||||||
|
util
|
||||||
|
common
|
||||||
|
gtest_main
|
||||||
|
index
|
||||||
|
)
|
||||||
|
|
||||||
|
add_test(
|
||||||
|
NAME index_test
|
||||||
|
COMMAND indexTest
|
||||||
|
)
|
|
@ -0,0 +1,59 @@
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "index.h"
|
||||||
|
#include "indexInt.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
TEST(IndexTest, index_create_test) {
|
||||||
|
SIndexOpts *opts = indexOptsCreate();
|
||||||
|
SIndex *index = indexOpen(opts, "./test");
|
||||||
|
if (index == NULL) {
|
||||||
|
std::cout << "index open failed" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// write
|
||||||
|
for (int i = 0; i < 100000; i++) {
|
||||||
|
SIndexMultiTerm* terms = indexMultiTermCreate();
|
||||||
|
std::string val = "field";
|
||||||
|
|
||||||
|
indexMultiTermAdd(terms, "tag1", strlen("tag1"), val.c_str(), val.size());
|
||||||
|
|
||||||
|
val.append(std::to_string(i));
|
||||||
|
indexMultiTermAdd(terms, "tag2", strlen("tag2"), val.c_str(), val.size());
|
||||||
|
|
||||||
|
val.insert(0, std::to_string(i));
|
||||||
|
indexMultiTermAdd(terms, "tag3", strlen("tag3"), val.c_str(), val.size());
|
||||||
|
|
||||||
|
val.append("const");
|
||||||
|
indexMultiTermAdd(terms, "tag4", strlen("tag4"), val.c_str(), val.size());
|
||||||
|
|
||||||
|
|
||||||
|
indexPut(index, terms, i);
|
||||||
|
indexMultiTermDestroy(terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// query
|
||||||
|
SIndexMultiTermQuery *multiQuery = indexMultiTermQueryCreate(MUST);
|
||||||
|
|
||||||
|
indexMultiTermQueryAdd(multiQuery, "tag1", strlen("tag1"), "field", strlen("field"), QUERY_PREFIX);
|
||||||
|
indexMultiTermQueryAdd(multiQuery, "tag3", strlen("tag3"), "0field0", strlen("0field0"), QUERY_TERM);
|
||||||
|
|
||||||
|
SArray *result = (SArray *)taosArrayInit(10, sizeof(int));
|
||||||
|
indexSearch(index, multiQuery, result);
|
||||||
|
|
||||||
|
std::cout << "taos'size : " << taosArrayGetSize(result) << std::endl;
|
||||||
|
for (int i = 0; i < taosArrayGetSize(result); i++) {
|
||||||
|
int *v = (int *)taosArrayGet(result, i);
|
||||||
|
std::cout << "value --->" << *v << std::endl;
|
||||||
|
}
|
||||||
|
indexMultiTermQueryDestroy(multiQuery);
|
||||||
|
|
||||||
|
indexOptsDestroy(opts);
|
||||||
|
indexClose(index);
|
||||||
|
//
|
||||||
|
}
|
|
@ -158,6 +158,8 @@ static void tkvInit() {
|
||||||
#ifdef USE_ROCKSDB
|
#ifdef USE_ROCKSDB
|
||||||
defaultReadOpts.ropts = rocksdb_readoptions_create();
|
defaultReadOpts.ropts = rocksdb_readoptions_create();
|
||||||
defaultWriteOpts.wopts = rocksdb_writeoptions_create();
|
defaultWriteOpts.wopts = rocksdb_writeoptions_create();
|
||||||
|
rocksdb_writeoptions_disable_WAL(defaultWriteOpts.wopts, true);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -166,4 +168,4 @@ static void tkvClear() {
|
||||||
rocksdb_readoptions_destroy(defaultReadOpts.ropts);
|
rocksdb_readoptions_destroy(defaultReadOpts.ropts);
|
||||||
rocksdb_writeoptions_destroy(defaultWriteOpts.wopts);
|
rocksdb_writeoptions_destroy(defaultWriteOpts.wopts);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue