merge develop
This commit is contained in:
commit
09f3e8e19a
|
@ -48,6 +48,7 @@ endif(${BUILD_WITH_ROCKSDB})
|
|||
## lucene
|
||||
if(${BUILD_WITH_LUCENE})
|
||||
cat("${CMAKE_SUPPORT_DIR}/lucene_CMakeLists.txt.in" ${DEPS_TMP_FILE})
|
||||
add_definitions(-DUSE_LUCENE)
|
||||
endif(${BUILD_WITH_LUCENE})
|
||||
|
||||
## NuRaft
|
||||
|
|
|
@ -22,7 +22,7 @@ option(
|
|||
option(
|
||||
BUILD_WITH_LUCENE
|
||||
"If build with lucene"
|
||||
OFF
|
||||
off
|
||||
)
|
||||
|
||||
option(
|
||||
|
@ -41,4 +41,4 @@ option(
|
|||
BUILD_DOCS
|
||||
"If use doxygen build documents"
|
||||
ON
|
||||
)
|
||||
)
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
|
||||
# lucene
|
||||
ExternalProject_Add(lucene
|
||||
GIT_REPOSITORY https://github.com/taosdata-contrib/LucenePlusPlus.git
|
||||
GIT_TAG rel_3.0.8_td
|
||||
GIT_REPOSITORY https://github.com/yihaoDeng/LucenePlusPlus.git
|
||||
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/lucene"
|
||||
BINARY_DIR ""
|
||||
#BUILD_IN_SOURCE TRUE
|
||||
|
@ -10,4 +9,4 @@ ExternalProject_Add(lucene
|
|||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
)
|
||||
|
|
|
@ -68,6 +68,11 @@ endif(${BUILD_WITH_ROCKSDB})
|
|||
if(${BUILD_WITH_LUCENE})
|
||||
option(ENABLE_TEST "Enable the tests" OFF)
|
||||
add_subdirectory(lucene)
|
||||
target_include_directories(
|
||||
lucene++
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/lucene/include>
|
||||
)
|
||||
|
||||
endif(${BUILD_WITH_LUCENE})
|
||||
|
||||
# NuRaft
|
||||
|
|
|
@ -16,12 +16,53 @@
|
|||
#ifndef _TD_INDEX_H_
|
||||
#define _TD_INDEX_H_
|
||||
|
||||
#include "os.h"
|
||||
#include "tarray.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct SIndex SIndex;
|
||||
typedef struct SIndexOpts SIndexOpts;
|
||||
typedef struct SIndexMultiTermQuery SIndexMultiTermQuery;
|
||||
typedef struct SArray SIndexMultiTerm;
|
||||
|
||||
typedef enum { MUST = 0, SHOULD = 1, NOT = 2 } EIndexOperatorType;
|
||||
typedef enum { QUERY_TERM = 0, QUERY_PREFIX = 1, QUERY_SUFFIX = 2,QUERY_REGEX = 3} EIndexQueryType;
|
||||
/*
|
||||
* @param: oper
|
||||
*
|
||||
*/
|
||||
SIndexMultiTermQuery *indexMultiTermQueryCreate(EIndexOperatorType oper);
|
||||
void indexMultiTermQueryDestroy(SIndexMultiTermQuery *pQuery);
|
||||
int indexMultiTermQueryAdd(SIndexMultiTermQuery *pQuery, const char *field, int32_t nFields, const char *value, int32_t nValue, EIndexQueryType type);
|
||||
/*
|
||||
* @param:
|
||||
* @param:
|
||||
*/
|
||||
SIndex* indexOpen(SIndexOpts *opt, const char *path);
|
||||
void indexClose(SIndex *index);
|
||||
int indexPut(SIndex *index, SIndexMultiTerm *terms, int uid);
|
||||
int indexDelete(SIndex *index, SIndexMultiTermQuery *query);
|
||||
int indexSearch(SIndex *index, SIndexMultiTermQuery *query, SArray *result);
|
||||
int indexRebuild(SIndex *index, SIndexOpts *opt);
|
||||
/*
|
||||
* @param
|
||||
* @param
|
||||
*/
|
||||
SIndexMultiTerm *indexMultiTermCreate();
|
||||
int indexMultiTermAdd(SIndexMultiTerm *terms, const char *field, int32_t nFields, const char *value, int32_t nValue);
|
||||
void indexMultiTermDestroy(SIndexMultiTerm *terms);
|
||||
/*
|
||||
* @param:
|
||||
* @param:
|
||||
*/
|
||||
SIndexOpts *indexOptsCreate();
|
||||
void indexOptsDestroy(SIndexOpts *opts);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /*_TD_INDEX_H_*/
|
||||
#endif /*_TD_INDEX_H_*/
|
||||
|
|
|
@ -4,4 +4,27 @@ target_include_directories(
|
|||
index
|
||||
PUBLIC "${CMAKE_SOURCE_DIR}/include/libs/index"
|
||||
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
|
||||
)
|
||||
)
|
||||
target_link_libraries(
|
||||
index
|
||||
PUBLIC os
|
||||
PUBLIC util
|
||||
)
|
||||
|
||||
if (${BUILD_WITH_LUCENE})
|
||||
target_include_directories(
|
||||
index
|
||||
PUBLIC "${CMAKE_SOURCE_DIR}/deps/lucene/include"
|
||||
)
|
||||
LINK_DIRECTORIES("${CMAKE_SOURCE_DIR}/deps/lucene/debug/src/core")
|
||||
target_link_libraries(
|
||||
index
|
||||
PUBLIC lucene++
|
||||
)
|
||||
|
||||
endif(${BUILD_WITH_LUCENE})
|
||||
|
||||
if (${BUILD_TEST})
|
||||
add_subdirectory(test)
|
||||
endif(${BUILD_TEST})
|
||||
|
||||
|
|
|
@ -16,12 +16,52 @@
|
|||
#ifndef _TD_INDEX_INT_H_
|
||||
#define _TD_INDEX_INT_H_
|
||||
|
||||
#include "index.h"
|
||||
|
||||
#ifdef USE_LUCENE
|
||||
#include <lucene++/Lucene_c.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct SIndex {
|
||||
#ifdef USE_LUCENE
|
||||
index_t *index;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct SIndexOpts {
|
||||
#ifdef USE_LUCENE
|
||||
void *opts;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct SIndexMultiTermQuery {
|
||||
EIndexOperatorType opera;
|
||||
SArray *query;
|
||||
};
|
||||
|
||||
// field and key;
|
||||
typedef struct SIndexTerm {
|
||||
char *key;
|
||||
int32_t nKey;
|
||||
char *val;
|
||||
int32_t nVal;
|
||||
} SIndexTerm;
|
||||
|
||||
typedef struct SIndexTermQuery {
|
||||
SIndexTerm* field_value;
|
||||
EIndexQueryType type;
|
||||
} SIndexTermQuery;
|
||||
|
||||
|
||||
SIndexTerm *indexTermCreate(const char *key, int32_t nKey, const char *val, int32_t nVal);
|
||||
void indexTermDestroy(SIndexTerm *p);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /*_TD_INDEX_INT_H_*/
|
||||
#endif /*_TD_INDEX_INT_H_*/
|
||||
|
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __INDEX_FST_H__
|
||||
#define __INDEX_FST_H__
|
||||
|
||||
|
||||
#include "tarray.h"
|
||||
#include "index_fst_util.h"
|
||||
#include "index_fst_registry.h"
|
||||
|
||||
|
||||
typedef struct FstNode FstNode;
|
||||
#define OUTPUT_PREFIX(a, b) ((a) > (b) ? (b) : (a)
|
||||
|
||||
|
||||
typedef struct FstRange {
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
} FstRange;
|
||||
|
||||
|
||||
typedef struct FstBuilderNode {
|
||||
bool isFinal;
|
||||
Output finalOutput;
|
||||
SArray *trans; // <FstTransition>
|
||||
} FstBuilderNode;
|
||||
|
||||
typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State;
|
||||
typedef enum { Included, Excluded, Unbounded} FstBound;
|
||||
|
||||
typedef uint32_t CheckSummer;
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* UnFinished node and helper function
|
||||
* TODO: simple function name
|
||||
*/
|
||||
typedef struct FstUnFinishedNodes {
|
||||
SArray *stack; // <FstBuilderNodeUnfinished> } FstUnFinishedNodes;
|
||||
} FstUnFinishedNodes;
|
||||
|
||||
#define FST_UNFINISHED_NODES_LEN(nodes) taosArrayGetSize(nodes->stack)
|
||||
|
||||
FstUnFinishedNodes *FstUnFinishedNodesCreate();
|
||||
void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal);
|
||||
FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes);
|
||||
FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr);
|
||||
FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes);
|
||||
void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *node, Output out);
|
||||
void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *node, CompiledAddr addr);
|
||||
void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *node, FstSlice bs, Output out);
|
||||
uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs);
|
||||
uint64_t FstUnFinishedNodesFindCommPreifxAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out);
|
||||
|
||||
typedef struct FstCountingWriter {
|
||||
void* wtr; // wrap any writer that counts and checksum bytes written
|
||||
uint64_t count;
|
||||
CheckSummer summer;
|
||||
} FstCountingWriter;
|
||||
|
||||
typedef struct FstBuilder {
|
||||
FstCountingWriter wtr; // The FST raw data is written directly to `wtr`.
|
||||
FstUnFinishedNodes *unfinished; // The stack of unfinished nodes
|
||||
FstRegistry registry; // A map of finished nodes.
|
||||
SArray* last; // The last word added
|
||||
CompiledAddr lastAddr; // The address of the last compiled node
|
||||
uint64_t len; // num of keys added
|
||||
} FstBuilder;
|
||||
|
||||
|
||||
|
||||
|
||||
typedef struct FstTransition {
|
||||
uint8_t inp; //The byte input associated with this transition.
|
||||
Output out; //The output associated with this transition
|
||||
CompiledAddr addr; //The address of the node that this transition points to
|
||||
} FstTransition;
|
||||
|
||||
typedef struct FstTransitions {
|
||||
FstNode *node;
|
||||
FstRange range;
|
||||
} FstTransitions;
|
||||
|
||||
|
||||
|
||||
typedef struct FstLastTransition {
|
||||
uint8_t inp;
|
||||
Output out;
|
||||
} FstLastTransition;
|
||||
|
||||
/*
|
||||
* FstBuilderNodeUnfinished and helper function
|
||||
* TODO: simple function name
|
||||
*/
|
||||
typedef struct FstBuilderNodeUnfinished {
|
||||
FstBuilderNode *node;
|
||||
FstLastTransition* last;
|
||||
} FstBuilderNodeUnfinished;
|
||||
|
||||
void fstBuilderNodeUnfinishedLastCompiled(FstBuilderNodeUnfinished *node, CompiledAddr addr);
|
||||
void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *node, CompiledAddr addr);
|
||||
|
||||
/*
|
||||
* FstNode and helper function
|
||||
*/
|
||||
typedef struct FstNode {
|
||||
FstSlice data;
|
||||
uint64_t version;
|
||||
State state;
|
||||
CompiledAddr start;
|
||||
CompiledAddr end;
|
||||
bool isFinal;
|
||||
uint64_t nTrans;
|
||||
PackSizes sizes;
|
||||
Output finalOutput;
|
||||
} FstNode;
|
||||
|
||||
// If this node is final and has a terminal output value, then it is, returned. Otherwise, a zero output is returned
|
||||
#define FST_NODE_FINAL_OUTPUT(node) node->finalOutput
|
||||
// Returns true if and only if this node corresponds to a final or "match", state in the finite state transducer.
|
||||
#define FST_NODE_IS_FINAL(node) node->isFinal
|
||||
// Returns the number of transitions in this node, The maximum number of transitions is 256.
|
||||
#define FST_NODE_LEN(node) node->nTrans
|
||||
// Returns true if and only if this node has zero transitions.
|
||||
#define FST_NODE_IS_EMPTYE(node) (node->nTrans == 0)
|
||||
// Return the address of this node.
|
||||
#define FST_NODE_ADDR(node) node->start
|
||||
|
||||
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *data);
|
||||
FstTransitions fstNodeTransitionIter(FstNode *node);
|
||||
FstTransitions* fstNodeTransitions(FstNode *node);
|
||||
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res);
|
||||
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res);
|
||||
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res);
|
||||
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode);
|
||||
FstSlice fstNodeAsSlice(FstNode *node);
|
||||
|
||||
|
||||
|
||||
typedef struct FstMeta {
|
||||
uint64_t version;
|
||||
CompiledAddr rootAddr;
|
||||
FstType ty;
|
||||
uint64_t len;
|
||||
uint32_t checkSum;
|
||||
} FstMeta;
|
||||
|
||||
typedef struct Fst {
|
||||
FstMeta meta;
|
||||
void *data; //
|
||||
} Fst;
|
||||
|
||||
// ops
|
||||
|
||||
typedef struct FstIndexedValue {
|
||||
uint64_t index;
|
||||
uint64_t value;
|
||||
} FstIndexedValue;
|
||||
|
||||
|
||||
typedef struct FstRegistryCell {
|
||||
CompiledAddr addr;
|
||||
FstBuilderNode *node;
|
||||
} FstRegistryCell;
|
||||
|
||||
|
||||
|
||||
#endif
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef __INDEX_FST_AUTAOMATION_H__
|
||||
#define __INDEX_FST_AUTAOMATION_H__
|
||||
|
||||
struct AutomationCtx;
|
||||
|
||||
typedef struct StartWith {
|
||||
AutomationCtx *autoSelf;
|
||||
} StartWith;
|
||||
|
||||
typedef struct Complement {
|
||||
AutomationCtx *autoSelf;
|
||||
} Complement;
|
||||
|
||||
// automation
|
||||
typedef struct AutomationCtx {
|
||||
void *data;
|
||||
} AutomationCtx;
|
||||
|
||||
// automation interface
|
||||
void (*start)(AutomationCtx *ctx);
|
||||
bool (*isMatch)(AutomationCtx *ctx);
|
||||
bool (*canMatch)(AutomationCtx *ctx, void *data);
|
||||
bool (*willAlwaysMatch)(AutomationCtx *ctx, void *state);
|
||||
void* (*accpet)(AutomationCtx *ctx, void *state, uint8_t byte);
|
||||
void* (*accpetEof)(AutomationCtx *ctx, *state);
|
||||
|
||||
|
||||
#endif
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __INDEX_FST_NODE_H__
|
||||
#define __INDEX_FST_NODE_H__
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef __FST_REGISTRY_H__
|
||||
#define __FST_REGISTRY_H__
|
||||
|
||||
#include "index_fst_util.h"
|
||||
|
||||
|
||||
typedef struct FstRegistry {
|
||||
|
||||
} FstRegistry;
|
||||
#endif
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __INDEX_FST_UTIL_H__
|
||||
#define __INDEX_FST_UTIL_H__
|
||||
|
||||
#include "tarray.h"
|
||||
|
||||
|
||||
typedef uint64_t FstType;
|
||||
typedef uint64_t CompiledAddr;
|
||||
typedef uint64_t Output;
|
||||
typedef uint8_t PackSizes;
|
||||
|
||||
|
||||
//A sentinel value used to indicate an empty final state
|
||||
extern const CompiledAddr EMPTY_ADDRESS;
|
||||
/// A sentinel value used to indicate an invalid state.
|
||||
extern const CompiledAddr NONE_ADDRESS;
|
||||
|
||||
// This version number is written to every finite state transducer created by
|
||||
// this crate. When a finite state transducer is read, its version number is
|
||||
// checked against this value.
|
||||
extern const uint64_t version;
|
||||
// The threshold (in number of transitions) at which an index is created for
|
||||
// a node's transitions. This speeds up lookup time at the expense of FST size
|
||||
|
||||
extern const uint64_t TRANS_INDEX_THRESHOLD;
|
||||
// high 4 bits is transition address packed size.
|
||||
// low 4 bits is output value packed size.
|
||||
//
|
||||
// `0` is a legal value which means there are no transitions/outputs
|
||||
|
||||
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4} while(0)
|
||||
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4)
|
||||
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v = (v & 0b11110000) | sz } while(0)
|
||||
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)
|
||||
|
||||
#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]
|
||||
|
||||
#define COMMON_INDEX(v, max, val) do { \
|
||||
val = ((uint16_t)COMMON_INPUTS[v] + 1)%256; \
|
||||
val = val > max ? 0: val; \
|
||||
} while(0)
|
||||
|
||||
|
||||
//uint8_t commonInput(uint8_t idx);
|
||||
//uint8_t commonIdx(uint8_t v, uint8_t max);
|
||||
|
||||
uint8_t packSize(uint64_t n);
|
||||
uint64_t unpackUint64(uint8_t *ch, uint8_t sz);
|
||||
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr);
|
||||
CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr);
|
||||
|
||||
|
||||
|
||||
typedef struct FstSlice {
|
||||
uint8_t *data;
|
||||
uint64_t dLen;
|
||||
uint32_t start;
|
||||
uint32_t end;
|
||||
} FstSlice;
|
||||
|
||||
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end);
|
||||
FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen);
|
||||
bool fstSliceEmpty(FstSlice *slice);
|
||||
|
||||
|
||||
#endif
|
|
@ -13,15 +13,176 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_INDEX_H_
|
||||
#define _TD_INDEX_H_
|
||||
#include "index.h"
|
||||
#include "indexInt.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#ifdef USE_LUCENE
|
||||
#include "lucene++/Lucene_c.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
static pthread_once_t isInit = PTHREAD_ONCE_INIT;
|
||||
|
||||
static void indexInit();
|
||||
|
||||
SIndex *indexOpen(SIndexOpts *opts, const char *path) {
|
||||
pthread_once(&isInit, indexInit);
|
||||
#ifdef USE_LUCENE
|
||||
index_t *index = index_open(path);
|
||||
SIndex *p = malloc(sizeof(SIndex));
|
||||
p->index = index;
|
||||
return p;
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /*_TD_INDEX_H_*/
|
||||
void indexClose(SIndex *index) {
|
||||
#ifdef USE_LUCENE
|
||||
index_close(index->index);
|
||||
index->index = NULL;
|
||||
#endif
|
||||
free(index);
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
#ifdef USE_LUCENE
|
||||
#endif
|
||||
int indexPut(SIndex *index, SArray* field_vals, int uid) {
|
||||
#ifdef USE_LUCENE
|
||||
index_document_t *doc = index_document_create();
|
||||
|
||||
char buf[16] = {0};
|
||||
sprintf(buf, "%d", uid);
|
||||
|
||||
for (int i = 0; i < taosArrayGetSize(field_vals); i++) {
|
||||
SIndexTerm *p = taosArrayGetP(field_vals, i);
|
||||
index_document_add(doc, (const char *)(p->key), p->nKey, (const char *)(p->val), p->nVal, 1);
|
||||
}
|
||||
index_document_add(doc, NULL, 0, buf, strlen(buf), 0);
|
||||
|
||||
index_put(index->index, doc);
|
||||
index_document_destroy(doc);
|
||||
#endif
|
||||
return 1;
|
||||
|
||||
}
|
||||
int indexSearch(SIndex *index, SIndexMultiTermQuery *multiQuerys, SArray *result) {
|
||||
#ifdef USE_LUCENE
|
||||
EIndexOperatorType opera = multiQuerys->opera;
|
||||
|
||||
int nQuery = taosArrayGetSize(multiQuerys->query);
|
||||
char **fields = malloc(sizeof(char *) * nQuery);
|
||||
char **keys = malloc(sizeof(char *) * nQuery);
|
||||
int *types = malloc(sizeof(int) * nQuery);
|
||||
|
||||
for (int i = 0; i < nQuery; i++) {
|
||||
SIndexTermQuery *p = taosArrayGet(multiQuerys->query, i);
|
||||
SIndexTerm *term = p->field_value;
|
||||
|
||||
fields[i] = calloc(1, term->nKey + 1);
|
||||
keys[i] = calloc(1, term->nVal + 1);
|
||||
|
||||
memcpy(fields[i], term->key, term->nKey);
|
||||
memcpy(keys[i], term->val, term->nVal);
|
||||
types[i] = (int)(p->type);
|
||||
}
|
||||
int *tResult = NULL;
|
||||
int tsz= 0;
|
||||
index_multi_search(index->index, (const char **)fields, (const char **)keys, types, nQuery, opera, &tResult, &tsz);
|
||||
|
||||
for (int i = 0; i < tsz; i++) {
|
||||
taosArrayPush(result, &tResult[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < nQuery; i++) {
|
||||
free(fields[i]);
|
||||
free(keys[i]);
|
||||
}
|
||||
free(fields);
|
||||
free(keys);
|
||||
free(types);
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
int indexDelete(SIndex *index, SIndexMultiTermQuery *query) {
|
||||
return 1;
|
||||
}
|
||||
int indexRebuild(SIndex *index, SIndexOpts *opts);
|
||||
|
||||
|
||||
SIndexOpts *indexOptsCreate() {
|
||||
#ifdef USE_LUCENE
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
void indexOptsDestroy(SIndexOpts *opts) {
|
||||
#ifdef USE_LUCENE
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
* @param: oper
|
||||
*
|
||||
*/
|
||||
|
||||
SIndexMultiTermQuery *indexMultiTermQueryCreate(EIndexOperatorType opera) {
|
||||
SIndexMultiTermQuery *p = (SIndexMultiTermQuery *)malloc(sizeof(SIndexMultiTermQuery));
|
||||
if (p == NULL) { return NULL; }
|
||||
p->opera = opera;
|
||||
p->query = taosArrayInit(1, sizeof(SIndexTermQuery));
|
||||
return p;
|
||||
}
|
||||
void indexMultiTermQueryDestroy(SIndexMultiTermQuery *pQuery) {
|
||||
for (int i = 0; i < taosArrayGetSize(pQuery->query); i++) {
|
||||
SIndexTermQuery *p = (SIndexTermQuery *)taosArrayGet(pQuery->query, i);
|
||||
indexTermDestroy(p->field_value);
|
||||
}
|
||||
taosArrayDestroy(pQuery->query);
|
||||
free(pQuery);
|
||||
};
|
||||
int indexMultiTermQueryAdd(SIndexMultiTermQuery *pQuery, const char *field, int32_t nFields, const char *value, int32_t nValue, EIndexQueryType type){
|
||||
SIndexTerm *t = indexTermCreate(field, nFields, value, nValue);
|
||||
if (t == NULL) {return -1;}
|
||||
SIndexTermQuery q = {.type = type, .field_value = t};
|
||||
taosArrayPush(pQuery->query, &q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
SIndexTerm *indexTermCreate(const char *key, int32_t nKey, const char *val, int32_t nVal) {
|
||||
SIndexTerm *t = (SIndexTerm *)malloc(sizeof(SIndexTerm));
|
||||
t->key = (char *)calloc(nKey + 1, 1);
|
||||
memcpy(t->key, key, nKey);
|
||||
t->nKey = nKey;
|
||||
|
||||
t->val = (char *)calloc(nVal + 1, 1);
|
||||
memcpy(t->val, val, nVal);
|
||||
t->nVal = nVal;
|
||||
return t;
|
||||
}
|
||||
void indexTermDestroy(SIndexTerm *p) {
|
||||
free(p->key);
|
||||
free(p->val);
|
||||
free(p);
|
||||
}
|
||||
|
||||
SArray *indexMultiTermCreate() {
|
||||
return taosArrayInit(4, sizeof(SIndexTerm *));
|
||||
}
|
||||
|
||||
int indexMultiTermAdd(SArray *array, const char *field, int32_t nField, const char *val, int32_t nVal) {
|
||||
SIndexTerm *term = indexTermCreate(field, nField, val, nVal);
|
||||
if (term == NULL) { return -1; }
|
||||
taosArrayPush(array, &term);
|
||||
return 0;
|
||||
}
|
||||
void indexMultiTermDestroy(SArray *array) {
|
||||
for (int32_t i = 0; i < taosArrayGetSize(array); i++) {
|
||||
SIndexTerm *p = taosArrayGetP(array, i);
|
||||
indexTermDestroy(p);
|
||||
}
|
||||
taosArrayDestroy(array);
|
||||
}
|
||||
void indexInit() {
|
||||
//do nothing
|
||||
}
|
||||
|
|
|
@ -0,0 +1,296 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "index_fst.h"
|
||||
|
||||
|
||||
FstUnFinishedNodes *fstUnFinishedNodesCreate() {
|
||||
FstUnFinishedNodes *nodes = malloc(sizeof(FstUnFinishedNodes));
|
||||
if (nodes == NULL) { return NULL; }
|
||||
|
||||
nodes->stack = (SArray *)taosArrayInit(64, sizeof(FstBuilderNodeUnfinished));
|
||||
fstUnFinishedNodesPushEmpty(nodes, false);
|
||||
return nodes;
|
||||
}
|
||||
void fstUnFinishedNodesPushEmpty(FstUnFinishedNodes *nodes, bool isFinal) {
|
||||
FstBuilderNode *node = malloc(sizeof(FstBuilderNode));
|
||||
node->isFinal = isFinal;
|
||||
node->finalOutput = 0;
|
||||
node->trans = NULL;
|
||||
|
||||
FstBuilderNodeUnfinished un = {.node = node, .last = NULL};
|
||||
taosArrayPush(nodes->stack, &un);
|
||||
|
||||
}
|
||||
FstBuilderNode *fstUnFinishedNodesPopRoot(FstUnFinishedNodes *nodes) {
|
||||
assert(taosArrayGetSize(nodes->stack) == 1);
|
||||
|
||||
FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
|
||||
assert(un->last == NULL);
|
||||
return un->node;
|
||||
}
|
||||
|
||||
FstBuilderNode *fstUnFinishedNodesPopFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) {
|
||||
FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
|
||||
fstBuilderNodeUnfinishedLastCompiled(un, addr);
|
||||
free(un->last); // TODO add func FstLastTransitionFree()
|
||||
return un->node;
|
||||
}
|
||||
|
||||
FstBuilderNode *fstUnFinishedNodesPopEmpty(FstUnFinishedNodes *nodes) {
|
||||
FstBuilderNodeUnfinished *un = taosArrayPop(nodes->stack);
|
||||
assert(un->last == NULL);
|
||||
return un->node;
|
||||
|
||||
}
|
||||
void fstUnFinishedNodesSetRootOutput(FstUnFinishedNodes *nodes, Output out) {
|
||||
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, 0);
|
||||
un->node->isFinal = true;
|
||||
un->node->finalOutput = out;
|
||||
//un->node->trans = NULL;
|
||||
}
|
||||
void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *nodes, CompiledAddr addr) {
|
||||
size_t sz = taosArrayGetSize(nodes->stack) - 1;
|
||||
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
|
||||
fstBuilderNodeUnfinishedLastCompiled(un, addr);
|
||||
}
|
||||
void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output out) {
|
||||
FstSlice *s = &bs;
|
||||
if (s->data == NULL || s->dLen == 0 || s->start > s->end) {
|
||||
return;
|
||||
}
|
||||
size_t sz = taosArrayGetSize(nodes->stack) - 1;
|
||||
FstBuilderNodeUnfinished *un = taosArrayGet(nodes->stack, sz);
|
||||
assert(un->last == NULL);
|
||||
|
||||
|
||||
FstLastTransition *trn = malloc(sizeof(FstLastTransition));
|
||||
trn->inp = s->data[s->start];
|
||||
trn->out = out;
|
||||
|
||||
un->last = trn;
|
||||
|
||||
for (uint64_t i = s->start; i <= s->end; i++) {
|
||||
FstBuilderNode *n = malloc(sizeof(FstBuilderNode));
|
||||
n->isFinal = false;
|
||||
n->finalOutput = 0;
|
||||
n->trans = NULL;
|
||||
|
||||
FstLastTransition *trn = malloc(sizeof(FstLastTransition));
|
||||
trn->inp = s->data[i];
|
||||
trn->out = out;
|
||||
|
||||
FstBuilderNodeUnfinished un = {.node = n, .last = trn};
|
||||
taosArrayPush(nodes->stack, &un);
|
||||
}
|
||||
fstUnFinishedNodesPushEmpty(nodes, true);
|
||||
}
|
||||
|
||||
|
||||
uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs) {
|
||||
FstSlice *s = &bs;
|
||||
|
||||
size_t lsz = (size_t)(s->end - s->start + 1); // data len
|
||||
size_t ssz = taosArrayGetSize(node->stack); // stack size
|
||||
|
||||
uint64_t count = 0;
|
||||
for (size_t i = 0; i < ssz && i < lsz; i++) {
|
||||
FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
|
||||
if (un->last->inp == s->data[s->start + i]) {
|
||||
count++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
uint64_t FstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstSlice bs, Output in, Output *out) {
|
||||
FstSlice *s = &bs;
|
||||
|
||||
size_t lsz = (size_t)(s->end - s->start + 1); // data len
|
||||
size_t ssz = taosArrayGetSize(node->stack); // stack size
|
||||
|
||||
uint64_t res = 0;
|
||||
for (size_t i = 0; i < lsz && i < ssz; i++) {
|
||||
FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i);
|
||||
|
||||
FstLastTransition *last = un->last;
|
||||
if (last->inp == s->data[s->start + i]) {
|
||||
uint64_t commPrefix = last->out;
|
||||
uint64_t addPrefix = last->out - commPrefix;
|
||||
out = out - commPrefix;
|
||||
last->out = commPrefix;
|
||||
if (addPrefix != 0) {
|
||||
fstBuilderNodeUnfinishedAddOutputPrefix(un, addPrefix);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// fst node function
|
||||
|
||||
|
||||
|
||||
FstNode *fstNodeCreate(int64_t version, CompiledAddr addr, FstSlice *slice) {
|
||||
FstNode *n = (FstNode *)malloc(sizeof(FstNode));
|
||||
if (n == NULL) { return NULL; }
|
||||
|
||||
if (addr == EMPTY_ADDRESS) {
|
||||
n->data = fstSliceCreate(NULL, 0);
|
||||
n->version = version;
|
||||
n->state = EmptyFinal;
|
||||
n->start = EMPTY_ADDRESS;
|
||||
n->end = EMPTY_ADDRESS;
|
||||
n->isFinal = true;
|
||||
n->nTrans = 0;
|
||||
n->sizes = 0;
|
||||
n->finalOutput = 0;
|
||||
}
|
||||
uint8_t v = slice->data[addr];
|
||||
uint8_t s = (v & 0b11000000) >> 6;
|
||||
if (s == 0b11) { // oneTransNext
|
||||
n->data = fstSliceCopy(slice, 0, addr);
|
||||
n->version = version;
|
||||
n->state = OneTransNext;
|
||||
n->start = addr;
|
||||
n->end = addr; //? s.end_addr(data);
|
||||
n->isFinal = false;
|
||||
n->sizes = 0;
|
||||
n->nTrans = 0;
|
||||
n->finalOutput = 0;
|
||||
} else if (v == 0b10) { // oneTrans
|
||||
uint64_t sz; // fetch sz from addr
|
||||
n->data = fstSliceCopy(slice, 0, addr);
|
||||
n->version = version;
|
||||
n->state = OneTrans;
|
||||
n->start = addr;
|
||||
n->end = addr; // s.end_addr(data, sz);
|
||||
n->isFinal = false;
|
||||
n->nTrans = 1;
|
||||
n->sizes = sz;
|
||||
n->finalOutput = 0;
|
||||
} else { // anyTrans
|
||||
uint64_t sz; // s.sizes(data)
|
||||
uint32_t nTrans; // s.ntrans(data)
|
||||
n->data = *slice;
|
||||
n->version = version;
|
||||
n->state = AnyTrans;
|
||||
n->start = addr;
|
||||
n->end = addr; // s.end_addr(version, data, sz, ntrans);
|
||||
n->isFinal = false; // s.is_final_state();
|
||||
n->nTrans = nTrans;
|
||||
n->sizes = sz;
|
||||
n->finalOutput = 0; // s.final_output(version, data, sz, ntrans);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
FstTransitions* fstNodeTransitions(FstNode *node) {
|
||||
FstTransitions *t = malloc(sizeof(FstTransitions));
|
||||
if (NULL == t) {
|
||||
return NULL;
|
||||
}
|
||||
FstRange range = {.start = 0, .end = FST_NODE_LEN(node)};
|
||||
t->node = node;
|
||||
t->range = range;
|
||||
return t;
|
||||
}
|
||||
bool fstNodeGetTransitionAt(FstNode *node, uint64_t i, FstTransition *res) {
|
||||
bool s = true;
|
||||
if (node->state == OneTransNext) {
|
||||
|
||||
} else if (node->state == OneTrans) {
|
||||
|
||||
} else if (node->state == AnyTrans) {
|
||||
|
||||
} else {
|
||||
s = false;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
bool fstNodeGetTransitionAddrAt(FstNode *node, uint64_t i, CompiledAddr *res) {
|
||||
bool s = true;
|
||||
if (node->state == OneTransNext) {
|
||||
|
||||
} else if (node->state == OneTrans) {
|
||||
|
||||
} else if (node->state == AnyTrans) {
|
||||
|
||||
} else if (node->state == EmptyFinal){
|
||||
s = false;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
bool fstNodeFindInput(FstNode *node, uint8_t b, uint64_t *res) {
|
||||
bool s = true;
|
||||
uint8_t input; // s.input
|
||||
if (node->state == OneTransNext) {
|
||||
if (b == input) { *res = 0; }
|
||||
else { return s ; }
|
||||
} else if (node->state == OneTrans) {
|
||||
if (b == input) { *res = 0; }
|
||||
else {return s;}
|
||||
} else if (node->state == AnyTrans) {
|
||||
|
||||
} else if (node->state == EmptyFinal) {
|
||||
s = false;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
bool fstNodeCompile(FstNode *node, void *w, CompiledAddr lastAddr, CompiledAddr addr, FstBuilderNode *builderNode) {
|
||||
size_t sz = taosArrayGetSize(builderNode->trans);
|
||||
assert(sz < 256);
|
||||
if (sz == 0 && builderNode->isFinal && builderNode->finalOutput == 0) {
|
||||
return true;
|
||||
} else if (sz != 1 || builderNode->isFinal) {
|
||||
// AnyTrans->Compile(w, addr, node);
|
||||
} else {
|
||||
FstTransition *tran = taosArrayGet(builderNode->trans, 0);
|
||||
if (tran->addr == lastAddr && tran->out == 0) {
|
||||
//OneTransNext::compile(w, lastAddr, tran->inp);
|
||||
return true;
|
||||
} else {
|
||||
//OneTrans::Compile(w, lastAddr, *tran);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
FstBuilder *fstBuilderCreate(void *w, FstType ty) {
|
||||
FstBuilder *b = malloc(sizeof(FstBuilder));
|
||||
if (NULL == b) { return b; }
|
||||
|
||||
FstCountingWriter wtr = {.wtr = w, .count = 0, .summer = 0};
|
||||
b->wtr = wtr;
|
||||
b->unfinished = malloc(sizeof(FstUnFinishedNodes));
|
||||
return b;
|
||||
|
||||
}
|
||||
FstSlice fstNodeAsSlice(FstNode *node) {
|
||||
FstSlice *slice = &node->data;
|
||||
FstSlice s = fstSliceCopy(slice, slice->end, slice->dLen - 1);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
|
@ -0,0 +1,306 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "tutil.h"
|
||||
const uint8_t COMMON_INPUTS[] = {
|
||||
84, // '\x00'
|
||||
85, // '\x01'
|
||||
86, // '\x02'
|
||||
87, // '\x03'
|
||||
88, // '\x04'
|
||||
89, // '\x05'
|
||||
90, // '\x06'
|
||||
91, // '\x07'
|
||||
92, // '\x08'
|
||||
93, // '\t'
|
||||
94, // '\n'
|
||||
95, // '\x0b'
|
||||
96, // '\x0c'
|
||||
97, // '\r'
|
||||
98, // '\x0e'
|
||||
99, // '\x0f'
|
||||
100, // '\x10'
|
||||
101, // '\x11'
|
||||
102, // '\x12'
|
||||
103, // '\x13'
|
||||
104, // '\x14'
|
||||
105, // '\x15'
|
||||
106, // '\x16'
|
||||
107, // '\x17'
|
||||
108, // '\x18'
|
||||
109, // '\x19'
|
||||
110, // '\x1a'
|
||||
111, // '\x1b'
|
||||
112, // '\x1c'
|
||||
113, // '\x1d'
|
||||
114, // '\x1e'
|
||||
115, // '\x1f'
|
||||
116, // ' '
|
||||
80, // '!'
|
||||
117, // '"'
|
||||
118, // '#'
|
||||
79, // '$'
|
||||
39, // '%'
|
||||
30, // '&'
|
||||
81, // "'"
|
||||
75, // '('
|
||||
74, // ')'
|
||||
82, // '*'
|
||||
57, // '+'
|
||||
66, // ','
|
||||
16, // '-'
|
||||
12, // '.'
|
||||
2, // '/'
|
||||
19, // '0'
|
||||
20, // '1'
|
||||
21, // '2'
|
||||
27, // '3'
|
||||
32, // '4'
|
||||
29, // '5'
|
||||
35, // '6'
|
||||
36, // '7'
|
||||
37, // '8'
|
||||
34, // '9'
|
||||
24, // ':'
|
||||
73, // ';'
|
||||
119, // '<'
|
||||
23, // '='
|
||||
120, // '>'
|
||||
40, // '?'
|
||||
83, // '@'
|
||||
44, // 'A'
|
||||
48, // 'B'
|
||||
42, // 'C'
|
||||
43, // 'D'
|
||||
49, // 'E'
|
||||
46, // 'F'
|
||||
62, // 'G'
|
||||
61, // 'H'
|
||||
47, // 'I'
|
||||
69, // 'J'
|
||||
68, // 'K'
|
||||
58, // 'L'
|
||||
56, // 'M'
|
||||
55, // 'N'
|
||||
59, // 'O'
|
||||
51, // 'P'
|
||||
72, // 'Q'
|
||||
54, // 'R'
|
||||
45, // 'S'
|
||||
52, // 'T'
|
||||
64, // 'U'
|
||||
65, // 'V'
|
||||
63, // 'W'
|
||||
71, // 'X'
|
||||
67, // 'Y'
|
||||
70, // 'Z'
|
||||
77, // '['
|
||||
121, // '\\'
|
||||
78, // ']'
|
||||
122, // '^'
|
||||
31, // '_'
|
||||
123, // '`'
|
||||
4, // 'a'
|
||||
25, // 'b'
|
||||
9, // 'c'
|
||||
17, // 'd'
|
||||
1, // 'e'
|
||||
26, // 'f'
|
||||
22, // 'g'
|
||||
13, // 'h'
|
||||
7, // 'i'
|
||||
50, // 'j'
|
||||
38, // 'k'
|
||||
14, // 'l'
|
||||
15, // 'm'
|
||||
10, // 'n'
|
||||
3, // 'o'
|
||||
8, // 'p'
|
||||
60, // 'q'
|
||||
6, // 'r'
|
||||
5, // 's'
|
||||
0, // 't'
|
||||
18, // 'u'
|
||||
33, // 'v'
|
||||
11, // 'w'
|
||||
41, // 'x'
|
||||
28, // 'y'
|
||||
53, // 'z'
|
||||
124, // '{'
|
||||
125, // '|'
|
||||
126, // '}'
|
||||
76, // '~'
|
||||
127, // '\x7f'
|
||||
128, // '\x80'
|
||||
129, // '\x81'
|
||||
130, // '\x82'
|
||||
131, // '\x83'
|
||||
132, // '\x84'
|
||||
133, // '\x85'
|
||||
134, // '\x86'
|
||||
135, // '\x87'
|
||||
136, // '\x88'
|
||||
137, // '\x89'
|
||||
138, // '\x8a'
|
||||
139, // '\x8b'
|
||||
140, // '\x8c'
|
||||
141, // '\x8d'
|
||||
142, // '\x8e'
|
||||
143, // '\x8f'
|
||||
144, // '\x90'
|
||||
145, // '\x91'
|
||||
146, // '\x92'
|
||||
147, // '\x93'
|
||||
148, // '\x94'
|
||||
149, // '\x95'
|
||||
150, // '\x96'
|
||||
151, // '\x97'
|
||||
152, // '\x98'
|
||||
153, // '\x99'
|
||||
154, // '\x9a'
|
||||
155, // '\x9b'
|
||||
156, // '\x9c'
|
||||
157, // '\x9d'
|
||||
158, // '\x9e'
|
||||
159, // '\x9f'
|
||||
160, // '\xa0'
|
||||
161, // '¡'
|
||||
162, // '¢'
|
||||
163, // '£'
|
||||
164, // '¤'
|
||||
165, // '¥'
|
||||
166, // '¦'
|
||||
167, // '§'
|
||||
168, // '¨'
|
||||
169, // '©'
|
||||
170, // 'ª'
|
||||
171, // '«'
|
||||
172, // '¬'
|
||||
173, // '\xad'
|
||||
174, // '®'
|
||||
175, // '¯'
|
||||
176, // '°'
|
||||
177, // '±'
|
||||
178, // '²'
|
||||
179, // '³'
|
||||
180, // '´'
|
||||
181, // 'µ'
|
||||
182, // '¶'
|
||||
183, // '·'
|
||||
184, // '¸'
|
||||
185, // '¹'
|
||||
186, // 'º'
|
||||
187, // '»'
|
||||
188, // '¼'
|
||||
189, // '½'
|
||||
190, // '¾'
|
||||
191, // '¿'
|
||||
192, // 'À'
|
||||
193, // 'Á'
|
||||
194, // 'Â'
|
||||
195, // 'Ã'
|
||||
196, // 'Ä'
|
||||
197, // 'Å'
|
||||
198, // 'Æ'
|
||||
199, // 'Ç'
|
||||
200, // 'È'
|
||||
201, // 'É'
|
||||
202, // 'Ê'
|
||||
203, // 'Ë'
|
||||
204, // 'Ì'
|
||||
205, // 'Í'
|
||||
206, // 'Î'
|
||||
207, // 'Ï'
|
||||
208, // 'Ð'
|
||||
209, // 'Ñ'
|
||||
210, // 'Ò'
|
||||
211, // 'Ó'
|
||||
212, // 'Ô'
|
||||
213, // 'Õ'
|
||||
214, // 'Ö'
|
||||
215, // '×'
|
||||
216, // 'Ø'
|
||||
217, // 'Ù'
|
||||
218, // 'Ú'
|
||||
219, // 'Û'
|
||||
220, // 'Ü'
|
||||
221, // 'Ý'
|
||||
222, // 'Þ'
|
||||
223, // 'ß'
|
||||
224, // 'à'
|
||||
225, // 'á'
|
||||
226, // 'â'
|
||||
227, // 'ã'
|
||||
228, // 'ä'
|
||||
229, // 'å'
|
||||
230, // 'æ'
|
||||
231, // 'ç'
|
||||
232, // 'è'
|
||||
233, // 'é'
|
||||
234, // 'ê'
|
||||
235, // 'ë'
|
||||
236, // 'ì'
|
||||
237, // 'í'
|
||||
238, // 'î'
|
||||
239, // 'ï'
|
||||
240, // 'ð'
|
||||
241, // 'ñ'
|
||||
242, // 'ò'
|
||||
243, // 'ó'
|
||||
244, // 'ô'
|
||||
245, // 'õ'
|
||||
246, // 'ö'
|
||||
247, // '÷'
|
||||
248, // 'ø'
|
||||
249, // 'ù'
|
||||
250, // 'ú'
|
||||
251, // 'û'
|
||||
252, // 'ü'
|
||||
253, // 'ý'
|
||||
254, // 'þ'
|
||||
255, // 'ÿ'
|
||||
};
|
||||
|
||||
char const COMMON_INPUTS_INV[] = {
|
||||
't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w',
|
||||
'.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=',
|
||||
':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6',
|
||||
'7', '8', 'k', '%', '?', 'x', 'C', 'D', 'A', 'S', 'F', 'I',
|
||||
'B', 'E', 'j', 'P', 'T', 'z', 'R', 'N', 'M', '+', 'L', 'O',
|
||||
'q', 'H', 'G', 'W', 'U', 'V', ',', 'Y', 'K', 'J', 'Z', 'X',
|
||||
'Q', ';', ')', '(', '~', '[', ']', '$', '!', '\'', '*', '@',
|
||||
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
|
||||
'\x08', '\t', '\n', '\x0b', '\x0c', '\r', '\x0e', '\x0f', '\x10',
|
||||
'\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18',
|
||||
'\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', ' ', '"',
|
||||
'#', '<', '>', '\\', '^', '`', '{', '|', '}','\x7f','\x80',
|
||||
'\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', '\x88',
|
||||
'\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', '\x90',
|
||||
'\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', '\x98',
|
||||
'\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', '\xa0',
|
||||
'\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', '\xa8',
|
||||
'\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', '\xb0',
|
||||
'\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', '\xb8',
|
||||
'\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', '\xc0',
|
||||
'\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', '\xc8',
|
||||
'\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', '\xd0',
|
||||
'\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', '\xd8',
|
||||
'\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', '\xe0',
|
||||
'\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', '\xe8',
|
||||
'\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', '\xf0',
|
||||
'\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', '\xf8',
|
||||
'\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
|
||||
};
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "index_fst_registry.h"
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include "index_fst_util.h"
|
||||
|
||||
|
||||
|
||||
//A sentinel value used to indicate an empty final state
|
||||
const CompiledAddr EMPTY_ADDRESS = 0;
|
||||
/// A sentinel value used to indicate an invalid state.
|
||||
const CompiledAddr NONE_ADDRESS = 1;
|
||||
|
||||
// This version number is written to every finite state transducer created by
|
||||
// this crate. When a finite state transducer is read, its version number is
|
||||
// checked against this value.
|
||||
const uint64_t version = 3;
|
||||
// The threshold (in number of transitions) at which an index is created for
|
||||
// a node's transitions. This speeds up lookup time at the expense of FST size
|
||||
|
||||
const uint64_t TRANS_INDEX_THRESHOLD = 32;
|
||||
|
||||
|
||||
//uint8_t commonInput(uint8_t idx) {
|
||||
// if (idx == 0) { return -1; }
|
||||
// else {
|
||||
// return COMMON_INPUTS_INV[idx - 1];
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//uint8_t commonIdx(uint8_t v, uint8_t max) {
|
||||
// uint8_t v = ((uint16_t)tCOMMON_INPUTS[v] + 1)%256;
|
||||
// return v > max ? 0: v;
|
||||
//}
|
||||
|
||||
|
||||
|
||||
uint8_t packSize(uint64_t n) {
|
||||
if (n < (1u << 8)) {
|
||||
return 1;
|
||||
} else if (n < (1u << 16)) {
|
||||
return 2;
|
||||
} else if (n < (1u << 24)) {
|
||||
return 3;
|
||||
} else if (n < ((uint64_t)(1) << 32)) {
|
||||
return 4;
|
||||
} else if (n < ((uint64_t)(1) << 40)) {
|
||||
return 5;
|
||||
} else if (n < ((uint64_t)(1) << 48)) {
|
||||
return 6;
|
||||
} else if (n < ((uint64_t)(1) << 56)) {
|
||||
return 7;
|
||||
} else {
|
||||
return 8;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t unpackUint64(uint8_t *ch, uint8_t sz) {
|
||||
uint64_t n;
|
||||
for (uint8_t i = 0; i < sz; i++) {
|
||||
n = n | (ch[i] << (8 * i));
|
||||
}
|
||||
return n;
|
||||
}
|
||||
uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr) {
|
||||
if (transAddr == EMPTY_ADDRESS) {
|
||||
return packSize(EMPTY_ADDRESS);
|
||||
} else {
|
||||
return packSize(nodeAddr - transAddr);
|
||||
}
|
||||
}
|
||||
CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr) {
|
||||
uint64_t delta = unpackUint64(data, len);
|
||||
// delta_add = u64_to_usize
|
||||
if (delta == EMPTY_ADDRESS) {
|
||||
return EMPTY_ADDRESS;
|
||||
} else {
|
||||
return nodeAddr - delta;
|
||||
}
|
||||
}
|
||||
|
||||
// fst slice func
|
||||
FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) {
|
||||
FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1};
|
||||
return slice;
|
||||
}
|
||||
FstSlice fstSliceCopy(FstSlice *slice, uint32_t start, uint32_t end) {
|
||||
FstSlice t;
|
||||
if (start >= slice->dLen || end >= slice->dLen || start > end) {
|
||||
t.data = NULL;
|
||||
return t;
|
||||
};
|
||||
|
||||
t.data = slice->data;
|
||||
t.dLen = slice->dLen;
|
||||
t.start = start;
|
||||
t.end = end;
|
||||
return t;
|
||||
}
|
||||
bool fstSliceEmpty(FstSlice *slice) {
|
||||
return slice->data == NULL || slice->dLen <= 0;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
add_executable(indexTest "")
|
||||
target_sources(indexTest
|
||||
PRIVATE
|
||||
"../src/index.c"
|
||||
"indexTests.cpp"
|
||||
)
|
||||
target_include_directories ( indexTest
|
||||
PUBLIC
|
||||
"${CMAKE_SOURCE_DIR}/include/libs/index"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/../inc"
|
||||
)
|
||||
target_link_libraries (indexTest
|
||||
os
|
||||
util
|
||||
common
|
||||
gtest_main
|
||||
index
|
||||
)
|
||||
|
||||
add_test(
|
||||
NAME index_test
|
||||
COMMAND indexTest
|
||||
)
|
|
@ -0,0 +1,59 @@
|
|||
#include <gtest/gtest.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "index.h"
|
||||
#include "indexInt.h"
|
||||
|
||||
|
||||
|
||||
|
||||
TEST(IndexTest, index_create_test) {
|
||||
SIndexOpts *opts = indexOptsCreate();
|
||||
SIndex *index = indexOpen(opts, "./test");
|
||||
if (index == NULL) {
|
||||
std::cout << "index open failed" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
// write
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
SIndexMultiTerm* terms = indexMultiTermCreate();
|
||||
std::string val = "field";
|
||||
|
||||
indexMultiTermAdd(terms, "tag1", strlen("tag1"), val.c_str(), val.size());
|
||||
|
||||
val.append(std::to_string(i));
|
||||
indexMultiTermAdd(terms, "tag2", strlen("tag2"), val.c_str(), val.size());
|
||||
|
||||
val.insert(0, std::to_string(i));
|
||||
indexMultiTermAdd(terms, "tag3", strlen("tag3"), val.c_str(), val.size());
|
||||
|
||||
val.append("const");
|
||||
indexMultiTermAdd(terms, "tag4", strlen("tag4"), val.c_str(), val.size());
|
||||
|
||||
|
||||
indexPut(index, terms, i);
|
||||
indexMultiTermDestroy(terms);
|
||||
}
|
||||
|
||||
|
||||
// query
|
||||
SIndexMultiTermQuery *multiQuery = indexMultiTermQueryCreate(MUST);
|
||||
|
||||
indexMultiTermQueryAdd(multiQuery, "tag1", strlen("tag1"), "field", strlen("field"), QUERY_PREFIX);
|
||||
indexMultiTermQueryAdd(multiQuery, "tag3", strlen("tag3"), "0field0", strlen("0field0"), QUERY_TERM);
|
||||
|
||||
SArray *result = (SArray *)taosArrayInit(10, sizeof(int));
|
||||
indexSearch(index, multiQuery, result);
|
||||
|
||||
std::cout << "taos'size : " << taosArrayGetSize(result) << std::endl;
|
||||
for (int i = 0; i < taosArrayGetSize(result); i++) {
|
||||
int *v = (int *)taosArrayGet(result, i);
|
||||
std::cout << "value --->" << *v << std::endl;
|
||||
}
|
||||
indexMultiTermQueryDestroy(multiQuery);
|
||||
|
||||
indexOptsDestroy(opts);
|
||||
indexClose(index);
|
||||
//
|
||||
}
|
|
@ -158,6 +158,8 @@ static void tkvInit() {
|
|||
#ifdef USE_ROCKSDB
|
||||
defaultReadOpts.ropts = rocksdb_readoptions_create();
|
||||
defaultWriteOpts.wopts = rocksdb_writeoptions_create();
|
||||
rocksdb_writeoptions_disable_WAL(defaultWriteOpts.wopts, true);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -166,4 +168,4 @@ static void tkvClear() {
|
|||
rocksdb_readoptions_destroy(defaultReadOpts.ropts);
|
||||
rocksdb_writeoptions_destroy(defaultWriteOpts.wopts);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue