Merge pull request #11137 from taosdata/feature/fst_update_query
add fuzzy search
This commit is contained in:
commit
6ce2556391
|
@ -0,0 +1,74 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __INDEX_FST_DFA_H__
|
||||||
|
#define __INDEX_FST_DFA_H__
|
||||||
|
|
||||||
|
#include "indexFstRegex.h"
|
||||||
|
#include "indexFstSparse.h"
|
||||||
|
#include "tarray.h"
|
||||||
|
#include "thash.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct FstDfa FstDfa;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
SArray * insts;
|
||||||
|
uint32_t next[256];
|
||||||
|
bool isMatch;
|
||||||
|
} State;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* dfa builder related func
|
||||||
|
**/
|
||||||
|
typedef struct FstDfaBuilder {
|
||||||
|
FstDfa * dfa;
|
||||||
|
SHashObj *cache;
|
||||||
|
} FstDfaBuilder;
|
||||||
|
|
||||||
|
FstDfaBuilder *dfaBuilderCreate(SArray *insts);
|
||||||
|
|
||||||
|
void dfaBuilderDestroy(FstDfaBuilder *builder);
|
||||||
|
|
||||||
|
FstDfa *dfaBuilderBuild(FstDfaBuilder *builder);
|
||||||
|
|
||||||
|
bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet *next, uint32_t state, uint8_t bytes,
|
||||||
|
uint32_t *result);
|
||||||
|
|
||||||
|
bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* dfa related func
|
||||||
|
**/
|
||||||
|
typedef struct FstDfa {
|
||||||
|
SArray *insts;
|
||||||
|
SArray *states;
|
||||||
|
} FstDfa;
|
||||||
|
|
||||||
|
FstDfa *dfaCreate(SArray *insts, SArray *states);
|
||||||
|
bool dfaIsMatch(FstDfa *dfa, uint32_t si);
|
||||||
|
bool dfaAccept(FstDfa *dfa, uint32_t si, uint8_t byte, uint32_t *result);
|
||||||
|
void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip);
|
||||||
|
bool dfaRun(FstDfa *dfa, FstSparseSet *from, FstSparseSet *to, uint8_t byte);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,74 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _TD_INDEX_FST_REGEX_H_
|
||||||
|
#define _TD_INDEX_FST_REGEX_H_
|
||||||
|
|
||||||
|
//#include "indexFstDfa.h"
|
||||||
|
#include "taos.h"
|
||||||
|
#include "tarray.h"
|
||||||
|
#include "tchecksum.h"
|
||||||
|
#include "thash.h"
|
||||||
|
#include "tlog.h"
|
||||||
|
#include "tutil.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef enum { MATCH, JUMP, SPLIT, RANGE } InstType;
|
||||||
|
|
||||||
|
typedef struct MatchValue {
|
||||||
|
} MatchValue;
|
||||||
|
typedef struct JumpValue {
|
||||||
|
uint32_t step;
|
||||||
|
} JumpValue;
|
||||||
|
|
||||||
|
typedef struct SplitValue {
|
||||||
|
uint32_t len1;
|
||||||
|
uint32_t len2;
|
||||||
|
} SplitValue;
|
||||||
|
|
||||||
|
typedef struct RangeValue {
|
||||||
|
uint8_t start;
|
||||||
|
uint8_t end;
|
||||||
|
} RangeValue;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
InstType ty;
|
||||||
|
union {
|
||||||
|
MatchValue mv;
|
||||||
|
JumpValue jv;
|
||||||
|
SplitValue sv;
|
||||||
|
RangeValue rv;
|
||||||
|
};
|
||||||
|
} Inst;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char *orig;
|
||||||
|
void *dfa;
|
||||||
|
} FstRegex;
|
||||||
|
|
||||||
|
FstRegex *regexCreate(const char *str);
|
||||||
|
|
||||||
|
void regexSetup(FstRegex *regex, uint32_t size, const char *str);
|
||||||
|
|
||||||
|
// uint32_t regexStart()
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -13,8 +13,8 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _TD_INDEX_SPARSE_H_
|
#ifndef _TD_INDEX_FST_SPARSE_H_
|
||||||
#define _TD_INDEX_SPARSE_H_
|
#define _TD_INDEX_FST_SPARSE_H_
|
||||||
|
|
||||||
#include "tarray.h"
|
#include "tarray.h"
|
||||||
|
|
|
@ -0,0 +1,218 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "indexFstDfa.h"
|
||||||
|
#include "thash.h"
|
||||||
|
|
||||||
|
const static uint32_t STATE_LIMIT = 1000;
|
||||||
|
|
||||||
|
static int dfaInstsEqual(const void *a, const void *b, size_t size) {
|
||||||
|
SArray *ar = (SArray *)a;
|
||||||
|
SArray *br = (SArray *)b;
|
||||||
|
size_t al = ar != NULL ? taosArrayGetSize(ar) : 0;
|
||||||
|
size_t bl = br != NULL ? taosArrayGetSize(br) : 0;
|
||||||
|
if (al != bl) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < al; i++) {
|
||||||
|
uint32_t v1 = *(uint32_t *)taosArrayGet(ar, i);
|
||||||
|
uint32_t v2 = *(uint32_t *)taosArrayGet(br, i);
|
||||||
|
if (v1 != v2) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
FstDfaBuilder *dfaBuilderCreate(SArray *insts) {
|
||||||
|
FstDfaBuilder *builder = taosMemoryCalloc(1, sizeof(FstDfaBuilder));
|
||||||
|
if (builder == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
SArray *states = taosArrayInit(4, sizeof(State));
|
||||||
|
|
||||||
|
builder->dfa = dfaCreate(insts, states);
|
||||||
|
builder->cache = taosHashInit(
|
||||||
|
4, taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT),
|
||||||
|
false, HASH_NO_LOCK);
|
||||||
|
taosHashSetEqualFp(builder->cache, dfaInstsEqual);
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
void dfaBuilderDestroy(FstDfaBuilder *builder) {
|
||||||
|
if (builder == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void *pIter = builder->cache != NULL ? taosHashIterate(builder->cache, NULL) : NULL;
|
||||||
|
while (pIter) {
|
||||||
|
SArray **key = pIter;
|
||||||
|
taosArrayDestroy(*key);
|
||||||
|
pIter = taosHashIterate(builder->cache, pIter);
|
||||||
|
}
|
||||||
|
taosHashCleanup(builder->cache);
|
||||||
|
}
|
||||||
|
|
||||||
|
FstDfa *dfaBuilderBuild(FstDfaBuilder *builder) {
|
||||||
|
uint32_t sz = taosArrayGetSize(builder->dfa->insts);
|
||||||
|
FstSparseSet *cur = sparSetCreate(sz);
|
||||||
|
FstSparseSet *nxt = sparSetCreate(sz);
|
||||||
|
|
||||||
|
dfaAdd(builder->dfa, cur, 0);
|
||||||
|
|
||||||
|
SArray * states = taosArrayInit(0, sizeof(uint32_t));
|
||||||
|
uint32_t result;
|
||||||
|
if (dfaBuilderCachedState(builder, cur, &result)) {
|
||||||
|
taosArrayPush(states, &result);
|
||||||
|
}
|
||||||
|
SHashObj *seen = taosHashInit(12, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
|
||||||
|
while (taosArrayGetSize(states) != 0) {
|
||||||
|
result = *(uint32_t *)taosArrayPop(states);
|
||||||
|
for (int i = 0; i < 256; i++) {
|
||||||
|
uint32_t ns, dummpy = 0;
|
||||||
|
if (dfaBuilderRunState(builder, cur, nxt, result, i, &ns)) {
|
||||||
|
if (taosHashGet(seen, &ns, sizeof(ns)) == NULL) {
|
||||||
|
taosHashPut(seen, &ns, sizeof(ns), &dummpy, sizeof(dummpy));
|
||||||
|
taosArrayPush(states, &ns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (taosArrayGetSize(builder->dfa->states) > STATE_LIMIT) {
|
||||||
|
// Too many state;
|
||||||
|
//
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
taosArrayDestroy(states);
|
||||||
|
taosHashCleanup(seen);
|
||||||
|
return builder->dfa;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet *next, uint32_t state, uint8_t byte,
|
||||||
|
uint32_t *result) {
|
||||||
|
sparSetClear(cur);
|
||||||
|
State *t = taosArrayGet(builder->dfa->states, state);
|
||||||
|
for (int i = 0; i < taosArrayGetSize(t->insts); i++) {
|
||||||
|
uint32_t ip = *(int32_t *)taosArrayGet(t->insts, i);
|
||||||
|
sparSetAdd(cur, ip);
|
||||||
|
}
|
||||||
|
dfaRun(builder->dfa, cur, next, byte);
|
||||||
|
|
||||||
|
t = taosArrayGet(builder->dfa->states, state);
|
||||||
|
|
||||||
|
uint32_t nxtState;
|
||||||
|
if (dfaBuilderCachedState(builder, next, &nxtState)) {
|
||||||
|
t->next[byte] = nxtState;
|
||||||
|
*result = nxtState;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result) {
|
||||||
|
SArray *tinsts = taosArrayInit(4, sizeof(uint32_t));
|
||||||
|
bool isMatch = false;
|
||||||
|
|
||||||
|
for (int i = 0; i < sparSetLen(set); i++) {
|
||||||
|
uint32_t ip = sparSetGet(set, i);
|
||||||
|
|
||||||
|
Inst *inst = taosArrayGet(builder->dfa->insts, ip);
|
||||||
|
if (inst->ty == JUMP || inst->ty == SPLIT) {
|
||||||
|
continue;
|
||||||
|
} else if (inst->ty == RANGE) {
|
||||||
|
taosArrayPush(tinsts, &ip);
|
||||||
|
} else if (inst->ty == MATCH) {
|
||||||
|
isMatch = true;
|
||||||
|
taosArrayPush(tinsts, &ip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (taosArrayGetSize(tinsts) == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
uint32_t *v = taosHashGet(builder->cache, &tinsts, sizeof(POINTER_BYTES));
|
||||||
|
if (v != NULL) {
|
||||||
|
*result = *v;
|
||||||
|
taosArrayDestroy(tinsts);
|
||||||
|
} else {
|
||||||
|
State st;
|
||||||
|
st.insts = tinsts;
|
||||||
|
st.isMatch = isMatch;
|
||||||
|
taosArrayPush(builder->dfa->states, &st);
|
||||||
|
int32_t sz = taosArrayGetSize(builder->dfa->states) - 1;
|
||||||
|
taosHashPut(builder->cache, &tinsts, sizeof(POINTER_BYTES), &sz, sizeof(sz));
|
||||||
|
*result = sz;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
FstDfa *dfaCreate(SArray *insts, SArray *states) {
|
||||||
|
FstDfa *dfa = taosMemoryCalloc(1, sizeof(FstDfa));
|
||||||
|
if (dfa == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
dfa->insts = insts;
|
||||||
|
dfa->states = states;
|
||||||
|
return dfa;
|
||||||
|
}
|
||||||
|
bool dfaIsMatch(FstDfa *dfa, uint32_t si) {
|
||||||
|
if (dfa->states == NULL || si < taosArrayGetSize(dfa->states)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
State *st = taosArrayGet(dfa->states, si);
|
||||||
|
return st != NULL ? st->isMatch : false;
|
||||||
|
}
|
||||||
|
bool dfaAccept(FstDfa *dfa, uint32_t si, uint8_t byte, uint32_t *result) {
|
||||||
|
if (dfa->states == NULL || si < taosArrayGetSize(dfa->states)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
State *st = taosArrayGet(dfa->states, si);
|
||||||
|
*result = st->next[byte];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip) {
|
||||||
|
if (sparSetContains(set, ip)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sparSetAdd(set, ip);
|
||||||
|
Inst *inst = taosArrayGet(dfa->insts, ip);
|
||||||
|
if (inst->ty == MATCH || inst->ty == RANGE) {
|
||||||
|
// do nothing
|
||||||
|
} else if (inst->ty == JUMP) {
|
||||||
|
dfaAdd(dfa, set, inst->jv.step);
|
||||||
|
} else if (inst->ty == SPLIT) {
|
||||||
|
dfaAdd(dfa, set, inst->sv.len1);
|
||||||
|
dfaAdd(dfa, set, inst->sv.len2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
bool dfaRun(FstDfa *dfa, FstSparseSet *from, FstSparseSet *to, uint8_t byte) {
|
||||||
|
bool isMatch = false;
|
||||||
|
sparSetClear(to);
|
||||||
|
for (int i = 0; i < sparSetLen(from); i++) {
|
||||||
|
uint32_t ip = sparSetGet(from, i);
|
||||||
|
|
||||||
|
Inst *inst = taosArrayGet(dfa->insts, ip);
|
||||||
|
if (inst->ty == JUMP || inst->ty == SPLIT) {
|
||||||
|
continue;
|
||||||
|
} else if (inst->ty == MATCH) {
|
||||||
|
isMatch = true;
|
||||||
|
} else if (inst->ty == RANGE) {
|
||||||
|
if (inst->rv.start <= byte && byte <= inst->rv.end) {
|
||||||
|
dfaAdd(dfa, to, ip + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return isMatch;
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can use, redistribute, and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License, version 3
|
||||||
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "indexFstRegex.h"
|
||||||
|
#include "indexFstSparse.h"
|
||||||
|
|
||||||
|
FstRegex *regexCreate(const char *str) {
|
||||||
|
FstRegex *regex = taosMemoryCalloc(1, sizeof(FstRegex));
|
||||||
|
if (regex == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
int32_t sz = (int32_t)strlen(str);
|
||||||
|
char * orig = taosMemoryCalloc(1, sz);
|
||||||
|
memcpy(orig, str, sz);
|
||||||
|
|
||||||
|
regex->orig = orig;
|
||||||
|
}
|
||||||
|
|
||||||
|
void regexSetup(FstRegex *regex, uint32_t size, const char *str) {
|
||||||
|
// return
|
||||||
|
// return;
|
||||||
|
}
|
|
@ -13,7 +13,7 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "indexSparse.h"
|
#include "indexFstSparse.h"
|
||||||
|
|
||||||
FstSparseSet *sparSetCreate(int32_t sz) {
|
FstSparseSet *sparSetCreate(int32_t sz) {
|
||||||
FstSparseSet *ss = taosMemoryCalloc(1, sizeof(FstSparseSet));
|
FstSparseSet *ss = taosMemoryCalloc(1, sizeof(FstSparseSet));
|
||||||
|
|
Loading…
Reference in New Issue