From a1d40669dced293cb289bcfd608b2cb3cd20c52c Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 31 Mar 2022 11:21:53 +0800 Subject: [PATCH 1/4] add fuzzy search --- source/libs/index/src/index.c | 2 +- source/libs/index/src/indexCache.c | 4 ++-- source/libs/index/src/indexFstCommon.c | 2 +- source/libs/index/src/{indexSparse.c => indexFstSparse.c} | 0 4 files changed, 4 insertions(+), 4 deletions(-) rename source/libs/index/src/{indexSparse.c => indexFstSparse.c} (100%) diff --git a/source/libs/index/src/index.c b/source/libs/index/src/index.c index d3ca3a1acf..7d52abcd1b 100644 --- a/source/libs/index/src/index.c +++ b/source/libs/index/src/index.c @@ -27,7 +27,7 @@ #endif #define INDEX_NUM_OF_THREADS 4 -#define INDEX_QUEUE_SIZE 200 +#define INDEX_QUEUE_SIZE 200 void* indexQhandle = NULL; diff --git a/source/libs/index/src/indexCache.c b/source/libs/index/src/indexCache.c index ca26cf38e5..df3c0b6e7b 100644 --- a/source/libs/index/src/indexCache.c +++ b/source/libs/index/src/indexCache.c @@ -21,8 +21,8 @@ #define MAX_INDEX_KEY_LEN 256 // test only, change later -#define MEM_TERM_LIMIT 10 * 10000 -#define MEM_THRESHOLD 1024 * 1024 +#define MEM_TERM_LIMIT 10 * 10000 +#define MEM_THRESHOLD 1024 * 1024 #define MEM_ESTIMATE_RADIO 1.5 static void indexMemRef(MemTable* tbl); diff --git a/source/libs/index/src/indexFstCommon.c b/source/libs/index/src/indexFstCommon.c index e2544c7ac3..902e68ce09 100644 --- a/source/libs/index/src/indexFstCommon.c +++ b/source/libs/index/src/indexFstCommon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 TAOS Data, Inc. +YAML:9:25: error: unknown key 'AlignConsecutiveMacros' * Copyright (c) 2019 TAOS Data, Inc. * * This program is free software: you can use, redistribute, and/or modify * it under the terms of the GNU Affero General Public License, version 3 diff --git a/source/libs/index/src/indexSparse.c b/source/libs/index/src/indexFstSparse.c similarity index 100% rename from source/libs/index/src/indexSparse.c rename to source/libs/index/src/indexFstSparse.c From 9da56a412ab99239ae60fa02c0d85c64a5d0f55d Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 31 Mar 2022 13:56:09 +0800 Subject: [PATCH 2/4] add fuzzy search --- source/libs/index/inc/indexFstRegex.h | 2 +- source/libs/index/src/indexFstRegex.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/source/libs/index/inc/indexFstRegex.h b/source/libs/index/inc/indexFstRegex.h index 50b9cae7ff..a58906c242 100644 --- a/source/libs/index/inc/indexFstRegex.h +++ b/source/libs/index/inc/indexFstRegex.h @@ -63,7 +63,7 @@ typedef struct { FstRegex *regexCreate(const char *str); -void regexSetup(FstRegex *regex, uint32_t size, const char *str); +// void regexSetup(FstRegex *regex, uint32_t size, const char *str); // uint32_t regexStart() diff --git a/source/libs/index/src/indexFstRegex.c b/source/libs/index/src/indexFstRegex.c index ec41a7f58e..2b8c8b6708 100644 --- a/source/libs/index/src/indexFstRegex.c +++ b/source/libs/index/src/indexFstRegex.c @@ -14,6 +14,7 @@ */ #include "indexFstRegex.h" +#include "indexFstDfa.h" #include "indexFstSparse.h" FstRegex *regexCreate(const char *str) { @@ -26,9 +27,11 @@ FstRegex *regexCreate(const char *str) { memcpy(orig, str, sz); regex->orig = orig; -} -void regexSetup(FstRegex *regex, uint32_t size, const char *str) { - // return - // return; + // construct insts based on str + SArray *insts = NULL; + + FstDfaBuilder *builder = dfaBuilderCreate(insts); + regex->dfa = dfaBuilderBuild(builder); + return regex; } From aae1ba31ac617b8639f9d51097f0302fe49ebbc6 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 31 Mar 2022 14:45:26 +0800 Subject: [PATCH 3/4] add fuzzy search --- source/libs/index/inc/indexFstRegex.h | 4 ++++ source/libs/index/src/indexFstRegex.c | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/source/libs/index/inc/indexFstRegex.h b/source/libs/index/inc/indexFstRegex.h index a58906c242..a80e768feb 100644 --- a/source/libs/index/inc/indexFstRegex.h +++ b/source/libs/index/inc/indexFstRegex.h @@ -63,6 +63,10 @@ typedef struct { FstRegex *regexCreate(const char *str); +uint32_t regexAutomStart(FstRegex *regex); +bool regexAutomIsMatch(FstRegex *regex, uint32_t state); +bool regexAutomCanMatch(FstRegex *regex, uint32_t state, bool null); +bool regexAutomAccept(FstRegex *regex, uint32_t state, uint8_t byte, uint32_t *result); // void regexSetup(FstRegex *regex, uint32_t size, const char *str); // uint32_t regexStart() diff --git a/source/libs/index/src/indexFstRegex.c b/source/libs/index/src/indexFstRegex.c index 2b8c8b6708..33eeae802e 100644 --- a/source/libs/index/src/indexFstRegex.c +++ b/source/libs/index/src/indexFstRegex.c @@ -35,3 +35,27 @@ FstRegex *regexCreate(const char *str) { regex->dfa = dfaBuilderBuild(builder); return regex; } + +uint32_t regexAutomStart(FstRegex *regex) { + ///// no nothing + return 0; +} +bool regexAutomIsMatch(FstRegex *regex, uint32_t state) { + if (regex->dfa != NULL && dfaIsMatch(regex->dfa, state)) { + return true; + } else { + return false; + } +} + +bool regexAutomCanMatch(FstRegex *regex, uint32_t state, bool null) { + // make frame happy + return null; +} + +bool regexAutomAccept(FstRegex *regex, uint32_t state, uint8_t byte, uint32_t *result) { + if (regex->dfa == NULL) { + return false; + } + return dfaAccept(regex->dfa, state, byte, result); +} From 6256e6f288cdeb56b6a3de8087739633dd2390cf Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 31 Mar 2022 18:05:13 +0800 Subject: [PATCH 4/4] add fuzzy search --- source/libs/index/inc/indexFstRegex.h | 3 --- source/libs/index/inc/indexFstSparse.h | 6 ++--- source/libs/index/src/indexFstSparse.c | 37 ++++++++++++-------------- 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/source/libs/index/inc/indexFstRegex.h b/source/libs/index/inc/indexFstRegex.h index a80e768feb..8fb5455336 100644 --- a/source/libs/index/inc/indexFstRegex.h +++ b/source/libs/index/inc/indexFstRegex.h @@ -67,9 +67,6 @@ uint32_t regexAutomStart(FstRegex *regex); bool regexAutomIsMatch(FstRegex *regex, uint32_t state); bool regexAutomCanMatch(FstRegex *regex, uint32_t state, bool null); bool regexAutomAccept(FstRegex *regex, uint32_t state, uint8_t byte, uint32_t *result); -// void regexSetup(FstRegex *regex, uint32_t size, const char *str); - -// uint32_t regexStart() #ifdef __cplusplus } diff --git a/source/libs/index/inc/indexFstSparse.h b/source/libs/index/inc/indexFstSparse.h index 69b33c82d9..665fb2ba5c 100644 --- a/source/libs/index/inc/indexFstSparse.h +++ b/source/libs/index/inc/indexFstSparse.h @@ -23,9 +23,9 @@ extern "C" { #endif typedef struct FstSparseSet { - SArray *dense; - SArray *sparse; - int32_t size; + uint32_t *dense; + uint32_t *sparse; + int32_t size; } FstSparseSet; FstSparseSet *sparSetCreate(int32_t sz); diff --git a/source/libs/index/src/indexFstSparse.c b/source/libs/index/src/indexFstSparse.c index 9d228e71ff..e8ab3be2fe 100644 --- a/source/libs/index/src/indexFstSparse.c +++ b/source/libs/index/src/indexFstSparse.c @@ -21,47 +21,44 @@ FstSparseSet *sparSetCreate(int32_t sz) { return NULL; } - ss->dense = taosArrayInit(sz, sizeof(uint32_t)); - ss->sparse = taosArrayInit(sz, sizeof(uint32_t)); - ss->size = sz; + ss->dense = (uint32_t *)taosMemoryCalloc(sz, sizeof(uint32_t)); + ss->sparse = (uint32_t *)taosMemoryCalloc(sz, sizeof(uint32_t)); + ss->size = 0; return ss; } void sparSetDestroy(FstSparseSet *ss) { if (ss == NULL) { return; } - taosArrayDestroy(ss->dense); - taosArrayDestroy(ss->sparse); + taosMemoryFree(ss->dense); + taosMemoryFree(ss->sparse); taosMemoryFree(ss); } -uint32_t sparSetLen(FstSparseSet *ss) { return ss == NULL ? 0 : ss->size; } +uint32_t sparSetLen(FstSparseSet *ss) { + // Get occupied size + return ss == NULL ? 0 : ss->size; +} uint32_t sparSetAdd(FstSparseSet *ss, uint32_t ip) { if (ss == NULL) { return 0; } uint32_t i = ss->size; - taosArraySet(ss->dense, i, &ip); - taosArraySet(ss->sparse, ip, &i); + ss->dense[i] = ip; + ss->sparse[ip] = i; ss->size += 1; return i; } uint32_t sparSetGet(FstSparseSet *ss, uint32_t i) { - if (i >= taosArrayGetSize(ss->dense)) { - return 0; - } - uint32_t *v = taosArrayGet(ss->dense, i); - return *v; + // check later + return ss->dense[i]; } bool sparSetContains(FstSparseSet *ss, uint32_t ip) { - if (ip >= taosArrayGetSize(ss->sparse)) { + uint32_t i = ss->sparse[ip]; + if (i < ss->size && ss->dense[i] == ip) { + return true; + } else { return false; } - uint32_t i = *(uint32_t *)taosArrayGet(ss->sparse, ip); - if (i >= taosArrayGetSize(ss->dense)) { - return false; - } - uint32_t v = *(uint32_t *)taosArrayGet(ss->dense, i); - return v == ip; } void sparSetClear(FstSparseSet *ss) { if (ss == NULL) {