diff --git a/source/libs/index/inc/indexFstDfa.h b/source/libs/index/inc/indexFstDfa.h new file mode 100644 index 0000000000..72a265c123 --- /dev/null +++ b/source/libs/index/inc/indexFstDfa.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef __INDEX_FST_DFA_H__ +#define __INDEX_FST_DFA_H__ + +#include "indexFstRegex.h" +#include "indexFstSparse.h" +#include "tarray.h" +#include "thash.h" + +#ifdef __cplusplus + +extern "C" { +#endif + +typedef struct FstDfa FstDfa; + +typedef struct { + SArray * insts; + uint32_t next[256]; + bool isMatch; +} State; + +/* + * dfa builder related func + **/ +typedef struct FstDfaBuilder { + FstDfa * dfa; + SHashObj *cache; +} FstDfaBuilder; + +FstDfaBuilder *dfaBuilderCreate(SArray *insts); + +FstDfa *dfaBuilderBuild(FstDfaBuilder *builder); + +bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet *next, uint32_t state, uint8_t bytes, + uint32_t *result); + +bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result); + +/* + * dfa related func + **/ +typedef struct FstDfa { + SArray *insts; + SArray *states; +} FstDfa; + +FstDfa *dfaCreate(SArray *insts, SArray *states); +bool dfaIsMatch(FstDfa *dfa, uint32_t si); +bool dfaAccept(FstDfa *dfa, uint32_t si, uint8_t byte, uint32_t *result); +void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip); +bool dfaRun(FstDfa *dfa, FstSparseSet *from, FstSparseSet *to, uint8_t byte); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/source/libs/index/inc/indexFstRegex.h b/source/libs/index/inc/indexFstRegex.h new file mode 100644 index 0000000000..50b9cae7ff --- /dev/null +++ b/source/libs/index/inc/indexFstRegex.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TD_INDEX_FST_REGEX_H_ +#define _TD_INDEX_FST_REGEX_H_ + +//#include "indexFstDfa.h" +#include "taos.h" +#include "tarray.h" +#include "tchecksum.h" +#include "thash.h" +#include "tlog.h" +#include "tutil.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { MATCH, JUMP, SPLIT, RANGE } InstType; + +typedef struct MatchValue { +} MatchValue; +typedef struct JumpValue { + uint32_t step; +} JumpValue; + +typedef struct SplitValue { + uint32_t len1; + uint32_t len2; +} SplitValue; + +typedef struct RangeValue { + uint8_t start; + uint8_t end; +} RangeValue; + +typedef struct { + InstType ty; + union { + MatchValue mv; + JumpValue jv; + SplitValue sv; + RangeValue rv; + }; +} Inst; + +typedef struct { + char *orig; + void *dfa; +} FstRegex; + +FstRegex *regexCreate(const char *str); + +void regexSetup(FstRegex *regex, uint32_t size, const char *str); + +// uint32_t regexStart() + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/source/libs/index/inc/indexSparse.h b/source/libs/index/inc/indexFstSparse.h similarity index 94% rename from source/libs/index/inc/indexSparse.h rename to source/libs/index/inc/indexFstSparse.h index 8035f6e08d..69b33c82d9 100644 --- a/source/libs/index/inc/indexSparse.h +++ b/source/libs/index/inc/indexFstSparse.h @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#ifndef _TD_INDEX_SPARSE_H_ -#define _TD_INDEX_SPARSE_H_ +#ifndef _TD_INDEX_FST_SPARSE_H_ +#define _TD_INDEX_FST_SPARSE_H_ #include "tarray.h" diff --git a/source/libs/index/src/indexFstDfa.c b/source/libs/index/src/indexFstDfa.c new file mode 100644 index 0000000000..765c5f08eb --- /dev/null +++ b/source/libs/index/src/indexFstDfa.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "indexFstDfa.h" +#include "thash.h" + +static int dfaInstsEqual(const void *a, const void *b, size_t size) { + SArray *ar = (SArray *)a; + SArray *br = (SArray *)b; + size_t al = ar != NULL ? taosArrayGetSize(ar) : 0; + size_t bl = br != NULL ? taosArrayGetSize(br) : 0; + if (al != bl) { + return -1; + } + for (int i = 0; i < al; i++) { + uint32_t v1 = *(uint32_t *)taosArrayGet(ar, i); + uint32_t v2 = *(uint32_t *)taosArrayGet(br, i); + if (v1 != v2) { + return -1; + } + } + return 0; +} +FstDfaBuilder *dfaBuilderCreate(SArray *insts) { + FstDfaBuilder *builder = taosMemoryCalloc(1, sizeof(FstDfaBuilder)); + if (builder == NULL) { + return NULL; + } + + SArray *states = taosArrayInit(4, sizeof(State)); + + builder->dfa = dfaCreate(insts, states); + builder->cache = taosHashInit( + 4, taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT), + false, HASH_NO_LOCK); + taosHashSetEqualFp(builder->cache, dfaInstsEqual); + return builder; +} + +FstDfa *dfaBuilderBuild(FstDfaBuilder *builder) { + uint32_t sz = taosArrayGetSize(builder->dfa->insts); + FstSparseSet *cur = sparSetCreate(sz); + FstSparseSet *nxt = sparSetCreate(sz); + + dfaAdd(builder->dfa, cur, 0); +} + +bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet *next, uint32_t state, uint8_t bytes, + uint32_t *result) { + // impl run state + return true; +} + +bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result) { + // impl cache state + return true; +} + +FstDfa *dfaCreate(SArray *insts, SArray *states) { + FstDfa *dfa = taosMemoryCalloc(1, sizeof(FstDfa)); + if (dfa == NULL) { + return NULL; + } + + dfa->insts = insts; + dfa->states = states; + return dfa; +} +bool dfaIsMatch(FstDfa *dfa, uint32_t si) { + // impl match + return true; +} +bool dfaAccept(FstDfa *dfa, uint32_t si, uint8_t byte, uint32_t *result) { + // impl accept + return true; +} +void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip) { + // impl add + return; +} +bool dfaRun(FstDfa *dfa, FstSparseSet *from, FstSparseSet *to, uint8_t byte) { + // impl run + return true; +} diff --git a/source/libs/index/src/indexFstRegex.c b/source/libs/index/src/indexFstRegex.c new file mode 100644 index 0000000000..ec41a7f58e --- /dev/null +++ b/source/libs/index/src/indexFstRegex.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "indexFstRegex.h" +#include "indexFstSparse.h" + +FstRegex *regexCreate(const char *str) { + FstRegex *regex = taosMemoryCalloc(1, sizeof(FstRegex)); + if (regex == NULL) { + return NULL; + } + int32_t sz = (int32_t)strlen(str); + char * orig = taosMemoryCalloc(1, sz); + memcpy(orig, str, sz); + + regex->orig = orig; +} + +void regexSetup(FstRegex *regex, uint32_t size, const char *str) { + // return + // return; +} diff --git a/source/libs/index/src/indexSparse.c b/source/libs/index/src/indexSparse.c index 8bcf04602f..9d228e71ff 100644 --- a/source/libs/index/src/indexSparse.c +++ b/source/libs/index/src/indexSparse.c @@ -13,7 +13,7 @@ * along with this program. If not, see . */ -#include "indexSparse.h" +#include "indexFstSparse.h" FstSparseSet *sparSetCreate(int32_t sz) { FstSparseSet *ss = taosMemoryCalloc(1, sizeof(FstSparseSet));