diff --git a/source/libs/index/inc/index_fst.h b/source/libs/index/inc/index_fst.h index bcb7070755..fe669c8567 100644 --- a/source/libs/index/inc/index_fst.h +++ b/source/libs/index/inc/index_fst.h @@ -34,6 +34,7 @@ typedef struct FstRange { } FstRange; +typedef enum {GE, GT, LE, LT} RangeType; typedef enum { OneTransNext, OneTrans, AnyTrans, EmptyFinal} State; typedef enum {Ordered, OutOfOrdered, DuplicateKey} OrderType; @@ -169,11 +170,6 @@ uint64_t fstStateFindInput(FstState *state, FstNode *node, uint8_t b, bool *null - - - - - #define FST_STATE_ONE_TRNAS_NEXT(node) (node->state.state == OneTransNext) #define FST_STATE_ONE_TRNAS(node) (node->state.state == OneTrans) #define FST_STATE_ANY_TRANS(node) (node->state.state == AnyTrans) @@ -273,6 +269,8 @@ FstNode* fstGetRoot(Fst *fst); FstType fstGetType(Fst *fst); CompiledAddr fstGetRootAddr(Fst *fst); + + Output fstEmptyFinalOutput(Fst *fst, bool *null); bool fstVerify(Fst *fst); @@ -280,10 +278,6 @@ bool fstVerify(Fst *fst); //refactor this function bool fstBuilderNodeCompileTo(FstBuilderNode *b, FstCountingWriter *wrt, CompiledAddr lastAddr, CompiledAddr startAddr); - - - - typedef struct StreamState { FstNode *node; uint64_t trans; @@ -316,4 +310,18 @@ StreamWithState *streamWithStateCreate(Fst *fst, Automation *automation, FstBoun void streamWithStateDestroy(StreamWithState *sws); bool streamWithStateSeekMin(StreamWithState *sws, FstBoundWithData *min); StreamWithStateResult* streamWithStateNextWith(StreamWithState *sws, StreamCallback callback); + +typedef struct FstStreamBuilder { + Fst *fst; + Automation *aut; + FstBoundWithData *min; + FstBoundWithData *max; +} FstStreamBuilder; + +FstStreamBuilder *fstStreamBuilderCreate(Fst *fst, Automation *aut); +// set up bound range +// refator, simple code by marco + +FstStreamBuilder *fstStreamBuilderRange(FstStreamBuilder *b, FstSlice *val, RangeType type); + #endif diff --git a/source/libs/index/inc/index_fst_util.h b/source/libs/index/inc/index_fst_util.h index ff0946063d..a3cef493eb 100644 --- a/source/libs/index/inc/index_fst_util.h +++ b/source/libs/index/inc/index_fst_util.h @@ -67,20 +67,27 @@ uint8_t packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr); CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr); +typedef struct FstString { + uint8_t *data; + uint32_t len; + int32_t ref; +} FstString; typedef struct FstSlice { - uint8_t *data; - uint64_t dLen; - int32_t start; - int32_t end; + FstString *str; + int32_t start; + int32_t end; } FstSlice; -FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end); -FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen); -bool fstSliceEmpty(FstSlice *slice); -int fstSliceCompare(FstSlice *a, FstSlice *b); +FstSlice fstSliceCreate(uint8_t *data, uint64_t len); +FstSlice fstSliceCopy(FstSlice *s, int32_t start, int32_t end); +FstSlice fstSliceDeepCopy(FstSlice *s, int32_t start, int32_t end); +bool fstSliceEmpty(FstSlice *s); +int fstSliceCompare(FstSlice *s1, FstSlice *s2); +void fstSliceDestroy(FstSlice *s); +uint8_t *fstSliceData(FstSlice *s, int32_t *sz); -#define FST_SLICE_LEN(s) ((s)->end - (s)->start + 1) +#define FST_SLICE_LEN(s) (s->end - s->start + 1) #endif diff --git a/source/libs/index/src/index_fst.c b/source/libs/index/src/index_fst.c index 1d6e26e612..f7ed124f43 100644 --- a/source/libs/index/src/index_fst.c +++ b/source/libs/index/src/index_fst.c @@ -92,7 +92,7 @@ void fstUnFinishedNodesTopLastFreeze(FstUnFinishedNodes *nodes, CompiledAddr add } void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output out) { FstSlice *s = &bs; - if (s->data == NULL || s->dLen == 0 || s->start > s->end) { + if (fstSliceEmpty(s)) { return; } size_t sz = taosArrayGetSize(nodes->stack) - 1; @@ -104,9 +104,11 @@ void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output //FstLastTransition *trn = malloc(sizeof(FstLastTransition)); //trn->inp = s->data[s->start]; //trn->out = out; - un->last = fstLastTransitionCreate(s->data[s->start], out); + int32_t len = 0; + uint8_t *data = fstSliceData(s, &len); + un->last = fstLastTransitionCreate(data[0], out); - for (uint64_t i = s->start; i <= s->end; i++) { + for (uint64_t i = 0; i < len; i++) { FstBuilderNode *n = malloc(sizeof(FstBuilderNode)); n->isFinal = false; n->finalOutput = 0; @@ -115,7 +117,7 @@ void fstUnFinishedNodesAddSuffix(FstUnFinishedNodes *nodes, FstSlice bs, Output //FstLastTransition *trn = malloc(sizeof(FstLastTransition)); //trn->inp = s->data[i]; //trn->out = out; - FstLastTransition *trn = fstLastTransitionCreate(s->data[i], out); + FstLastTransition *trn = fstLastTransitionCreate(data[i], out); FstBuilderNodeUnfinished un = {.node = n, .last = trn}; taosArrayPush(nodes->stack, &un); @@ -133,7 +135,8 @@ uint64_t fstUnFinishedNodesFindCommPrefix(FstUnFinishedNodes *node, FstSlice bs) uint64_t count = 0; for (size_t i = 0; i < ssz && i < lsz; i++) { FstBuilderNodeUnfinished *un = taosArrayGet(node->stack, i); - if (un->last->inp == s->data[s->start + i]) { + uint8_t *data = fstSliceData(s, NULL); + if (un->last->inp == data[i]) { count++; } else { break; @@ -153,7 +156,8 @@ uint64_t fstUnFinishedNodesFindCommPrefixAndSetOutput(FstUnFinishedNodes *node, FstLastTransition *t = un->last; uint64_t addPrefix = 0; - if (t && t->inp == s->data[s->start + i]) { + uint8_t *data = fstSliceData(s, NULL); + if (t && t->inp == data[i]) { uint64_t commPrefix = MIN(t->out, *out); uint64_t tAddPrefix = t->out - commPrefix; (*out) = (*out) - commPrefix; @@ -176,7 +180,9 @@ FstState fstStateCreateFrom(FstSlice* slice, CompiledAddr addr) { if (addr == EMPTY_ADDRESS) { return fs; } - uint8_t v = slice->data[addr]; + + uint8_t *data = fstSliceData(slice, NULL); + uint8_t v = data[addr]; uint8_t t = (v & 0b11000000) >> 6; if (t == 0b11) { fs.state = OneTransNext; @@ -376,7 +382,8 @@ uint8_t fstStateInput(FstState *s, FstNode *node) { FstSlice *slice = &node->data; bool null = false; uint8_t inp = fstStateCommInput(s, &null); - return null == false ? inp : slice->data[slice->start - 1]; + uint8_t *data = fstSliceData(slice, NULL); + return null == false ? inp : data[-1]; } uint8_t fstStateInputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { assert(s->state == AnyTrans); @@ -388,7 +395,9 @@ uint8_t fstStateInputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { - fstStateTransIndexSize(s, node->version, node->nTrans) - i - 1; // the output size - return slice->data[at]; + + uint8_t *data = fstSliceData(slice, NULL); + return data[at]; } // trans_addr @@ -406,7 +415,8 @@ CompiledAddr fstStateTransAddr(FstState *s, FstNode *node) { - tSizes; // refactor error logic - return unpackDelta(slice->data + slice->start + i, tSizes, node->end); + uint8_t *data = fstSliceData(slice, NULL); + return unpackDelta(data +i, tSizes, node->end); } } CompiledAddr fstStateTransAddrForAnyTrans(FstState *s, FstNode *node, uint64_t i) { @@ -421,7 +431,8 @@ CompiledAddr fstStateTransAddrForAnyTrans(FstState *s, FstNode *node, uint64_t i - node->nTrans - (i * tSizes) - tSizes; - return unpackDelta(slice->data + slice->start + at, tSizes, node->end); + uint8_t *data = fstSliceData(slice, NULL); + return unpackDelta(data + at, tSizes, node->end); } // sizes @@ -434,7 +445,8 @@ PackSizes fstStateSizes(FstState *s, FstSlice *slice) { i = FST_SLICE_LEN(slice) - 1 - fstStateNtransLen(s) - 1; } - return (PackSizes)(slice->data[slice->start + i]); + uint8_t *data = fstSliceData(slice, NULL); + return (PackSizes)(*(data +i)); } // Output Output fstStateOutput(FstState *s, FstNode *node) { @@ -452,7 +464,8 @@ Output fstStateOutput(FstState *s, FstNode *node) { - 1 - tSizes - oSizes; - return unpackUint64(slice->data + slice->start + i, oSizes); + uint8_t *data = fstSliceData(slice, NULL); + return unpackUint64(data + i, oSizes); } Output fstStateOutputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { @@ -469,7 +482,9 @@ Output fstStateOutputForAnyTrans(FstState *s, FstNode *node, uint64_t i) { - fstStateTotalTransSize(s, node->version, node->sizes, node->nTrans) - (i * oSizes) - oSizes; - return unpackUint64(slice->data + slice->start + at, oSizes); + + uint8_t *data = fstSliceData(slice, NULL); + return unpackUint64(data + at, oSizes); } // anyTrans specify function @@ -523,7 +538,10 @@ uint64_t fstStateNtrans(FstState *s, FstSlice *slice) { if (null != true) { return n; } - n = slice->data[slice->end - 1]; // data[data.len() - 2] + int32_t len; + uint8_t *data = fstSliceData(slice, &len); + n = data[len - 2]; + //n = data[slice->end - 1]; // data[data.len() - 2] return n == 1 ? 256: n; // // "1" is never a normal legal value here, because if there, // is only 1 transition, then it is encoded in the state byte } Output fstStateFinalOutput(FstState *s, uint64_t version, FstSlice *slice, PackSizes sizes, uint64_t nTrans) { @@ -538,7 +556,8 @@ Output fstStateFinalOutput(FstState *s, uint64_t version, FstSlice *slice, Pack - fstStateTotalTransSize(s, version, sizes, nTrans) - (nTrans * oSizes) - oSizes; - return unpackUint64(slice->data + slice->start + at, (uint8_t)oSizes); + uint8_t *data = fstSliceData(slice, NULL); + return unpackUint64(data + at, (uint8_t)oSizes); } uint64_t fstStateFindInput(FstState *s, FstNode *node, uint8_t b, bool *null) { @@ -549,7 +568,10 @@ uint64_t fstStateFindInput(FstState *s, FstNode *node, uint8_t b, bool *null) { - fstStateNtransLen(s) - 1 // pack size - fstStateTransIndexSize(s, node->version, node->nTrans); - uint64_t i = slice->data[slice->start + at + b]; + int32_t dlen = 0; + uint8_t *data = fstSliceData(slice, &dlen); + uint64_t i = data[at + b]; + //uint64_t i = slice->data[slice->start + at + b]; if (i >= node->nTrans) { *null = true; } @@ -561,8 +583,13 @@ uint64_t fstStateFindInput(FstState *s, FstNode *node, uint8_t b, bool *null) { - node->nTrans; uint64_t end = start + node->nTrans; uint64_t len = end - start; + int32_t dlen = 0; + uint8_t *data = fstSliceData(slice, &dlen); for(int i = 0; i < len; i++) { - uint8_t v = slice->data[slice->start + i]; + //uint8_t v = slice->data[slice->start + i]; + ////slice->data[slice->start + i]; + uint8_t v = data[i]; + if (v == b) { return node->nTrans - i - 1; // bug } @@ -888,7 +915,7 @@ void fstBuilderFinish(FstBuilder *b) { FstSlice fstNodeAsSlice(FstNode *node) { FstSlice *slice = &node->data; - FstSlice s = fstSliceCopy(slice, slice->end, slice->dLen - 1); + FstSlice s = fstSliceCopy(slice, slice->end, FST_SLICE_LEN(slice) - 1); return s; } @@ -929,12 +956,13 @@ void fstBuilderNodeUnfinishedAddOutputPrefix(FstBuilderNodeUnfinished *unNode, O } Fst* fstCreate(FstSlice *slice) { - char *buf = slice->data; - uint64_t skip = 0; - uint64_t len = slice->dLen; - if (len < 36) { + int32_t slen; + char *buf = fstSliceData(slice, &slen); + if (slen < 36) { return NULL; } + uint64_t len = slen; + uint64_t skip = 0; uint64_t version; taosDecodeFixedU64(buf, &version); @@ -992,8 +1020,10 @@ void fstDestroy(Fst *fst) { bool fstGet(Fst *fst, FstSlice *b, Output *out) { FstNode *root = fstGetRoot(fst); Output tOut = 0; - for (uint32_t i = 0; i < b->dLen; i++) { - uint8_t inp = b->data[i]; + int32_t len; + uint8_t *data = fstSliceData(b, &len); + for (uint32_t i = 0; i < len; i++) { + uint8_t inp = data[i]; Output res = 0; bool null = fstNodeFindInput(root, inp, &res); if (null) { return false; } @@ -1046,9 +1076,10 @@ Output fstEmptyFinalOutput(Fst *fst, bool *null) { bool fstVerify(Fst *fst) { uint32_t checkSum = fst->meta->checkSum; - FstSlice *data = fst->data; + int32_t len; + uint8_t *data = fstSliceData(fst->data, &len); TSCKSUM initSum = 0; - if (!taosCheckChecksumWhole(data->data, data->dLen)) { + if (!taosCheckChecksumWhole(data, len)) { return false; } return true; @@ -1058,9 +1089,14 @@ bool fstVerify(Fst *fst) { FstBoundWithData* fstBoundStateCreate(FstBound type, FstSlice *data) { FstBoundWithData *b = calloc(1, sizeof(FstBoundWithData)); if (b == NULL) { return NULL; } - + + if (data != NULL) { + b->data = fstSliceCopy(data, data->start, data->end); + } else { + b->data = fstSliceCreate(NULL, 0); + } b->type = type; - b->data = fstSliceCopy(data, data->start, data->end); + return b; } @@ -1145,8 +1181,10 @@ bool streamWithStateSeekMin(StreamWithState *sws, FstBoundWithData *min) { Output out = 0; void* autState = sws->aut->start(); - for (uint32_t i = 0; i < key->dLen; i++) { - uint8_t b = key->data[i]; + int32_t len; + uint8_t *data = fstSliceData(key, &len); + for (uint32_t i = 0; i < len; i++) { + uint8_t b = data[i]; uint64_t res = 0; bool null = fstNodeFindInput(node, b, &res); if (null == false) { @@ -1277,8 +1315,8 @@ StreamWithStateResult *swsResultCreate(FstSlice *data, FstOutput fOut, void *sta StreamWithStateResult *result = calloc(1, sizeof(StreamWithStateResult)); if (result == NULL) { return NULL; } - FstSlice slice = fstSliceCopy(data, 0, data->dLen - 1); - result->data = slice; + FstSlice s = fstSliceCopy(data, 0, FST_SLICE_LEN(data) - 1); + result->data = s; result->out = fOut; result->state = state; @@ -1293,5 +1331,42 @@ void streamStateDestroy(void *s) { //free(s->autoState); } +FstStreamBuilder *fstStreamBuilderCreate(Fst *fst, Automation *aut) { + FstStreamBuilder *b = calloc(1, sizeof(FstStreamBuilder)); + if (NULL == b) { return NULL; } + + b->fst = fst; + b->aut = aut; + b->min = fstBoundStateCreate(Unbounded, NULL); + b->max = fstBoundStateCreate(Unbounded, NULL); + return b; +} +void fstStreamBuilderDestroy(FstStreamBuilder *b) { + free(b); +} +FstStreamBuilder *fstStreamBuilderRange(FstStreamBuilder *b, FstSlice *val, RangeType type) { + if (b == NULL) { return NULL; } + + if (type == GE) { + b->min->type = Included; + fstSliceDestroy(&(b->min->data)); + b->min->data = fstSliceDeepCopy(val, 0, FST_SLICE_LEN(val) - 1); + } else if (type == GT) { + b->min->type = Excluded; + fstSliceDestroy(&(b->min->data)); + b->min->data = fstSliceDeepCopy(val, 0, FST_SLICE_LEN(val) - 1); + } else if (type == LE) { + b->max->type = Included; + fstSliceDestroy(&(b->max->data)); + b->max->data = fstSliceDeepCopy(val, 0, FST_SLICE_LEN(val) - 1); + } else if (type == LT) { + b->max->type = Excluded; + fstSliceDestroy(&(b->max->data)); + b->max->data = fstSliceDeepCopy(val, 0, FST_SLICE_LEN(val) - 1); + } + return b; +} + + diff --git a/source/libs/index/src/index_fst_automation.c b/source/libs/index/src/index_fst_automation.c index f2f48bbc8a..3d5efd30f3 100644 --- a/source/libs/index/src/index_fst_automation.c +++ b/source/libs/index/src/index_fst_automation.c @@ -12,3 +12,4 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ + diff --git a/source/libs/index/src/index_fst_util.c b/source/libs/index/src/index_fst_util.c index 952c710676..92949d1ff2 100644 --- a/source/libs/index/src/index_fst_util.c +++ b/source/libs/index/src/index_fst_util.c @@ -91,42 +91,88 @@ CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr) { } // fst slice func -FstSlice fstSliceCreate(uint8_t *data, uint64_t dLen) { - FstSlice slice = {.data = data, .dLen = dLen, .start = 0, .end = dLen - 1}; - return slice; +// + +FstSlice fstSliceCreate(uint8_t *data, uint64_t len) { + FstString *str = (FstString *)malloc(sizeof(FstString)); + str->ref = 1; + str->len = len; + str->data = malloc(len * sizeof(uint8_t)); + memcpy(str->data, data, len); + + FstSlice s = {.str = str, .start = 0, .end = len - 1}; + return s; } // just shallow copy -FstSlice fstSliceCopy(FstSlice *slice, int32_t start, int32_t end) { - FstSlice t; - if (start >= slice->dLen || end >= slice->dLen || start > end) { - t.data = NULL; - return t; - }; - - t.data = slice->data; - t.dLen = slice->dLen; - t.start = start; - t.end = end; +FstSlice fstSliceCopy(FstSlice *s, int32_t start, int32_t end) { + FstString *str = s->str; + str->ref++; + int32_t alen; + //uint8_t *buf = fstSliceData(s, &alen); + //start = buf + start - (buf - s->start); + //end = buf + end - (buf - s->start); + + FstSlice t = {.str = str, .start = start + s->start, .end = end + s->start}; return t; } -bool fstSliceEmpty(FstSlice *slice) { - return slice->data == NULL || slice->dLen <= 0; +FstSlice fstSliceDeepCopy(FstSlice *s, int32_t start, int32_t end) { + + int32_t alen, tlen = end - start + 1; + uint8_t *data = fstSliceData(s, &alen); + assert(tlen <= alen); + + uint8_t *buf = malloc(sizeof(uint8_t) * tlen); + memcpy(buf, data, tlen); + + FstString *str = malloc(sizeof(FstString)); + str->data = buf; + str->len = tlen; + str->ref = 1; + + FstSlice ans; + ans.str = str; + ans.start = 0; + ans.end = tlen - 1; + return ans; +} +bool fstSliceEmpty(FstSlice *s) { + return s->str == NULL || s->start < 0 || s->end < 0; +} + +uint8_t *fstSliceData(FstSlice *s, int32_t *size) { + FstString *str = s->str; + if (size != NULL) { + *size = s->end - s->start + 1; + } + return str->data + s->start; +} +void fstSliceDestroy(FstSlice *s) { + FstString *str = s->str; + str->ref--; + if (str->ref <= 0) { + free(str->data); + free(str); + s->str = NULL; + } } int fstSliceCompare(FstSlice *a, FstSlice *b) { - int32_t aLen = (a->end - a->start + 1); - int32_t bLen = (b->end - b->start + 1); - int32_t mLen = (aLen < bLen ? aLen : bLen); - for (int i = 0; i < mLen; i++) { - uint8_t x = a->data[i + a->start]; - uint8_t y = b->data[i + b->start]; - if (x == y) { continue; } + int32_t alen, blen; + uint8_t *aBuf = fstSliceData(a, &alen); + uint8_t *bBuf = fstSliceData(b, &blen); + + + uint32_t i, j; + for (i = 0, j = 0; i < alen && j < blen; i++, j++) { + uint8_t x = aBuf[i]; + uint8_t y = bBuf[j]; + if (x == y) { continue;} else if (x < y) { return -1; } - else { return 1; } - } - if (aLen == bLen) { return 0; } - else if (aLen < bLen) { return -1; } - else { return 1; } + else { return 1; }; + } + if (i < alen) { return 1; } + else if (j < blen) { return -1; } + else { return 0; } }