Merge pull request #12324 from taosdata/enh/queryIndexInter

enh(query): index inter
This commit is contained in:
Yihao Deng 2022-05-11 00:01:05 +08:00 committed by GitHub
commit 9d2ba0a360
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 370 additions and 107 deletions

View File

@ -23,18 +23,18 @@
#pragma GCC diagnostic ignored "-Wsign-compare"
#include "os.h"
#include "tglobal.h"
#include "executor.h"
#include "executorimpl.h"
#include "function.h"
#include "taos.h"
#include "tdef.h"
#include "tvariant.h"
#include "tdatablock.h"
#include "trpc.h"
#include "stub.h"
#include "executor.h"
#include "taos.h"
#include "tdatablock.h"
#include "tdef.h"
#include "tglobal.h"
#include "tmsg.h"
#include "tname.h"
#include "trpc.h"
#include "tvariant.h"
namespace {
@ -191,10 +191,10 @@ SSDataBlock* get2ColsDummyBlock(SOperatorInfo* pOperator) {
blockDataUpdateTsWindow(pBlock);
return pBlock;
}
SOperatorInfo* createDummyOperator(int32_t startVal, int32_t numOfBlocks, int32_t rowsPerPage, int32_t type, int32_t numOfCols) {
SOperatorInfo* createDummyOperator(int32_t startVal, int32_t numOfBlocks, int32_t rowsPerPage, int32_t type,
int32_t numOfCols) {
SOperatorInfo* pOperator = static_cast<SOperatorInfo*>(taosMemoryCalloc(1, sizeof(SOperatorInfo)));
pOperator->name = "dummyInputOpertor4Test";
@ -214,14 +214,15 @@ SOperatorInfo* createDummyOperator(int32_t startVal, int32_t numOfBlocks, int32_
pOperator->info = pInfo;
return pOperator;
}
}
} // namespace
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
TEST(testCase, build_executor_tree_Test) {
const char* msg = "{\n"
const char* msg =
"{\n"
" \"NodeType\": \"48\",\n"
" \"Name\": \"PhysiSubplan\",\n"
" \"PhysiSubplan\": {\n"
@ -947,7 +948,10 @@ TEST(testCase, build_executor_tree_Test) {
code = qCreateExecTask(&handle, 2, 1, plan, (void**)&pTaskInfo, &sinkHandle, OPTR_EXEC_MODEL_BATCH);
ASSERT_EQ(code, 0);
}
TEST(testCase, index_plan_test) {
// add later
EXPECT_EQ(0, 0);
}
#if 0
TEST(testCase, inMem_sort_Test) {

View File

@ -278,9 +278,9 @@ static int32_t cacheSearchCompareFunc_JSON(void* cache, SIndexTerm* term, SIdxTe
break;
}
CacheTerm* c = (CacheTerm*)SL_GET_NODE_DATA(node);
printf("json val: %s\n", c->colVal);
if (0 != strncmp(c->colVal, term->colName, term->nColName)) {
continue;
// printf("json val: %s\n", c->colVal);
if (0 != strncmp(c->colVal, pCt->colVal, skip)) {
break;
}
TExeCond cond = cmpFn(c->colVal + skip, term->colVal, dType);
@ -640,30 +640,30 @@ static int indexFindCh(char* a, char c) {
return p - a;
}
static int indexCacheJsonTermCompareImpl(char* a, char* b) {
int alen = indexFindCh(a, '&');
int blen = indexFindCh(b, '&');
// int alen = indexFindCh(a, '&');
// int blen = indexFindCh(b, '&');
int cmp = strncmp(a, b, MIN(alen, blen));
if (cmp == 0) {
cmp = alen - blen;
if (cmp != 0) {
return cmp;
}
cmp = *(a + alen) - *(b + blen);
if (cmp != 0) {
return cmp;
}
alen += 2;
blen += 2;
cmp = strcmp(a + alen, b + blen);
}
return cmp;
// int cmp = strncmp(a, b, MIN(alen, blen));
// if (cmp == 0) {
// cmp = alen - blen;
// if (cmp != 0) {
// return cmp;
// }
// cmp = *(a + alen) - *(b + blen);
// if (cmp != 0) {
// return cmp;
// }
// alen += 2;
// blen += 2;
// cmp = strcmp(a + alen, b + blen);
//}
return 0;
}
static int32_t indexCacheJsonTermCompare(const void* l, const void* r) {
CacheTerm* lt = (CacheTerm*)l;
CacheTerm* rt = (CacheTerm*)r;
// compare colVal
int cmp = indexCacheJsonTermCompareImpl(lt->colVal, rt->colVal);
int32_t cmp = strcmp(lt->colVal, rt->colVal);
if (cmp == 0) {
return rt->version - lt->version;
}
@ -704,6 +704,8 @@ static bool indexCacheIteratorNext(Iterate* itera) {
iv->type = ct->operaType;
iv->ver = ct->version;
iv->colVal = tstrdup(ct->colVal);
// printf("col Val: %s\n", iv->colVal);
// iv->colType = cv->colType;
taosArrayPush(iv->val, &ct->uid);
}

View File

@ -334,6 +334,11 @@ static int32_t tfSearchCompareFunc(void* reader, SIndexTerm* tem, SIdxTempResult
while ((rt = streamWithStateNextWith(st, NULL)) != NULL) {
FstSlice* s = &rt->data;
char* ch = (char*)fstSliceData(s, NULL);
// if (0 != strncmp(ch, tem->colName, tem->nColName)) {
// swsResultDestroy(rt);
// break;
//}
TExeCond cond = cmpFn(ch, p, tem->colType);
if (MATCH == cond) {
tfileReaderLoadTableIds((TFileReader*)reader, rt->out.out, tr->total);
@ -455,16 +460,22 @@ static int32_t tfSearchCompareFunc_JSON(void* reader, SIndexTerm* tem, SIdxTempR
AutomationCtx* ctx = automCtxCreate((void*)p, AUTOMATION_PREFIX);
FstStreamBuilder* sb = fstSearch(((TFileReader*)reader)->fst, ctx);
FstSlice h = fstSliceCreate((uint8_t*)p, skip);
fstStreamBuilderSetRange(sb, &h, ctype);
fstSliceDestroy(&h);
// FstSlice h = fstSliceCreate((uint8_t*)p, skip);
// fstStreamBuilderSetRange(sb, &h, ctype);
// fstSliceDestroy(&h);
StreamWithState* st = streamBuilderIntoStream(sb);
StreamWithStateResult* rt = NULL;
while ((rt = streamWithStateNextWith(st, NULL)) != NULL) {
FstSlice* s = &rt->data;
char* ch = (char*)fstSliceData(s, NULL);
TExeCond cond = cmpFn(ch, p, tem->colType);
if (0 != strncmp(ch, p, skip)) {
swsResultDestroy(rt);
break;
}
TExeCond cond = cmpFn(ch + skip, tem->colVal, tem->colType);
if (MATCH == cond) {
tfileReaderLoadTableIds((TFileReader*)reader, rt->out.out, tr->total);
} else if (CONTINUE == cond) {
@ -594,13 +605,16 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) {
if (tfileWriteData(tw, v) != 0) {
indexError("failed to write data: %s, offset: %d len: %d", v->colVal, v->offset,
(int)taosArrayGetSize(v->tableId));
// printf("write faile\n");
} else {
// printf("write sucee\n");
// indexInfo("success to write data: %s, offset: %d len: %d", v->colVal, v->offset,
// (int)taosArrayGetSize(v->tableId));
// indexInfo("tfile write data size: %d", tw->ctx->size(tw->ctx));
}
}
fstBuilderFinish(tw->fb);
fstBuilderDestroy(tw->fb);
tw->fb = NULL;
@ -845,18 +859,24 @@ static int tfileWriteData(TFileWriter* write, TFileValue* tval) {
uint8_t colType = header->colType;
colType = INDEX_TYPE_GET_TYPE(colType);
if (colType == TSDB_DATA_TYPE_BINARY || colType == TSDB_DATA_TYPE_NCHAR) {
FstSlice key = fstSliceCreate((uint8_t*)(tval->colVal), (size_t)strlen(tval->colVal));
if (fstBuilderInsert(write->fb, key, tval->offset)) {
fstSliceDestroy(&key);
return 0;
}
fstSliceDestroy(&key);
return -1;
} else {
// handle other type later
}
return 0;
// if (colType == TSDB_DATA_TYPE_BINARY || colType == TSDB_DATA_TYPE_NCHAR) {
// FstSlice key = fstSliceCreate((uint8_t*)(tval->colVal), (size_t)strlen(tval->colVal));
// if (fstBuilderInsert(write->fb, key, tval->offset)) {
// fstSliceDestroy(&key);
// return 0;
// }
// fstSliceDestroy(&key);
// return -1;
//} else {
// // handle other type later
//}
}
static int tfileWriteFooter(TFileWriter* write) {
char buf[sizeof(tfileMagicNumber) + 1] = {0};
@ -913,6 +933,7 @@ static int tfileReaderLoadFst(TFileReader* reader) {
static int tfileReaderLoadTableIds(TFileReader* reader, int32_t offset, SArray* result) {
// TODO(yihao): opt later
WriterCtx* ctx = reader->ctx;
// add block cache
char block[1024] = {0};
int32_t nread = ctx->readFrom(ctx, block, sizeof(block), offset);
assert(nread >= sizeof(uint32_t));

View File

@ -272,9 +272,8 @@ void checkFstCheckIterator1() {
std::cout << "insert data count : " << count << "elapas time: " << e - s << std::endl;
fw->Put("Hello world", 1);
fw->Put("Hello worle", 2);
fw->Put("hello worlf", 4);
fw->Put("test1&^D&10", 1);
fw->Put("test2&^D&10", 2);
delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64);
@ -645,11 +644,11 @@ int main(int argc, char* argv[]) {
// iterTFileReader(argv[1], argv[2], argv[3], argv[4]);
//}
checkFstCheckIterator1();
checkFstCheckIterator2();
checkFstCheckIteratorPrefix();
checkFstCheckIteratorRange1();
checkFstCheckIteratorRange2();
checkFstCheckIteratorRange3();
// checkFstCheckIterator2();
// checkFstCheckIteratorPrefix();
// checkFstCheckIteratorRange1();
// checkFstCheckIteratorRange2();
// checkFstCheckIteratorRange3();
// checkFstLongTerm();
// checkFstPrefixSearch();

View File

@ -181,3 +181,240 @@ TEST_F(JsonEnv, testWriteMillonData) {
}
}
}
TEST_F(JsonEnv, testWriteJsonNumberData) {
{
std::string colName("test");
std::string colVal("10");
SIndexTerm* term = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(),
colVal.c_str(), colVal.size());
SIndexMultiTerm* terms = indexMultiTermCreate();
indexMultiTermAdd(terms, term);
for (size_t i = 0; i < 1000; i++) {
tIndexJsonPut(index, terms, i);
}
indexMultiTermDestroy(terms);
}
{
std::string colName("test2");
std::string colVal("20");
SIndexTerm* term = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(),
colVal.c_str(), colVal.size());
SIndexMultiTerm* terms = indexMultiTermCreate();
indexMultiTermAdd(terms, term);
for (size_t i = 0; i < 1000; i++) {
tIndexJsonPut(index, terms, i);
}
indexMultiTermDestroy(terms);
}
{
std::string colName("test2");
std::string colVal("15");
SIndexTerm* term = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(),
colVal.c_str(), colVal.size());
SIndexMultiTerm* terms = indexMultiTermCreate();
indexMultiTermAdd(terms, term);
for (size_t i = 0; i < 1000; i++) {
tIndexJsonPut(index, terms, i);
}
indexMultiTermDestroy(terms);
}
{
std::string colName("test2");
std::string colVal("15");
SIndexTerm* term = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(),
colVal.c_str(), colVal.size());
SIndexMultiTerm* terms = indexMultiTermCreate();
indexMultiTermAdd(terms, term);
for (size_t i = 0; i < 1000; i++) {
tIndexJsonPut(index, terms, i);
}
indexMultiTermDestroy(terms);
}
{
std::string colName("test");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_TERM);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(1000, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
{
std::string colName("test");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_GREATER_THAN);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(0, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
{
std::string colName("test");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_GREATER_EQUAL);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(1000, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
{
std::string colName("test");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_LESS_THAN);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(0, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
{
std::string colName("test");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_LESS_EQUAL);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(1000, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
}
TEST_F(JsonEnv, testWriteJsonTfileAndCache) {
{
std::string colName("test1");
std::string colVal("10");
SIndexTerm* term = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(),
colVal.c_str(), colVal.size());
SIndexMultiTerm* terms = indexMultiTermCreate();
indexMultiTermAdd(terms, term);
for (size_t i = 0; i < 1000; i++) {
tIndexJsonPut(index, terms, i);
}
indexMultiTermDestroy(terms);
}
{
std::string colName("test");
std::string colVal("xxxxxxxxxxxxxxxxxxx");
SIndexTerm* term = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(),
colVal.c_str(), colVal.size());
SIndexMultiTerm* terms = indexMultiTermCreate();
indexMultiTermAdd(terms, term);
for (size_t i = 0; i < 100000; i++) {
tIndexJsonPut(index, terms, i);
}
indexMultiTermDestroy(terms);
}
{
std::string colName("test1");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_TERM);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(1000, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
{
std::string colName("test1");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_GREATER_THAN);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(0, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
{
std::string colName("test1");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_GREATER_EQUAL);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(1000, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
{
std::string colName("test1");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_GREATER_THAN);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(0, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
{
std::string colName("test1");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_LESS_EQUAL);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(1000, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
{
std::string colName("test1");
std::string colVal("10");
SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST);
SIndexTerm* q = indexTermCreate(1, ADD_VALUE, TSDB_DATA_TYPE_INT, colName.c_str(), colName.size(), colVal.c_str(),
colVal.size());
SArray* result = taosArrayInit(1, sizeof(uint64_t));
indexMultiTermQueryAdd(mq, q, QUERY_LESS_THAN);
tIndexJsonSearch(index, mq, result);
EXPECT_EQ(0, taosArrayGetSize(result));
indexMultiTermQueryDestroy(mq);
}
}

View File

@ -105,8 +105,8 @@ typedef void* queue[2];
/* Return the structure holding the given element. */
#define QUEUE_DATA(e, type, field) ((type*)((void*)((char*)(e)-offsetof(type, field))))
#define TRANS_RETRY_COUNT_LIMIT 10 // retry count limit
#define TRANS_RETRY_INTERVAL 5 // ms retry interval
#define TRANS_RETRY_COUNT_LIMIT 20 // retry count limit
#define TRANS_RETRY_INTERVAL 15 // ms retry interval
#define TRANS_CONN_TIMEOUT 3 // connect timeout
typedef struct {