From cee5a2ecbe92212ee1b3bba2b39069973055069a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 4 Jan 2022 18:01:31 +0800 Subject: [PATCH 1/7] optimize tfile --- include/util/tfile.h | 2 +- .../index/inc/index_fst_counting_writer.h | 5 +++++ .../index/src/index_fst_counting_writer.c | 20 ++++++++++++++++++- source/libs/index/test/indexTests.cc | 2 +- source/util/src/tfile.c | 10 ++++++++++ 5 files changed, 36 insertions(+), 3 deletions(-) diff --git a/include/util/tfile.h b/include/util/tfile.h index b3d141c443..d3813051a4 100644 --- a/include/util/tfile.h +++ b/include/util/tfile.h @@ -43,7 +43,7 @@ int32_t tfFsync(int64_t tfd); bool tfValid(int64_t tfd); int64_t tfLseek(int64_t tfd, int64_t offset, int32_t whence); int32_t tfFtruncate(int64_t tfd, int64_t length); - +void * tfMmapReadOnly(int64_t tfd, int64_t length); #ifdef __cplusplus } #endif diff --git a/source/libs/index/inc/index_fst_counting_writer.h b/source/libs/index/inc/index_fst_counting_writer.h index fcc0d5a0b3..d7363f2f4c 100644 --- a/source/libs/index/inc/index_fst_counting_writer.h +++ b/source/libs/index/inc/index_fst_counting_writer.h @@ -22,6 +22,8 @@ extern "C" { #include "tfile.h" +//#define USE_MMAP 1 + #define DefaultMem 1024 * 1024 static char tmpFile[] = "./index"; @@ -39,6 +41,9 @@ typedef struct WriterCtx { bool readOnly; char buf[256]; int size; +#ifdef USE_MMAP + char* ptr; +#endif } file; struct { int32_t capa; diff --git a/source/libs/index/src/index_fst_counting_writer.c b/source/libs/index/src/index_fst_counting_writer.c index c3e1aab381..2b64d65e46 100644 --- a/source/libs/index/src/index_fst_counting_writer.c +++ b/source/libs/index/src/index_fst_counting_writer.c @@ -31,7 +31,12 @@ static int writeCtxDoWrite(WriterCtx* ctx, uint8_t* buf, int len) { static int writeCtxDoRead(WriterCtx* ctx, uint8_t* buf, int len) { int nRead = 0; if (ctx->type == TFile) { +#ifdef USE_MMAP + nRead = len < ctx->file.size ? len : ctx->file.size; + memcpy(buf, ctx->file.ptr, nRead); +#else nRead = tfRead(ctx->file.fd, buf, len); +#endif } else { memcpy(buf, ctx->mem.buf + ctx->offset, len); } @@ -43,7 +48,13 @@ static int writeCtxDoReadFrom(WriterCtx* ctx, uint8_t* buf, int len, int32_t off int nRead = 0; if (ctx->type == TFile) { // tfLseek(ctx->file.fd, offset, 0); +#ifdef USE_MMAP + int32_t last = ctx->file.size - offset; + nRead = last >= len ? len : last; + memcpy(buf, ctx->file.ptr + offset, nRead); +#else nRead = tfPread(ctx->file.fd, buf, len, offset); +#endif } else { // refactor later assert(0); @@ -83,6 +94,9 @@ WriterCtx* writerCtxCreate(WriterType type, const char* path, bool readOnly, int struct stat fstat; stat(path, &fstat); ctx->file.size = fstat.st_size; +#ifdef USE_MMAP + ctx->file.ptr = (char*)tfMmapReadOnly(ctx->file.fd, ctx->file.size); +#endif } memcpy(ctx->file.buf, path, strlen(path)); if (ctx->file.fd < 0) { @@ -111,8 +125,12 @@ void writerCtxDestroy(WriterCtx* ctx, bool remove) { if (ctx->type == TMemory) { free(ctx->mem.buf); } else { - // ctx->flush(ctx); tfClose(ctx->file.fd); + if (ctx->file.readOnly) { +#ifdef USE_MMAP + munmap(ctx->file.ptr, ctx->file.size); +#endif + } if (remove) { unlink(ctx->file.buf); } } free(ctx); diff --git a/source/libs/index/test/indexTests.cc b/source/libs/index/test/indexTests.cc index c3d6e5541f..badc510b19 100644 --- a/source/libs/index/test/indexTests.cc +++ b/source/libs/index/test/indexTests.cc @@ -28,7 +28,7 @@ #include "tutil.h" using namespace std; -#define NUM_OF_THREAD 10 +#define NUM_OF_THREAD 5 class DebugInfo { public: diff --git a/source/util/src/tfile.c b/source/util/src/tfile.c index 4cb20802c7..0f68e9204d 100644 --- a/source/util/src/tfile.c +++ b/source/util/src/tfile.c @@ -158,3 +158,13 @@ int32_t tfFtruncate(int64_t tfd, int64_t length) { taosReleaseRef(tsFileRsetId, tfd); return code; } + +void *tfMmapReadOnly(int64_t tfd, int64_t length) { + void *p = taosAcquireRef(tsFileRsetId, tfd); + if (p == NULL) return NULL; + int32_t fd = (int32_t)(uintptr_t)p; + + void *ptr = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); + taosReleaseRef(tsFileRsetId, tfd); + return ptr; +} From 8279e49cb0ed6d9de62106095aff00f1280db9fb Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 4 Jan 2022 20:18:15 +0800 Subject: [PATCH 2/7] optimize read performace case --- source/libs/index/test/indexTests.cc | 76 ++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/source/libs/index/test/indexTests.cc b/source/libs/index/test/indexTests.cc index badc510b19..ef41855c33 100644 --- a/source/libs/index/test/indexTests.cc +++ b/source/libs/index/test/indexTests.cc @@ -679,6 +679,17 @@ class IndexObj { } return numOfTable; } + int ReadMultiMillonData(const std::string& colName, const std::string& colVal = "Hello world", + size_t numOfTable = 100 * 10000) { + std::string tColVal = colVal; + + int colValSize = tColVal.size(); + for (int i = 0; i < numOfTable; i++) { + tColVal[i % colValSize] = 'a' + i % 26; + SearchOne(colName, tColVal); + } + return 0; + } int Put(SIndexMultiTerm* fvs, uint64_t uid) { numOfWrite += taosArrayGetSize(fvs); @@ -701,8 +712,8 @@ class IndexObj { int64_t s = taosGetTimestampUs(); if (Search(mq, result) == 0) { int64_t e = taosGetTimestampUs(); - std::cout << "search one successfully and time cost:" << e - s << "\tquery col:" << colName - << "\t val: " << colVal << "\t size:" << taosArrayGetSize(result) << std::endl; + std::cout << "search and time cost:" << e - s << "\tquery col:" << colName << "\t val: " << colVal + << "\t size:" << taosArrayGetSize(result) << std::endl; } else { } int sz = taosArrayGetSize(result); @@ -711,6 +722,31 @@ class IndexObj { return sz; // assert(taosArrayGetSize(result) == targetSize); } + int SearchOneTarget(const std::string& colName, const std::string& colVal, uint64_t val) { + SIndexMultiTermQuery* mq = indexMultiTermQueryCreate(MUST); + SIndexTerm* term = indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), + colVal.c_str(), colVal.size()); + indexMultiTermQueryAdd(mq, term, QUERY_TERM); + + SArray* result = (SArray*)taosArrayInit(1, sizeof(uint64_t)); + + int64_t s = taosGetTimestampUs(); + if (Search(mq, result) == 0) { + int64_t e = taosGetTimestampUs(); + std::cout << "search one successfully and time cost:" << e - s << "\tquery col:" << colName + << "\t val: " << colVal << "\t size:" << taosArrayGetSize(result) << std::endl; + } else { + } + int sz = taosArrayGetSize(result); + indexMultiTermQueryDestroy(mq); + taosArrayDestroy(result); + assert(sz == 1); + uint64_t* ret = (uint64_t*)taosArrayGet(result, 0); + assert(val = *ret); + + return sz; + } + void PutOne(const std::string& colName, const std::string& colVal) { SIndexMultiTerm* terms = indexMultiTermCreate(); SIndexTerm* term = indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), @@ -831,12 +867,15 @@ TEST_F(IndexEnv2, testIndex_TrigeFlush) { assert(numOfTable == target); } -static void write_and_search(IndexObj* idx) { - std::string colName("tag1"), colVal("Hello"); - +static void single_write_and_search(IndexObj* idx) { int target = idx->SearchOne("tag1", "Hello"); target = idx->SearchOne("tag2", "Test"); - // idx->PutOne(colName, colVal); +} +static void multi_write_and_search(IndexObj* idx) { + int target = idx->SearchOne("tag1", "Hello"); + target = idx->SearchOne("tag2", "Test"); + idx->WriteMultiMillonData("tag1", "Hello", 100 * 10000); + idx->WriteMultiMillonData("tag2", "Test", 100 * 10000); } TEST_F(IndexEnv2, testIndex_serarch_cache_and_tfile) { std::string path = "/tmp/cache_and_tfile"; @@ -851,7 +890,21 @@ TEST_F(IndexEnv2, testIndex_serarch_cache_and_tfile) { for (int i = 0; i < NUM_OF_THREAD; i++) { // - threads[i] = std::thread(write_and_search, index); + threads[i] = std::thread(single_write_and_search, index); + } + for (int i = 0; i < NUM_OF_THREAD; i++) { + // TOD + threads[i].join(); + } +} +TEST_F(IndexEnv2, testIndex_MultiWrite_and_MultiRead) { + std::string path = "/tmp/cache_and_tfile"; + if (index->Init(path) != 0) {} + + std::thread threads[NUM_OF_THREAD]; + for (int i = 0; i < NUM_OF_THREAD; i++) { + // + threads[i] = std::thread(multi_write_and_search, index); } for (int i = 0; i < NUM_OF_THREAD; i++) { // TOD @@ -860,13 +913,16 @@ TEST_F(IndexEnv2, testIndex_serarch_cache_and_tfile) { } TEST_F(IndexEnv2, testIndex_restart) { - std::string path = "/tmp/test1"; + std::string path = "/tmp/cache_and_tfile"; if (index->Init(path) != 0) {} + index->SearchOneTarget("tag1", "Hello", 10); + index->SearchOneTarget("tag2", "Test", 10); } -TEST_F(IndexEnv2, testIndex_performance) { - std::string path = "/tmp/test2"; +TEST_F(IndexEnv2, testIndex_read_performance) { + std::string path = "/tmp/cache_and_tfile"; if (index->Init(path) != 0) {} + index->ReadMultiMillonData("tag1", "Hello"); } TEST_F(IndexEnv2, testIndexMultiTag) { std::string path = "/tmp/test3"; From 99379fd5a61c0ab536d8b43e701b697c86b5ed13 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 4 Jan 2022 20:56:55 +0800 Subject: [PATCH 3/7] add test case --- source/libs/index/test/indexTests.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/source/libs/index/test/indexTests.cc b/source/libs/index/test/indexTests.cc index ef41855c33..0ce48bcec4 100644 --- a/source/libs/index/test/indexTests.cc +++ b/source/libs/index/test/indexTests.cc @@ -755,6 +755,14 @@ class IndexObj { Put(terms, 10); indexMultiTermDestroy(terms); } + void PutOneTarge(const std::string& colName, const std::string& colVal, uint64_t val) { + SIndexMultiTerm* terms = indexMultiTermCreate(); + SIndexTerm* term = indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), + colVal.c_str(), colVal.size()); + indexMultiTermAdd(terms, term); + Put(terms, val); + indexMultiTermDestroy(terms); + } void Debug() { std::cout << "numOfWrite:" << numOfWrite << std::endl; std::cout << "numOfRead:" << numOfRead << std::endl; @@ -922,7 +930,10 @@ TEST_F(IndexEnv2, testIndex_restart) { TEST_F(IndexEnv2, testIndex_read_performance) { std::string path = "/tmp/cache_and_tfile"; if (index->Init(path) != 0) {} + index->PutOneTarge("tag1", "Hello", 12); + index->PutOneTarge("tag1", "Hello", 15); index->ReadMultiMillonData("tag1", "Hello"); + std::cout << "reader sz: " << index->SearchOne("tag1", "Hello") << std::endl; } TEST_F(IndexEnv2, testIndexMultiTag) { std::string path = "/tmp/test3"; From 49ef94fb6972b3fd1d5409c5399f41d3fea9292f Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 4 Jan 2022 22:58:52 +0800 Subject: [PATCH 4/7] refactor code --- source/libs/index/src/index.c | 3 ++- source/libs/index/test/indexTests.cc | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/source/libs/index/src/index.c b/source/libs/index/src/index.c index 44d3066589..b5a65a9fda 100644 --- a/source/libs/index/src/index.c +++ b/source/libs/index/src/index.c @@ -401,7 +401,7 @@ int indexFlushCacheTFile(SIndex* sIdx, void* cache) { IndexCache* pCache = (IndexCache*)cache; TFileReader* pReader = tfileGetReaderByCol(sIdx->tindex, pCache->suid, pCache->colName); - if (pReader == NULL) { indexWarn("empty pReader found"); } + if (pReader == NULL) { indexWarn("empty tfile reader found"); } // handle flush Iterate* cacheIter = indexCacheIteratorCreate(pCache); Iterate* tfileIter = tfileIteratorCreate(pReader); @@ -512,6 +512,7 @@ static int indexGenTFile(SIndex* sIdx, IndexCache* cache, SArray* batch) { return ret; END: tfileWriterClose(tw); + return -1; } int32_t indexSerialCacheKey(ICacheKey* key, char* buf) { diff --git a/source/libs/index/test/indexTests.cc b/source/libs/index/test/indexTests.cc index 0ce48bcec4..2ba6b505c5 100644 --- a/source/libs/index/test/indexTests.cc +++ b/source/libs/index/test/indexTests.cc @@ -934,6 +934,7 @@ TEST_F(IndexEnv2, testIndex_read_performance) { index->PutOneTarge("tag1", "Hello", 15); index->ReadMultiMillonData("tag1", "Hello"); std::cout << "reader sz: " << index->SearchOne("tag1", "Hello") << std::endl; + assert(3 == index->SearchOne("tag1", "Hello")); } TEST_F(IndexEnv2, testIndexMultiTag) { std::string path = "/tmp/test3"; From a75899024c8d04f2966b12b6fa9dc98bd4168a66 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 4 Jan 2022 23:09:30 +0800 Subject: [PATCH 5/7] add test case --- source/libs/index/test/indexTests.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/source/libs/index/test/indexTests.cc b/source/libs/index/test/indexTests.cc index 2ba6b505c5..9c92af26a2 100644 --- a/source/libs/index/test/indexTests.cc +++ b/source/libs/index/test/indexTests.cc @@ -937,6 +937,17 @@ TEST_F(IndexEnv2, testIndex_read_performance) { assert(3 == index->SearchOne("tag1", "Hello")); } TEST_F(IndexEnv2, testIndexMultiTag) { - std::string path = "/tmp/test3"; + std::string path = "/tmp/multi_tag"; if (index->Init(path) != 0) {} + index->WriteMultiMillonData("tag1", "Hello", 100 * 10000); + index->WriteMultiMillonData("tag2", "Test", 100 * 10000); + index->WriteMultiMillonData("tag3", "Test", 100 * 10000); + index->WriteMultiMillonData("tag4", "Test", 100 * 10000); +} +TEST_F(IndexEnv2, testLongComVal) { + std::string path = "/tmp/long_colVal"; + if (index->Init(path) != 0) {} + // gen colVal by randstr + std::string randstr = "xxxxxxxxxxxxxxxxx"; + index->WriteMultiMillonData("tag1", randstr, 100 * 10000); } From 82745dea8062c9ef15f22ba85aa6d4b485c5a0a3 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 4 Jan 2022 23:13:08 +0800 Subject: [PATCH 6/7] refactor code --- include/os/os.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/include/os/os.h b/include/os/os.h index 53a6cef96a..0135865461 100644 --- a/include/os/os.h +++ b/include/os/os.h @@ -22,11 +22,13 @@ extern "C" { #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -36,18 +38,14 @@ extern "C" { #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include +#include #include +#include +#include + +#include #include "osAtomic.h" #include "osDef.h" From a84af557023f947cf2708aa90435a27e85b68fd1 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Tue, 4 Jan 2022 23:24:37 +0800 Subject: [PATCH 7/7] refactor code --- include/os/os.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/os/os.h b/include/os/os.h index 0135865461..972880da9c 100644 --- a/include/os/os.h +++ b/include/os/os.h @@ -40,7 +40,7 @@ extern "C" { #include #include #include -#include +#include #include #include #include