fix tfile bug and add unit test

This commit is contained in:
yihaoDeng 2021-12-23 18:18:51 +08:00
parent 6e7df831ce
commit aedd147359
6 changed files with 333 additions and 258 deletions

View File

@ -42,7 +42,12 @@ typedef struct TFileHeader {
#define TFILE_HEADER_SIZE (sizeof(TFileHeader)) #define TFILE_HEADER_SIZE (sizeof(TFileHeader))
#define TFILE_HEADER_NO_FST (TFILE_HEADER_SIZE - sizeof(int32_t)) #define TFILE_HEADER_NO_FST (TFILE_HEADER_SIZE - sizeof(int32_t))
//#define TFILE_HADER_PRE_SIZE (sizeof(uint64_t) + sizeof(int32_t) + sizeof(int32_t))
typedef struct TFileValue {
char* colVal; // null terminated
SArray* tableId;
int32_t offset;
} TFileValue;
typedef struct TFileCacheKey { typedef struct TFileCacheKey {
uint64_t suid; uint64_t suid;

View File

@ -41,7 +41,7 @@ static pthread_once_t isInit = PTHREAD_ONCE_INIT;
static void indexInit(); static void indexInit();
static int indexTermSearch(SIndex* sIdx, SIndexTermQuery* term, SArray** result); static int indexTermSearch(SIndex* sIdx, SIndexTermQuery* term, SArray** result);
static int indexMergeCacheIntoTindex(SIndex* sIdx); static int indexFlushCacheToTindex(SIndex* sIdx);
static void indexInterResultsDestroy(SArray* results); static void indexInterResultsDestroy(SArray* results);
static int indexMergeFinalResults(SArray* interResults, EIndexOperatorType oType, SArray* finalResult); static int indexMergeFinalResults(SArray* interResults, EIndexOperatorType oType, SArray* finalResult);
@ -49,9 +49,7 @@ static int indexMergeFinalResults(SArray* interResults, EIndexOperatorType oTyp
int indexOpen(SIndexOpts* opts, const char* path, SIndex** index) { int indexOpen(SIndexOpts* opts, const char* path, SIndex** index) {
pthread_once(&isInit, indexInit); pthread_once(&isInit, indexInit);
SIndex* sIdx = calloc(1, sizeof(SIndex)); SIndex* sIdx = calloc(1, sizeof(SIndex));
if (sIdx == NULL) { if (sIdx == NULL) { return -1; }
return -1;
}
#ifdef USE_LUCENE #ifdef USE_LUCENE
index_t* index = index_open(path); index_t* index = index_open(path);
@ -131,9 +129,7 @@ int indexPut(SIndex* index, SIndexMultiTerm* fVals, uint64_t uid) {
int32_t colId = fi->colId; int32_t colId = fi->colId;
int32_t version = index->cVersion; int32_t version = index->cVersion;
int ret = indexCachePut(index->cache, p, colId, version, uid); int ret = indexCachePut(index->cache, p, colId, version, uid);
if (ret != 0) { if (ret != 0) { return ret; }
return ret;
}
} }
#endif #endif
@ -221,9 +217,7 @@ void indexOptsDestroy(SIndexOpts* opts){
SIndexMultiTermQuery* indexMultiTermQueryCreate(EIndexOperatorType opera) { SIndexMultiTermQuery* indexMultiTermQueryCreate(EIndexOperatorType opera) {
SIndexMultiTermQuery* p = (SIndexMultiTermQuery*)malloc(sizeof(SIndexMultiTermQuery)); SIndexMultiTermQuery* p = (SIndexMultiTermQuery*)malloc(sizeof(SIndexMultiTermQuery));
if (p == NULL) { if (p == NULL) { return NULL; }
return NULL;
}
p->opera = opera; p->opera = opera;
p->query = taosArrayInit(4, sizeof(SIndexTermQuery)); p->query = taosArrayInit(4, sizeof(SIndexTermQuery));
return p; return p;
@ -250,9 +244,7 @@ SIndexTerm* indexTermCreate(int64_t suid,
const char* colVal, const char* colVal,
int32_t nColVal) { int32_t nColVal) {
SIndexTerm* t = (SIndexTerm*)calloc(1, (sizeof(SIndexTerm))); SIndexTerm* t = (SIndexTerm*)calloc(1, (sizeof(SIndexTerm)));
if (t == NULL) { if (t == NULL) { return NULL; }
return NULL;
}
t->suid = suid; t->suid = suid;
t->operType = oper; t->operType = oper;
@ -332,9 +324,7 @@ static int indexTermSearch(SIndex* sIdx, SIndexTermQuery* query, SArray** result
return 0; return 0;
} }
static void indexInterResultsDestroy(SArray* results) { static void indexInterResultsDestroy(SArray* results) {
if (results == NULL) { if (results == NULL) { return; }
return;
}
size_t sz = taosArrayGetSize(results); size_t sz = taosArrayGetSize(results);
for (size_t i = 0; i < sz; i++) { for (size_t i = 0; i < sz; i++) {
@ -363,10 +353,10 @@ static int indexMergeFinalResults(SArray* interResults, EIndexOperatorType oType
} }
return 0; return 0;
} }
static int indexMergeCacheIntoTindex(SIndex* sIdx) { static int indexFlushCacheToTindex(SIndex* sIdx) {
if (sIdx == NULL) { if (sIdx == NULL) { return -1; }
return -1;
}
indexWarn("suid %" PRIu64 " merge cache into tindex", sIdx->suid); indexWarn("suid %" PRIu64 " merge cache into tindex", sIdx->suid);
return 0; return 0;
} }

View File

@ -151,7 +151,6 @@ int indexCacheSearch(void* cache, SIndexTermQuery* query, int16_t colId, int32_t
EIndexQueryType qtype = query->qType; EIndexQueryType qtype = query->qType;
int32_t keyLen = CACHE_KEY_LEN(term); int32_t keyLen = CACHE_KEY_LEN(term);
char* buf = calloc(1, keyLen); char* buf = calloc(1, keyLen);
if (qtype == QUERY_TERM) { if (qtype == QUERY_TERM) {
// //

View File

@ -69,9 +69,9 @@ WriterCtx* writerCtxCreate(WriterType type, const char* path, bool readOnly, int
// ugly code, refactor later // ugly code, refactor later
ctx->file.readOnly = readOnly; ctx->file.readOnly = readOnly;
if (readOnly == false) { if (readOnly == false) {
ctx->file.fd = tfOpenCreateWriteAppend(tmpFile); ctx->file.fd = tfOpenCreateWriteAppend(path);
} else { } else {
ctx->file.fd = tfOpenReadWrite(tmpFile); ctx->file.fd = tfOpenReadWrite(path);
} }
if (ctx->file.fd < 0) { if (ctx->file.fd < 0) {
indexError("open file error %d", errno); indexError("open file error %d", errno);
@ -93,6 +93,7 @@ WriterCtx* writerCtxCreate(WriterType type, const char* path, bool readOnly, int
END: END:
if (ctx->type == TMemory) { free(ctx->mem.buf); } if (ctx->type == TMemory) { free(ctx->mem.buf); }
free(ctx); free(ctx);
return NULL;
} }
void writerCtxDestroy(WriterCtx* ctx) { void writerCtxDestroy(WriterCtx* ctx) {
if (ctx->type == TMemory) { if (ctx->type == TMemory) {

View File

@ -25,12 +25,7 @@
#define TF_TABLE_TATOAL_SIZE(sz) (sizeof(sz) + sz * sizeof(uint64_t)) #define TF_TABLE_TATOAL_SIZE(sz) (sizeof(sz) + sz * sizeof(uint64_t))
typedef struct TFileValue { static int tfileStrCompare(const void* a, const void* b);
char* colVal; // null terminated
SArray* tableId;
int32_t offset;
} TFileValue;
static int tfileValueCompare(const void* a, const void* b, const void* param); static int tfileValueCompare(const void* a, const void* b, const void* param);
static void tfileSerialTableIdsToBuf(char* buf, SArray* tableIds); static void tfileSerialTableIdsToBuf(char* buf, SArray* tableIds);
@ -49,6 +44,7 @@ static int tfileRmExpireFile(SArray* result);
static void tfileDestroyFileName(void* elem); static void tfileDestroyFileName(void* elem);
static int tfileCompare(const void* a, const void* b); static int tfileCompare(const void* a, const void* b);
static int tfileParseFileName(const char* filename, uint64_t* suid, int* colId, int* version); static int tfileParseFileName(const char* filename, uint64_t* suid, int* colId, int* version);
static void tfileGenFileName(char* filename, uint64_t suid, int colId, int version);
static void tfileSerialCacheKey(TFileCacheKey* key, char* buf); static void tfileSerialCacheKey(TFileCacheKey* key, char* buf);
TFileCache* tfileCacheCreate(const char* path) { TFileCache* tfileCacheCreate(const char* path) {
@ -209,16 +205,28 @@ TFileWriter* tfileWriterCreate(WriterCtx* ctx, TFileHeader* header) {
tw->ctx = ctx; tw->ctx = ctx;
tw->header = *header; tw->header = *header;
tfileWriteHeader(tw); tfileWriteHeader(tw);
tw->fb = fstBuilderCreate(ctx, 0);
if (tw->fb == NULL) {
tfileWriterDestroy(tw);
return NULL;
}
return tw; return tw;
} }
int tfileWriterPut(TFileWriter* tw, void* data) { int tfileWriterPut(TFileWriter* tw, void* data) {
// sort by coltype and write to tindex // sort by coltype and write to tindex
__compar_fn_t fn = getComparFunc(tw->header.colType, 0); __compar_fn_t fn;
int8_t colType = tw->header.colType;
if (colType == TSDB_DATA_TYPE_BINARY || colType == TSDB_DATA_TYPE_NCHAR) {
fn = tfileStrCompare;
} else {
fn = getComparFunc(colType, 0);
}
taosArraySortPWithExt((SArray*)(data), tfileValueCompare, &fn); taosArraySortPWithExt((SArray*)(data), tfileValueCompare, &fn);
int32_t bufLimit = 4096, offset = 0; int32_t bufLimit = 4096, offset = 0;
char* buf = calloc(1, sizeof(bufLimit)); char* buf = calloc(1, sizeof(char) * bufLimit);
char* p = buf; char* p = buf;
int32_t sz = taosArrayGetSize((SArray*)data); int32_t sz = taosArrayGetSize((SArray*)data);
int32_t fstOffset = tw->offset; int32_t fstOffset = tw->offset;
@ -267,6 +275,9 @@ int tfileWriterPut(TFileWriter* tw, void* data) {
// //
} }
} }
fstBuilderFinish(tw->fb);
fstBuilderDestroy(tw->fb);
tw->fb = NULL;
return 0; return 0;
} }
void tfileWriterDestroy(TFileWriter* tw) { void tfileWriterDestroy(TFileWriter* tw) {
@ -305,6 +316,12 @@ int indexTFilePut(void* tfile, SIndexTerm* term, uint64_t uid) {
return 0; return 0;
} }
static int tfileStrCompare(const void* a, const void* b) {
int ret = strcmp((char*)a, (char*)b);
if (ret == 0) { return ret; }
return ret < 0 ? -1 : 1;
}
static int tfileValueCompare(const void* a, const void* b, const void* param) { static int tfileValueCompare(const void* a, const void* b, const void* param) {
__compar_fn_t fn = *(__compar_fn_t*)param; __compar_fn_t fn = *(__compar_fn_t*)param;
@ -445,6 +462,10 @@ static int tfileCompare(const void* a, const void* b) {
return strncmp(aName, bName, aLen > bLen ? aLen : bLen); return strncmp(aName, bName, aLen > bLen ? aLen : bLen);
} }
// tfile name suid-colId-version.tindex // tfile name suid-colId-version.tindex
static void tfileGenFileName(char* filename, uint64_t suid, int colId, int version) {
sprintf(filename, "%" PRIu64 "-%d-%d.tindex", suid, colId, version);
return;
}
static int tfileParseFileName(const char* filename, uint64_t* suid, int* colId, int* version) { static int tfileParseFileName(const char* filename, uint64_t* suid, int* colId, int* version) {
if (3 == sscanf(filename, "%" PRIu64 "-%d-%d.tindex", suid, colId, version)) { if (3 == sscanf(filename, "%" PRIu64 "-%d-%d.tindex", suid, colId, version)) {
// read suid & colid & version success // read suid & colid & version success

View File

@ -2,8 +2,7 @@
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com> * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
* *
* This program is free software: you can use, redistribute, and/or modify * This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3 * it under the terms of the GNU Affero General Public License, version 3 * or later ("AGPL"), as published by the Free Software Foundation.
* or later ("AGPL"), as published by the Free Software Foundation.
* *
* This program is distributed in the hope that it will be useful, but WITHOUT * This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
@ -13,20 +12,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <string>
#include <iostream> #include <iostream>
#include <string>
#include "index.h" #include "index.h"
#include "tutil.h"
#include "indexInt.h" #include "indexInt.h"
#include "index_fst.h" #include "index_fst.h"
#include "index_fst_util.h"
#include "index_fst_counting_writer.h" #include "index_fst_counting_writer.h"
#include "index_fst_util.h"
#include "index_tfile.h"
#include "tutil.h"
class FstWriter { class FstWriter {
public: public:
FstWriter() { FstWriter() {
_wc = writerCtxCreate(TFile, "/tmp/tindex", false, 0); _wc = writerCtxCreate(TFile, "/tmp/tindex", false, 64 * 1024 * 1024);
_b = fstBuilderCreate(NULL, 0); _b = fstBuilderCreate(NULL, 0);
} }
bool Put(const std::string& key, uint64_t val) { bool Put(const std::string& key, uint64_t val) {
@ -41,6 +40,7 @@ class FstWriter {
writerCtxDestroy(_wc); writerCtxDestroy(_wc);
} }
private: private:
FstBuilder* _b; FstBuilder* _b;
WriterCtx* _wc; WriterCtx* _wc;
@ -49,7 +49,7 @@ class FstWriter {
class FstReadMemory { class FstReadMemory {
public: public:
FstReadMemory(size_t size) { FstReadMemory(size_t size) {
_wc = writerCtxCreate(TFile, "/tmp/tindex", true, 0); _wc = writerCtxCreate(TFile, "/tmp/tindex", true, 64 * 1024);
_w = fstCountingWriterCreate(_wc); _w = fstCountingWriterCreate(_wc);
_size = size; _size = size;
memset((void*)&_s, 0, sizeof(_s)); memset((void*)&_s, 0, sizeof(_s));
@ -108,7 +108,6 @@ class FstReadMemory {
FstSlice _s; FstSlice _s;
WriterCtx* _wc; WriterCtx* _wc;
size_t _size; size_t _size;
}; };
// TEST(IndexTest, index_create_test) { // TEST(IndexTest, index_create_test) {
@ -163,7 +162,6 @@ class FstReadMemory {
// // // //
//} //}
#define L 100 #define L 100
#define M 100 #define M 100
#define N 100 #define N 100
@ -216,9 +214,7 @@ void checkFstPerf() {
delete fw; delete fw;
FstReadMemory* m = new FstReadMemory(1024 * 64); FstReadMemory* m = new FstReadMemory(1024 * 64);
if (m->init()) { if (m->init()) { printf("success to init fst read"); }
printf("success to init fst read");
}
Performance_fstReadRecords(m); Performance_fstReadRecords(m);
delete m; delete m;
} }
@ -247,6 +243,7 @@ void checkFstPrefixSearch() {
// prefix search // prefix search
std::vector<uint64_t> result; std::vector<uint64_t> result;
AutomationCtx* ctx = automCtxCreate((void*)"ab", AUTOMATION_PREFIX); AutomationCtx* ctx = automCtxCreate((void*)"ab", AUTOMATION_PREFIX);
m->Search(ctx, result); m->Search(ctx, result);
assert(result.size() == count); assert(result.size() == count);
@ -318,53 +315,115 @@ class IndexEnv : public ::testing::Test {
SIndex* index; SIndex* index;
}; };
TEST_F(IndexEnv, testPut) { // TEST_F(IndexEnv, testPut) {
// // single index column
// single index column // {
{ // std::string colName("tag1"), colVal("Hello world");
// SIndexTerm* term = indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), colVal.c_str(),
std::string colName("tag1"), colVal("Hello world"); // colVal.size()); SIndexMultiTerm* terms = indexMultiTermCreate(); indexMultiTermAdd(terms, term);
SIndexTerm *term = indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), colVal.c_str(), colVal.size());
SIndexMultiTerm *terms = indexMultiTermCreate();
indexMultiTermAdd(terms, term);
for (size_t i = 0; i < 100; i++) {
int tableId = i;
int ret = indexPut(index, terms, tableId);
assert(ret == 0);
}
indexMultiTermDestroy(terms);
}
// multi index column
{
SIndexMultiTerm *terms = indexMultiTermCreate();
{
std::string colName("tag1"), colVal("Hello world");
SIndexTerm *term = indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), colVal.c_str(), colVal.size());
indexMultiTermAdd(terms, term);
}
{
std::string colName("tag2"), colVal("Hello world");
SIndexTerm *term = indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), colVal.c_str(), colVal.size());
indexMultiTermAdd(terms, term);
}
for (int i = 0; i < 100; i++) {
int tableId = i;
int ret = indexPut(index, terms, tableId);
assert(ret == 0);
}
indexMultiTermDestroy(terms);
}
// //
// for (size_t i = 0; i < 100; i++) {
// int tableId = i;
// int ret = indexPut(index, terms, tableId);
// assert(ret == 0);
// }
// indexMultiTermDestroy(terms);
// }
// // multi index column
// {
// SIndexMultiTerm* terms = indexMultiTermCreate();
// {
// std::string colName("tag1"), colVal("Hello world");
// SIndexTerm* term =
// indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), colVal.c_str(), colVal.size());
// indexMultiTermAdd(terms, term);
// }
// {
// std::string colName("tag2"), colVal("Hello world");
// SIndexTerm* term =
// indexTermCreate(0, ADD_VALUE, TSDB_DATA_TYPE_BINARY, colName.c_str(), colName.size(), colVal.c_str(), colVal.size());
// indexMultiTermAdd(terms, term);
// }
//
// for (int i = 0; i < 100; i++) {
// int tableId = i;
// int ret = indexPut(index, terms, tableId);
// assert(ret == 0);
// }
// indexMultiTermDestroy(terms);
// }
// //
//}
class IndexTFileEnv : public ::testing::Test {
protected:
virtual void SetUp() {
taosRemoveDir(dir);
taosMkDir(dir);
tfInit();
std::string colName("voltage");
header.suid = 1;
header.version = 1;
memcpy(header.colName, colName.c_str(), colName.size());
header.colType = TSDB_DATA_TYPE_BINARY;
std::string path(dir);
int colId = 2;
char buf[64] = {0};
sprintf(buf, "%" PRIu64 "-%d-%d.tindex", header.suid, colId, header.version);
path.append("/").append(buf);
ctx = writerCtxCreate(TFile, path.c_str(), false, 64 * 1024 * 1024);
twrite = tfileWriterCreate(ctx, &header);
} }
TEST_F(IndexEnv, testDel) { virtual void TearDown() {
// indexClose(index);
// indexeptsDestroy(opts);
tfCleanup();
tfileWriterDestroy(twrite);
}
const char* dir = "/tmp/tindex";
WriterCtx* ctx = NULL;
TFileHeader header;
TFileWriter* twrite = NULL;
};
static TFileValue* genTFileValue(const char* val) {
TFileValue* tv = (TFileValue*)calloc(1, sizeof(TFileValue));
int32_t vlen = strlen(val) + 1;
tv->colVal = (char*)calloc(1, vlen);
memcpy(tv->colVal, val, vlen);
tv->tableId = (SArray*)taosArrayInit(1, sizeof(uint64_t));
for (size_t i = 0; i < 10; i++) {
uint64_t v = i;
taosArrayPush(tv->tableId, &v);
}
return tv;
}
static void destroyTFileValue(void* val) {
TFileValue* tv = (TFileValue*)val;
free(tv->colVal);
taosArrayDestroy(tv->tableId);
free(tv);
} }
TEST_F(IndexTFileEnv, test_tfile_write) {
TFileValue* v1 = genTFileValue("c");
TFileValue* v2 = genTFileValue("a");
SArray* data = (SArray*)taosArrayInit(4, sizeof(void*));
taosArrayPush(data, &v1);
taosArrayPush(data, &v2);
tfileWriterPut(twrite, data);
// tfileWriterDestroy(twrite);
for (size_t i = 0; i < taosArrayGetSize(data); i++) {
destroyTFileValue(taosArrayGetP(data, i));
}
taosArrayDestroy(data);
}