homework-jianmu/source/libs/index/src/index.c

561 lines
16 KiB
C

/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3 * or later ("AGPL"), as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "index.h"
#include "indexCache.h"
#include "indexComm.h"
#include "indexInt.h"
#include "indexTfile.h"
#include "indexUtil.h"
#include "tdef.h"
#include "tsched.h"
#ifdef USE_LUCENE
#include "lucene++/Lucene_c.h"
#endif
#define INDEX_NUM_OF_THREADS 4
#define INDEX_QUEUE_SIZE 200
void* indexQhandle = NULL;
void indexInit() {
// refactor later
indexQhandle = taosInitScheduler(INDEX_QUEUE_SIZE, INDEX_NUM_OF_THREADS, "index");
}
void indexCleanUp() {
// refacto later
taosCleanUpScheduler(indexQhandle);
}
typedef struct SIdxColInfo {
int colId; // generated by index internal
int cVersion;
} SIdxColInfo;
static TdThreadOnce isInit = PTHREAD_ONCE_INIT;
// static void indexInit();
static int indexTermSearch(SIndex* sIdx, SIndexTermQuery* term, SArray** result);
static void indexInterResultsDestroy(SArray* results);
static int indexMergeFinalResults(SArray* interResults, EIndexOperatorType oType, SArray* finalResult);
static int indexGenTFile(SIndex* index, IndexCache* cache, SArray* batch);
// merge cache and tfile by opera type
static void indexMergeCacheAndTFile(SArray* result, IterateValue* icache, IterateValue* iTfv, SIdxTempResult* helper);
// static int32_t indexSerialTermKey(SIndexTerm* itm, char* buf);
// int32_t indexSerialKey(ICacheKey* key, char* buf);
int indexOpen(SIndexOpts* opts, const char* path, SIndex** index) {
taosThreadOnce(&isInit, indexInit);
SIndex* sIdx = taosMemoryCalloc(1, sizeof(SIndex));
if (sIdx == NULL) {
return -1;
}
#ifdef USE_LUCENE
index_t* index = index_open(path);
sIdx->index = index;
#endif
#ifdef USE_INVERTED_INDEX
// sIdx->cache = (void*)indexCacheCreate(sIdx);
sIdx->tindex = indexTFileCreate(path);
if (sIdx->tindex == NULL) {
goto END;
}
sIdx->colObj = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
sIdx->cVersion = 1;
sIdx->path = tstrdup(path);
taosThreadMutexInit(&sIdx->mtx, NULL);
*index = sIdx;
return 0;
#endif
END:
if (sIdx != NULL) {
indexClose(sIdx);
}
*index = NULL;
return -1;
}
void indexClose(SIndex* sIdx) {
#ifdef USE_LUCENE
index_close(sIdex->index);
sIdx->index = NULL;
#endif
#ifdef USE_INVERTED_INDEX
void* iter = taosHashIterate(sIdx->colObj, NULL);
while (iter) {
IndexCache** pCache = iter;
if (*pCache) {
indexCacheUnRef(*pCache);
}
iter = taosHashIterate(sIdx->colObj, iter);
}
taosHashCleanup(sIdx->colObj);
taosThreadMutexDestroy(&sIdx->mtx);
indexTFileDestroy(sIdx->tindex);
#endif
taosMemoryFree(sIdx->path);
taosMemoryFree(sIdx);
return;
}
int indexPut(SIndex* index, SIndexMultiTerm* fVals, uint64_t uid) {
#ifdef USE_LUCENE
index_document_t* doc = index_document_create();
char buf[16] = {0};
sprintf(buf, "%d", uid);
for (int i = 0; i < taosArrayGetSize(fVals); i++) {
SIndexTerm* p = taosArrayGetP(fVals, i);
index_document_add(doc, (const char*)(p->key), p->nKey, (const char*)(p->val), p->nVal, 1);
}
index_document_add(doc, NULL, 0, buf, strlen(buf), 0);
index_put(index->index, doc);
index_document_destroy(doc);
#endif
#ifdef USE_INVERTED_INDEX
// TODO(yihao): reduce the lock range
taosThreadMutexLock(&index->mtx);
for (int i = 0; i < taosArrayGetSize(fVals); i++) {
SIndexTerm* p = taosArrayGetP(fVals, i);
char buf[128] = {0};
ICacheKey key = {.suid = p->suid, .colName = p->colName, .nColName = strlen(p->colName), .colType = p->colType};
int32_t sz = indexSerialCacheKey(&key, buf);
IndexCache** cache = taosHashGet(index->colObj, buf, sz);
if (cache == NULL) {
IndexCache* pCache = indexCacheCreate(index, p->suid, p->colName, p->colType);
taosHashPut(index->colObj, buf, sz, &pCache, sizeof(void*));
}
}
taosThreadMutexUnlock(&index->mtx);
for (int i = 0; i < taosArrayGetSize(fVals); i++) {
SIndexTerm* p = taosArrayGetP(fVals, i);
char buf[128] = {0};
ICacheKey key = {.suid = p->suid, .colName = p->colName, .nColName = strlen(p->colName), .colType = p->colType};
int32_t sz = indexSerialCacheKey(&key, buf);
IndexCache** cache = taosHashGet(index->colObj, buf, sz);
assert(*cache != NULL);
int ret = indexCachePut(*cache, p, uid);
if (ret != 0) {
return ret;
}
}
#endif
return 0;
}
int indexSearch(SIndex* index, SIndexMultiTermQuery* multiQuerys, SArray* result) {
#ifdef USE_INVERTED_INDEX
EIndexOperatorType opera = multiQuerys->opera; // relation of querys
SArray* iRslts = taosArrayInit(4, POINTER_BYTES);
int nQuery = taosArrayGetSize(multiQuerys->query);
for (size_t i = 0; i < nQuery; i++) {
SIndexTermQuery* qterm = taosArrayGet(multiQuerys->query, i);
SArray* trslt = NULL;
indexTermSearch(index, qterm, &trslt);
taosArrayPush(iRslts, (void*)&trslt);
}
indexMergeFinalResults(iRslts, opera, result);
indexInterResultsDestroy(iRslts);
#endif
return 0;
}
int indexDelete(SIndex* index, SIndexMultiTermQuery* query) {
#ifdef USE_INVERTED_INDEX
#endif
return 1;
}
int indexRebuild(SIndex* index, SIndexOpts* opts) {
#ifdef USE_INVERTED_INDEX
#endif
return 0;
}
SIndexOpts* indexOptsCreate() {
#ifdef USE_LUCENE
#endif
return NULL;
}
void indexOptsDestroy(SIndexOpts* opts) {
#ifdef USE_LUCENE
#endif
return;
}
/*
* @param: oper
*
*/
SIndexMultiTermQuery* indexMultiTermQueryCreate(EIndexOperatorType opera) {
SIndexMultiTermQuery* mtq = (SIndexMultiTermQuery*)taosMemoryMalloc(sizeof(SIndexMultiTermQuery));
if (mtq == NULL) {
return NULL;
}
mtq->opera = opera;
mtq->query = taosArrayInit(4, sizeof(SIndexTermQuery));
return mtq;
}
void indexMultiTermQueryDestroy(SIndexMultiTermQuery* pQuery) {
for (int i = 0; i < taosArrayGetSize(pQuery->query); i++) {
SIndexTermQuery* p = (SIndexTermQuery*)taosArrayGet(pQuery->query, i);
indexTermDestroy(p->term);
}
taosArrayDestroy(pQuery->query);
taosMemoryFree(pQuery);
};
int indexMultiTermQueryAdd(SIndexMultiTermQuery* pQuery, SIndexTerm* term, EIndexQueryType qType) {
SIndexTermQuery q = {.qType = qType, .term = term};
taosArrayPush(pQuery->query, &q);
return 0;
}
SIndexTerm* indexTermCreate(int64_t suid, SIndexOperOnColumn oper, int8_t queryType, uint8_t colType,
const char* colName, int32_t nColName, const char* colVal, int32_t nColVal) {
SIndexTerm* tm = (SIndexTerm*)taosMemoryCalloc(1, (sizeof(SIndexTerm)));
if (tm == NULL) {
return NULL;
}
tm->suid = suid;
tm->operType = oper;
tm->colType = colType;
tm->colName = (char*)taosMemoryCalloc(1, nColName + 1);
memcpy(tm->colName, colName, nColName);
tm->nColName = nColName;
tm->colVal = (char*)taosMemoryCalloc(1, nColVal + 1);
memcpy(tm->colVal, colVal, nColVal);
tm->nColVal = nColVal;
tm->qType = queryType;
return tm;
}
void indexTermDestroy(SIndexTerm* p) {
taosMemoryFree(p->colName);
taosMemoryFree(p->colVal);
taosMemoryFree(p);
}
SIndexMultiTerm* indexMultiTermCreate() { return taosArrayInit(4, sizeof(SIndexTerm*)); }
int indexMultiTermAdd(SIndexMultiTerm* terms, SIndexTerm* term) {
taosArrayPush(terms, &term);
return 0;
}
void indexMultiTermDestroy(SIndexMultiTerm* terms) {
for (int32_t i = 0; i < taosArrayGetSize(terms); i++) {
SIndexTerm* p = taosArrayGetP(terms, i);
indexTermDestroy(p);
}
taosArrayDestroy(terms);
}
static int indexTermSearch(SIndex* sIdx, SIndexTermQuery* query, SArray** result) {
SIndexTerm* term = query->term;
const char* colName = term->colName;
int32_t nColName = term->nColName;
// Get col info
IndexCache* cache = NULL;
char buf[128] = {0};
ICacheKey key = {
.suid = term->suid, .colName = term->colName, .nColName = strlen(term->colName), .colType = term->colType};
int32_t sz = indexSerialCacheKey(&key, buf);
taosThreadMutexLock(&sIdx->mtx);
IndexCache** pCache = taosHashGet(sIdx->colObj, buf, sz);
cache = (pCache == NULL) ? NULL : *pCache;
taosThreadMutexUnlock(&sIdx->mtx);
*result = taosArrayInit(4, sizeof(uint64_t));
// TODO: iterator mem and tidex
STermValueType s = kTypeValue;
int64_t st = taosGetTimestampUs();
SIdxTempResult* tr = sIdxTempResultCreate();
if (0 == indexCacheSearch(cache, query, tr, &s)) {
if (s == kTypeDeletion) {
indexInfo("col: %s already drop by", term->colName);
// coloum already drop by other oper, no need to query tindex
return 0;
} else {
st = taosGetTimestampUs();
if (0 != indexTFileSearch(sIdx->tindex, query, tr)) {
indexError("corrupt at index(TFile) col:%s val: %s", term->colName, term->colVal);
goto END;
}
int64_t tfCost = taosGetTimestampUs() - st;
indexInfo("tfile search cost: %" PRIu64 "us", tfCost);
}
} else {
indexError("corrupt at index(cache) col:%s val: %s", term->colName, term->colVal);
goto END;
}
int64_t cost = taosGetTimestampUs() - st;
indexInfo("search cost: %" PRIu64 "us", cost);
sIdxTempResultMergeTo(*result, tr);
sIdxTempResultDestroy(tr);
return 0;
END:
sIdxTempResultDestroy(tr);
return -1;
}
static void indexInterResultsDestroy(SArray* results) {
if (results == NULL) {
return;
}
size_t sz = taosArrayGetSize(results);
for (size_t i = 0; i < sz; i++) {
SArray* p = taosArrayGetP(results, i);
taosArrayDestroy(p);
}
taosArrayDestroy(results);
}
static int indexMergeFinalResults(SArray* interResults, EIndexOperatorType oType, SArray* fResults) {
// refactor, merge interResults into fResults by oType
for (int i = 0; i < taosArrayGetSize(interResults); i--) {
SArray* t = taosArrayGetP(interResults, i);
taosArraySort(t, uidCompare);
taosArrayRemoveDuplicate(t, uidCompare, NULL);
}
if (oType == MUST) {
iIntersection(interResults, fResults);
} else if (oType == SHOULD) {
iUnion(interResults, fResults);
} else if (oType == NOT) {
// just one column index, enhance later
taosArrayAddAll(fResults, interResults);
// not use currently
}
return 0;
}
static void indexMayMergeTempToFinalResult(SArray* result, TFileValue* tfv, SIdxTempResult* tr) {
int32_t sz = taosArrayGetSize(result);
if (sz > 0) {
TFileValue* lv = taosArrayGetP(result, sz - 1);
if (tfv != NULL && strcmp(lv->colVal, tfv->colVal) != 0) {
sIdxTempResultMergeTo(lv->tableId, tr);
sIdxTempResultClear(tr);
taosArrayPush(result, &tfv);
} else if (tfv == NULL) {
// handle last iterator
sIdxTempResultMergeTo(lv->tableId, tr);
} else {
// temp result saved in help
tfileValueDestroy(tfv);
}
} else {
taosArrayPush(result, &tfv);
}
}
static void indexMergeCacheAndTFile(SArray* result, IterateValue* cv, IterateValue* tv, SIdxTempResult* tr) {
char* colVal = (cv != NULL) ? cv->colVal : tv->colVal;
TFileValue* tfv = tfileValueCreate(colVal);
indexMayMergeTempToFinalResult(result, tfv, tr);
if (cv != NULL) {
uint64_t id = *(uint64_t*)taosArrayGet(cv->val, 0);
uint32_t ver = cv->ver;
if (cv->type == ADD_VALUE) {
INDEX_MERGE_ADD_DEL(tr->deled, tr->added, id)
} else if (cv->type == DEL_VALUE) {
INDEX_MERGE_ADD_DEL(tr->added, tr->deled, id)
}
}
if (tv != NULL) {
taosArrayAddAll(tr->total, tv->val);
}
}
static void indexDestroyFinalResult(SArray* result) {
int32_t sz = result ? taosArrayGetSize(result) : 0;
for (size_t i = 0; i < sz; i++) {
TFileValue* tv = taosArrayGetP(result, i);
tfileValueDestroy(tv);
}
taosArrayDestroy(result);
}
int indexFlushCacheToTFile(SIndex* sIdx, void* cache) {
if (sIdx == NULL) {
return -1;
}
indexInfo("suid %" PRIu64 " merge cache into tindex", sIdx->suid);
int64_t st = taosGetTimestampUs();
IndexCache* pCache = (IndexCache*)cache;
TFileReader* pReader = tfileGetReaderByCol(sIdx->tindex, pCache->suid, pCache->colName);
if (pReader == NULL) {
indexWarn("empty tfile reader found");
}
// handle flush
Iterate* cacheIter = indexCacheIteratorCreate(pCache);
Iterate* tfileIter = tfileIteratorCreate(pReader);
if (tfileIter == NULL) {
indexWarn("empty tfile reader iterator");
}
SArray* result = taosArrayInit(1024, sizeof(void*));
bool cn = cacheIter ? cacheIter->next(cacheIter) : false;
bool tn = tfileIter ? tfileIter->next(tfileIter) : false;
SIdxTempResult* tr = sIdxTempResultCreate();
while (cn == true || tn == true) {
IterateValue* cv = (cn == true) ? cacheIter->getValue(cacheIter) : NULL;
IterateValue* tv = (tn == true) ? tfileIter->getValue(tfileIter) : NULL;
int comp = 0;
if (cn == true && tn == true) {
comp = strcmp(cv->colVal, tv->colVal);
} else if (cn == true) {
comp = -1;
} else {
comp = 1;
}
if (comp == 0) {
indexMergeCacheAndTFile(result, cv, tv, tr);
cn = cacheIter->next(cacheIter);
tn = tfileIter->next(tfileIter);
} else if (comp < 0) {
indexMergeCacheAndTFile(result, cv, NULL, tr);
cn = cacheIter->next(cacheIter);
} else {
indexMergeCacheAndTFile(result, NULL, tv, tr);
tn = tfileIter->next(tfileIter);
}
}
indexMayMergeTempToFinalResult(result, NULL, tr);
sIdxTempResultDestroy(tr);
int ret = indexGenTFile(sIdx, pCache, result);
indexDestroyFinalResult(result);
indexCacheDestroyImm(pCache);
indexCacheIteratorDestroy(cacheIter);
tfileIteratorDestroy(tfileIter);
tfileReaderUnRef(pReader);
indexCacheUnRef(pCache);
int64_t cost = taosGetTimestampUs() - st;
if (ret != 0) {
indexError("failed to merge, time cost: %" PRId64 "ms", cost / 1000);
} else {
indexInfo("success to merge , time cost: %" PRId64 "ms", cost / 1000);
}
return ret;
}
void iterateValueDestroy(IterateValue* value, bool destroy) {
if (destroy) {
taosArrayDestroy(value->val);
value->val = NULL;
} else {
if (value->val != NULL) {
taosArrayClear(value->val);
}
}
taosMemoryFree(value->colVal);
value->colVal = NULL;
}
static int indexGenTFile(SIndex* sIdx, IndexCache* cache, SArray* batch) {
int32_t version = CACHE_VERSION(cache);
uint8_t colType = cache->type;
TFileWriter* tw = tfileWriterOpen(sIdx->path, cache->suid, version, cache->colName, colType);
if (tw == NULL) {
indexError("failed to open file to write");
return -1;
}
int ret = tfileWriterPut(tw, batch, true);
if (ret != 0) {
indexError("failed to write into tindex ");
goto END;
}
tfileWriterClose(tw);
TFileReader* reader = tfileReaderOpen(sIdx->path, cache->suid, version, cache->colName);
if (reader == NULL) {
return -1;
}
TFileHeader* header = &reader->header;
ICacheKey key = {.suid = cache->suid, .colName = header->colName, .nColName = strlen(header->colName)};
taosThreadMutexLock(&sIdx->mtx);
IndexTFile* ifile = (IndexTFile*)sIdx->tindex;
tfileCachePut(ifile->cache, &key, reader);
taosThreadMutexUnlock(&sIdx->mtx);
return ret;
END:
if (tw != NULL) {
writerCtxDestroy(tw->ctx, true);
taosMemoryFree(tw);
}
return -1;
}
int32_t indexSerialCacheKey(ICacheKey* key, char* buf) {
bool hasJson = INDEX_TYPE_CONTAIN_EXTERN_TYPE(key->colType, TSDB_DATA_TYPE_JSON);
char* p = buf;
SERIALIZE_MEM_TO_BUF(buf, key, suid);
SERIALIZE_VAR_TO_BUF(buf, '_', char);
// SERIALIZE_MEM_TO_BUF(buf, key, colType);
// SERIALIZE_VAR_TO_BUF(buf, '_', char);
if (hasJson) {
SERIALIZE_STR_VAR_TO_BUF(buf, JSON_COLUMN, strlen(JSON_COLUMN));
} else {
SERIALIZE_STR_MEM_TO_BUF(buf, key, colName, key->nColName);
}
return buf - p;
}