enh: add json index

This commit is contained in:
yihaoDeng 2022-06-04 22:33:36 +08:00
parent 0ef1a19b23
commit 7c9d76a6a6
7 changed files with 170 additions and 40 deletions

View File

@ -603,6 +603,9 @@ typedef struct {
} SIdxCursor;
int32_t metaFilteTableIds(SMeta *pMeta, SMetaFltParam *param, SArray *pUids) {
#ifdef USE_INVERTED_INDEX
return -1;
#else
SIdxCursor *pCursor = NULL;
int32_t ret = 0, valid = 0;
@ -678,4 +681,5 @@ END:
taosMemoryFree(pCursor);
return ret;
#endif
}

View File

@ -15,6 +15,7 @@
#include "meta.h"
static int metaSaveJsonVarToIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry, const SSchema *pSchema);
static int metaHandleEntry(SMeta *pMeta, const SMetaEntry *pME);
static int metaSaveToTbDb(SMeta *pMeta, const SMetaEntry *pME);
static int metaUpdateUidIdx(SMeta *pMeta, const SMetaEntry *pME);
@ -43,6 +44,75 @@ static int metaUpdateMetaRsp(tb_uid_t uid, char* tbName, SSchemaWrapper *pSchema
return 0;
}
static int metaSaveJsonVarToIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry, const SSchema *pSchema) {
#ifdef USE_INVERTED_INDEX
if (pMeta->pTagIvtIdx == NULL || pCtbEntry == NULL) {
return -1;
}
void * data = pCtbEntry->ctbEntry.pTags;
const char *tagName = pSchema->name;
tb_uid_t suid = pCtbEntry->ctbEntry.suid;
tb_uid_t tuid = pCtbEntry->uid;
const void *pTagData = pCtbEntry->ctbEntry.pTags;
int32_t nTagData = 0;
SArray *pTagVals = NULL;
if (tTagToValArray((const STag *)data, &pTagVals) != 0) {
return -1;
}
SIndexMultiTerm *terms = indexMultiTermCreate();
int16_t nCols = taosArrayGetSize(pTagVals);
for (int i = 0; i < nCols; i++) {
STagVal *pTagVal = (STagVal *)taosArrayGet(pTagVals, i);
char type = pTagVal->type;
char *Key = pTagVal->pKey;
char *key = taosMemoryCalloc(1, strlen(Key) + 2 + strlen(tagName));
sprintf(key, "%s_%s", tagName, Key);
int32_t nKey = strlen(key);
SIndexTerm *term = NULL;
if (type == TSDB_DATA_TYPE_NULL) {
} else if (type == TSDB_DATA_TYPE_NCHAR) {
if (pTagVal->nData > 0) {
char * val = taosMemoryCalloc(1, pTagVal->nData);
int32_t len = taosUcs4ToMbs((TdUcs4 *)pTagVal->pData, pTagVal->nData, val);
// printf("val: %s, len: %d", val, len);
char *tval = taosMemoryCalloc(1, len + VARSTR_HEADER_SIZE);
memcpy(tval, (uint16_t *)&len, VARSTR_HEADER_SIZE);
memcpy(tval + VARSTR_HEADER_SIZE, val, len);
type = TSDB_DATA_TYPE_VARCHAR;
term = indexTermCreate(suid, ADD_VALUE, type, key, nKey, tval, len + 2);
} else if (pTagVal->nData == 0) {
char * val = NULL;
int32_t len = 0;
// handle NULL key
}
} else if (type == TSDB_DATA_TYPE_DOUBLE) {
double val = *(double *)(&pTagVal->i64);
int len = 0;
term = indexTermCreate(suid, ADD_VALUE, type, key, nKey, (const char *)&val, len);
} else if (type == TSDB_DATA_TYPE_BOOL) {
int val = *(int *)(&pTagVal->i64);
int len = 0;
term = indexTermCreate(suid, ADD_VALUE, type, key, nKey, (const char *)&val, len);
}
if (term == NULL) {
// handle except later
} else {
indexMultiTermAdd(terms, term);
}
taosMemoryFree(key);
}
tIndexJsonPut(pMeta->pTagIvtIdx, terms, tuid);
indexMultiTermDestroy(terms);
#endif
return -1;
}
int metaCreateSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) {
SMetaEntry me = {0};
int kLen = 0;
@ -341,7 +411,6 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) {
return 0;
}
static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAlterTbReq, STableMetaRsp *pMetaRsp) {
void * pVal = NULL;
int nVal = 0;
@ -824,6 +893,9 @@ static int metaUpdateTagIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry) {
} else {
// pTagData = pCtbEntry->ctbEntry.pTags;
// nTagData = ((const STag *)pCtbEntry->ctbEntry.pTags)->len;
pTagData = pCtbEntry->ctbEntry.pTags;
nTagData = ((const STag *)pCtbEntry->ctbEntry.pTags)->len;
return metaSaveJsonVarToIdx(pMeta, pCtbEntry, pTagColumn);
}
// update tag index

View File

@ -202,7 +202,7 @@ int indexPut(SIndex* index, SIndexMultiTerm* fVals, uint64_t uid) {
char buf[128] = {0};
ICacheKey key = {.suid = p->suid, .colName = p->colName, .nColName = strlen(p->colName), .colType = p->colType};
int32_t sz = indexSerialCacheKey(&key, buf);
indexDebug("suid: %" PRIu64 ", colName: %s, colType: %d", key.suid, key.colName, key.colType);
indexDebug("w suid: %" PRIu64 ", colName: %s, colType: %d", key.suid, key.colName, key.colType);
IndexCache** cache = taosHashGet(index->colObj, buf, sz);
assert(*cache != NULL);
@ -330,7 +330,7 @@ static int indexTermSearch(SIndex* sIdx, SIndexTermQuery* query, SArray** result
char buf[128] = {0};
ICacheKey key = {
.suid = term->suid, .colName = term->colName, .nColName = strlen(term->colName), .colType = term->colType};
indexDebug("suid: %" PRIu64 ", colName: %s, colType: %d", key.suid, key.colName, key.colType);
indexDebug("r suid: %" PRIu64 ", colName: %s, colType: %d", key.suid, key.colName, key.colType);
int32_t sz = indexSerialCacheKey(&key, buf);
taosThreadMutexLock(&sIdx->mtx);

View File

@ -402,16 +402,16 @@ int32_t indexConvertDataToStr(void* src, int8_t type, void** dst) {
break;
}
case TSDB_DATA_TYPE_VARCHAR: { // TSDB_DATA_TYPE_BINARY
tlen = taosEncodeBinary(NULL, src, strlen(src));
tlen = taosEncodeBinary(NULL, varDataVal(src), varDataLen(src));
*dst = taosMemoryCalloc(1, tlen + 1);
tlen = taosEncodeBinary(dst, src, strlen(src));
tlen = taosEncodeBinary(dst, varDataVal(src), varDataLen(src));
*dst = (char*)*dst - tlen;
break;
}
case TSDB_DATA_TYPE_VARBINARY:
tlen = taosEncodeBinary(NULL, src, strlen(src));
tlen = taosEncodeBinary(NULL, varDataVal(src), varDataLen(src));
*dst = taosMemoryCalloc(1, tlen + 1);
tlen = taosEncodeBinary(dst, src, strlen(src));
tlen = taosEncodeBinary(dst, varDataVal(src), varDataLen(src));
*dst = (char*)*dst - tlen;
break;
default:

View File

@ -162,12 +162,27 @@ static int32_t sifGetValueFromNode(SNode *node, char **value) {
return TSDB_CODE_SUCCESS;
}
static int32_t sifInitJsonParam(SNode *node, SIFParam *param, SIFCtx *ctx) {
SOperatorNode *nd = (SOperatorNode *)node;
assert(nodeType(node) == QUERY_NODE_OPERATOR);
SColumnNode *l = (SColumnNode *)nd->pLeft;
SValueNode * r = (SValueNode *)nd->pRight;
param->colId = l->colId;
param->colValType = l->node.resType.type;
memcpy(param->dbName, l->dbName, sizeof(l->dbName));
sprintf(param->colName, "%s_%s", l->colName, r->literal);
param->colValType = r->typeData;
return 0;
// memcpy(param->colName, l->colName, sizeof(l->colName));
}
static int32_t sifInitParam(SNode *node, SIFParam *param, SIFCtx *ctx) {
switch (nodeType(node)) {
case QUERY_NODE_VALUE: {
SValueNode *vn = (SValueNode *)node;
SIF_ERR_RET(sifGetValueFromNode(node, &param->condValue));
param->colId = -1;
param->colValType = (uint8_t)(vn->node.resType.type);
break;
}
case QUERY_NODE_COLUMN: {
@ -219,17 +234,31 @@ static int32_t sifInitOperParams(SIFParam **params, SOperatorNode *node, SIFCtx
indexError("invalid operation node, left: %p, rigth: %p", node->pLeft, node->pRight);
SIF_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
}
if (node->opType == OP_TYPE_JSON_GET_VALUE || node->opType == OP_TYPE_JSON_CONTAINS) {
return code;
}
SIFParam *paramList = taosMemoryCalloc(nParam, sizeof(SIFParam));
if (NULL == paramList) {
SIF_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
if (nodeType(node->pLeft) == QUERY_NODE_OPERATOR) {
SNode *interNode = (node->pLeft);
SIF_ERR_JRET(sifInitJsonParam(interNode, &paramList[0], ctx));
if (nParam > 1) {
SIF_ERR_JRET(sifInitParam(node->pRight, &paramList[1], ctx));
}
paramList[0].colValType = TSDB_DATA_TYPE_JSON;
*params = paramList;
return TSDB_CODE_SUCCESS;
} else {
SIF_ERR_JRET(sifInitParam(node->pLeft, &paramList[0], ctx));
if (nParam > 1) {
SIF_ERR_JRET(sifInitParam(node->pRight, &paramList[1], ctx));
}
*params = paramList;
return TSDB_CODE_SUCCESS;
}
_return:
taosMemoryFree(paramList);
SIF_RET(code);
@ -307,18 +336,23 @@ static Filter sifGetFilterFunc(EIndexQueryType type, bool *reverse) {
static int32_t sifDoIndex(SIFParam *left, SIFParam *right, int8_t operType, SIFParam *output) {
SIndexMetaArg *arg = &output->arg;
#ifdef USE_INVERTED_INDEX
SIndexTerm *tm = indexTermCreate(arg->suid, DEFAULT, left->colValType, left->colName, strlen(left->colName),
SIndexTerm *tm = indexTermCreate(arg->suid, DEFAULT, right->colValType, left->colName, strlen(left->colName),
right->condValue, strlen(right->condValue));
if (tm == NULL) {
return TSDB_CODE_QRY_OUT_OF_MEMORY;
}
int ret = 0;
EIndexQueryType qtype = 0;
SIF_ERR_RET(sifGetFuncFromSql(operType, &qtype));
SIndexMultiTermQuery *mtm = indexMultiTermQueryCreate(MUST);
indexMultiTermQueryAdd(mtm, tm, qtype);
int ret = indexSearch(arg->metaHandle, mtm, output->result);
if (left->colValType == TSDB_DATA_TYPE_JSON) {
ret = tIndexJsonSearch(arg->metaHandle, mtm, output->result);
} else {
ret = indexSearch(arg->metaHandle, mtm, output->result);
}
indexDebug("index filter data size: %d", (int)taosArrayGetSize(output->result));
indexMultiTermQueryDestroy(mtm);
return ret;
@ -392,6 +426,14 @@ static int32_t sifNotMatchFunc(SIFParam *left, SIFParam *right, SIFParam *output
int id = OP_TYPE_NMATCH;
return sifDoIndex(left, right, id, output);
}
static int32_t sifJsonContains(SIFParam *left, SIFParam *right, SIFParam *output) {
// return 0
return 0;
}
static int32_t sifJsonGetValue(SIFParam *left, SIFParam *rigth, SIFParam *output) {
// return 0
return 0;
}
static int32_t sifDefaultFunc(SIFParam *left, SIFParam *right, SIFParam *output) {
// add more except
@ -445,6 +487,14 @@ static int32_t sifGetOperFn(int32_t funcId, sif_func_t *func, SIdxFltStatus *sta
*status = SFLT_NOT_INDEX;
*func = sifNotMatchFunc;
return 0;
case OP_TYPE_JSON_CONTAINS:
*status = SFLT_ACCURATE_INDEX;
*func = sifJsonContains;
return 0;
case OP_TYPE_JSON_GET_VALUE:
*status = SFLT_ACCURATE_INDEX;
*func = sifJsonGetValue;
return 0;
default:
*status = SFLT_NOT_INDEX;
*func = sifNullFunc;
@ -460,13 +510,15 @@ static int32_t sifExecOper(SOperatorNode *node, SIFCtx *ctx, SIFParam *output) {
if (nParam <= 1) {
SIF_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
}
if (node->opType == OP_TYPE_JSON_GET_VALUE || node->opType == OP_TYPE_JSON_CONTAINS) {
return code;
}
SIFParam *params = NULL;
SIF_ERR_RET(sifInitOperParams(&params, node, ctx));
SIF_ERR_RET(sifInitOperParams(&params, node, ctx));
// ugly code, refactor later
output->arg = ctx->arg;
sif_func_t operFn = sifNullFunc;
code = sifGetOperFn(node->opType, &operFn, &output->status);
if (ctx->noExec) {
@ -573,7 +625,9 @@ EDealRes sifCalcWalker(SNode *node, void *context) {
if (QUERY_NODE_LOGIC_CONDITION == nodeType(node)) {
return sifWalkLogic(node, ctx);
}
if (QUERY_NODE_OPERATOR == nodeType(node)) {
indexInfo("node type for index filter, type: %d", nodeType(node));
return sifWalkOper(node, ctx);
}