From d4c33fba2f3433932e72f17233d8de129910b526 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Tue, 7 Jun 2022 20:49:27 +0800 Subject: [PATCH 01/16] feat: create tsma result stable --- include/common/taosdef.h | 1 + include/common/tmsg.h | 61 +++---- source/common/src/tdatablock.c | 40 +++-- source/common/src/tmsg.c | 36 ++-- source/dnode/mnode/impl/inc/mndDef.h | 53 +++--- source/dnode/mnode/impl/src/mndSma.c | 87 +++++++++- source/dnode/mnode/impl/src/mndStream.c | 4 + source/dnode/vnode/src/meta/metaSma.c | 4 +- source/dnode/vnode/src/sma/smaRollup.c | 4 +- source/dnode/vnode/src/sma/smaTimeRange2.c | 188 ++------------------- source/dnode/vnode/src/tq/tqPush.c | 12 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 8 +- 12 files changed, 229 insertions(+), 269 deletions(-) diff --git a/include/common/taosdef.h b/include/common/taosdef.h index 516df71b0b..60b1dc6c10 100644 --- a/include/common/taosdef.h +++ b/include/common/taosdef.h @@ -98,6 +98,7 @@ extern char *qtypeStr[]; #undef TD_DEBUG_PRINT_ROW #undef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS #undef TD_DEBUG_PRINT_TAG +#define TD_DEBUG_SMA_ID 123456 #ifdef __cplusplus } diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 4a3c4b0c3f..72f36a6995 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2329,24 +2329,28 @@ typedef struct { } SVgEpSet; typedef struct { - int8_t version; // for compatibility(default 0) - int8_t intervalUnit; // MACRO: TIME_UNIT_XXX - int8_t slidingUnit; // MACRO: TIME_UNIT_XXX - int8_t timezoneInt; // sma data expired if timezone changes. - int32_t dstVgId; - char indexName[TSDB_INDEX_NAME_LEN]; - int32_t exprLen; - int32_t tagsFilterLen; - int32_t numOfVgroups; - int64_t indexUid; - tb_uid_t tableUid; // super/child/common table uid - int64_t interval; - int64_t offset; // use unit by precision of DB - int64_t sliding; - char* expr; // sma expression - char* tagsFilter; - SVgEpSet* pVgEpSet; -} STSma; // Time-range-wise SMA + int8_t version; // for compatibility(default 0) + int8_t intervalUnit; // MACRO: TIME_UNIT_XXX + int8_t slidingUnit; // MACRO: TIME_UNIT_XXX + int8_t timezoneInt; // sma data expired if timezone changes. + int32_t dstVgId; + char indexName[TSDB_INDEX_NAME_LEN]; + int32_t exprLen; + int32_t tagsFilterLen; + int32_t numOfVgroups; // for dstVgroup + int64_t indexUid; + tb_uid_t tableUid; // super/child/common table uid + tb_uid_t dstTbUid; // for dstVgroup + int64_t interval; + int64_t offset; // use unit by precision of DB + int64_t sliding; + char* dstTbName; // for dstVgroup + char* expr; // sma expression + char* tagsFilter; + SVgEpSet* pVgEpSet; // for dstVgroup + SSchemaWrapper schemaRow; // for dstVgroup + SSchemaWrapper schemaTag; // for dstVgroup +} STSma; // Time-range-wise SMA typedef STSma SVCreateTSmaReq; @@ -2493,14 +2497,14 @@ int32_t tSerializeSTableIndexReq(void* buf, int32_t bufLen, STableIndexReq* pReq int32_t tDeserializeSTableIndexReq(void* buf, int32_t bufLen, STableIndexReq* pReq); typedef struct { - int8_t intervalUnit; - int8_t slidingUnit; - int64_t interval; - int64_t offset; - int64_t sliding; - int64_t dstTbUid; - int32_t dstVgId; // for stream - char* expr; + int8_t intervalUnit; + int8_t slidingUnit; + int64_t interval; + int64_t offset; + int64_t sliding; + int64_t dstTbUid; + int32_t dstVgId; // for stream + char* expr; } STableIndexInfo; typedef struct { @@ -2510,7 +2514,6 @@ typedef struct { int32_t tSerializeSTableIndexRsp(void* buf, int32_t bufLen, const STableIndexRsp* pRsp); int32_t tDeserializeSTableIndexRsp(void* buf, int32_t bufLen, STableIndexRsp* pRsp); - typedef struct { int8_t mqMsgType; int32_t code; @@ -2751,8 +2754,8 @@ typedef struct { char* msg; } SVDeleteReq; -int32_t tSerializeSVDeleteReq(void *buf, int32_t bufLen, SVDeleteReq *pReq); -int32_t tDeserializeSVDeleteReq(void *buf, int32_t bufLen, SVDeleteReq *pReq); +int32_t tSerializeSVDeleteReq(void* buf, int32_t bufLen, SVDeleteReq* pReq); +int32_t tDeserializeSVDeleteReq(void* buf, int32_t bufLen, SVDeleteReq* pReq); typedef struct { int64_t affectedRows; diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 9caa9a73a5..2202b9579d 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1624,25 +1624,31 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SArray* pDataBlocks break; default: if (pColInfoData->info.type < TSDB_DATA_TYPE_MAX && pColInfoData->info.type > TSDB_DATA_TYPE_NULL) { - char tv[8] = {0}; - if (pColInfoData->info.type == TSDB_DATA_TYPE_FLOAT) { - float v = 0; - GET_TYPED_DATA(v, float, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); - } else if (pColInfoData->info.type == TSDB_DATA_TYPE_DOUBLE) { - double v = 0; - GET_TYPED_DATA(v, double, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); - } else if (IS_SIGNED_NUMERIC_TYPE(pColInfoData->info.type)) { - int64_t v = 0; - GET_TYPED_DATA(v, int64_t, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); + if (pCol->type == pColInfoData->info.type) { + tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, var, true, offset, + k); } else { - uint64_t v = 0; - GET_TYPED_DATA(v, uint64_t, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); + char tv[8] = {0}; + if (pColInfoData->info.type == TSDB_DATA_TYPE_FLOAT) { + float v = 0; + GET_TYPED_DATA(v, float, pColInfoData->info.type, var); + SET_TYPED_DATA(&tv, pCol->type, v); + } else if (pColInfoData->info.type == TSDB_DATA_TYPE_DOUBLE) { + double v = 0; + GET_TYPED_DATA(v, double, pColInfoData->info.type, var); + SET_TYPED_DATA(&tv, pCol->type, v); + } else if (IS_SIGNED_NUMERIC_TYPE(pColInfoData->info.type)) { + int64_t v = 0; + GET_TYPED_DATA(v, int64_t, pColInfoData->info.type, var); + SET_TYPED_DATA(&tv, pCol->type, v); + } else { + uint64_t v = 0; + GET_TYPED_DATA(v, uint64_t, pColInfoData->info.type, var); + SET_TYPED_DATA(&tv, pCol->type, v); + } + tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, tv, true, offset, + k); } - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, tv, true, offset, k); } else { uError("the column type %" PRIi16 " is undefined\n", pColInfoData->info.type); TASSERT(0); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 51c40827b5..bbd7c2e7a0 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -3844,6 +3844,8 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (tEncodeI32(pCoder, pSma->numOfVgroups) < 0) return -1; if (tEncodeI64(pCoder, pSma->indexUid) < 0) return -1; if (tEncodeI64(pCoder, pSma->tableUid) < 0) return -1; + if (tEncodeI64(pCoder, pSma->dstTbUid) < 0) return -1; + if (tEncodeCStr(pCoder, pSma->dstTbName) < 0) return -1; if (tEncodeI64(pCoder, pSma->interval) < 0) return -1; if (tEncodeI64(pCoder, pSma->offset) < 0) return -1; if (tEncodeI64(pCoder, pSma->sliding) < 0) return -1; @@ -3853,17 +3855,24 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (pSma->tagsFilterLen > 0) { if (tEncodeCStr(pCoder, pSma->tagsFilter) < 0) return -1; } - for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { - if (tEncodeI32(pCoder, pSma->pVgEpSet[v].vgId) < 0) return -1; - if (tEncodeI8(pCoder, pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; - int8_t numOfEps = pSma->pVgEpSet[v].epSet.numOfEps; - if (tEncodeI8(pCoder, numOfEps) < 0) return -1; - for (int32_t n = 0; n < numOfEps; ++n) { - const SEp *pEp = &pSma->pVgEpSet[v].epSet.eps[n]; - if (tEncodeCStr(pCoder, pEp->fqdn) < 0) return -1; - if (tEncodeU16(pCoder, pEp->port) < 0) return -1; + + if (pSma->numOfVgroups) { // only needed in dstVgroup + for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { + if (tEncodeI32(pCoder, pSma->pVgEpSet[v].vgId) < 0) return -1; + if (tEncodeI8(pCoder, pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; + int8_t numOfEps = pSma->pVgEpSet[v].epSet.numOfEps; + if (tEncodeI8(pCoder, numOfEps) < 0) return -1; + for (int32_t n = 0; n < numOfEps; ++n) { + const SEp *pEp = &pSma->pVgEpSet[v].epSet.eps[n]; + if (tEncodeCStr(pCoder, pEp->fqdn) < 0) return -1; + if (tEncodeU16(pCoder, pEp->port) < 0) return -1; + } } + + tEncodeSSchemaWrapper(pCoder, &pSma->schemaRow); + tEncodeSSchemaWrapper(pCoder, &pSma->schemaTag); } + return 0; } @@ -3871,14 +3880,16 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { if (tDecodeI8(pCoder, &pSma->version) < 0) return -1; if (tDecodeI8(pCoder, &pSma->intervalUnit) < 0) return -1; if (tDecodeI8(pCoder, &pSma->slidingUnit) < 0) return -1; - if (tDecodeI32(pCoder, &pSma->dstVgId) < 0) return -1; if (tDecodeI8(pCoder, &pSma->timezoneInt) < 0) return -1; + if (tDecodeI32(pCoder, &pSma->dstVgId) < 0) return -1; if (tDecodeCStrTo(pCoder, pSma->indexName) < 0) return -1; if (tDecodeI32(pCoder, &pSma->exprLen) < 0) return -1; if (tDecodeI32(pCoder, &pSma->tagsFilterLen) < 0) return -1; if (tDecodeI32(pCoder, &pSma->numOfVgroups) < 0) return -1; if (tDecodeI64(pCoder, &pSma->indexUid) < 0) return -1; if (tDecodeI64(pCoder, &pSma->tableUid) < 0) return -1; + if (tDecodeI64(pCoder, &pSma->dstTbUid) < 0) return -1; + if (tDecodeCStr(pCoder, &pSma->dstTbName) < 0) return -1; if (tDecodeI64(pCoder, &pSma->interval) < 0) return -1; if (tDecodeI64(pCoder, &pSma->offset) < 0) return -1; if (tDecodeI64(pCoder, &pSma->sliding) < 0) return -1; @@ -3892,7 +3903,7 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { } else { pSma->tagsFilter = NULL; } - if (pSma->numOfVgroups > 0) { + if (pSma->numOfVgroups > 0) { // only needed in dstVgroup pSma->pVgEpSet = (SVgEpSet *)tDecoderMalloc(pCoder, pSma->numOfVgroups * sizeof(SVgEpSet)); if (!pSma->pVgEpSet) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -3912,6 +3923,9 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { if (tDecodeU16(pCoder, &pEp->port) < 0) return -1; } } + + tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaRow); + tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaTag); } return 0; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 382f8dd55f..d21af87067 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -298,31 +298,34 @@ typedef struct { } SVgObj; typedef struct { - char name[TSDB_TABLE_FNAME_LEN]; - char stb[TSDB_TABLE_FNAME_LEN]; - char db[TSDB_DB_FNAME_LEN]; - int64_t createdTime; - int64_t uid; - int64_t stbUid; - int64_t dbUid; - int8_t intervalUnit; - int8_t slidingUnit; - int8_t timezone; - int32_t dstVgId; // for stream - int64_t dstTbUid; - int64_t interval; - int64_t offset; - int64_t sliding; - int32_t exprLen; // strlen + 1 - int32_t tagsFilterLen; - int32_t sqlLen; - int32_t astLen; - int32_t numOfVgroups; - char* expr; - char* tagsFilter; - char* sql; - char* ast; - SVgEpSet* pVgEpSet; + char name[TSDB_TABLE_FNAME_LEN]; + char stb[TSDB_TABLE_FNAME_LEN]; + char db[TSDB_DB_FNAME_LEN]; + char dstTbName[TSDB_TABLE_FNAME_LEN]; + int64_t createdTime; + int64_t uid; + int64_t stbUid; + int64_t dbUid; + int64_t dstTbUid; + int8_t intervalUnit; + int8_t slidingUnit; + int8_t timezone; + int32_t dstVgId; // for stream + int64_t interval; + int64_t offset; + int64_t sliding; + int32_t exprLen; // strlen + 1 + int32_t tagsFilterLen; + int32_t sqlLen; + int32_t astLen; + int32_t numOfVgroups; // for dstVgroup + char* expr; + char* tagsFilter; + char* sql; + char* ast; + SVgEpSet* pVgEpSet; // for dstVgroup + SSchemaWrapper schemaRow; // for dstVgroup + SSchemaWrapper schemaTag; // for dstVgroup } SSmaObj; typedef struct { diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index 1d47f8fc7a..ef9f5c3352 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -26,6 +26,7 @@ #include "mndTrans.h" #include "mndUser.h" #include "mndVgroup.h" +#include "parser.h" #include "tname.h" #define TSDB_SMA_VER_NUMBER 1 @@ -82,10 +83,12 @@ static SSdbRaw *mndSmaActionEncode(SSmaObj *pSma) { SDB_SET_BINARY(pRaw, dataPos, pSma->name, TSDB_TABLE_FNAME_LEN, _OVER) SDB_SET_BINARY(pRaw, dataPos, pSma->stb, TSDB_TABLE_FNAME_LEN, _OVER) SDB_SET_BINARY(pRaw, dataPos, pSma->db, TSDB_DB_FNAME_LEN, _OVER) + SDB_SET_BINARY(pRaw, dataPos, pSma->dstTbName, TSDB_DB_FNAME_LEN, _OVER) SDB_SET_INT64(pRaw, dataPos, pSma->createdTime, _OVER) SDB_SET_INT64(pRaw, dataPos, pSma->uid, _OVER) SDB_SET_INT64(pRaw, dataPos, pSma->stbUid, _OVER) SDB_SET_INT64(pRaw, dataPos, pSma->dbUid, _OVER) + SDB_SET_INT64(pRaw, dataPos, pSma->dstTbUid, _OVER) SDB_SET_INT8(pRaw, dataPos, pSma->intervalUnit, _OVER) SDB_SET_INT8(pRaw, dataPos, pSma->slidingUnit, _OVER) SDB_SET_INT8(pRaw, dataPos, pSma->timezone, _OVER) @@ -147,10 +150,12 @@ static SSdbRow *mndSmaActionDecode(SSdbRaw *pRaw) { SDB_GET_BINARY(pRaw, dataPos, pSma->name, TSDB_TABLE_FNAME_LEN, _OVER) SDB_GET_BINARY(pRaw, dataPos, pSma->stb, TSDB_TABLE_FNAME_LEN, _OVER) SDB_GET_BINARY(pRaw, dataPos, pSma->db, TSDB_DB_FNAME_LEN, _OVER) + SDB_GET_BINARY(pRaw, dataPos, pSma->dstTbName, TSDB_DB_FNAME_LEN, _OVER) SDB_GET_INT64(pRaw, dataPos, &pSma->createdTime, _OVER) SDB_GET_INT64(pRaw, dataPos, &pSma->uid, _OVER) SDB_GET_INT64(pRaw, dataPos, &pSma->stbUid, _OVER) SDB_GET_INT64(pRaw, dataPos, &pSma->dbUid, _OVER) + SDB_GET_INT64(pRaw, dataPos, &pSma->dstTbUid, _OVER) SDB_GET_INT8(pRaw, dataPos, &pSma->intervalUnit, _OVER) SDB_GET_INT8(pRaw, dataPos, &pSma->slidingUnit, _OVER) SDB_GET_INT8(pRaw, dataPos, &pSma->timezone, _OVER) @@ -260,6 +265,8 @@ static void *mndBuildVCreateSmaReq(SMnode *pMnode, SVgObj *pVgroup, SSmaObj *pSm req.tagsFilterLen = pSma->tagsFilterLen; req.indexUid = pSma->uid; req.tableUid = pSma->stbUid; + req.dstVgId = pSma->dstVgId; + req.dstTbUid = pSma->dstTbUid; req.interval = pSma->interval; req.offset = pSma->offset; req.sliding = pSma->sliding; @@ -267,7 +274,10 @@ static void *mndBuildVCreateSmaReq(SMnode *pMnode, SVgObj *pVgroup, SSmaObj *pSm req.tagsFilter = pSma->tagsFilter; req.numOfVgroups = pSma->numOfVgroups; req.pVgEpSet = pSma->pVgEpSet; - + req.schemaRow = pSma->schemaRow; + req.schemaTag = pSma->schemaTag; + req.dstTbName = pSma->dstTbName; + // get length int32_t ret = 0; tEncodeSize(tEncodeSVCreateTSmaReq, &req, contLen, ret); @@ -425,15 +435,43 @@ static int32_t mndSetCreateSmaVgroupRedoActions(SMnode *pMnode, STrans *pTrans, mndReleaseDnode(pMnode, pDnode); // todo add sma info here +#if 1 + SNode *pAst = NULL; + if (nodesStringToNode(pSma->ast, &pAst) < 0) { + return -1; + } + if (qExtractResultSchema(pAst, &pSma->schemaRow.nCols, &pSma->schemaRow.pSchema) != 0) { + nodesDestroyNode(pAst); + return -1; + } + nodesDestroyNode(pAst); + pSma->schemaRow.version = 1; + + // TODO: the schemaTag generated by qExtractResultXXX later. + pSma->schemaTag.nCols = 1; + pSma->schemaTag.version = 1; + pSma->schemaTag.pSchema = taosMemoryCalloc(1, sizeof(SSchema)); + if (!pSma->schemaTag.pSchema) { + nodesDestroyNode(pAst); + return -1; + } + pSma->schemaTag.pSchema[0].type = TSDB_DATA_TYPE_BIGINT; + pSma->schemaTag.pSchema[0].bytes = TYPE_BYTES[TSDB_DATA_TYPE_BIGINT]; + pSma->schemaTag.pSchema[0].colId = pSma->schemaRow.nCols + PRIMARYKEY_TIMESTAMP_COL_ID; + pSma->schemaTag.pSchema[0].flags = 0; + snprintf(pSma->schemaTag.pSchema[0].name, TSDB_COL_NAME_LEN, "groupId"); + SVgEpSet *pVgEpSet = NULL; int32_t numOfVgroups = 0; if (mndSmaGetVgEpSet(pMnode, pDb, &pVgEpSet, &numOfVgroups) != 0) { + nodesDestroyNode(pAst); return -1; } + nodesDestroyNode(pAst); pSma->pVgEpSet = pVgEpSet; pSma->numOfVgroups = numOfVgroups; - +#endif int32_t smaContLen = 0; void *pSmaReq = mndBuildVCreateSmaReq(pMnode, pVgroup, pSma, &smaContLen); if (pSmaReq == NULL) return -1; @@ -463,13 +501,20 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.stb, pStb->name, TSDB_TABLE_FNAME_LEN); memcpy(smaObj.db, pDb->name, TSDB_DB_FNAME_LEN); smaObj.createdTime = taosGetTimestampMs(); +#if 0 smaObj.uid = mndGenerateUid(pCreate->name, TSDB_TABLE_FNAME_LEN); +#endif + smaObj.uid = TD_DEBUG_SMA_ID; + char resultTbName[TSDB_TABLE_FNAME_LEN + 16] = {0}; + snprintf(resultTbName, TSDB_TABLE_FNAME_LEN + 16, "td.tsma.rst.tb.%s", pCreate->name); + memcpy(smaObj.dstTbName, resultTbName, TSDB_TABLE_FNAME_LEN); + smaObj.dstTbUid = mndGenerateUid(smaObj.dstTbName, TSDB_TABLE_FNAME_LEN); smaObj.stbUid = pStb->uid; smaObj.dbUid = pStb->dbUid; smaObj.intervalUnit = pCreate->intervalUnit; smaObj.slidingUnit = pCreate->slidingUnit; smaObj.timezone = pCreate->timezone; - smaObj.dstVgId = pCreate->dstVgId; + // smaObj.dstVgId = pCreate->dstVgId; smaObj.interval = pCreate->interval; smaObj.offset = pCreate->offset; smaObj.sliding = pCreate->sliding; @@ -502,6 +547,42 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.ast, pCreate->ast, smaObj.astLen); } +#if 1 // only for debugging, not needed in common vgroups, only needed in dstVgroup. + SNode *pAst = NULL; + if (nodesStringToNode(smaObj.ast, &pAst) < 0) { + return -1; + } + if (qExtractResultSchema(pAst, &smaObj.schemaRow.nCols, &smaObj.schemaRow.pSchema) != 0) { + nodesDestroyNode(pAst); + return -1; + } + smaObj.schemaRow.version = 1; + + smaObj.schemaTag.nCols = 1; + smaObj.schemaTag.version = 1; + smaObj.schemaTag.pSchema = taosMemoryCalloc(1, sizeof(SSchema)); + if (!smaObj.schemaTag.pSchema) { + nodesDestroyNode(pAst); + return -1; + } + smaObj.schemaTag.pSchema[0].type = TSDB_DATA_TYPE_BIGINT; + smaObj.schemaTag.pSchema[0].bytes = TYPE_BYTES[TSDB_DATA_TYPE_BIGINT]; + smaObj.schemaTag.pSchema[0].colId = smaObj.schemaRow.nCols + PRIMARYKEY_TIMESTAMP_COL_ID; + smaObj.schemaTag.pSchema[0].flags = 0; + snprintf(smaObj.schemaTag.pSchema[0].name, TSDB_COL_NAME_LEN, "groupId"); + + nodesDestroyNode(pAst); + + SVgEpSet *pVgEpSet = NULL; + int32_t numOfVgroups = 0; + if (mndSmaGetVgEpSet(pMnode, pDb, &pVgEpSet, &numOfVgroups) != 0) { + return -1; + } + + smaObj.pVgEpSet = pVgEpSet; + smaObj.numOfVgroups = numOfVgroups; +#endif + SStreamObj streamObj = {0}; tstrncpy(streamObj.name, pCreate->name, TSDB_STREAM_FNAME_LEN); tstrncpy(streamObj.sourceDb, pDb->name, TSDB_DB_FNAME_LEN); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 7abe9e3c0d..a331534a93 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -252,8 +252,12 @@ int32_t mndAddStreamToTrans(SMnode *pMnode, SStreamObj *pStream, const char *ast } if (qExtractResultSchema(pAst, (int32_t *)&pStream->outputSchema.nCols, &pStream->outputSchema.pSchema) != 0) { + nodesDestroyNode(pAst); return -1; } + // free + nodesDestroyNode(pAst); + #if 0 printf("|"); diff --git a/source/dnode/vnode/src/meta/metaSma.c b/source/dnode/vnode/src/meta/metaSma.c index 689cd511c4..910a0835bb 100644 --- a/source/dnode/vnode/src/meta/metaSma.c +++ b/source/dnode/vnode/src/meta/metaSma.c @@ -34,13 +34,13 @@ int32_t metaCreateTSma(SMeta *pMeta, int64_t version, SSmaCfg *pCfg) { SMetaReader mr = {0}; // validate req + // save smaIndex metaReaderInit(&mr, pMeta, 0); if (metaGetTableEntryByUid(&mr, pCfg->indexUid) == 0) { -// TODO: just for pass case #if 1 terrno = TSDB_CODE_TDB_TSMA_ALREADY_EXIST; metaReaderClear(&mr); - return -1; + return -1; // don't goto _err; #else metaReaderClear(&mr); return 0; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index e738d3a408..05ad5c1cb2 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -167,13 +167,13 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui */ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { SSma *pSma = pVnode->pSma; - SMeta *pMeta = pVnode->pMeta; - SMsgCb *pMsgCb = &pVnode->msgCb; if (!pReq->rollup) { smaTrace("vgId:%d, return directly since no rollup for stable %s %" PRIi64, SMA_VID(pSma), pReq->name, pReq->suid); return TSDB_CODE_SUCCESS; } + SMeta *pMeta = pVnode->pMeta; + SMsgCb *pMsgCb = &pVnode->msgCb; SRSmaParam *param = &pReq->pRSmaParam; if ((param->qmsg1Len == 0) && (param->qmsg2Len == 0)) { diff --git a/source/dnode/vnode/src/sma/smaTimeRange2.c b/source/dnode/vnode/src/sma/smaTimeRange2.c index 09adc1a6a2..df89ec1795 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange2.c +++ b/source/dnode/vnode/src/sma/smaTimeRange2.c @@ -363,8 +363,8 @@ static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { */ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); +#if 0 const SArray *pDataBlocks = (const SArray *)msg; - int64_t testSkey = TSKEY_INITIAL_VAL; // TODO: destroy SSDataBlocks(msg) @@ -386,6 +386,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { smaWarn("vgId:%d insert tSma data failed since pDataBlocks is empty", SMA_VID(pSma)); return TSDB_CODE_FAILED; } +#endif SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); SSmaStat *pStat = SMA_ENV_STAT(pEnv); @@ -403,178 +404,8 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { return TSDB_CODE_FAILED; } - STSma *pTSma = pItem->pTSma; - STSmaWriteH tSmaH = {0}; + STSma *pTSma = pItem->pTSma; - if (tdInitTSmaWriteH(&tSmaH, pSma, pDataBlocks, pTSma->interval, pTSma->intervalUnit) != 0) { - return TSDB_CODE_FAILED; - } - - char rPath[TSDB_FILENAME_LEN] = {0}; - char aPath[TSDB_FILENAME_LEN] = {0}; - snprintf(rPath, TSDB_FILENAME_LEN, "%s%s%" PRIi64, SMA_ENV_PATH(pEnv), TD_DIRSEP, indexUid); - tfsAbsoluteName(SMA_TFS(pSma), SMA_ENV_DID(pEnv), rPath, aPath); - if (!taosCheckExistFile(aPath)) { - if (tfsMkdirRecurAt(SMA_TFS(pSma), rPath, SMA_ENV_DID(pEnv)) != TSDB_CODE_SUCCESS) { - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - } - - // Step 1: Judge the storage level and days - int32_t storageLevel = tdGetSmaStorageLevel(pCfg, tSmaH.interval); - int32_t minutePerFile = tdGetTSmaDays(pSma, tSmaH.interval, storageLevel); - - char smaKey[SMA_KEY_LEN] = {0}; // key: skey + groupId - char dataBuf[512] = {0}; // val: aggr data // TODO: handle 512 buffer? - void *pDataBuf = NULL; - int32_t sz = taosArrayGetSize(pDataBlocks); - for (int32_t i = 0; i < sz; ++i) { - SSDataBlock *pDataBlock = taosArrayGet(pDataBlocks, i); - int32_t colNum = pDataBlock->info.numOfCols; - int32_t rows = pDataBlock->info.rows; - int32_t rowSize = pDataBlock->info.rowSize; - int64_t groupId = pDataBlock->info.groupId; - for (int32_t j = 0; j < rows; ++j) { - printf("|"); - TSKEY skey = TSKEY_INITIAL_VAL; // the start key of TS window by interval - void *pSmaKey = &smaKey; - bool isStartKey = false; - - int32_t tlen = 0; // reset the len - pDataBuf = &dataBuf; // reset the buf - for (int32_t k = 0; k < colNum; ++k) { - SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k); - void *var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes); - switch (pColInfoData->info.type) { - case TSDB_DATA_TYPE_TIMESTAMP: - if (!isStartKey) { - isStartKey = true; - skey = *(TSKEY *)var; - testSkey = skey; - printf("= skey %" PRIi64 " groupId = %" PRIi64 "|", skey, groupId); - tdEncodeTSmaKey(groupId, skey, &pSmaKey); - } else { - printf(" %" PRIi64 " |", *(int64_t *)var); - tlen += taosEncodeFixedI64(&pDataBuf, *(int64_t *)var); - break; - } - break; - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_UTINYINT: - printf(" %15d |", *(uint8_t *)var); - tlen += taosEncodeFixedU8(&pDataBuf, *(uint8_t *)var); - break; - case TSDB_DATA_TYPE_TINYINT: - printf(" %15d |", *(int8_t *)var); - tlen += taosEncodeFixedI8(&pDataBuf, *(int8_t *)var); - break; - case TSDB_DATA_TYPE_SMALLINT: - printf(" %15d |", *(int16_t *)var); - tlen += taosEncodeFixedI16(&pDataBuf, *(int16_t *)var); - break; - case TSDB_DATA_TYPE_USMALLINT: - printf(" %15d |", *(uint16_t *)var); - tlen += taosEncodeFixedU16(&pDataBuf, *(uint16_t *)var); - break; - case TSDB_DATA_TYPE_INT: - printf(" %15d |", *(int32_t *)var); - tlen += taosEncodeFixedI32(&pDataBuf, *(int32_t *)var); - break; - case TSDB_DATA_TYPE_FLOAT: - printf(" %15f |", *(float *)var); - tlen += taosEncodeBinary(&pDataBuf, var, sizeof(float)); - break; - case TSDB_DATA_TYPE_UINT: - printf(" %15u |", *(uint32_t *)var); - tlen += taosEncodeFixedU32(&pDataBuf, *(uint32_t *)var); - break; - case TSDB_DATA_TYPE_BIGINT: - printf(" %15ld |", *(int64_t *)var); - tlen += taosEncodeFixedI64(&pDataBuf, *(int64_t *)var); - break; - case TSDB_DATA_TYPE_DOUBLE: - printf(" %15lf |", *(double *)var); - tlen += taosEncodeBinary(&pDataBuf, var, sizeof(double)); - case TSDB_DATA_TYPE_UBIGINT: - printf(" %15lu |", *(uint64_t *)var); - tlen += taosEncodeFixedU64(&pDataBuf, *(uint64_t *)var); - break; - case TSDB_DATA_TYPE_NCHAR: { - char tmpChar[100] = {0}; - strncpy(tmpChar, varDataVal(var), varDataLen(var)); - printf(" %s |", tmpChar); - tlen += taosEncodeBinary(&pDataBuf, varDataVal(var), varDataLen(var)); - break; - } - case TSDB_DATA_TYPE_VARCHAR: { // TSDB_DATA_TYPE_BINARY - char tmpChar[100] = {0}; - strncpy(tmpChar, varDataVal(var), varDataLen(var)); - printf(" %s |", tmpChar); - tlen += taosEncodeBinary(&pDataBuf, varDataVal(var), varDataLen(var)); - break; - } - case TSDB_DATA_TYPE_VARBINARY: - // TODO: add binary/varbinary - TASSERT(0); - default: - printf("the column type %" PRIi16 " is undefined\n", pColInfoData->info.type); - TASSERT(0); - break; - } - } - printf("\n"); - // if ((tlen > 0) && (skey != TSKEY_INITIAL_VAL)) { - if (tlen > 0) { - int32_t fid = (int32_t)(TSDB_KEY_FID(skey, minutePerFile, pCfg->precision)); - - // Step 2: Set the DFile for storage of SMA index, and iterate/split the TSma data and store to B+Tree index - // file - // - Set and open the DFile or the B+Tree file - // TODO: tsdbStartTSmaCommit(); - if (fid != tSmaH.dFile.fid) { - if (tSmaH.dFile.fid != SMA_IVLD_FID) { - tdSmaEndCommit(pEnv); - smaCloseDBF(&tSmaH.dFile); - } - tdSetTSmaDataFile(&tSmaH, indexUid, fid); - smaDebug("@@@ vgId:%d write to DBF %s, days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi32 - " queryKey:%" PRIi64, - SMA_VID(pSma), tSmaH.dFile.path, minutePerFile, tSmaH.interval, storageLevel, testSkey); - if (smaOpenDBF(pEnv->dbEnv, &tSmaH.dFile) != 0) { - smaWarn("vgId:%d open DB file %s failed since %s", SMA_VID(pSma), - tSmaH.dFile.path ? tSmaH.dFile.path : "path is NULL", tstrerror(terrno)); - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - tdSmaBeginCommit(pEnv); - } - - if (tdInsertTSmaBlocks(&tSmaH, &smaKey, SMA_KEY_LEN, dataBuf, tlen, &pEnv->txn) != 0) { - smaWarn("vgId:%d insert tsma data blocks fail for index %" PRIi64 ", skey %" PRIi64 ", groupId %" PRIi64 - " since %s", - SMA_VID(pSma), indexUid, skey, groupId, tstrerror(terrno)); - tdSmaEndCommit(pEnv); - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - - smaDebug("vgId:%d insert tsma data blocks success for index %" PRIi64 ", skey %" PRIi64 ", groupId %" PRIi64, - SMA_VID(pSma), indexUid, skey, groupId); - // TODO:tsdbEndTSmaCommit(); - - // Step 3: reset the SSmaStat - tdResetExpiredWindow(pSma, pStat, indexUid, skey); - } else { - smaWarn("vgId:%d invalid data skey:%" PRIi64 ", tlen %" PRIi32 " during insert tSma data for %" PRIi64, - SMA_VID(pSma), skey, tlen, indexUid); - } - } - } - tdSmaEndCommit(pEnv); // TODO: not commit for every insert - tdDestroyTSmaWriteH(&tSmaH); tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_SUCCESS; @@ -865,6 +696,19 @@ int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { return -1; } + if (TD_VID(pSma->pVnode) == pCfg->dstVgId) { + // create stable to save tsma result in dstVgId + SVCreateStbReq pReq = {0}; + pReq.name = pCfg->dstTbName; + pReq.suid = pCfg->dstTbUid; + pReq.schemaRow = pCfg->schemaRow; + pReq.schemaTag = pCfg->schemaTag; + + if (metaCreateSTable(SMA_META(pSma), version, &pReq) < 0) { + return -1; + } + } + tdTSmaAdd(pSma, 1); return 0; } diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 9be94eb5b6..2f26fba50a 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -241,13 +241,13 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) if (tdUpdateExpireWindow(pTq->pVnode->pSma, msg, ver) != 0) { // TODO handle sma error } - void* data = taosMemoryMalloc(msgLen); - if (data == NULL) { - return -1; - } - memcpy(data, msg, msgLen); + // void* data = taosMemoryMalloc(msgLen); + // if (data == NULL) { + // return -1; + // } + // memcpy(data, msg, msgLen); - tqProcessStreamTrigger(pTq, data); + // tqProcessStreamTrigger(pTq, data); } return 0; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 7a9c9ef393..dd705011c4 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -174,11 +174,12 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } vTrace("vgId:%d, process %s request success, version: %" PRId64, TD_VID(pVnode), TMSG_INFO(pMsg->msgType), version); - +#if 1 if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } +#endif // commit if need if (vnodeShouldCommit(pVnode)) { @@ -278,8 +279,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, int64_t version, SRpcMsg *pMsg, SRp void smaHandleRes(void *pVnode, int64_t smaId, const SArray *data) { // TODO - // blockDebugShowData(data); + // blockDebugShowData(data, __func__); +#if 0 tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, smaId, (const char *)data); +#endif + tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, TD_DEBUG_SMA_ID, NULL); } void vnodeUpdateMetaRsp(SVnode *pVnode, STableMetaRsp *pMetaRsp) { From 75182bb0ec20d38452718eec80f2abbdd699bb0f Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Wed, 8 Jun 2022 15:20:29 +0800 Subject: [PATCH 02/16] feat: tsma exp wnd clear --- include/common/tmsg.h | 22 +++++++++ include/common/tmsgdef.h | 1 + source/common/src/tmsg.c | 52 +++++++++++++++++++-- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 + source/dnode/vnode/src/vnd/vnodeSvr.c | 49 +++++++++++++++++-- 5 files changed, 116 insertions(+), 9 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 72f36a6995..de64c5157c 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2455,6 +2455,28 @@ int32_t tDecodeSVGetTsmaExpWndsReq(SDecoder* pCoder, SVGetTsmaExpWndsReq* pReq); int32_t tEncodeSVGetTSmaExpWndsRsp(SEncoder* pCoder, const SVGetTsmaExpWndsRsp* pReq); int32_t tDecodeSVGetTsmaExpWndsRsp(SDecoder* pCoder, SVGetTsmaExpWndsRsp* pReq); +typedef struct { + int64_t nKeys; // n consecutive keys since skey + int64_t skey; +} SVTsmaExpWndItem; + +typedef struct { + int64_t indexUid; + int64_t version; // tsma result version + int64_t nItems; + SVTsmaExpWndItem items[]; +} SVClrTsmaExpWndsReq; + +typedef struct { + int64_t indexUid; + int32_t code; +} SVClrTsmaExpWndsRsp; + +int32_t tEncodeSVClrTsmaExpWndsReq(SEncoder* pCoder, const SVClrTsmaExpWndsReq* pReq); +int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder* pCoder, SVClrTsmaExpWndsReq* pReq); +int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder* pCoder, const SVClrTsmaExpWndsRsp* pReq); +int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder* pCoder, SVClrTsmaExpWndsRsp* pReq); + typedef struct { int idx; } SMCreateFullTextReq; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index e922df64b3..81ddc68d5d 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -190,6 +190,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_DROP_SMA, "vnode-drop-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT_RSMA, "vnode-submit-rsma", SSubmitReq, SSubmitRsp) TD_DEF_MSG_TYPE(TDMT_VND_GET_TSMA_EXP_WNDS, "vnode-get-tsma-expired-windows", SVGetTsmaExpWndsReq, SVGetTsmaExpWndsRsp) + TD_DEF_MSG_TYPE(TDMT_VND_CLR_TSMA_EXP_WNDS, "vnode-clr-tsma-expired-windows", SVClrTsmaExpWndsReq, SVClrTsmaExpWndsRsp) TD_DEF_MSG_TYPE(TDMT_VND_DELETE, "delete-data", SVDeleteReq, SVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_REPLICA, "alter-replica", NULL, NULL) diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index bbd7c2e7a0..d7ebf03fe1 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -2419,7 +2419,7 @@ int32_t tDeserializeSTableIndexReq(void *buf, int32_t bufLen, STableIndexReq *pR return 0; } -int32_t tSerializeSTableIndexInfo(SEncoder *pEncoder, STableIndexInfo* pInfo) { +int32_t tSerializeSTableIndexInfo(SEncoder *pEncoder, STableIndexInfo *pInfo) { if (tEncodeI8(pEncoder, pInfo->intervalUnit) < 0) return -1; if (tEncodeI8(pEncoder, pInfo->slidingUnit) < 0) return -1; if (tEncodeI64(pEncoder, pInfo->interval) < 0) return -1; @@ -2440,7 +2440,7 @@ int32_t tSerializeSTableIndexRsp(void *buf, int32_t bufLen, const STableIndexRsp if (tEncodeI32(&encoder, num) < 0) return -1; if (num > 0) { for (int32_t i = 0; i < num; ++i) { - STableIndexInfo* pInfo = (STableIndexInfo*)taosArrayGet(pRsp->pIndex, i); + STableIndexInfo *pInfo = (STableIndexInfo *)taosArrayGet(pRsp->pIndex, i); if (tSerializeSTableIndexInfo(&encoder, pInfo) < 0) return -1; } } @@ -2489,7 +2489,6 @@ int32_t tDeserializeSTableIndexRsp(void *buf, int32_t bufLen, STableIndexRsp *pR return 0; } - int32_t tSerializeSShowReq(void *buf, int32_t bufLen, SShowReq *pReq) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -3856,7 +3855,7 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (tEncodeCStr(pCoder, pSma->tagsFilter) < 0) return -1; } - if (pSma->numOfVgroups) { // only needed in dstVgroup + if (pSma->numOfVgroups) { // only needed in dstVgroup for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { if (tEncodeI32(pCoder, pSma->pVgEpSet[v].vgId) < 0) return -1; if (tEncodeI8(pCoder, pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; @@ -3903,7 +3902,7 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { } else { pSma->tagsFilter = NULL; } - if (pSma->numOfVgroups > 0) { // only needed in dstVgroup + if (pSma->numOfVgroups > 0) { // only needed in dstVgroup pSma->pVgEpSet = (SVgEpSet *)tDecoderMalloc(pCoder, pSma->numOfVgroups * sizeof(SVgEpSet)); if (!pSma->pVgEpSet) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -4018,6 +4017,49 @@ int32_t tDecodeSVGetTsmaExpWndsRsp(SDecoder *pCoder, SVGetTsmaExpWndsRsp *pReq) return 0; } +int32_t tEncodeSVClrTsmaExpWndsReq(SEncoder *pCoder, const SVClrTsmaExpWndsReq *pReq) { + if (tStartEncode(pCoder) < 0) return -1; + if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; + if (tEncodeI64(pCoder, pReq->version) < 0) return -1; + if (tEncodeI64v(pCoder, pReq->nItems) < 0) return -1; + for (int64_t n = 0; pReq->nItems; ++n) { + if (tEncodeI64v(pCoder, pReq->items[n].nKeys) < 0) return -1; + if (tEncodeI64(pCoder, pReq->items[n].skey) < 0) return -1; + } + tEndEncode(pCoder); + return 0; +} + +int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder *pCoder, SVClrTsmaExpWndsReq *pReq) { + if (tStartDecode(pCoder) < 0) return -1; + if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; + if (tDecodeI64(pCoder, &pReq->version) < 0) return -1; + if (tDecodeI64v(pCoder, &pReq->nItems) < 0) return -1; + + for (int64_t i = 0; i < pReq->nItems; ++i) { + if (tDecodeI64v(pCoder, &pReq->items[i].nKeys) < 0) return -1; + if (tDecodeI64(pCoder, &pReq->items[i].skey) < 0) return -1; + } + tEndDecode(pCoder); + return 0; +} + +int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder *pCoder, const SVClrTsmaExpWndsRsp *pReq) { + if (tStartEncode(pCoder) < 0) return -1; + if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; + if (tEncodeI32(pCoder, pReq->code) < 0) return -1; + tEndEncode(pCoder); + return 0; +} + +int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder *pCoder, SVClrTsmaExpWndsRsp *pReq) { + if (tStartDecode(pCoder) < 0) return -1; + if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; + if (tDecodeI32(pCoder, &pReq->code) < 0) return -1; + tEndDecode(pCoder); + return 0; +} + int32_t tSerializeSVDeleteReq(void *buf, int32_t bufLen, SVDeleteReq *pReq) { int32_t headLen = sizeof(SMsgHead); if (buf != NULL) { diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 4c5a32536f..9c3c0af986 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -348,6 +348,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_CONSUME, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DELETE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_HEARTBEAT, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_CLR_TSMA_EXP_WNDS, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index dd705011c4..157162161e 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -25,6 +25,7 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessWriteMsg(SVnode *pVnode, int64_t version, SRpcMsg *pMsg, SRpcMsg *pRsp); +static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp); int32_t vnodePreprocessReq(SVnode *pVnode, SRpcMsg *pMsg) { int32_t code = 0; @@ -174,7 +175,8 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } vTrace("vgId:%d, process %s request success, version: %" PRId64, TD_VID(pVnode), TMSG_INFO(pMsg->msgType), version); -#if 1 + +#if 0 if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; @@ -225,6 +227,7 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { vTrace("message in fetch queue is processing"); char *msgstr = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + switch (pMsg->msgType) { case TDMT_VND_FETCH: return qWorkerProcessFetchMsg(pVnode, pVnode->pQuery, pMsg, 0); @@ -236,13 +239,10 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return qWorkerProcessDropMsg(pVnode, pVnode->pQuery, pMsg, 0); case TDMT_VND_QUERY_HEARTBEAT: return qWorkerProcessHbMsg(pVnode, pVnode->pQuery, pMsg, 0); - case TDMT_VND_TABLE_META: return vnodeGetTableMeta(pVnode, pMsg); - case TDMT_VND_CONSUME: return tqProcessPollReq(pVnode->pTq, pMsg, pInfo->workerId); - case TDMT_STREAM_TASK_RUN: return tqProcessTaskRunReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH: @@ -253,6 +253,8 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_RECOVER_RSP: return tqProcessTaskRecoverRsp(pVnode->pTq, pMsg); + case TDMT_VND_CLR_TSMA_EXP_WNDS: + return vnodeProcessExpWndsClrReq(pVnode, pMsg, msgLen, NULL); default: vError("unknown msg type:%d in fetch queue", pMsg->msgType); return TSDB_CODE_VND_APP_ERROR; @@ -896,3 +898,42 @@ static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void return 0; } + +static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp) { + SVClrTsmaExpWndsReq req = {0}; + SDecoder coder = {0}; + + if (pRsp) { + pRsp->msgType = TDMT_VND_CLR_TSMA_EXP_WNDS_RSP; + pRsp->code = TSDB_CODE_SUCCESS; + pRsp->pCont = NULL; + pRsp->contLen = 0; + } + + // decode and process + tDecoderInit(&coder, pReq, len); + + if (tDecodeSVClrTsmaExpWndsReq(&coder, &req) < 0) { + terrno = TSDB_CODE_MSG_DECODE_ERROR; + if (pRsp) pRsp->code = terrno; + goto _err; + } + + ASSERT(0); + + // if (tdProcess(pVnode->pSma, version, (const char *)&req) < 0) { + // if (pRsp) pRsp->code = terrno; + // goto _err; + // } + + tDecoderClear(&coder); + vDebug("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64, TD_VID(pVnode), + req.indexUid, req.version); + return 0; + +_err: + tDecoderClear(&coder); + vError("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64 " since %s", TD_VID(pVnode), + req.indexUid, req.version, terrstr()); + return -1; +} \ No newline at end of file From 2b2485d175db7509dc0bc4a1c8333a1c4e6d1b86 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Thu, 9 Jun 2022 20:02:55 +0800 Subject: [PATCH 03/16] feat: tsma exp wnds process --- include/util/taoserror.h | 16 +- source/common/src/tmsg.c | 4 +- source/dnode/vnode/src/inc/sma.h | 7 +- source/dnode/vnode/src/inc/vnodeInt.h | 3 +- source/dnode/vnode/src/meta/metaSma.c | 2 +- source/dnode/vnode/src/sma/sma.c | 11 +- source/dnode/vnode/src/sma/smaEnv.c | 6 +- source/dnode/vnode/src/sma/smaRollup.c | 6 +- source/dnode/vnode/src/sma/smaTimeRange.c | 65 ++++--- source/dnode/vnode/src/sma/smaTimeRange2.c | 205 ++++++++++++++------- source/dnode/vnode/src/tsdb/tsdbCommit.c | 2 +- source/dnode/vnode/src/tsdb/tsdbMemTable.c | 4 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 8 +- source/dnode/vnode/test/tsdbSmaTest.cpp | 2 +- source/util/src/terror.c | 46 +++-- 15 files changed, 244 insertions(+), 143 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index ae0191e6d2..cac5c0758d 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -351,9 +351,6 @@ int32_t* taosGetErrno(); #define TSDB_CODE_TDB_NO_CACHE_LAST_ROW TAOS_DEF_ERROR_CODE(0, 0x0619) #define TSDB_CODE_TDB_TABLE_RECREATED TAOS_DEF_ERROR_CODE(0, 0x061A) #define TSDB_CODE_TDB_TDB_ENV_OPEN_ERROR TAOS_DEF_ERROR_CODE(0, 0x061B) -#define TSDB_CODE_TDB_NO_SMA_INDEX_IN_META TAOS_DEF_ERROR_CODE(0, 0x061C) -#define TSDB_CODE_TDB_INVALID_SMA_STAT TAOS_DEF_ERROR_CODE(0, 0x061D) -#define TSDB_CODE_TDB_TSMA_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x061E) // query #define TSDB_CODE_QRY_INVALID_QHANDLE TAOS_DEF_ERROR_CODE(0, 0x0700) @@ -684,6 +681,19 @@ int32_t* taosGetErrno(); #define TSDB_CODE_SML_INVALID_DATA TAOS_DEF_ERROR_CODE(0, 0x3002) #define TSDB_CODE_SML_INVALID_DB_CONF TAOS_DEF_ERROR_CODE(0, 0x3003) +//tsma +#define TSDB_CODE_TSMA_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x3100) +#define TSDB_CODE_TSMA_NO_INDEX_IN_META TAOS_DEF_ERROR_CODE(0, 0x3101) +#define TSDB_CODE_TSMA_INVALID_ENV TAOS_DEF_ERROR_CODE(0, 0x3102) +#define TSDB_CODE_TSMA_INVALID_STAT TAOS_DEF_ERROR_CODE(0, 0x3103) +#define TSDB_CODE_TSMA_NO_INDEX_IN_CACHE TAOS_DEF_ERROR_CODE(0, 0x3104) +#define TSDB_CODE_TSMA_RM_SKEY_IN_HASH TAOS_DEF_ERROR_CODE(0, 0x3105) + +//rsma +#define TSDB_CODE_RSMA_INVALID_ENV TAOS_DEF_ERROR_CODE(0, 0x3150) +#define TSDB_CODE_RSMA_INVALID_STAT TAOS_DEF_ERROR_CODE(0, 0x3151) + + #ifdef __cplusplus } #endif diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index fd99953e7a..42f004ebf5 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -4049,7 +4049,7 @@ int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder *pCoder, SVClrTsmaExpWndsReq *pReq) int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder *pCoder, const SVClrTsmaExpWndsRsp *pReq) { if (tStartEncode(pCoder) < 0) return -1; if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI32(pCoder, pReq->code) < 0) return -1; + if (tEncodeI32v(pCoder, pReq->code) < 0) return -1; tEndEncode(pCoder); return 0; } @@ -4057,7 +4057,7 @@ int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder *pCoder, const SVClrTsmaExpWndsRsp * int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder *pCoder, SVClrTsmaExpWndsRsp *pReq) { if (tStartDecode(pCoder) < 0) return -1; if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI32(pCoder, &pReq->code) < 0) return -1; + if (tDecodeI32v(pCoder, &pReq->code) < 0) return -1; tEndDecode(pCoder); return 0; } diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 4ca62f1de9..7eb5c34e5d 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -70,7 +70,7 @@ struct SSmaStatItem { * N.B. only applicable to tsma */ int8_t state; // ETsdbSmaStat - SHashObj *expiredWindows; // key: skey of time window, value: version + SHashObj *expireWindows; // key: skey of time window, value: version STSma *pTSma; // cache schema }; @@ -128,7 +128,7 @@ int32_t tdInsertRSmaData(SSma *pSma, char *msg); int32_t tdRefSmaStat(SSma *pSma, SSmaStat *pStat); int32_t tdUnRefSmaStat(SSma *pSma, SSmaStat *pStat); -int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType); +int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType, bool onlyCheck); int32_t tdLockSma(SSma *pSma); int32_t tdUnLockSma(SSma *pSma); @@ -219,7 +219,8 @@ static int32_t tdInitSmaEnv(SSma *pSma, int8_t smaType, const char *path, SDisk void *tdFreeRSmaInfo(SRSmaInfo *pInfo); int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg); -int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version); +int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version); +int32_t tdClearExpireWindowImpl(SSma *pSma, const SVClrTsmaExpWndsReq *pMsg); // TODO: This is the basic params, and should wrap the params to a queryHandle. int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 5532f202fc..944a03f70a 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -151,6 +151,7 @@ int32_t smaOpen(SVnode* pVnode); int32_t smaClose(SSma* pSma); int32_t tdUpdateExpireWindow(SSma* pSma, const SSubmitReq* pMsg, int64_t version); +int32_t tdClearExpireWindow(SSma* pSma, const SVClrTsmaExpWndsReq* pMsg); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); @@ -227,7 +228,7 @@ struct SVnode { SQHandle* pQuery; }; -#define TD_VID(PVNODE) (PVNODE)->config.vgId +#define TD_VID(PVNODE) ((PVNODE)->config.vgId) #define VND_TSDB(vnd) ((vnd)->pTsdb) #define VND_RSMA0(vnd) ((vnd)->pTsdb) diff --git a/source/dnode/vnode/src/meta/metaSma.c b/source/dnode/vnode/src/meta/metaSma.c index 910a0835bb..0b6a526d8c 100644 --- a/source/dnode/vnode/src/meta/metaSma.c +++ b/source/dnode/vnode/src/meta/metaSma.c @@ -38,7 +38,7 @@ int32_t metaCreateTSma(SMeta *pMeta, int64_t version, SSmaCfg *pCfg) { metaReaderInit(&mr, pMeta, 0); if (metaGetTableEntryByUid(&mr, pCfg->indexUid) == 0) { #if 1 - terrno = TSDB_CODE_TDB_TSMA_ALREADY_EXIST; + terrno = TSDB_CODE_TSMA_ALREADY_EXIST; metaReaderClear(&mr); return -1; // don't goto _err; #else diff --git a/source/dnode/vnode/src/sma/sma.c b/source/dnode/vnode/src/sma/sma.c index fd5dd080ca..5782318006 100644 --- a/source/dnode/vnode/src/sma/sma.c +++ b/source/dnode/vnode/src/sma/sma.c @@ -38,8 +38,15 @@ int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg) { int32_t tdUpdateExpireWindow(SSma* pSma, const SSubmitReq* pMsg, int64_t version) { int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdUpdateExpiredWindowImpl(pSma, pMsg, version)) < 0) { - smaWarn("vgId:%d, update expired sma window failed since %s", SMA_VID(pSma), tstrerror(terrno)); + if ((code = tdUpdateExpireWindowImpl(pSma, pMsg, version)) < 0) { + smaWarn("vgId:%d, update expire window failed since %s", SMA_VID(pSma), tstrerror(terrno)); + } + return code; +} +int32_t tdClearExpireWindow(SSma* pSma, const SVClrTsmaExpWndsReq* pMsg) { + int32_t code = TSDB_CODE_SUCCESS; + if ((code = tdClearExpireWindowImpl(pSma, pMsg)) < 0) { + smaWarn("vgId:%d, update expire window failed since %s", SMA_VID(pSma), tstrerror(terrno)); } return code; } diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index 179f573e8d..d15769e28e 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -242,7 +242,7 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType) { } /** - * 1. Lazy mode utilized when init SSmaStat to update expired window(or hungry mode when tdNew). + * 1. Lazy mode utilized when init SSmaStat to update expire window(or hungry mode when tdNew). * 2. Currently, there is mutex lock when init SSmaEnv, thus no need add lock on SSmaStat, and please add lock if * tdInitSmaStat invoked in other multithread environment later. */ @@ -280,7 +280,7 @@ void *tdFreeSmaStatItem(SSmaStatItem *pSmaStatItem) { if (pSmaStatItem) { tDestroyTSma(pSmaStatItem->pTSma); taosMemoryFreeClear(pSmaStatItem->pTSma); - taosHashCleanup(pSmaStatItem->expiredWindows); + taosHashCleanup(pSmaStatItem->expireWindows); taosMemoryFreeClear(pSmaStatItem); } return NULL; @@ -341,7 +341,7 @@ int32_t tdUnLockSma(SSma *pSma) { return 0; } -int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType) { +int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType, bool onlyCheck) { SSmaEnv *pEnv = NULL; // return if already init diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 05ad5c1cb2..1b34529506 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -65,7 +65,7 @@ static FORCE_INLINE int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SA pRSmaInfo = taosHashGet(SMA_STAT_INFO_HASH(pStat), suid, sizeof(tb_uid_t)); if (!pRSmaInfo || !(pRSmaInfo = *(SRSmaInfo **)pRSmaInfo)) { smaError("vgId:%d, failed to get rsma info for uid:%" PRIi64, SMA_VID(pSma), *suid); - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_RSMA_INVALID_STAT; return TSDB_CODE_FAILED; } @@ -132,7 +132,7 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui SSmaStat *pStat = SMA_ENV_STAT(pEnv); SHashObj *infoHash = NULL; if (!pStat || !(infoHash = SMA_STAT_INFO_HASH(pStat))) { - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_RSMA_INVALID_STAT; return TSDB_CODE_FAILED; } @@ -181,7 +181,7 @@ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { return TSDB_CODE_SUCCESS; } - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_ROLLUP) != TSDB_CODE_SUCCESS) { + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_ROLLUP, false) != TSDB_CODE_SUCCESS) { terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; } diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index b72be06455..4cc9703531 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -69,10 +69,10 @@ static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey); static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey); static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, TXN *txn); -// expired window +// expire window -static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); -static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); +static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); +static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); // read data @@ -319,7 +319,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { // For super table aggregation, the sma data is stored in vgroup calculated from the hash value of stable name. Thus // the sma data would arrive ahead of the update-expired-window msg. - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE) != TSDB_CODE_SUCCESS) { + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) != TSDB_CODE_SUCCESS) { terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; } @@ -347,7 +347,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { } if (!pItem || !(pItem = *(SSmaStatItem **)pItem) || tdSmaStatIsDropped(pItem)) { - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_TSMA_INVALID_STAT; tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_FAILED; } @@ -515,7 +515,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { // TODO:tsdbEndTSmaCommit(); // Step 3: reset the SSmaStat - tdResetExpiredWindow(pSma, pStat, indexUid, skey); + tdResetExpireWindow(pSma, pStat, indexUid, skey); } else { smaWarn("vgId:%d, invalid data skey:%" PRIi64 ", tlen %" PRIi32 " during insert tSma data for %" PRIi64, SMA_VID(pSma), skey, tlen, indexUid); @@ -572,7 +572,7 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL } /** - * @brief When sma data received from stream computing, make the relative expired window valid. + * @brief When sma data received from stream computing, make the relative expire window valid. * * @param pSma * @param pStat @@ -580,7 +580,7 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL * @param skey * @return int32_t */ -static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { +static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { SSmaStatItem *pItem = NULL; tdRefSmaStat(pSma, pStat); @@ -591,14 +591,14 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi if ((pItem) && ((pItem = *(SSmaStatItem **)pItem))) { // pItem resides in hash buffer all the time unless drop sma index // TODO: multithread protect - if (taosHashRemove(pItem->expiredWindows, &skey, sizeof(TSKEY)) != 0) { + if (taosHashRemove(pItem->expireWindows, &skey, sizeof(TSKEY)) != 0) { // error handling tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, remove skey %" PRIi64 " from expired window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, + smaWarn("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, indexUid); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d, remove skey %" PRIi64 " from expired window for sma index %" PRIi64 " succeed", SMA_VID(pSma), + smaDebug("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " succeed", SMA_VID(pSma), skey, indexUid); // TODO: use a standalone interface to received state upate notification from stream computing module. /** @@ -612,7 +612,7 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi } else { // error handling tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, expired window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); + smaWarn("vgId:%d, expire window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); return TSDB_CODE_FAILED; } @@ -711,7 +711,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int32_t nQueryWin = taosArrayGetSize(pQuerySKey); for (int32_t n = 0; n < nQueryWin; ++n) { TSKEY skey = taosArrayGet(pQuerySKey, n); - if (taosHashGet(pItem->expiredWindows, &skey, sizeof(TSKEY))) { + if (taosHashGet(pItem->expireWindows, &skey, sizeof(TSKEY))) { // TODO: mark this window as expired. } } @@ -721,18 +721,18 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int8_t smaStat = 0; if (!tdSmaStatIsOK(pItem, &smaStat)) { // TODO: multiple check for large scale sma query tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_TSMA_INVALID_STAT; smaWarn("vgId:%d, getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, tstrerror(terrno), smaStat); return TSDB_CODE_FAILED; } - if (taosHashGet(pItem->expiredWindows, &querySKey, sizeof(TSKEY))) { + if (taosHashGet(pItem->expireWindows, &querySKey, sizeof(TSKEY))) { // TODO: mark this window as expired. - smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expired window for index %" PRIi64, SMA_VID(pSma), querySKey, + smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, indexUid); } else { - smaDebug("vgId:%d, skey %" PRIi64 " of window not in expired window for index %" PRIi64, SMA_VID(pSma), querySKey, + smaDebug("vgId:%d, skey %" PRIi64 " of window not in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, indexUid); } @@ -747,7 +747,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query tdUnRefSmaStat(pSma, pStat); tdInitTSmaFile(&tReadH, indexUid, querySKey); - smaDebug("### vgId:%d read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, + smaDebug("### vgId:%d, read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, SMA_VID(pSma), tReadH.dFile.path, tReadH.days, tReadH.interval, tReadH.storageLevel, querySKey); if (smaOpenDBF(pEnv->dbEnv, &tReadH.dFile) != 0) { smaWarn("vgId:%d, open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); @@ -860,9 +860,9 @@ static SSmaStatItem *tdNewSmaStatItem(int8_t state) { } pItem->state = state; - pItem->expiredWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), - true, HASH_ENTRY_LOCK); - if (!pItem->expiredWindows) { + pItem->expireWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), + true, HASH_ENTRY_LOCK); + if (!pItem->expireWindows) { taosMemoryFreeClear(pItem); return NULL; } @@ -870,8 +870,7 @@ static SSmaStatItem *tdNewSmaStatItem(int8_t state) { return pItem; } -static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, - int64_t version) { +static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version) { SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); if (!pItem) { // TODO: use TSDB_SMA_STAT_EXPIRED and update by stream computing later @@ -885,8 +884,8 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde // cache smaMeta STSma *pTSma = metaGetSmaInfoByIndex(SMA_META(pSma), indexUid); if (!pTSma) { - terrno = TSDB_CODE_TDB_NO_SMA_INDEX_IN_META; - taosHashCleanup(pItem->expiredWindows); + terrno = TSDB_CODET_TSMA_NO_INDEX_IN_META; + taosHashCleanup(pItem->expireWindows); taosMemoryFree(pItem); smaWarn("vgId:%d, set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), indexUid, tstrerror(terrno)); @@ -896,7 +895,7 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde if (taosHashPut(pItemsHash, &indexUid, sizeof(indexUid), &pItem, sizeof(pItem)) != 0) { // If error occurs during put smaStatItem, free the resources of pItem - taosHashCleanup(pItem->expiredWindows); + taosHashCleanup(pItem->expireWindows); taosMemoryFree(pItem); return TSDB_CODE_FAILED; } @@ -905,14 +904,14 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde return TSDB_CODE_FAILED; } - if (taosHashPut(pItem->expiredWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { - // If error occurs during taosHashPut expired windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would + if (taosHashPut(pItem->expireWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { + // If error occurs during taosHashPut expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would // tell query module to query raw TS data. // N.B. // 1) It is assumed to be extemely little probability event of fail to taosHashPut. // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired // windows failed to put into hash table. - taosHashCleanup(pItem->expiredWindows); + taosHashCleanup(pItem->expireWindows); taosMemoryFreeClear(pItem->pTSma); taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); smaWarn("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, @@ -926,13 +925,13 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde } /** - * @brief Update expired window according to msg from stream computing module. + * @brief Update expire window according to msg from stream computing module. * * @param pSma * @param msg SSubmitReq * @return int32_t */ -int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { +int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { // no time-range-sma, just return success if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { smaTrace("vgId:%d, not update expire window since no tSma", SMA_VID(pSma)); @@ -945,7 +944,7 @@ int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t ve return TSDB_CODE_FAILED; } - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE) < 0) { + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) < 0) { smaError("vgId:%d, init sma env failed since %s", SMA_VID(pSma), terrstr(terrno)); terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; @@ -1019,7 +1018,7 @@ int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t ve if (lastWinSKey != winSKey) { lastWinSKey = winSKey; - if (tdSetExpiredWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { + if (tdSetExpireWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { pSW = tFreeTSmaWrapper(pSW, false); tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_FAILED; diff --git a/source/dnode/vnode/src/sma/smaTimeRange2.c b/source/dnode/vnode/src/sma/smaTimeRange2.c index df89ec1795..760eb808bb 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange2.c +++ b/source/dnode/vnode/src/sma/smaTimeRange2.c @@ -72,10 +72,10 @@ static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey); static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey); static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, TXN *txn); -// expired window +// expire window -static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); -static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); +static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); +static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); /** @@ -149,7 +149,7 @@ static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, } /** - * @brief Init of tSma FS + * @brief Init of tsma FS * * @param pReadH * @param indexUid @@ -169,7 +169,7 @@ static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey) { } /** - * @brief Set and open tSma file if it has key locates in queryWin. + * @brief Set and open tsma file if it has key locates in queryWin. * * @param pReadH * @param param @@ -335,7 +335,7 @@ static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel) } /** - * @brief Judge the tSma storage level + * @brief Judge the tsma storage level * * @param pCfg * @param interval @@ -362,7 +362,7 @@ static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { * @return int32_t */ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { - STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); + STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); #if 0 const SArray *pDataBlocks = (const SArray *)msg; @@ -370,20 +370,20 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { // For super table aggregation, the sma data is stored in vgroup calculated from the hash value of stable name. Thus // the sma data would arrive ahead of the update-expired-window msg. - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE) != TSDB_CODE_SUCCESS) { + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) != TSDB_CODE_SUCCESS) { terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; } if (!pDataBlocks) { terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d insert tSma data failed since pDataBlocks is NULL", SMA_VID(pSma)); + smaWarn("vgId:%d, insert tsma data failed since pDataBlocks is NULL", SMA_VID(pSma)); return terrno; } if (taosArrayGetSize(pDataBlocks) <= 0) { terrno = TSDB_CODE_INVALID_PARA; - smaWarn("vgId:%d insert tSma data failed since pDataBlocks is empty", SMA_VID(pSma)); + smaWarn("vgId:%d, insert tsma data failed since pDataBlocks is empty", SMA_VID(pSma)); return TSDB_CODE_FAILED; } #endif @@ -399,7 +399,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { } if (!pItem || !(pItem = *(SSmaStatItem **)pItem) || tdSmaStatIsDropped(pItem)) { - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; + terrno = TSDB_CODE_TSMA_INVALID_STAT; tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_FAILED; } @@ -414,7 +414,7 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { int32_t tdDropTSmaData(SSma *pSma, int64_t indexUid) { int32_t code = TSDB_CODE_SUCCESS; if ((code = tdDropTSmaDataImpl(pSma, indexUid)) < 0) { - smaWarn("vgId:%d drop tSma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); + smaWarn("vgId:%d, drop tsma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); } return code; } @@ -435,11 +435,11 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL // TODO: insert tsma data blocks into B+Tree(TTB) if (smaSaveSmaToDB(pDBFile, smaKey, keyLen, pData, dataLen, txn) != 0) { - smaWarn("vgId:%d insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " fail", + smaWarn("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " fail", SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " succeed", + smaDebug("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " succeed", SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); #ifdef _TEST_SMA_PRINT_DEBUG_LOG_ @@ -447,14 +447,14 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL void *data = tdGetSmaDataByKey(pDBFile, smaKey, keyLen, &valueSize); ASSERT(data != NULL); for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d insert sma data val[%d] %" PRIi64, REPO_ID(pSmaH->pTsdb), v, *(int64_t *)POINTER_SHIFT(data, v)); + smaWarn("vgId:%d, insert sma data val[%d] %" PRIi64, REPO_ID(pSmaH->pTsdb), v, *(int64_t *)POINTER_SHIFT(data, v)); } #endif return TSDB_CODE_SUCCESS; } /** - * @brief When sma data received from stream computing, make the relative expired window valid. + * @brief When sma data received from stream computing, make the relative expire window valid. * * @param pSma * @param pStat @@ -462,7 +462,7 @@ static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyL * @param skey * @return int32_t */ -static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { +static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { SSmaStatItem *pItem = NULL; tdRefSmaStat(pSma, pStat); @@ -473,14 +473,14 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi if ((pItem) && ((pItem = *(SSmaStatItem **)pItem))) { // pItem resides in hash buffer all the time unless drop sma index // TODO: multithread protect - if (taosHashRemove(pItem->expiredWindows, &skey, sizeof(TSKEY)) != 0) { + if (taosHashRemove(pItem->expireWindows, &skey, sizeof(TSKEY)) != 0) { // error handling tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d remove skey %" PRIi64 " from expired window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, + smaWarn("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, indexUid); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d remove skey %" PRIi64 " from expired window for sma index %" PRIi64 " succeed", SMA_VID(pSma), + smaDebug("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " succeed", SMA_VID(pSma), skey, indexUid); // TODO: use a standalone interface to received state upate notification from stream computing module. /** @@ -494,7 +494,7 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi } else { // error handling tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d expired window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); + smaWarn("vgId:%d, expire window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); return TSDB_CODE_FAILED; } @@ -503,7 +503,7 @@ static int32_t tdResetExpiredWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUi } /** - * @brief Drop tSma data and local cache + * @brief Drop tsma data and local cache * - insert/query reference * @param pSma * @param msg @@ -514,19 +514,19 @@ static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid) { // clear local cache if (pEnv) { - smaDebug("vgId:%d drop tSma local cache for %" PRIi64, SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, drop tsma local cache for %" PRIi64, SMA_VID(pSma), indexUid); SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); if ((pItem) || ((pItem = *(SSmaStatItem **)pItem))) { if (tdSmaStatIsDropped(pItem)) { - smaDebug("vgId:%d tSma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, tsma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode } tdWLockSmaEnv(pEnv); if (tdSmaStatIsDropped(pItem)) { tdUnLockSmaEnv(pEnv); - smaDebug("vgId:%d tSma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, tsma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode } tdSmaStatSetDropped(pItem); @@ -536,19 +536,20 @@ static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid) { int32_t refVal = INT32_MAX; while (true) { if ((refVal = T_REF_VAL_GET(SMA_ENV_STAT(pEnv))) <= 0) { - smaDebug("vgId:%d drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); + smaDebug("vgId:%d, drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); break; } - smaDebug("vgId:%d wait 1s to drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); + smaDebug("vgId:%d, wait 1s to drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); taosSsleep(1); if (++nSleep > SMA_DROP_EXPIRED_TIME) { - smaDebug("vgId:%d drop index %" PRIi64 " after wait %d (refVal=%d)", SMA_VID(pSma), indexUid, nSleep, refVal); + smaDebug("vgId:%d, drop index %" PRIi64 " after wait %d (refVal=%d)", SMA_VID(pSma), indexUid, nSleep, + refVal); break; }; } tdFreeSmaStatItem(pItem); - smaDebug("vgId:%d getTSmaDataImpl failed since no index %" PRIi64 " in local cache", SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64 " in local cache", SMA_VID(pSma), indexUid); } } // clear sma data files @@ -572,7 +573,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query if (!pEnv) { terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d getTSmaDataImpl failed since pTSmaEnv is NULL", SMA_VID(pSma)); + smaWarn("vgId:%d, getTSmaDataImpl failed since pTSmaEnv is NULL", SMA_VID(pSma)); return TSDB_CODE_FAILED; } @@ -585,7 +586,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query // it's NULL. tdUnRefSmaStat(pSma, pStat); terrno = TSDB_CODE_TDB_INVALID_ACTION; - smaDebug("vgId:%d getTSmaDataImpl failed since no index %" PRIi64, SMA_VID(pSma), indexUid); + smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64, SMA_VID(pSma), indexUid); return TSDB_CODE_FAILED; } @@ -593,7 +594,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int32_t nQueryWin = taosArrayGetSize(pQuerySKey); for (int32_t n = 0; n < nQueryWin; ++n) { TSKEY skey = taosArrayGet(pQuerySKey, n); - if (taosHashGet(pItem->expiredWindows, &skey, sizeof(TSKEY))) { + if (taosHashGet(pItem->expireWindows, &skey, sizeof(TSKEY))) { // TODO: mark this window as expired. } } @@ -603,18 +604,18 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int8_t smaStat = 0; if (!tdSmaStatIsOK(pItem, &smaStat)) { // TODO: multiple check for large scale sma query tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TDB_INVALID_SMA_STAT; - smaWarn("vgId:%d getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, + terrno = TSDB_CODE_TSMA_INVALID_STAT; + smaWarn("vgId:%d, getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, tstrerror(terrno), smaStat); return TSDB_CODE_FAILED; } - if (taosHashGet(pItem->expiredWindows, &querySKey, sizeof(TSKEY))) { + if (taosHashGet(pItem->expireWindows, &querySKey, sizeof(TSKEY))) { // TODO: mark this window as expired. - smaDebug("vgId:%d skey %" PRIi64 " of window exists in expired window for index %" PRIi64, SMA_VID(pSma), querySKey, + smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, indexUid); } else { - smaDebug("vgId:%d skey %" PRIi64 " of window not in expired window for index %" PRIi64, SMA_VID(pSma), querySKey, + smaDebug("vgId:%d, skey %" PRIi64 " of window not in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, indexUid); } @@ -629,10 +630,10 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query tdUnRefSmaStat(pSma, pStat); tdInitTSmaFile(&tReadH, indexUid, querySKey); - smaDebug("### vgId:%d read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, + smaDebug("### vgId:%d, read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, SMA_VID(pSma), tReadH.dFile.path, tReadH.days, tReadH.interval, tReadH.storageLevel, querySKey); if (smaOpenDBF(pEnv->dbEnv, &tReadH.dFile) != 0) { - smaWarn("vgId:%d open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); + smaWarn("vgId:%d, open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); return TSDB_CODE_FAILED; } @@ -641,13 +642,13 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query int64_t queryGroupId = 0; tdEncodeTSmaKey(queryGroupId, querySKey, (void **)&pSmaKey); - smaDebug("vgId:%d get sma data from %s: smaKey %" PRIx64 "-%" PRIx64 ", keyLen %d", SMA_VID(pSma), tReadH.dFile.path, + smaDebug("vgId:%d, get sma data from %s: smaKey %" PRIx64 "-%" PRIx64 ", keyLen %d", SMA_VID(pSma), tReadH.dFile.path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), SMA_KEY_LEN); void *result = NULL; int32_t valueSize = 0; if (!(result = smaGetSmaDataByKey(&tReadH.dFile, smaKey, SMA_KEY_LEN, &valueSize))) { - smaWarn("vgId:%d get sma data failed from smaIndex %" PRIi64 ", smaKey %" PRIx64 "-%" PRIx64 " since %s", + smaWarn("vgId:%d, get sma data failed from smaIndex %" PRIi64 ", smaKey %" PRIx64 "-%" PRIx64 " since %s", SMA_VID(pSma), indexUid, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), tstrerror(terrno)); smaCloseDBF(&tReadH.dFile); return TSDB_CODE_FAILED; @@ -656,7 +657,7 @@ int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY query #ifdef _TEST_SMA_PRINT_DEBUG_LOG_ for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d get sma data v[%d]=%" PRIi64, SMA_VID(pSma), v, *(int64_t *)POINTER_SHIFT(result, v)); + smaWarn("vgId:%d, get sma data v[%d]=%" PRIi64, SMA_VID(pSma), v, *(int64_t *)POINTER_SHIFT(result, v)); } #endif taosMemoryFreeClear(result); // TODO: fill the result to output @@ -721,7 +722,7 @@ int32_t tdDropTSma(SSma *pSma, char *pMsg) { return -1; } - // TODO: send msg to stream computing to drop tSma + // TODO: send msg to stream computing to drop tsma // if ((send msg to stream computing) < 0) { // tDestroyTSma(&vCreateSmaReq); // return -1; @@ -755,9 +756,9 @@ static SSmaStatItem *tdNewSmaStatItem(int8_t state) { } pItem->state = state; - pItem->expiredWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), - true, HASH_ENTRY_LOCK); - if (!pItem->expiredWindows) { + pItem->expireWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), + true, HASH_ENTRY_LOCK); + if (!pItem->expireWindows) { taosMemoryFreeClear(pItem); return NULL; } @@ -765,8 +766,7 @@ static SSmaStatItem *tdNewSmaStatItem(int8_t state) { return pItem; } -static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, - int64_t version) { +static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version) { SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); if (!pItem) { // TODO: use TSDB_SMA_STAT_EXPIRED and update by stream computing later @@ -780,10 +780,10 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde // cache smaMeta STSma *pTSma = metaGetSmaInfoByIndex(SMA_META(pSma), indexUid); if (!pTSma) { - terrno = TSDB_CODE_TDB_NO_SMA_INDEX_IN_META; - taosHashCleanup(pItem->expiredWindows); + terrno = TSDB_CODE_TSMA_NO_INDEX_IN_META; + taosHashCleanup(pItem->expireWindows); taosMemoryFree(pItem); - smaWarn("vgId:%d set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), + smaWarn("vgId:%d, set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), indexUid, tstrerror(terrno)); return TSDB_CODE_FAILED; } @@ -791,7 +791,7 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde if (taosHashPut(pItemsHash, &indexUid, sizeof(indexUid), &pItem, sizeof(pItem)) != 0) { // If error occurs during put smaStatItem, free the resources of pItem - taosHashCleanup(pItem->expiredWindows); + taosHashCleanup(pItem->expireWindows); taosMemoryFree(pItem); return TSDB_CODE_FAILED; } @@ -800,53 +800,53 @@ static int32_t tdSetExpiredWindow(SSma *pSma, SHashObj *pItemsHash, int64_t inde return TSDB_CODE_FAILED; } - if (taosHashPut(pItem->expiredWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { - // If error occurs during taosHashPut expired windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would + if (taosHashPut(pItem->expireWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { + // If error occurs during taosHashPut expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would // tell query module to query raw TS data. // N.B. // 1) It is assumed to be extemely little probability event of fail to taosHashPut. // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired // windows failed to put into hash table. - taosHashCleanup(pItem->expiredWindows); + taosHashCleanup(pItem->expireWindows); taosMemoryFreeClear(pItem->pTSma); taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); - smaWarn("vgId:%d smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, + smaWarn("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, winSKey); return TSDB_CODE_FAILED; } - smaDebug("vgId:%d smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window succeed", SMA_VID(pSma), indexUid, + smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window succeed", SMA_VID(pSma), indexUid, winSKey); return TSDB_CODE_SUCCESS; } /** - * @brief Update expired window according to msg from stream computing module. + * @brief Update expire window according to msg from stream computing module. * * @param pSma * @param msg SSubmitReq * @return int32_t */ -int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { +int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { // no time-range-sma, just return success if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { - smaTrace("vgId:%d not update expire window since no tSma", SMA_VID(pSma)); + smaTrace("vgId:%d, not update expire window since no tsma", SMA_VID(pSma)); return TSDB_CODE_SUCCESS; } if (!SMA_META(pSma)) { terrno = TSDB_CODE_INVALID_PTR; - smaError("vgId:%d update expire window failed since no meta ptr", SMA_VID(pSma)); + smaError("vgId:%d, update expire window failed since no meta ptr", SMA_VID(pSma)); return TSDB_CODE_FAILED; } - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE) < 0) { - smaError("vgId:%d init sma env failed since %s", SMA_VID(pSma), terrstr(terrno)); + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) < 0) { + smaError("vgId:%d, init tsma env failed since %s", SMA_VID(pSma), terrstr(terrno)); terrno = TSDB_CODE_TDB_INIT_FAILED; return TSDB_CODE_FAILED; } - // Firstly, assume that tSma can only be created on super table/normal table. + // Firstly, assume that tsma can only be created on super table/normal table. // getActiveTimeWindow SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); @@ -914,13 +914,13 @@ int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t ve if (lastWinSKey != winSKey) { lastWinSKey = winSKey; - if (tdSetExpiredWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { + if (tdSetExpireWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { pSW = tFreeTSmaWrapper(pSW, false); tdUnRefSmaStat(pSma, pStat); return TSDB_CODE_FAILED; } } else { - smaDebug("vgId:%d smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window ignore as duplicated", + smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window ignore as duplicated", SMA_VID(pSma), pTSma->indexUid, winSKey); } } @@ -928,5 +928,78 @@ int32_t tdUpdateExpiredWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t ve tdUnRefSmaStat(pSma, pStat); + return TSDB_CODE_SUCCESS; +} + +/** + * @brief Clear skeys from tsma dstVgroups in expire window. + * + * @param pSma + * @param pMsg + * @return int32_t + */ +int32_t tdClearExpireWindowImpl(SSma *pSma, const SVClrTsmaExpWndsReq *pMsg) { + int64_t indexUid = pMsg->indexUid; + + if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { + smaWarn("vgId:%d, not clear expire window since no tsma for smaIndex %" PRIi64, SMA_VID(pSma), indexUid); + terrno = TSDB_CODE_TSMA_INVALID_ENV; + return TSDB_CODE_FAILED; + } + + if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, true) < 0) { + smaWarn("vgId:%d, not clear expire window since no tsma env", SMA_VID(pSma)); + terrno = TSDB_CODE_TSMA_INVALID_ENV; + return TSDB_CODE_FAILED; + } + + // Firstly, assume that tsma can only be created on super table/normal table. + // getActiveTimeWindow + + SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); + SSmaStat *pStat = SMA_ENV_STAT(pEnv); + SHashObj *pItemsHash = SMA_ENV_STAT_ITEMS(pEnv); + + ASSERT(pEnv && pStat && pItemsHash); + + // basic procedure + // TODO: optimization + tdRefSmaStat(pSma, pStat); + + SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); + if (!pItem || !(pItem = *(SSmaStatItem **)pItem)) { + smaWarn("vgId:%d, no sma item to clear expire window for smaIndex %" PRIi64, SMA_VID(pSma), indexUid); + terrno = TSDB_CODE_TSMA_NO_INDEX_IN_CACHE; + tdUnRefSmaStat(pSma, pStat); + return TSDB_CODE_FAILED; + } + + for (int64_t i = 0; i < pMsg->nItems; ++i) { + const SVTsmaExpWndItem *pWndItem = &pMsg->items[i]; + int64_t winSKey = pWndItem->skey; + for (int64_t j = 0; j < pWndItem->nKeys; ++j) { + winSKey += pItem->pTSma->interval; + if (taosHashRemove(pItem->expireWindows, &winSKey, sizeof(winSKey)) != 0) { + // If error occurs during taosHashRemove expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB + // would tell query module to query raw TS data. N.B. + // 1) It is assumed to be extemely little probability event of fail to taosHashPut. + // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired + // windows failed to put into hash table. + taosHashCleanup(pItem->expireWindows); + taosMemoryFreeClear(pItem->pTSma); + taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); + smaWarn("vgId:%d, rm skey %" PRIi64 " in expire window for smaIndex %" PRIi64 " fail", SMA_VID(pSma), winSKey, + indexUid); + terrno = TSDB_CODE_TSMA_RM_SKEY_IN_HASH; + tdUnRefSmaStat(pSma, pStat); + return TSDB_CODE_FAILED; + } + smaDebug("vgId:%d, rm skey %" PRIi64 " in expire window for smaIndex %" PRIi64 " success", SMA_VID(pSma), winSKey, + indexUid); + } + } + + tdUnRefSmaStat(pSma, pStat); + return TSDB_CODE_SUCCESS; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 17b8afda4b..90093f2510 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -150,7 +150,7 @@ int32_t tsdbCommit(STsdb *pTsdb) { return code; _err: - tsdbError("vgId:%d failed to commit since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d, failed to commit since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index ffbef4e765..d8bc87d471 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -176,13 +176,13 @@ int32_t tsdbDeleteTableData(STsdb *pTsdb, int64_t version, tb_uid_t suid, tb_uid pMemTable->nDelOp++; - tsdbError("vgId:%d delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64 + tsdbError("vgId:%d, delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64 " since %s", TD_VID(pTsdb->pVnode), suid, uid, sKey, eKey, tstrerror(code)); return code; _err: - tsdbError("vgId:%d failed to delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64 + tsdbError("vgId:%d, failed to delete data from table suid:%" PRId64 " uid:%" PRId64 " skey:%" PRId64 " eKey:%" PRId64 " since %s", TD_VID(pTsdb->pVnode), suid, uid, sKey, eKey, tstrerror(code)); return code; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 3a47a54637..55ab5f5729 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -918,10 +918,10 @@ static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len ASSERT(0); - // if (tdProcess(pVnode->pSma, version, (const char *)&req) < 0) { - // if (pRsp) pRsp->code = terrno; - // goto _err; - // } + if (tdClearExpireWindow(pVnode->pSma, (const SVClrTsmaExpWndsReq *)&req) < 0) { + if (pRsp) pRsp->code = terrno; + goto _err; + } tDecoderClear(&coder); vDebug("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64, TD_VID(pVnode), diff --git a/source/dnode/vnode/test/tsdbSmaTest.cpp b/source/dnode/vnode/test/tsdbSmaTest.cpp index 3b8c94e413..0161fac9b5 100644 --- a/source/dnode/vnode/test/tsdbSmaTest.cpp +++ b/source/dnode/vnode/test/tsdbSmaTest.cpp @@ -373,7 +373,7 @@ TEST(testCase, tSma_Data_Insert_Query_Test) { pTsdb->pTfs = tfsOpen(&pDisks, numOfDisks); EXPECT_NE(pTsdb->pTfs, nullptr); - // generate SSubmitReq msg and update expired window + // generate SSubmitReq msg and update expire window int16_t schemaVer = 0; uint32_t mockRowLen = sizeof(STSRow); uint32_t mockRowNum = 2; diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 71c348f810..7b9c254a20 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -352,9 +352,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TDB_IVLD_TAG_VAL, "TSDB invalid tag valu TAOS_DEFINE_ERROR(TSDB_CODE_TDB_NO_CACHE_LAST_ROW, "TSDB no cache last row data") TAOS_DEFINE_ERROR(TSDB_CODE_TDB_TABLE_RECREATED, "Table re-created") TAOS_DEFINE_ERROR(TSDB_CODE_TDB_TDB_ENV_OPEN_ERROR, "TDB env open error") -TAOS_DEFINE_ERROR(TSDB_CODE_TDB_NO_SMA_INDEX_IN_META, "No sma index in meta") -TAOS_DEFINE_ERROR(TSDB_CODE_TDB_INVALID_SMA_STAT, "Invalid sma state") -TAOS_DEFINE_ERROR(TSDB_CODE_TDB_TSMA_ALREADY_EXIST, "TSMA already exists") // query @@ -536,25 +533,38 @@ TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_DELETE_WHERE, "The DELETE statemen TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_REDISTRIBUTE_VG, "The REDISTRIBUTE VGROUP statement only support 1 to 3 dnodes") //planner -TAOS_DEFINE_ERROR(TSDB_CODE_PLAN_INTERNAL_ERROR, "Planner internal error") +TAOS_DEFINE_ERROR(TSDB_CODE_PLAN_INTERNAL_ERROR, "Planner internal error") //udf -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_STOPPING, "udf is stopping") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_READ_ERR, "udf pipe read error") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_CONNECT_ERR, "udf pipe connect error") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_NO_PIPE, "udf no pipe") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_LOAD_UDF_FAILURE, "udf load failure") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_STATE, "udf invalid state") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_INPUT, "udf invalid function input") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_NO_FUNC_HANDLE, "udf no function handle") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_BUFSIZE, "udf invalid bufsize") -TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_OUTPUT_TYPE, "udf invalid output type") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_STOPPING, "udf is stopping") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_READ_ERR, "udf pipe read error") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_CONNECT_ERR, "udf pipe connect error") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_PIPE_NO_PIPE, "udf no pipe") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_LOAD_UDF_FAILURE, "udf load failure") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_STATE, "udf invalid state") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_INPUT, "udf invalid function input") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_NO_FUNC_HANDLE, "udf no function handle") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_BUFSIZE, "udf invalid bufsize") +TAOS_DEFINE_ERROR(TSDB_CODE_UDF_INVALID_OUTPUT_TYPE, "udf invalid output type") //schemaless -TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_PROTOCOL_TYPE, "Invalid line protocol type") -TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_PRECISION_TYPE, "Invalid timestamp precision type") -TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_DATA, "Invalid data type") -TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_DB_CONF, "Invalid schemaless db config") +TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_PROTOCOL_TYPE, "Invalid line protocol type") +TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_PRECISION_TYPE, "Invalid timestamp precision type") +TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_DATA, "Invalid data type") +TAOS_DEFINE_ERROR(TSDB_CODE_SML_INVALID_DB_CONF, "Invalid schemaless db config") + +//tsma +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_ALREADY_EXIST, "Tsma already exists") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_NO_INDEX_IN_META, "No tsma index in meta") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_INVALID_ENV, "Invalid tsma env") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_INVALID_STAT, "Invalid tsma state") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_NO_INDEX_IN_CACHE, "No tsma index in cache") +TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_RM_SKEY_IN_HASH, "Rm tsma skey in cache") + +//rsma +TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_INVALID_ENV, "Invalid rsma env") +TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_INVALID_STAT, "Invalid rsma state") + #ifdef TAOS_ERROR_C }; From 2160a112f52d916c4942a5844cc928d33004a73d Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Fri, 10 Jun 2022 09:59:47 +0800 Subject: [PATCH 04/16] other: merge 3.0 --- include/common/tmsgdef.h | 3 + include/libs/stream/tstream.h | 2 + include/libs/sync/sync.h | 27 +- include/libs/sync/syncTools.h | 94 +++ include/libs/wal/wal.h | 14 +- source/common/src/tdatablock.c | 1 + source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 9 +- source/dnode/mgmt/mgmt_mnode/src/mmWorker.c | 2 +- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 + source/dnode/mnode/impl/inc/mndInt.h | 3 +- source/dnode/mnode/impl/inc/mndVgroup.h | 1 + source/dnode/mnode/impl/src/mndDb.c | 51 -- source/dnode/mnode/impl/src/mndDnode.c | 2 + source/dnode/mnode/impl/src/mndMain.c | 154 +++-- source/dnode/mnode/impl/src/mndSync.c | 64 +- source/dnode/mnode/impl/src/mndTrans.c | 8 +- source/dnode/mnode/impl/src/mndVgroup.c | 151 ++++- source/dnode/mnode/sdb/inc/sdb.h | 10 +- source/dnode/mnode/sdb/src/sdb.c | 14 +- source/dnode/mnode/sdb/src/sdbFile.c | 38 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 14 + source/dnode/vnode/src/vnd/vnodeSync.c | 2 + source/libs/index/inc/indexCache.h | 22 +- source/libs/index/inc/indexComm.h | 4 +- source/libs/index/inc/indexInt.h | 24 +- source/libs/index/inc/indexTfile.h | 8 +- source/libs/index/src/index.c | 68 +- source/libs/index/src/indexCache.c | 104 +-- source/libs/index/src/indexComm.c | 22 +- source/libs/index/src/indexFilter.c | 2 +- source/libs/index/src/indexJson.c | 4 +- source/libs/index/src/indexTfile.c | 28 +- source/libs/index/test/indexTests.cc | 10 +- source/libs/sync/inc/syncAppendEntries.h | 1 + source/libs/sync/inc/syncAppendEntriesReply.h | 1 + source/libs/sync/inc/syncElection.h | 2 + source/libs/sync/inc/syncIO.h | 3 + source/libs/sync/inc/syncIndexMgr.h | 4 + source/libs/sync/inc/syncInt.h | 53 +- source/libs/sync/inc/syncRaftCfg.h | 8 +- source/libs/sync/inc/syncRaftLog.h | 10 +- source/libs/sync/inc/syncRaftStore.h | 4 +- source/libs/sync/inc/syncReplication.h | 1 + source/libs/sync/inc/syncRequestVote.h | 1 + source/libs/sync/inc/syncRequestVoteReply.h | 1 + source/libs/sync/inc/syncSnapshot.h | 60 +- source/libs/sync/inc/syncUtil.h | 1 + source/libs/sync/src/syncAppendEntries.c | 337 +++++++++- source/libs/sync/src/syncAppendEntriesReply.c | 115 ++++ source/libs/sync/src/syncCommit.c | 105 +--- source/libs/sync/src/syncElection.c | 28 +- source/libs/sync/src/syncIO.c | 30 +- source/libs/sync/src/syncIndexMgr.c | 62 +- source/libs/sync/src/syncMain.c | 431 ++++++++++++- source/libs/sync/src/syncMessage.c | 424 ++++++++++++- source/libs/sync/src/syncRaftCfg.c | 9 +- source/libs/sync/src/syncRaftLog.c | 304 ++++++++- source/libs/sync/src/syncReplication.c | 83 ++- source/libs/sync/src/syncRequestVote.c | 65 ++ source/libs/sync/src/syncRequestVoteReply.c | 66 ++ source/libs/sync/src/syncSnapshot.c | 595 +++++++++++++++++- source/libs/sync/src/syncUtil.c | 22 + source/libs/sync/test/CMakeLists.txt | 126 ++++ .../sync/test/syncAppendEntriesReplyTest.cpp | 2 + .../libs/sync/test/syncAppendEntriesTest.cpp | 1 + .../test/syncConfigChangeSnapshotTest.cpp | 366 +++++++++++ .../libs/sync/test/syncConfigChangeTest.cpp | 1 - source/libs/sync/test/syncIndexMgrTest.cpp | 82 +-- source/libs/sync/test/syncRaftCfgTest.cpp | 6 +- source/libs/sync/test/syncRaftLogTest.cpp | 172 +++++ source/libs/sync/test/syncRaftLogTest2.cpp | 437 +++++++++++++ source/libs/sync/test/syncRaftLogTest3.cpp | 388 ++++++++++++ .../sync/test/syncSnapshotReceiverTest.cpp | 63 ++ source/libs/sync/test/syncSnapshotRspTest.cpp | 101 +++ .../libs/sync/test/syncSnapshotSendTest.cpp | 101 +++ .../libs/sync/test/syncSnapshotSenderTest.cpp | 72 +++ source/libs/sync/test/syncTest.cpp | 7 +- source/libs/sync/test/syncTestTool.cpp | 399 ++++++++++++ source/libs/sync/test/syncTimeoutTest.cpp | 21 + source/libs/wal/inc/walInt.h | 1 + source/libs/wal/src/walMeta.c | 9 + source/libs/wal/src/walMgmt.c | 14 +- source/libs/wal/src/walWrite.c | 39 +- tests/script/jenkins/basic.txt | 3 +- tests/script/tsim/dnode/drop_dnode_mnode.sim | 52 ++ tests/script/tsim/mnode/basic3.sim | 5 +- 86 files changed, 5670 insertions(+), 585 deletions(-) create mode 100644 source/libs/sync/test/syncConfigChangeSnapshotTest.cpp create mode 100644 source/libs/sync/test/syncRaftLogTest.cpp create mode 100644 source/libs/sync/test/syncRaftLogTest2.cpp create mode 100644 source/libs/sync/test/syncRaftLogTest3.cpp create mode 100644 source/libs/sync/test/syncSnapshotReceiverTest.cpp create mode 100644 source/libs/sync/test/syncSnapshotRspTest.cpp create mode 100644 source/libs/sync/test/syncSnapshotSendTest.cpp create mode 100644 source/libs/sync/test/syncSnapshotSenderTest.cpp create mode 100644 source/libs/sync/test/syncTestTool.cpp create mode 100644 tests/script/tsim/dnode/drop_dnode_mnode.sim diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 81ddc68d5d..dba80f5397 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -195,6 +195,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_REPLICA, "alter-replica", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIRM, "alter-confirm", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_ALTER_HASHRANGE, "alter-hashrange", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_COMPACT, "compact", NULL, NULL) TD_NEW_MSG_SEG(TDMT_QND_MSG) @@ -234,6 +235,8 @@ enum { TD_DEF_MSG_TYPE(TDMT_SYNC_COMMON_RESPONSE, "sync-common-response", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_APPLY_MSG, "sync-apply-msg", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_CONFIG_CHANGE, "sync-config-change", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SYNC_SNAPSHOT_SEND, "sync-snapshot-send", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SYNC_SNAPSHOT_RSP, "sync-snapshot-rsp", NULL, NULL) #if defined(TD_MSG_NUMBER_) TDMT_MAX diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 6b5eb3b491..31d13d6cf4 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -308,9 +308,11 @@ static FORCE_INLINE int32_t streamTaskOutput(SStreamTask* pTask, SStreamDataBloc if (pTask->sinkType == TASK_SINK__TABLE) { ASSERT(pTask->dispatchType == TASK_DISPATCH__NONE); pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, 0, pBlock->blocks); + taosFreeQitem(pBlock); } else if (pTask->sinkType == TASK_SINK__SMA) { ASSERT(pTask->dispatchType == TASK_DISPATCH__NONE); pTask->smaSink.smaSink(pTask->smaSink.vnode, pTask->smaSink.smaId, pBlock->blocks); + taosFreeQitem(pBlock); } else { ASSERT(pTask->dispatchType != TASK_DISPATCH__NONE); taosWriteQitem(pTask->outputQueue->queue, pBlock); diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index a587ad6ef2..10ece0b219 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -88,11 +88,16 @@ typedef struct SReConfigCbMeta { } SReConfigCbMeta; typedef struct SSnapshot { - void *data; + void* data; SyncIndex lastApplyIndex; SyncTerm lastApplyTerm; + SyncIndex lastConfigIndex; } SSnapshot; +typedef struct SSnapshotMeta { + SyncIndex lastConfigIndex; +} SSnapshotMeta; + typedef struct SSyncFSM { void* data; @@ -141,10 +146,28 @@ typedef struct SSyncLogStore { // return commit index of log SyncIndex (*getCommitIndex)(struct SSyncLogStore* pLogStore); + // refactor, log[0 .. n] ==> log[m .. n] + int32_t (*syncLogSetBeginIndex)(struct SSyncLogStore* pLogStore, SyncIndex beginIndex); + int32_t (*syncLogResetBeginIndex)(struct SSyncLogStore* pLogStore); + SyncIndex (*syncLogBeginIndex)(struct SSyncLogStore* pLogStore); + SyncIndex (*syncLogEndIndex)(struct SSyncLogStore* pLogStore); + bool (*syncLogIsEmpty)(struct SSyncLogStore* pLogStore); + int32_t (*syncLogEntryCount)(struct SSyncLogStore* pLogStore); + bool (*syncLogInRange)(struct SSyncLogStore* pLogStore, SyncIndex index); + + SyncIndex (*syncLogWriteIndex)(struct SSyncLogStore* pLogStore); + SyncIndex (*syncLogLastIndex)(struct SSyncLogStore* pLogStore); + SyncTerm (*syncLogLastTerm)(struct SSyncLogStore* pLogStore); + + int32_t (*syncLogAppendEntry)(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); + int32_t (*syncLogGetEntry)(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry); + int32_t (*syncLogTruncate)(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); + } SSyncLogStore; typedef struct SSyncInfo { bool isStandBy; + bool snapshotEnable; SyncGroupId vgId; SSyncCfg syncCfg; char path[TSDB_FILENAME_LEN]; @@ -172,6 +195,8 @@ bool syncEnvIsStart(); const char* syncStr(ESyncState state); bool syncIsRestoreFinish(int64_t rid); +int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta); + // to be moved to static void syncStartNormal(int64_t rid); void syncStartStandBy(int64_t rid); diff --git a/include/libs/sync/syncTools.h b/include/libs/sync/syncTools.h index bd396edf55..bb50fc141c 100644 --- a/include/libs/sync/syncTools.h +++ b/include/libs/sync/syncTools.h @@ -301,6 +301,7 @@ typedef struct SyncAppendEntries { SyncIndex prevLogIndex; SyncTerm prevLogTerm; SyncIndex commitIndex; + SyncTerm privateTerm; uint32_t dataLen; char data[]; } SyncAppendEntries; @@ -332,6 +333,7 @@ typedef struct SyncAppendEntriesReply { SRaftId destId; // private data SyncTerm term; + SyncTerm privateTerm; bool success; SyncIndex matchIndex; } SyncAppendEntriesReply; @@ -385,6 +387,75 @@ void syncApplyMsgPrint2(char* s, const SyncApplyMsg* pMsg); void syncApplyMsgLog(const SyncApplyMsg* pMsg); void syncApplyMsgLog2(char* s, const SyncApplyMsg* pMsg); +// --------------------------------------------- +typedef struct SyncSnapshotSend { + uint32_t bytes; + int32_t vgId; + uint32_t msgType; + SRaftId srcId; + SRaftId destId; + + SyncTerm term; + SyncIndex lastIndex; // lastIndex of snapshot + SyncTerm lastTerm; // lastTerm of snapshot + SyncTerm privateTerm; + int32_t seq; + uint32_t dataLen; + char data[]; +} SyncSnapshotSend; + +SyncSnapshotSend* syncSnapshotSendBuild(uint32_t dataLen, int32_t vgId); +void syncSnapshotSendDestroy(SyncSnapshotSend* pMsg); +void syncSnapshotSendSerialize(const SyncSnapshotSend* pMsg, char* buf, uint32_t bufLen); +void syncSnapshotSendDeserialize(const char* buf, uint32_t len, SyncSnapshotSend* pMsg); +char* syncSnapshotSendSerialize2(const SyncSnapshotSend* pMsg, uint32_t* len); +SyncSnapshotSend* syncSnapshotSendDeserialize2(const char* buf, uint32_t len); +void syncSnapshotSend2RpcMsg(const SyncSnapshotSend* pMsg, SRpcMsg* pRpcMsg); +void syncSnapshotSendFromRpcMsg(const SRpcMsg* pRpcMsg, SyncSnapshotSend* pMsg); +SyncSnapshotSend* syncSnapshotSendFromRpcMsg2(const SRpcMsg* pRpcMsg); +cJSON* syncSnapshotSend2Json(const SyncSnapshotSend* pMsg); +char* syncSnapshotSend2Str(const SyncSnapshotSend* pMsg); + +// for debug ---------------------- +void syncSnapshotSendPrint(const SyncSnapshotSend* pMsg); +void syncSnapshotSendPrint2(char* s, const SyncSnapshotSend* pMsg); +void syncSnapshotSendLog(const SyncSnapshotSend* pMsg); +void syncSnapshotSendLog2(char* s, const SyncSnapshotSend* pMsg); + +// --------------------------------------------- +typedef struct SyncSnapshotRsp { + uint32_t bytes; + int32_t vgId; + uint32_t msgType; + SRaftId srcId; + SRaftId destId; + + SyncTerm term; + SyncIndex lastIndex; + SyncTerm lastTerm; + SyncTerm privateTerm; + int32_t ack; + int32_t code; +} SyncSnapshotRsp; + +SyncSnapshotRsp* syncSnapshotRspBuild(int32_t vgId); +void syncSnapshotRspDestroy(SyncSnapshotRsp* pMsg); +void syncSnapshotRspSerialize(const SyncSnapshotRsp* pMsg, char* buf, uint32_t bufLen); +void syncSnapshotRspDeserialize(const char* buf, uint32_t len, SyncSnapshotRsp* pMsg); +char* syncSnapshotRspSerialize2(const SyncSnapshotRsp* pMsg, uint32_t* len); +SyncSnapshotRsp* syncSnapshotRspDeserialize2(const char* buf, uint32_t len); +void syncSnapshotRsp2RpcMsg(const SyncSnapshotRsp* pMsg, SRpcMsg* pRpcMsg); +void syncSnapshotRspFromRpcMsg(const SRpcMsg* pRpcMsg, SyncSnapshotRsp* pMsg); +SyncSnapshotRsp* syncSnapshotRspFromRpcMsg2(const SRpcMsg* pRpcMsg); +cJSON* syncSnapshotRsp2Json(const SyncSnapshotRsp* pMsg); +char* syncSnapshotRsp2Str(const SyncSnapshotRsp* pMsg); + +// for debug ---------------------- +void syncSnapshotRspPrint(const SyncSnapshotRsp* pMsg); +void syncSnapshotRspPrint2(char* s, const SyncSnapshotRsp* pMsg); +void syncSnapshotRspLog(const SyncSnapshotRsp* pMsg); +void syncSnapshotRspLog2(char* s, const SyncSnapshotRsp* pMsg); + // on message ---------------------- int32_t syncNodeOnPingCb(SSyncNode* ths, SyncPing* pMsg); int32_t syncNodeOnPingReplyCb(SSyncNode* ths, SyncPingReply* pMsg); @@ -395,6 +466,29 @@ int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg); int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); +int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg); +int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); +int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg); +int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); + +int32_t syncNodeOnSnapshotSendCb(SSyncNode* ths, SyncSnapshotSend* pMsg); +int32_t syncNodeOnSnapshotRspCb(SSyncNode* ths, SyncSnapshotRsp* pMsg); + +// ----------------------------------------- +typedef int32_t (*FpOnPingCb)(SSyncNode* ths, SyncPing* pMsg); +typedef int32_t (*FpOnPingReplyCb)(SSyncNode* ths, SyncPingReply* pMsg); +typedef int32_t (*FpOnClientRequestCb)(SSyncNode* ths, SyncClientRequest* pMsg); +typedef int32_t (*FpOnRequestVoteCb)(SSyncNode* ths, SyncRequestVote* pMsg); +typedef int32_t (*FpOnRequestVoteReplyCb)(SSyncNode* ths, SyncRequestVoteReply* pMsg); +typedef int32_t (*FpOnAppendEntriesCb)(SSyncNode* ths, SyncAppendEntries* pMsg); +typedef int32_t (*FpOnAppendEntriesReplyCb)(SSyncNode* ths, SyncAppendEntriesReply* pMsg); +typedef int32_t (*FpOnTimeoutCb)(SSyncNode* pSyncNode, SyncTimeout* pMsg); +typedef int32_t (*FpOnSnapshotSendCb)(SSyncNode* ths, SyncSnapshotSend* pMsg); +typedef int32_t (*FpOnSnapshotRspCb)(SSyncNode* ths, SyncSnapshotRsp* pMsg); + +// option ---------------------------------- +bool syncNodeSnapshotEnable(SSyncNode* pSyncNode); + // --------------------------------------------- #ifdef __cplusplus diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index 95af8ac306..c7d1ccd3de 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -141,6 +141,8 @@ typedef struct SWal { // ctl int64_t refId; TdThreadMutex mutex; + // ref + SHashObj *pRefHash; // ref -> SWalRef // path char path[WAL_PATH_LEN]; // reusable write head @@ -184,7 +186,7 @@ int32_t walRollback(SWal *, int64_t ver); // notify that previous logs can be pruned safely int32_t walBeginSnapshot(SWal *, int64_t ver); int32_t walEndSnapshot(SWal *); -void walRestoreFromSnapshot(SWal *, int64_t ver); +int32_t walRestoreFromSnapshot(SWal *, int64_t ver); // int32_t walDataCorrupted(SWal*); // read @@ -199,6 +201,16 @@ int32_t walFetchHead(SWalReadHandle *pRead, int64_t ver, SWalHead *pHead); int32_t walFetchBody(SWalReadHandle *pRead, SWalHead **ppHead); int32_t walSkipFetchBody(SWalReadHandle *pRead, const SWalHead *pHead); +typedef struct { + int64_t refId; + int64_t ver; +} SWalRef; + +SWalRef *walOpenRef(SWal *); +void walCloseRef(SWalRef *); +int32_t walRefVer(SWalRef *, int64_t ver); +int32_t walUnrefVer(SWal *); + // deprecated #if 0 int32_t walRead(SWal *, SWalHead **, int64_t ver); diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 5d6425264d..aa679b9414 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1779,6 +1779,7 @@ SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pTSchema, boo } // assign data + // TODO ret = taosMemoryCalloc(1, cap + 46); ret = POINTER_SHIFT(ret, 46); ret->header.vgId = vgId; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 6c2783cb5c..264dc74e36 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -215,8 +215,12 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIRM_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_HASHRANGE_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_COMPACT_RSP, mmPutNodeMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MON_MM_INFO, mmPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MON_MM_LOAD, mmPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_TIMEOUT, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_PING, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_PING_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; @@ -226,9 +230,8 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SYNC_REQUEST_VOTE_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_REPLY, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; - - if (dmSetMgmtHandle(pArray, TDMT_MON_MM_INFO, mmPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_MON_MM_LOAD, mmPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_SEND, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_RSP, mmPutNodeMsgToSyncQueue, 1) == NULL) goto _OVER; code = 0; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c index 42fa7b718e..53943b61b0 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmWorker.c @@ -71,7 +71,7 @@ static void mmProcessSyncQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { } static int32_t mmPutNodeMsgToWorker(SSingleWorker *pWorker, SRpcMsg *pMsg) { - dTrace("msg:%p, put into worker %s, type:%s", pMsg, pWorker->name, TMSG_INFO(pMsg->msgType)); + dTrace("msg:%p, put into %s queue, type:%s", pMsg, pWorker->name, TMSG_INFO(pMsg->msgType)); taosWriteQitem(pWorker->queue, pMsg); return 0; } diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 7cd4ddd713..1540f10ba4 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -360,6 +360,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIRM, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_HASHRANGE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_COMPACT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/inc/mndInt.h b/source/dnode/mnode/impl/inc/mndInt.h index 6661347e42..4869a19856 100644 --- a/source/dnode/mnode/impl/inc/mndInt.h +++ b/source/dnode/mnode/impl/inc/mndInt.h @@ -19,6 +19,7 @@ #include "mndDef.h" #include "sdb.h" +#include "sync.h" #include "syncTools.h" #include "tcache.h" #include "tdatablock.h" @@ -75,7 +76,6 @@ typedef struct { } STelemMgmt; typedef struct { - SWal *pWal; sem_t syncSem; int64_t sync; bool standby; @@ -108,6 +108,7 @@ typedef struct SMnode { SQHandle *pQuery; SHashObj *infosMeta; SHashObj *perfsMeta; + SWal *pWal; SShowMgmt showMgmt; SProfileMgmt profileMgmt; STelemMgmt telemMgmt; diff --git a/source/dnode/mnode/impl/inc/mndVgroup.h b/source/dnode/mnode/impl/inc/mndVgroup.h index 89f93d30b5..c50279889e 100644 --- a/source/dnode/mnode/impl/inc/mndVgroup.h +++ b/source/dnode/mnode/impl/inc/mndVgroup.h @@ -41,6 +41,7 @@ int32_t mndAddAlterVnodeAction(SMnode *, STrans *pTrans, SDbObj *pDb, SVgObj *pV int32_t mndAddDropVnodeAction(SMnode *, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SVnodeGid *pVgid, bool isRedo); int32_t mndSetMoveVgroupInfoToTrans(SMnode *, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t vn, SArray *pArray); int32_t mndSetMoveVgroupsInfoToTrans(SMnode *, STrans *pTrans, int32_t dropDnodeId); +int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SArray *pArray); void *mndBuildCreateVnodeReq(SMnode *, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *cntlen, bool standby); void *mndBuildDropVnodeReq(SMnode *, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen); diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 61d6e7be6c..e6c93a9bfd 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -636,57 +636,6 @@ static int32_t mndSetAlterDbCommitLogs(SMnode *pMnode, STrans *pTrans, SDbObj *p return 0; } -static int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SArray *pArray) { - if (pVgroup->replica <= 0 || pVgroup->replica == pDb->cfg.replications) { - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, pVgroup, TDMT_VND_ALTER_CONFIG) != 0) { - return -1; - } - } else { - SVgObj newVgroup = {0}; - memcpy(&newVgroup, pVgroup, sizeof(SVgObj)); - mndTransSetSerial(pTrans); - - if (newVgroup.replica < pDb->cfg.replications) { - mInfo("db:%s, vgId:%d, vn:0 dnode:%d, will add 2 vnodes", pVgroup->dbName, pVgroup->vgId, - pVgroup->vnodeGid[0].dnodeId); - - if (mndAddVnodeToVgroup(pMnode, &newVgroup, pArray) != 0) return -1; - if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVgroup, &newVgroup.vnodeGid[1], true) != 0) return -1; - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; - if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; - - if (mndAddVnodeToVgroup(pMnode, &newVgroup, pArray) != 0) return -1; - if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVgroup, &newVgroup.vnodeGid[2], true) != 0) return -1; - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; - if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; - } else { - mInfo("db:%s, vgId:%d, will remove 2 vnodes", pVgroup->dbName, pVgroup->vgId); - - SVnodeGid del1 = {0}; - if (mndRemoveVnodeFromVgroup(pMnode, &newVgroup, pArray, &del1) != 0) return -1; - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; - if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVgroup, &del1, true) != 0) return -1; - if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; - - SVnodeGid del2 = {0}; - if (mndRemoveVnodeFromVgroup(pMnode, &newVgroup, pArray, &del2) != 0) return -1; - if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; - if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVgroup, &del2, true) != 0) return -1; - if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; - } - - SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup); - if (pVgRaw == NULL) return -1; - if (mndTransAppendCommitlog(pTrans, pVgRaw) != 0) { - sdbFreeRaw(pVgRaw); - return -1; - } - sdbSetRawStatus(pVgRaw, SDB_STATUS_READY); - } - - return 0; -} - static int32_t mndSetAlterDbRedoActions(SMnode *pMnode, STrans *pTrans, SDbObj *pOld, SDbObj *pNew) { SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 7be4939337..3fab870277 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -566,9 +566,11 @@ static int32_t mndDropDnode(SMnode *pMnode, SRpcMsg *pReq, SDnodeObj *pDnode, SM pRaw = NULL; if (pMObj != NULL) { + mDebug("trans:%d, mnode on dnode:%d will be dropped", pTrans->id, pDnode->id); if (mndSetDropMnodeInfoToTrans(pMnode, pTrans, pMObj) != 0) goto _OVER; } if (numOfVnodes > 0) { + mDebug("trans:%d, %d vnodes on dnode:%d will be dropped", pTrans->id, numOfVnodes, pDnode->id); if (mndSetMoveVgroupsInfoToTrans(pMnode, pTrans, pDnode->id) != 0) goto _OVER; } if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 69300c0889..813e4c30b5 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -139,10 +139,40 @@ static int32_t mndCreateDir(SMnode *pMnode, const char *path) { return 0; } +static int32_t mndInitWal(SMnode *pMnode) { + char path[PATH_MAX + 20] = {0}; + snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP); + SWalCfg cfg = { + .vgId = 1, + .fsyncPeriod = 0, + .rollPeriod = -1, + .segSize = -1, + .retentionPeriod = -1, + .retentionSize = -1, + .level = TAOS_WAL_FSYNC, + }; + + pMnode->pWal = walOpen(path, &cfg); + if (pMnode->pWal == NULL) { + mError("failed to open wal since %s", terrstr()); + return -1; + } + + return 0; +} + +static void mndCloseWal(SMnode *pMnode) { + if (pMnode->pWal != NULL) { + walClose(pMnode->pWal); + pMnode->pWal = NULL; + } +} + static int32_t mndInitSdb(SMnode *pMnode) { SSdbOpt opt = {0}; opt.path = pMnode->path; opt.pMnode = pMnode; + opt.pWal = pMnode->pWal; pMnode->pSdb = sdbInit(&opt); if (pMnode->pSdb == NULL) { @@ -156,7 +186,6 @@ static int32_t mndOpenSdb(SMnode *pMnode) { if (!pMnode->deploy) { return sdbReadFile(pMnode->pSdb); } else { - // return sdbDeploy(pMnode->pSdb);; return 0; } } @@ -182,6 +211,7 @@ static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCle } static int32_t mndInitSteps(SMnode *pMnode) { + if (mndAllocStep(pMnode, "mnode-wal", mndInitWal, mndCloseWal) != 0) return -1; if (mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb) != 0) return -1; if (mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans) != 0) return -1; if (mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster) != 0) return -1; @@ -201,7 +231,7 @@ static int32_t mndInitSteps(SMnode *pMnode) { if (mndAllocStep(pMnode, "mnode-offset", mndInitOffset, mndCleanupOffset) != 0) return -1; if (mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup) != 0) return -1; if (mndAllocStep(pMnode, "mnode-stb", mndInitStb, mndCleanupStb) != 0) return -1; - if (mndAllocStep(pMnode, "mnode-stb", mndInitSma, mndCleanupSma) != 0) return -1; + if (mndAllocStep(pMnode, "mnode-sma", mndInitSma, mndCleanupSma) != 0) return -1; if (mndAllocStep(pMnode, "mnode-infos", mndInitInfos, mndCleanupInfos) != 0) return -1; if (mndAllocStep(pMnode, "mnode-perfs", mndInitPerfs, mndCleanupPerfs) != 0) return -1; if (mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb) != 0) return -1; @@ -376,41 +406,93 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { syncRpcMsgLog2(logBuf, pMsg); taosMemoryFree(syncNodeStr); - if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { - SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); - code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); - syncTimeoutDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING) { - SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); - code = syncNodeOnPingCb(pSyncNode, pSyncMsg); - syncPingDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { - SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); - code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); - syncPingReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); - code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg); - syncClientRequestDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { - SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); - code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg); - syncRequestVoteDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { - SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); - code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg); - syncRequestVoteReplyDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { - SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); - code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg); - syncAppendEntriesDestroy(pSyncMsg); - } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { - SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); - code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg); - syncAppendEntriesReplyDestroy(pSyncMsg); + // ToDo: ugly! use function pointer + if (syncNodeSnapshotEnable(pSyncNode)) { + if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { + SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); + code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); + syncTimeoutDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_PING) { + SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); + code = syncNodeOnPingCb(pSyncNode, pSyncMsg); + syncPingDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { + SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); + code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); + syncPingReplyDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { + SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); + code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg); + syncClientRequestDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { + SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); + code = syncNodeOnRequestVoteSnapshotCb(pSyncNode, pSyncMsg); + syncRequestVoteDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { + SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); + code = syncNodeOnRequestVoteReplySnapshotCb(pSyncNode, pSyncMsg); + syncRequestVoteReplyDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { + SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntriesSnapshotCb(pSyncNode, pSyncMsg); + syncAppendEntriesDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { + SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntriesReplySnapshotCb(pSyncNode, pSyncMsg); + syncAppendEntriesReplyDestroy(pSyncMsg); + + } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { + SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg); + code = syncNodeOnSnapshotSendCb(pSyncNode, pSyncMsg); + syncSnapshotSendDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { + SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg); + code = syncNodeOnSnapshotRspCb(pSyncNode, pSyncMsg); + syncSnapshotRspDestroy(pSyncMsg); + + } else { + mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType)); + code = TAOS_SYNC_PROPOSE_OTHER_ERROR; + } + } else { - mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType)); - code = TAOS_SYNC_PROPOSE_OTHER_ERROR; + if (pMsg->msgType == TDMT_SYNC_TIMEOUT) { + SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg); + code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg); + syncTimeoutDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_PING) { + SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg); + code = syncNodeOnPingCb(pSyncNode, pSyncMsg); + syncPingDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) { + SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg); + code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg); + syncPingReplyDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { + SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg); + code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg); + syncClientRequestDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) { + SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg); + code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg); + syncRequestVoteDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) { + SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg); + code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg); + syncRequestVoteReplyDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) { + SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg); + syncAppendEntriesDestroy(pSyncMsg); + } else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) { + SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg); + code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg); + syncAppendEntriesReplyDestroy(pSyncMsg); + } else { + mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType)); + code = TAOS_SYNC_PROPOSE_OTHER_ERROR; + } } mndReleaseSyncRef(pMnode); diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index ce025d5547..a0daa72d9a 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -17,15 +17,27 @@ #include "mndSync.h" #include "mndTrans.h" -int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { +static int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { SMsgHead *pHead = pMsg->pCont; pHead->contLen = htonl(pHead->contLen); pHead->vgId = htonl(pHead->vgId); - return tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); + int32_t code = tmsgPutToQueue(msgcb, SYNC_QUEUE, pMsg); + if (code != 0) { + rpcFreeCont(pMsg->pCont); + pMsg->pCont = NULL; + } + return code; } -int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); } +static int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { + int32_t code = tmsgSendReq(pEpSet, pMsg); + if (code != 0) { + rpcFreeCont(pMsg->pCont); + pMsg->pCont = NULL; + } + return code; +} void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { SMnode *pMnode = pFsm->data; @@ -34,7 +46,7 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM int32_t transId = sdbGetIdFromRaw(pMnode->pSdb, pRaw); pMgmt->errCode = cbMeta.code; - mTrace("trans:%d, is proposed, savedTransId:%d code:0x%x, ver:%" PRId64 " term:%" PRId64 " role:%s raw:%p", transId, + mDebug("trans:%d, is proposed, saved:%d code:0x%x, index:%" PRId64 " term:%" PRId64 " role:%s raw:%p", transId, pMgmt->transId, cbMeta.code, cbMeta.index, cbMeta.term, syncStr(cbMeta.state), pRaw); if (pMgmt->errCode == 0) { @@ -50,6 +62,10 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM tsem_post(&pMgmt->syncSem); } else { if (cbMeta.index - sdbGetApplyIndex(pMnode->pSdb) > 100) { + SSnapshotMeta sMeta = {0}; + if (syncGetSnapshotMeta(pMnode->syncMgmt.sync, &sMeta) == 0) { + sdbSetCurConfig(pMnode->pSdb, sMeta.lastConfigIndex); + } sdbWriteFile(pMnode->pSdb); } } @@ -57,13 +73,20 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM int32_t mndSyncGetSnapshot(struct SSyncFSM *pFsm, SSnapshot *pSnapshot) { SMnode *pMnode = pFsm->data; - pSnapshot->lastApplyIndex = sdbGetApplyIndex(pMnode->pSdb); - pSnapshot->lastApplyTerm = sdbGetApplyTerm(pMnode->pSdb); + pSnapshot->lastApplyIndex = sdbGetCommitIndex(pMnode->pSdb); + pSnapshot->lastApplyTerm = sdbGetCommitTerm(pMnode->pSdb); + pSnapshot->lastConfigIndex = sdbGetCurConfig(pMnode->pSdb); return 0; } void mndRestoreFinish(struct SSyncFSM *pFsm) { SMnode *pMnode = pFsm->data; + + SSnapshotMeta sMeta = {0}; + if (syncGetSnapshotMeta(pMnode->syncMgmt.sync, &sMeta) == 0) { + sdbSetCurConfig(pMnode->pSdb, sMeta.lastConfigIndex); + } + if (!pMnode->deploy) { mInfo("mnode sync restore finished, and will handle outstanding transactions"); mndTransPullup(pMnode); @@ -78,8 +101,8 @@ void mndReConfig(struct SSyncFSM *pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) SSyncMgmt *pMgmt = &pMnode->syncMgmt; pMgmt->errCode = cbMeta.code; - mInfo("trans:-1, sync reconfig is proposed, savedTransId:%d code:0x%x, curTerm:%" PRId64 " term:%" PRId64, - pMgmt->transId, cbMeta.code, cbMeta.index, cbMeta.term); + mInfo("trans:-1, sync reconfig is proposed, saved:%d code:0x%x, index:%" PRId64 " term:%" PRId64, pMgmt->transId, + cbMeta.code, cbMeta.index, cbMeta.term); if (pMgmt->transId == -1) { if (pMgmt->errCode != 0) { @@ -144,29 +167,12 @@ SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) { int32_t mndInitSync(SMnode *pMnode) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; - char path[PATH_MAX + 20] = {0}; - snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP); - SWalCfg cfg = { - .vgId = 1, - .fsyncPeriod = 0, - .rollPeriod = -1, - .segSize = -1, - .retentionPeriod = -1, - .retentionSize = -1, - .level = TAOS_WAL_FSYNC, - }; - - pMgmt->pWal = walOpen(path, &cfg); - if (pMgmt->pWal == NULL) { - mError("failed to open wal since %s", terrstr()); - return -1; - } - SSyncInfo syncInfo = {.vgId = 1, .FpSendMsg = mndSyncSendMsg, .FpEqMsg = mndSyncEqMsg}; snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", pMnode->path, TD_DIRSEP); - syncInfo.pWal = pMgmt->pWal; + syncInfo.pWal = pMnode->pWal; syncInfo.pFsm = mndSyncMakeFsm(pMnode); syncInfo.isStandBy = pMgmt->standby; + syncInfo.snapshotEnable = true; SSyncCfg *pCfg = &syncInfo.syncCfg; pCfg->replicaNum = pMnode->replica; @@ -196,10 +202,6 @@ void mndCleanupSync(SMnode *pMnode) { mDebug("mnode sync is stopped, id:%" PRId64, pMgmt->sync); tsem_destroy(&pMgmt->syncSem); - if (pMgmt->pWal != NULL) { - walClose(pMgmt->pWal); - } - memset(pMgmt, 0, sizeof(SSyncMgmt)); } diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index c2097c069b..1e98a3bbf9 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -922,7 +922,7 @@ static int32_t mndTransSendSingleMsg(SMnode *pMnode, STrans *pTrans, STransActio char detail[1024] = {0}; int32_t len = snprintf(detail, sizeof(detail), "msgType:%s numOfEps:%d inUse:%d", TMSG_INFO(pAction->msgType), pAction->epSet.numOfEps, pAction->epSet.inUse); - for (int32_t i = 0; i < pTrans->lastErrorEpset.numOfEps; ++i) { + for (int32_t i = 0; i < pAction->epSet.numOfEps; ++i) { len += snprintf(detail + len, sizeof(detail) - len, " ep:%d-%s:%u", i, pAction->epSet.eps[i].fqdn, pAction->epSet.eps[i].port); } @@ -1085,6 +1085,8 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) } if (code == 0) { + if (!pMnode->deploy && !mndIsMaster(pMnode)) break; + pTrans->code = 0; pTrans->redoActionPos++; mDebug("trans:%d, %s:%d is executed and need sync to other mnodes", pTrans->id, mndTransStr(pAction->stage), @@ -1386,6 +1388,10 @@ void mndTransPullup(SMnode *pMnode) { mndReleaseTrans(pMnode, pTrans); } + SSnapshotMeta sMeta = {0}; + if (syncGetSnapshotMeta(pMnode->syncMgmt.sync, &sMeta) == 0) { + sdbSetCurConfig(pMnode->pSdb, sMeta.lastConfigIndex); + } sdbWriteFile(pMnode->pSdb); taosArrayDestroy(pArray); } diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 5244bc657b..76e65ddd92 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -55,6 +55,7 @@ int32_t mndInitVgroup(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_VND_ALTER_REPLICA_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_ALTER_CONFIG_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_ALTER_CONFIRM_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_VND_ALTER_HASHRANGE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_DND_DROP_VNODE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp); @@ -1166,7 +1167,155 @@ _OVER: return code; } -static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { return 0; } +int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SArray *pArray) { + if (pVgroup->replica <= 0 || pVgroup->replica == pDb->cfg.replications) { + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, pVgroup, TDMT_VND_ALTER_CONFIG) != 0) { + return -1; + } + } else { + SVgObj newVgroup = {0}; + memcpy(&newVgroup, pVgroup, sizeof(SVgObj)); + mndTransSetSerial(pTrans); + + if (newVgroup.replica < pDb->cfg.replications) { + mInfo("db:%s, vgId:%d, vn:0 dnode:%d, will add 2 vnodes", pVgroup->dbName, pVgroup->vgId, + pVgroup->vnodeGid[0].dnodeId); + + if (mndAddVnodeToVgroup(pMnode, &newVgroup, pArray) != 0) return -1; + if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVgroup, &newVgroup.vnodeGid[1], true) != 0) return -1; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; + + if (mndAddVnodeToVgroup(pMnode, &newVgroup, pArray) != 0) return -1; + if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVgroup, &newVgroup.vnodeGid[2], true) != 0) return -1; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; + } else if (newVgroup.replica > pDb->cfg.replications) { + mInfo("db:%s, vgId:%d, will remove 2 vnodes", pVgroup->dbName, pVgroup->vgId); + + SVnodeGid del1 = {0}; + if (mndRemoveVnodeFromVgroup(pMnode, &newVgroup, pArray, &del1) != 0) return -1; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; + if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVgroup, &del1, true) != 0) return -1; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; + + SVnodeGid del2 = {0}; + if (mndRemoveVnodeFromVgroup(pMnode, &newVgroup, pArray, &del2) != 0) return -1; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVgroup, TDMT_VND_ALTER_REPLICA) != 0) return -1; + if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVgroup, &del2, true) != 0) return -1; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVgroup) != 0) return -1; + } else { + } + + SSdbRaw *pVgRaw = mndVgroupActionEncode(&newVgroup); + if (pVgRaw == NULL) return -1; + if (mndTransAppendCommitlog(pTrans, pVgRaw) != 0) { + sdbFreeRaw(pVgRaw); + return -1; + } + sdbSetRawStatus(pVgRaw, SDB_STATUS_READY); + } + + return 0; +} + +static int32_t mndAddAdjustVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup) { + return 0; +} + +static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) { + int32_t code = -1; + SSdbRaw *pRaw = NULL; + STrans *pTrans = NULL; + SArray *pArray = mndBuildDnodesArray(pMnode, 0); + + pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq); + if (pTrans == NULL) goto _OVER; + mndTransSetSerial(pTrans); + mDebug("trans:%d, used to split vgroup, vgId:%d", pTrans->id, pVgroup->vgId); + + SVgObj newVg1 = {0}; + memcpy(&newVg1, pVgroup, sizeof(SVgObj)); + mInfo("vgId:%d, vgroup info before split, replica:%d hashBegin:%u hashEnd:%u", newVg1.vgId, newVg1.replica, + newVg1.hashBegin, newVg1.hashEnd); + for (int32_t i = 0; i < newVg1.replica; ++i) { + mInfo("vgId:%d, vnode:%d dnode:%d", newVg1.vgId, i, newVg1.vnodeGid[i].dnodeId); + } + + if (newVg1.replica == 1) { + if (mndAddVnodeToVgroup(pMnode, &newVg1, pArray) != 0) goto _OVER; + if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg1, &newVg1.vnodeGid[1], true) != 0) goto _OVER; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVg1, TDMT_VND_ALTER_REPLICA) != 0) goto _OVER; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1) != 0) goto _OVER; + } else if (newVg1.replica == 3) { + SVnodeGid del1 = {0}; + if (mndRemoveVnodeFromVgroup(pMnode, &newVg1, pArray, &del1) != 0) goto _OVER; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVg1, TDMT_VND_ALTER_REPLICA) != 0) goto _OVER; + if (mndAddDropVnodeAction(pMnode, pTrans, pDb, &newVg1, &del1, true) != 0) goto _OVER; + if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg1) != 0) goto _OVER; + } else { + goto _OVER; + } + + SVgObj newVg2 = {0}; + memcpy(&newVg1, &newVg2, sizeof(SVgObj)); + newVg1.replica = 1; + newVg1.hashEnd = (newVg1.hashBegin + newVg1.hashEnd) / 2; + memset(&newVg1.vnodeGid[1], 0, sizeof(SVnodeGid)); + + newVg2.replica = 1; + newVg2.hashBegin = newVg1.hashEnd + 1; + memcpy(&newVg2.vnodeGid[0], &newVg2.vnodeGid[1], sizeof(SVnodeGid)); + memset(&newVg1.vnodeGid[1], 0, sizeof(SVnodeGid)); + + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVg1, TDMT_VND_ALTER_HASHRANGE) != 0) goto _OVER; + if (mndAddAlterVnodeAction(pMnode, pTrans, pDb, &newVg2, TDMT_VND_ALTER_HASHRANGE) != 0) goto _OVER; + + // adjust vgroup + if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, &newVg1, pArray) != 0) goto _OVER; + if (mndBuildAlterVgroupAction(pMnode, pTrans, pDb, &newVg2, pArray) != 0) goto _OVER; + +_OVER: + mndTransDrop(pTrans); + sdbFreeRaw(pRaw); + return code; +} + +static int32_t mndProcessSplitVgroupMsg(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + int32_t code = -1; + int32_t vgId = 2; + SUserObj *pUser = NULL; + SVgObj *pVgroup = NULL; + SDbObj *pDb = NULL; + + mDebug("vgId:%d, start to split", vgId); + + pVgroup = mndAcquireVgroup(pMnode, vgId); + if (pVgroup == NULL) goto _OVER; + + pDb = mndAcquireDb(pMnode, pVgroup->dbName); + if (pDb == NULL) goto _OVER; + + pUser = mndAcquireUser(pMnode, pReq->conn.user); + if (pUser == NULL) { + terrno = TSDB_CODE_MND_NO_USER_FROM_CONN; + goto _OVER; + } + + if (mndCheckNodeAuth(pUser) != 0) { + goto _OVER; + } + + code = mndSplitVgroup(pMnode, pReq, pDb, pVgroup); + if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; + +_OVER: + mndReleaseUser(pMnode, pUser); + mndReleaseVgroup(pMnode, pVgroup); + mndReleaseDb(pMnode, pDb); + return code; +} static int32_t mndSetBalanceVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, SDnodeObj *pSrc, SDnodeObj *pDst) { diff --git a/source/dnode/mnode/sdb/inc/sdb.h b/source/dnode/mnode/sdb/inc/sdb.h index 9444543804..8536c451b7 100644 --- a/source/dnode/mnode/sdb/inc/sdb.h +++ b/source/dnode/mnode/sdb/inc/sdb.h @@ -22,6 +22,7 @@ #include "tlockfree.h" #include "tlog.h" #include "tmsg.h" +#include "wal.h" #ifdef __cplusplus extern "C" { @@ -165,12 +166,14 @@ typedef struct SSdbRow { typedef struct SSdb { SMnode *pMnode; + SWal *pWal; char *currDir; char *tmpDir; int64_t lastCommitVer; int64_t lastCommitTerm; int64_t curVer; int64_t curTerm; + int64_t curConfig; int64_t tableVer[SDB_MAX]; int64_t maxId[SDB_MAX]; EKeyType keyTypes[SDB_MAX]; @@ -205,6 +208,7 @@ typedef struct { typedef struct SSdbOpt { const char *path; SMnode *pMnode; + SWal *pWal; } SSdbOpt; /** @@ -358,9 +362,13 @@ int64_t sdbGetTableVer(SSdb *pSdb, ESdbType type); * @return int32_t The current index of sdb */ void sdbSetApplyIndex(SSdb *pSdb, int64_t index); -int64_t sdbGetApplyIndex(SSdb *pSdb); void sdbSetApplyTerm(SSdb *pSdb, int64_t term); +void sdbSetCurConfig(SSdb *pSdb, int64_t config); +int64_t sdbGetApplyIndex(SSdb *pSdb); int64_t sdbGetApplyTerm(SSdb *pSdb); +int64_t sdbGetCommitIndex(SSdb *pSdb); +int64_t sdbGetCommitTerm(SSdb *pSdb); +int64_t sdbGetCurConfig(SSdb *pSdb); SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen); void sdbFreeRaw(SSdbRaw *pRaw); diff --git a/source/dnode/mnode/sdb/src/sdb.c b/source/dnode/mnode/sdb/src/sdb.c index 0526ea5c2d..c11aaaaa8a 100644 --- a/source/dnode/mnode/sdb/src/sdb.c +++ b/source/dnode/mnode/sdb/src/sdb.c @@ -52,10 +52,12 @@ SSdb *sdbInit(SSdbOpt *pOption) { pSdb->keyTypes[i] = SDB_KEY_INT32; } + pSdb->pWal = pOption->pWal; pSdb->curVer = -1; pSdb->curTerm = -1; pSdb->lastCommitVer = -1; pSdb->lastCommitTerm = -1; + pSdb->curConfig = -1; pSdb->pMnode = pOption->pMnode; taosThreadMutexInit(&pSdb->filelock, NULL); mDebug("sdb init successfully"); @@ -159,8 +161,16 @@ static int32_t sdbCreateDir(SSdb *pSdb) { void sdbSetApplyIndex(SSdb *pSdb, int64_t index) { pSdb->curVer = index; } -int64_t sdbGetApplyIndex(SSdb *pSdb) { return pSdb->curVer; } - void sdbSetApplyTerm(SSdb *pSdb, int64_t term) { pSdb->curTerm = term; } +void sdbSetCurConfig(SSdb *pSdb, int64_t config) { pSdb->curConfig = config; } + +int64_t sdbGetApplyIndex(SSdb *pSdb) { return pSdb->curVer; } + int64_t sdbGetApplyTerm(SSdb *pSdb) { return pSdb->curTerm; } + +int64_t sdbGetCommitIndex(SSdb *pSdb) { return pSdb->lastCommitVer; } + +int64_t sdbGetCommitTerm(SSdb *pSdb) { return pSdb->lastCommitTerm; } + +int64_t sdbGetCurConfig(SSdb *pSdb) { return pSdb->curConfig; } \ No newline at end of file diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index 83135491a9..f98ecf5343 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -110,6 +110,16 @@ static int32_t sdbReadFileHead(SSdb *pSdb, TdFilePtr pFile) { return -1; } + ret = taosReadFile(pFile, &pSdb->curConfig, sizeof(int64_t)); + if (ret < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + if (ret != sizeof(int64_t)) { + terrno = TSDB_CODE_FILE_CORRUPTED; + return -1; + } + for (int32_t i = 0; i < SDB_TABLE_SIZE; ++i) { int64_t maxId = 0; ret = taosReadFile(pFile, &maxId, sizeof(int64_t)); @@ -173,6 +183,11 @@ static int32_t sdbWriteFileHead(SSdb *pSdb, TdFilePtr pFile) { return -1; } + if (taosWriteFile(pFile, &pSdb->curConfig, sizeof(int64_t)) != sizeof(int64_t)) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + for (int32_t i = 0; i < SDB_TABLE_SIZE; ++i) { int64_t maxId = 0; if (i < SDB_MAX) { @@ -288,8 +303,8 @@ static int32_t sdbReadFileImp(SSdb *pSdb) { pSdb->lastCommitVer = pSdb->curVer; pSdb->lastCommitTerm = pSdb->curTerm; memcpy(pSdb->tableVer, tableVer, sizeof(tableVer)); - mDebug("read sdb file:%s successfully, ver:%" PRId64 " term:%" PRId64, file, pSdb->lastCommitVer, - pSdb->lastCommitTerm); + mDebug("read sdb file:%s successfully, index:%" PRId64 " term:%" PRId64 " config:%" PRId64, file, pSdb->lastCommitVer, + pSdb->lastCommitTerm, pSdb->curConfig); _OVER: taosCloseFile(&pFile); @@ -426,12 +441,23 @@ static int32_t sdbWriteFileImp(SSdb *pSdb) { } int32_t sdbWriteFile(SSdb *pSdb) { + int32_t code = 0; if (pSdb->curVer == pSdb->lastCommitVer) { return 0; } taosThreadMutexLock(&pSdb->filelock); - int32_t code = sdbWriteFileImp(pSdb); + if (pSdb->pWal != NULL) { + code = walBeginSnapshot(pSdb->pWal, pSdb->curVer); + } + if (code == 0) { + code = sdbWriteFileImp(pSdb); + } + if (code == 0) { + if (pSdb->pWal != NULL) { + code = walEndSnapshot(pSdb->pWal); + } + } if (code != 0) { mError("failed to write sdb file since %s", terrstr()); } @@ -496,6 +522,9 @@ int32_t sdbStartRead(SSdb *pSdb, SSdbIter **ppIter) { snprintf(datafile, sizeof(datafile), "%s%ssdb.data", pSdb->currDir, TD_DIRSEP); taosThreadMutexLock(&pSdb->filelock); + int64_t commitIndex = pSdb->lastCommitVer; + int64_t commitTerm = pSdb->lastCommitTerm; + int64_t curConfig = pSdb->curConfig; if (taosCopyFile(datafile, pIter->name) < 0) { taosThreadMutexUnlock(&pSdb->filelock); terrno = TAOS_SYSTEM_ERROR(errno); @@ -514,7 +543,8 @@ int32_t sdbStartRead(SSdb *pSdb, SSdbIter **ppIter) { } *ppIter = pIter; - mInfo("sdbiter:%p, is created to read snapshot, file:%s", pIter, pIter->name); + mInfo("sdbiter:%p, is created to read snapshot, index:%" PRId64 " term:%" PRId64 " config:%" PRId64 " file:%s", pIter, + commitIndex, commitTerm, curConfig, pIter->name); return 0; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 55ab5f5729..eb489a6d81 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -24,6 +24,7 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t version, void *pReq static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); +static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessWriteMsg(SVnode *pVnode, int64_t version, SRpcMsg *pMsg, SRpcMsg *pRsp); static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp); @@ -164,6 +165,9 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp case TDMT_VND_ALTER_CONFIRM: vnodeProcessAlterConfirmReq(pVnode, version, pReq, len, pRsp); break; + case TDMT_VND_ALTER_HASHRANGE: + vnodeProcessAlterHasnRangeReq(pVnode, version, pReq, len, pRsp); + break; case TDMT_VND_ALTER_CONFIG: break; default: @@ -933,4 +937,14 @@ _err: vError("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64 " since %s", TD_VID(pVnode), req.indexUid, req.version, terrstr()); return -1; +} + +static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { + vInfo("vgId:%d, alter hashrange msg will be processed", TD_VID(pVnode)); + + // todo + // 1. stop work + // 2. adjust hash range / compact / remove wals / rename vgroups + // 3. reload sync + return 0; } \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 203eecd8ab..816c0cfac9 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -252,6 +252,8 @@ static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) { int32_t vnodeSyncOpen(SVnode *pVnode, char *path) { SSyncInfo syncInfo = { + .isStandBy = false, + .snapshotEnable = false, .vgId = pVnode->config.vgId, .isStandBy = pVnode->config.standby, .syncCfg = pVnode->config.syncCfg, diff --git a/source/libs/index/inc/indexCache.h b/source/libs/index/inc/indexCache.h index 6c95eb987b..8b5885d58b 100644 --- a/source/libs/index/inc/indexCache.h +++ b/source/libs/index/inc/indexCache.h @@ -62,25 +62,25 @@ typedef struct CacheTerm { } CacheTerm; // -IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, int8_t type); +IndexCache* idxCacheCreate(SIndex* idx, uint64_t suid, const char* colName, int8_t type); -void indexCacheForceToMerge(void* cache); -void indexCacheDestroy(void* cache); -void indexCacheBroadcast(void* cache); -void indexCacheWait(void* cache); +void idxCacheForceToMerge(void* cache); +void idxCacheDestroy(void* cache); +void idxCacheBroadcast(void* cache); +void idxCacheWait(void* cache); -Iterate* indexCacheIteratorCreate(IndexCache* cache); +Iterate* idxCacheIteratorCreate(IndexCache* cache); void idxCacheIteratorDestroy(Iterate* iiter); -int indexCachePut(void* cache, SIndexTerm* term, uint64_t uid); +int idxCachePut(void* cache, SIndexTerm* term, uint64_t uid); // int indexCacheGet(void *cache, uint64_t *rst); -int indexCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* tr, STermValueType* s); +int idxCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* tr, STermValueType* s); -void indexCacheRef(IndexCache* cache); -void indexCacheUnRef(IndexCache* cache); +void idxCacheRef(IndexCache* cache); +void idxCacheUnRef(IndexCache* cache); -void indexCacheDebug(IndexCache* cache); +void idxCacheDebug(IndexCache* cache); void idxCacheDestroyImm(IndexCache* cache); #ifdef __cplusplus diff --git a/source/libs/index/inc/indexComm.h b/source/libs/index/inc/indexComm.h index bccba98116..09fd2f3555 100644 --- a/source/libs/index/inc/indexComm.h +++ b/source/libs/index/inc/indexComm.h @@ -34,11 +34,11 @@ typedef enum { MATCH, CONTINUE, BREAK } TExeCond; typedef TExeCond (*_cache_range_compare)(void* a, void* b, int8_t type); -__compar_fn_t indexGetCompar(int8_t type); +__compar_fn_t idxGetCompar(int8_t type); TExeCond tCompare(__compar_fn_t func, int8_t cmpType, void* a, void* b, int8_t dType); TExeCond tDoCompare(__compar_fn_t func, int8_t cmpType, void* a, void* b); -_cache_range_compare indexGetCompare(RangeType ty); +_cache_range_compare idxGetCompare(RangeType ty); int32_t idxConvertData(void* src, int8_t type, void** dst); int32_t idxConvertDataToStr(void* src, int8_t type, void** dst); diff --git a/source/libs/index/inc/indexInt.h b/source/libs/index/inc/indexInt.h index 47f7260d3a..906cbb6a20 100644 --- a/source/libs/index/inc/indexInt.h +++ b/source/libs/index/inc/indexInt.h @@ -133,24 +133,24 @@ typedef struct TFileCacheKey { } ICacheKey; int idxFlushCacheToTFile(SIndex* sIdx, void*, bool quit); -int64_t indexAddRef(void* p); -int32_t indexRemoveRef(int64_t ref); -void indexAcquireRef(int64_t ref); -void indexReleaseRef(int64_t ref); +int64_t idxAddRef(void* p); +int32_t idxRemoveRef(int64_t ref); +void idxAcquireRef(int64_t ref); +void idxReleaseRef(int64_t ref); -int32_t indexSerialCacheKey(ICacheKey* key, char* buf); +int32_t idxSerialCacheKey(ICacheKey* key, char* buf); // int32_t indexSerialKey(ICacheKey* key, char* buf); // int32_t indexSerialTermKey(SIndexTerm* itm, char* buf); -#define INDEX_TYPE_CONTAIN_EXTERN_TYPE(ty, exTy) (((ty >> 4) & (exTy)) != 0) +#define IDX_TYPE_CONTAIN_EXTERN_TYPE(ty, exTy) (((ty >> 4) & (exTy)) != 0) -#define INDEX_TYPE_GET_TYPE(ty) (ty & 0x0F) +#define IDX_TYPE_GET_TYPE(ty) (ty & 0x0F) -#define INDEX_TYPE_ADD_EXTERN_TYPE(ty, exTy) \ - do { \ - uint8_t oldTy = ty; \ - ty = (ty >> 4) | exTy; \ - ty = (ty << 4) | oldTy; \ +#define IDX_TYPE_ADD_EXTERN_TYPE(ty, exTy) \ + do { \ + uint8_t oldTy = ty; \ + ty = (ty >> 4) | exTy; \ + ty = (ty << 4) | oldTy; \ } while (0) #ifdef __cplusplus diff --git a/source/libs/index/inc/indexTfile.h b/source/libs/index/inc/indexTfile.h index ca55aa93da..6cfea5bc0b 100644 --- a/source/libs/index/inc/indexTfile.h +++ b/source/libs/index/inc/indexTfile.h @@ -117,10 +117,10 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order); int tfileWriterFinish(TFileWriter* tw); // -IndexTFile* indexTFileCreate(const char* path); -void indexTFileDestroy(IndexTFile* tfile); -int indexTFilePut(void* tfile, SIndexTerm* term, uint64_t uid); -int indexTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* tr); +IndexTFile* idxTFileCreate(const char* path); +void idxTFileDestroy(IndexTFile* tfile); +int idxTFilePut(void* tfile, SIndexTerm* term, uint64_t uid); +int idxTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* tr); Iterate* tfileIteratorCreate(TFileReader* reader); void tfileIteratorDestroy(Iterate* iterator); diff --git a/source/libs/index/src/index.c b/source/libs/index/src/index.c index 9b8bee5623..04d7e04b30 100644 --- a/source/libs/index/src/index.c +++ b/source/libs/index/src/index.c @@ -90,7 +90,7 @@ static void idxMergeCacheAndTFile(SArray* result, IterateValue* icache, IterateV // static int32_t indexSerialTermKey(SIndexTerm* itm, char* buf); // int32_t indexSerialKey(ICacheKey* key, char* buf); -static void indexPost(void* idx) { +static void idxPost(void* idx) { SIndex* pIdx = idx; tsem_post(&pIdx->sem); } @@ -106,8 +106,8 @@ int indexOpen(SIndexOpts* opts, const char* path, SIndex** index) { return -1; } - // sIdx->cache = (void*)indexCacheCreate(sIdx); - sIdx->tindex = indexTFileCreate(path); + // sIdx->cache = (void*)idxCacheCreate(sIdx); + sIdx->tindex = idxTFileCreate(path); if (sIdx->tindex == NULL) { goto END; } @@ -118,8 +118,8 @@ int indexOpen(SIndexOpts* opts, const char* path, SIndex** index) { taosThreadMutexInit(&sIdx->mtx, NULL); tsem_init(&sIdx->sem, 0, 0); - sIdx->refId = indexAddRef(sIdx); - indexAcquireRef(sIdx->refId); + sIdx->refId = idxAddRef(sIdx); + idxAcquireRef(sIdx->refId); *index = sIdx; return 0; @@ -136,7 +136,7 @@ void indexDestroy(void* handle) { SIndex* sIdx = handle; taosThreadMutexDestroy(&sIdx->mtx); tsem_destroy(&sIdx->sem); - indexTFileDestroy(sIdx->tindex); + idxTFileDestroy(sIdx->tindex); taosMemoryFree(sIdx->path); taosMemoryFree(sIdx); return; @@ -147,33 +147,33 @@ void indexClose(SIndex* sIdx) { void* iter = taosHashIterate(sIdx->colObj, NULL); while (iter) { IndexCache** pCache = iter; - indexCacheForceToMerge((void*)(*pCache)); + idxCacheForceToMerge((void*)(*pCache)); indexInfo("%s wait to merge", (*pCache)->colName); indexWait((void*)(sIdx)); indexInfo("%s finish to wait", (*pCache)->colName); iter = taosHashIterate(sIdx->colObj, iter); - indexCacheUnRef(*pCache); + idxCacheUnRef(*pCache); } taosHashCleanup(sIdx->colObj); sIdx->colObj = NULL; } - indexReleaseRef(sIdx->refId); - indexRemoveRef(sIdx->refId); + idxReleaseRef(sIdx->refId); + idxRemoveRef(sIdx->refId); } -int64_t indexAddRef(void* p) { +int64_t idxAddRef(void* p) { // impl return taosAddRef(indexRefMgt, p); } -int32_t indexRemoveRef(int64_t ref) { +int32_t idxRemoveRef(int64_t ref) { // impl later return taosRemoveRef(indexRefMgt, ref); } -void indexAcquireRef(int64_t ref) { +void idxAcquireRef(int64_t ref) { // impl taosAcquireRef(indexRefMgt, ref); } -void indexReleaseRef(int64_t ref) { +void idxReleaseRef(int64_t ref) { // impl taosReleaseRef(indexRefMgt, ref); } @@ -186,11 +186,11 @@ int indexPut(SIndex* index, SIndexMultiTerm* fVals, uint64_t uid) { char buf[128] = {0}; ICacheKey key = {.suid = p->suid, .colName = p->colName, .nColName = strlen(p->colName), .colType = p->colType}; - int32_t sz = indexSerialCacheKey(&key, buf); + int32_t sz = idxSerialCacheKey(&key, buf); IndexCache** cache = taosHashGet(index->colObj, buf, sz); if (cache == NULL) { - IndexCache* pCache = indexCacheCreate(index, p->suid, p->colName, p->colType); + IndexCache* pCache = idxCacheCreate(index, p->suid, p->colName, p->colType); taosHashPut(index->colObj, buf, sz, &pCache, sizeof(void*)); } } @@ -201,12 +201,12 @@ int indexPut(SIndex* index, SIndexMultiTerm* fVals, uint64_t uid) { char buf[128] = {0}; ICacheKey key = {.suid = p->suid, .colName = p->colName, .nColName = strlen(p->colName), .colType = p->colType}; - int32_t sz = indexSerialCacheKey(&key, buf); + int32_t sz = idxSerialCacheKey(&key, buf); indexDebug("w suid: %" PRIu64 ", colName: %s, colType: %d", key.suid, key.colName, key.colType); IndexCache** cache = taosHashGet(index->colObj, buf, sz); assert(*cache != NULL); - int ret = indexCachePut(*cache, p, uid); + int ret = idxCachePut(*cache, p, uid); if (ret != 0) { return ret; } @@ -289,7 +289,7 @@ SIndexTerm* indexTermCreate(int64_t suid, SIndexOperOnColumn oper, uint8_t colTy tm->nColName = nColName; char* buf = NULL; - int32_t len = idxConvertDataToStr((void*)colVal, INDEX_TYPE_GET_TYPE(colType), (void**)&buf); + int32_t len = idxConvertDataToStr((void*)colVal, IDX_TYPE_GET_TYPE(colType), (void**)&buf); assert(len != -1); tm->colVal = buf; @@ -331,7 +331,7 @@ static int idxTermSearch(SIndex* sIdx, SIndexTermQuery* query, SArray** result) ICacheKey key = { .suid = term->suid, .colName = term->colName, .nColName = strlen(term->colName), .colType = term->colType}; indexDebug("r suid: %" PRIu64 ", colName: %s, colType: %d", key.suid, key.colName, key.colType); - int32_t sz = indexSerialCacheKey(&key, buf); + int32_t sz = idxSerialCacheKey(&key, buf); taosThreadMutexLock(&sIdx->mtx); IndexCache** pCache = taosHashGet(sIdx->colObj, buf, sz); @@ -345,14 +345,14 @@ static int idxTermSearch(SIndex* sIdx, SIndexTermQuery* query, SArray** result) int64_t st = taosGetTimestampUs(); SIdxTRslt* tr = idxTRsltCreate(); - if (0 == indexCacheSearch(cache, query, tr, &s)) { + if (0 == idxCacheSearch(cache, query, tr, &s)) { if (s == kTypeDeletion) { indexInfo("col: %s already drop by", term->colName); // coloum already drop by other oper, no need to query tindex return 0; } else { st = taosGetTimestampUs(); - if (0 != indexTFileSearch(sIdx->tindex, query, tr)) { + if (0 != idxTFileSearch(sIdx->tindex, query, tr)) { indexError("corrupt at index(TFile) col:%s val: %s", term->colName, term->colVal); goto END; } @@ -465,23 +465,23 @@ int idxFlushCacheToTFile(SIndex* sIdx, void* cache, bool quit) { IndexCache* pCache = (IndexCache*)cache; - while (quit && atomic_load_32(&pCache->merging) == 1) { - } + while (quit && atomic_load_32(&pCache->merging) == 1) + ; TFileReader* pReader = tfileGetReaderByCol(sIdx->tindex, pCache->suid, pCache->colName); if (pReader == NULL) { indexWarn("empty tfile reader found"); } // handle flush - Iterate* cacheIter = indexCacheIteratorCreate(pCache); + Iterate* cacheIter = idxCacheIteratorCreate(pCache); if (cacheIter == NULL) { indexError("%p immtable is empty, ignore merge opera", pCache); idxCacheDestroyImm(pCache); tfileReaderUnRef(pReader); atomic_store_32(&pCache->merging, 0); if (quit) { - indexPost(sIdx); + idxPost(sIdx); } - indexReleaseRef(sIdx->refId); + idxReleaseRef(sIdx->refId); return 0; } @@ -532,7 +532,7 @@ int idxFlushCacheToTFile(SIndex* sIdx, void* cache, bool quit) { tfileIteratorDestroy(tfileIter); tfileReaderUnRef(pReader); - indexCacheUnRef(pCache); + idxCacheUnRef(pCache); int64_t cost = taosGetTimestampUs() - st; if (ret != 0) { @@ -542,9 +542,9 @@ int idxFlushCacheToTFile(SIndex* sIdx, void* cache, bool quit) { } atomic_store_32(&pCache->merging, 0); if (quit) { - indexPost(sIdx); + idxPost(sIdx); } - indexReleaseRef(sIdx->refId); + idxReleaseRef(sIdx->refId); return ret; } @@ -561,7 +561,7 @@ void iterateValueDestroy(IterateValue* value, bool destroy) { value->colVal = NULL; } -static int64_t indexGetAvaialbleVer(SIndex* sIdx, IndexCache* cache) { +static int64_t idxGetAvailableVer(SIndex* sIdx, IndexCache* cache) { ICacheKey key = {.suid = cache->suid, .colName = cache->colName, .nColName = strlen(cache->colName)}; int64_t ver = CACHE_VERSION(cache); @@ -579,7 +579,7 @@ static int64_t indexGetAvaialbleVer(SIndex* sIdx, IndexCache* cache) { return ver; } static int idxGenTFile(SIndex* sIdx, IndexCache* cache, SArray* batch) { - int64_t version = indexGetAvaialbleVer(sIdx, cache); + int64_t version = idxGetAvailableVer(sIdx, cache); indexInfo("file name version: %" PRId64 "", version); uint8_t colType = cache->type; @@ -620,8 +620,8 @@ END: return -1; } -int32_t indexSerialCacheKey(ICacheKey* key, char* buf) { - bool hasJson = INDEX_TYPE_CONTAIN_EXTERN_TYPE(key->colType, TSDB_DATA_TYPE_JSON); +int32_t idxSerialCacheKey(ICacheKey* key, char* buf) { + bool hasJson = IDX_TYPE_CONTAIN_EXTERN_TYPE(key->colType, TSDB_DATA_TYPE_JSON); char* p = buf; char tbuf[65] = {0}; diff --git a/source/libs/index/src/indexCache.c b/source/libs/index/src/indexCache.c index 20cd9c8b4c..040e8ed830 100644 --- a/source/libs/index/src/indexCache.c +++ b/source/libs/index/src/indexCache.c @@ -68,7 +68,7 @@ static int32_t (*cacheSearch[][QUERY_MAX])(void* cache, SIndexTerm* ct, SIdxTRsl cacheSearchLessThan_JSON, cacheSearchLessEqual_JSON, cacheSearchGreaterThan_JSON, cacheSearchGreaterEqual_JSON, cacheSearchRange_JSON}}; -static void doMergeWork(SSchedMsg* msg); +static void idxDoMergeWork(SSchedMsg* msg); static bool idxCacheIteratorNext(Iterate* itera); static int32_t cacheSearchTerm(void* cache, SIndexTerm* term, SIdxTRslt* tr, STermValueType* s) { @@ -127,7 +127,7 @@ static int32_t cacheSearchCompareFunc(void* cache, SIndexTerm* term, SIdxTRslt* MemTable* mem = cache; IndexCache* pCache = mem->pCache; - _cache_range_compare cmpFn = indexGetCompare(type); + _cache_range_compare cmpFn = idxGetCompare(type); CacheTerm* pCt = taosMemoryCalloc(1, sizeof(CacheTerm)); pCt->colVal = term->colVal; @@ -187,7 +187,7 @@ static int32_t cacheSearchTerm_JSON(void* cache, SIndexTerm* term, SIdxTRslt* tr pCt->version = atomic_load_64(&pCache->version); char* exBuf = NULL; - if (INDEX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { + if (IDX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { exBuf = idxPackJsonData(term); pCt->colVal = exBuf; } @@ -257,7 +257,7 @@ static int32_t cacheSearchCompareFunc_JSON(void* cache, SIndexTerm* term, SIdxTR if (cache == NULL) { return 0; } - _cache_range_compare cmpFn = indexGetCompare(type); + _cache_range_compare cmpFn = idxGetCompare(type); MemTable* mem = cache; IndexCache* pCache = mem->pCache; @@ -266,7 +266,7 @@ static int32_t cacheSearchCompareFunc_JSON(void* cache, SIndexTerm* term, SIdxTR pCt->colVal = term->colVal; pCt->version = atomic_load_64(&pCache->version); - int8_t dType = INDEX_TYPE_GET_TYPE(term->colType); + int8_t dType = IDX_TYPE_GET_TYPE(term->colType); int skip = 0; char* exBuf = NULL; if (type == CONTAINS) { @@ -331,9 +331,9 @@ static int32_t cacheSearchRange(void* cache, SIndexTerm* term, SIdxTRslt* tr, ST // impl later return 0; } -static IterateValue* indexCacheIteratorGetValue(Iterate* iter); +static IterateValue* idxCacheIteratorGetValue(Iterate* iter); -IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, int8_t type) { +IndexCache* idxCacheCreate(SIndex* idx, uint64_t suid, const char* colName, int8_t type) { IndexCache* cache = taosMemoryCalloc(1, sizeof(IndexCache)); if (cache == NULL) { indexError("failed to create index cache"); @@ -342,7 +342,7 @@ IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, in cache->mem = idxInternalCacheCreate(type); cache->mem->pCache = cache; - cache->colName = INDEX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? tstrdup(JSON_COLUMN) : tstrdup(colName); + cache->colName = IDX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? tstrdup(JSON_COLUMN) : tstrdup(colName); cache->type = type; cache->index = idx; cache->version = 0; @@ -352,13 +352,13 @@ IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, in taosThreadMutexInit(&cache->mtx, NULL); taosThreadCondInit(&cache->finished, NULL); - indexCacheRef(cache); + idxCacheRef(cache); if (idx != NULL) { - indexAcquireRef(idx->refId); + idxAcquireRef(idx->refId); } return cache; } -void indexCacheDebug(IndexCache* cache) { +void idxCacheDebug(IndexCache* cache) { MemTable* tbl = NULL; taosThreadMutexLock(&cache->mtx); @@ -405,7 +405,7 @@ void indexCacheDebug(IndexCache* cache) { } } -void indexCacheDestroySkiplist(SSkipList* slt) { +void idxCacheDestroySkiplist(SSkipList* slt) { SSkipListIterator* iter = tSkipListCreateIter(slt); while (iter != NULL && tSkipListIterNext(iter)) { SSkipListNode* node = tSkipListIterGet(iter); @@ -418,11 +418,11 @@ void indexCacheDestroySkiplist(SSkipList* slt) { tSkipListDestroyIter(iter); tSkipListDestroy(slt); } -void indexCacheBroadcast(void* cache) { +void idxCacheBroadcast(void* cache) { IndexCache* pCache = cache; taosThreadCondBroadcast(&pCache->finished); } -void indexCacheWait(void* cache) { +void idxCacheWait(void* cache) { IndexCache* pCache = cache; taosThreadCondWait(&pCache->finished, &pCache->mtx); } @@ -435,14 +435,14 @@ void idxCacheDestroyImm(IndexCache* cache) { tbl = cache->imm; cache->imm = NULL; // or throw int bg thread - indexCacheBroadcast(cache); + idxCacheBroadcast(cache); taosThreadMutexUnlock(&cache->mtx); idxMemUnRef(tbl); idxMemUnRef(tbl); } -void indexCacheDestroy(void* cache) { +void idxCacheDestroy(void* cache) { IndexCache* pCache = cache; if (pCache == NULL) { return; @@ -455,12 +455,12 @@ void indexCacheDestroy(void* cache) { taosThreadMutexDestroy(&pCache->mtx); taosThreadCondDestroy(&pCache->finished); if (pCache->index != NULL) { - indexReleaseRef(((SIndex*)pCache->index)->refId); + idxReleaseRef(((SIndex*)pCache->index)->refId); } taosMemoryFree(pCache); } -Iterate* indexCacheIteratorCreate(IndexCache* cache) { +Iterate* idxCacheIteratorCreate(IndexCache* cache) { if (cache->imm == NULL) { return NULL; } @@ -477,7 +477,7 @@ Iterate* indexCacheIteratorCreate(IndexCache* cache) { iiter->val.colVal = NULL; iiter->iter = tbl != NULL ? tSkipListCreateIter(tbl->mem) : NULL; iiter->next = idxCacheIteratorNext; - iiter->getValue = indexCacheIteratorGetValue; + iiter->getValue = idxCacheIteratorGetValue; taosThreadMutexUnlock(&cache->mtx); @@ -492,30 +492,30 @@ void idxCacheIteratorDestroy(Iterate* iter) { taosMemoryFree(iter); } -int indexCacheSchedToMerge(IndexCache* pCache, bool notify) { +int idxCacheSchedToMerge(IndexCache* pCache, bool notify) { SSchedMsg schedMsg = {0}; - schedMsg.fp = doMergeWork; + schedMsg.fp = idxDoMergeWork; schedMsg.ahandle = pCache; if (notify) { schedMsg.thandle = taosMemoryMalloc(1); } schedMsg.msg = NULL; - indexAcquireRef(pCache->index->refId); + idxAcquireRef(pCache->index->refId); taosScheduleTask(indexQhandle, &schedMsg); return 0; } -static void indexCacheMakeRoomForWrite(IndexCache* cache) { +static void idxCacheMakeRoomForWrite(IndexCache* cache) { while (true) { if (cache->occupiedMem * MEM_ESTIMATE_RADIO < MEM_THRESHOLD) { break; } else if (cache->imm != NULL) { // TODO: wake up by condition variable - indexCacheWait(cache); + idxCacheWait(cache); } else { bool quit = cache->occupiedMem >= MEM_SIGNAL_QUIT ? true : false; - indexCacheRef(cache); + idxCacheRef(cache); cache->imm = cache->mem; cache->mem = idxInternalCacheCreate(cache->type); cache->mem->pCache = cache; @@ -525,18 +525,18 @@ static void indexCacheMakeRoomForWrite(IndexCache* cache) { } // sched to merge // unref cache in bgwork - indexCacheSchedToMerge(cache, quit); + idxCacheSchedToMerge(cache, quit); } } } -int indexCachePut(void* cache, SIndexTerm* term, uint64_t uid) { +int idxCachePut(void* cache, SIndexTerm* term, uint64_t uid) { if (cache == NULL) { return -1; } - bool hasJson = INDEX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON); + bool hasJson = IDX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON); IndexCache* pCache = cache; - indexCacheRef(pCache); + idxCacheRef(pCache); // encode data CacheTerm* ct = taosMemoryCalloc(1, sizeof(CacheTerm)); if (cache == NULL) { @@ -559,7 +559,7 @@ int indexCachePut(void* cache, SIndexTerm* term, uint64_t uid) { taosThreadMutexLock(&pCache->mtx); pCache->occupiedMem += estimate; - indexCacheMakeRoomForWrite(pCache); + idxCacheMakeRoomForWrite(pCache); MemTable* tbl = pCache->mem; idxMemRef(tbl); tSkipListPut(tbl->mem, (char*)ct); @@ -567,29 +567,29 @@ int indexCachePut(void* cache, SIndexTerm* term, uint64_t uid) { taosThreadMutexUnlock(&pCache->mtx); - indexCacheUnRef(pCache); + idxCacheUnRef(pCache); return 0; // encode end } -void indexCacheForceToMerge(void* cache) { +void idxCacheForceToMerge(void* cache) { IndexCache* pCache = cache; - indexCacheRef(pCache); + idxCacheRef(pCache); taosThreadMutexLock(&pCache->mtx); indexInfo("%p is forced to merge into tfile", pCache); pCache->occupiedMem += MEM_SIGNAL_QUIT; - indexCacheMakeRoomForWrite(pCache); + idxCacheMakeRoomForWrite(pCache); taosThreadMutexUnlock(&pCache->mtx); - indexCacheUnRef(pCache); + idxCacheUnRef(pCache); return; } -int indexCacheDel(void* cache, const char* fieldValue, int32_t fvlen, uint64_t uid, int8_t operType) { +int idxCacheDel(void* cache, const char* fieldValue, int32_t fvlen, uint64_t uid, int8_t operType) { IndexCache* pCache = cache; return 0; } -static int32_t indexQueryMem(MemTable* mem, SIndexTermQuery* query, SIdxTRslt* tr, STermValueType* s) { +static int32_t idxQueryMem(MemTable* mem, SIndexTermQuery* query, SIdxTRslt* tr, STermValueType* s) { if (mem == NULL) { return 0; } @@ -597,13 +597,13 @@ static int32_t indexQueryMem(MemTable* mem, SIndexTermQuery* query, SIdxTRslt* t SIndexTerm* term = query->term; EIndexQueryType qtype = query->qType; - if (INDEX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { + if (IDX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { return cacheSearch[1][qtype](mem, term, tr, s); } else { return cacheSearch[0][qtype](mem, term, tr, s); } } -int indexCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* result, STermValueType* s) { +int idxCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* result, STermValueType* s) { int64_t st = taosGetTimestampUs(); if (cache == NULL) { return 0; @@ -618,10 +618,10 @@ int indexCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* result, STe idxMemRef(imm); taosThreadMutexUnlock(&pCache->mtx); - int ret = (mem && mem->mem) ? indexQueryMem(mem, query, result, s) : 0; + int ret = (mem && mem->mem) ? idxQueryMem(mem, query, result, s) : 0; if (ret == 0 && *s != kTypeDeletion) { // continue search in imm - ret = (imm && imm->mem) ? indexQueryMem(imm, query, result, s) : 0; + ret = (imm && imm->mem) ? idxQueryMem(imm, query, result, s) : 0; } idxMemUnRef(mem); @@ -631,20 +631,20 @@ int indexCacheSearch(void* cache, SIndexTermQuery* query, SIdxTRslt* result, STe return ret; } -void indexCacheRef(IndexCache* cache) { +void idxCacheRef(IndexCache* cache) { if (cache == NULL) { return; } int ref = T_REF_INC(cache); UNUSED(ref); } -void indexCacheUnRef(IndexCache* cache) { +void idxCacheUnRef(IndexCache* cache) { if (cache == NULL) { return; } int ref = T_REF_DEC(cache); if (ref == 0) { - indexCacheDestroy(cache); + idxCacheDestroy(cache); } } @@ -662,7 +662,7 @@ void idxMemUnRef(MemTable* tbl) { int ref = T_REF_DEC(tbl); if (ref == 0) { SSkipList* slt = tbl->mem; - indexCacheDestroySkiplist(slt); + idxCacheDestroySkiplist(slt); taosMemoryFree(tbl); } } @@ -693,15 +693,15 @@ static int32_t idxCacheTermCompare(const void* l, const void* r) { return cmp; } -static int indexFindCh(char* a, char c) { +static int idxFindCh(char* a, char c) { char* p = a; while (*p != 0 && *p++ != c) { } return p - a; } static int idxCacheJsonTermCompareImpl(char* a, char* b) { - // int alen = indexFindCh(a, '&'); - // int blen = indexFindCh(b, '&'); + // int alen = idxFindCh(a, '&'); + // int blen = idxFindCh(b, '&'); // int cmp = strncmp(a, b, MIN(alen, blen)); // if (cmp == 0) { @@ -730,9 +730,9 @@ static int32_t idxCacheJsonTermCompare(const void* l, const void* r) { return cmp; } static MemTable* idxInternalCacheCreate(int8_t type) { - int ttype = INDEX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? TSDB_DATA_TYPE_BINARY : TSDB_DATA_TYPE_BINARY; + int ttype = IDX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? TSDB_DATA_TYPE_BINARY : TSDB_DATA_TYPE_BINARY; int32_t (*cmpFn)(const void* l, const void* r) = - INDEX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? idxCacheJsonTermCompare : idxCacheTermCompare; + IDX_TYPE_CONTAIN_EXTERN_TYPE(type, TSDB_DATA_TYPE_JSON) ? idxCacheJsonTermCompare : idxCacheTermCompare; MemTable* tbl = taosMemoryCalloc(1, sizeof(MemTable)); idxMemRef(tbl); @@ -742,7 +742,7 @@ static MemTable* idxInternalCacheCreate(int8_t type) { return tbl; } -static void doMergeWork(SSchedMsg* msg) { +static void idxDoMergeWork(SSchedMsg* msg) { IndexCache* pCache = msg->ahandle; SIndex* sidx = (SIndex*)pCache->index; @@ -771,7 +771,7 @@ static bool idxCacheIteratorNext(Iterate* itera) { return next; } -static IterateValue* indexCacheIteratorGetValue(Iterate* iter) { +static IterateValue* idxCacheIteratorGetValue(Iterate* iter) { // opt later return &iter->val; } diff --git a/source/libs/index/src/indexComm.c b/source/libs/index/src/indexComm.c index be9243b8dd..99b49f97bd 100644 --- a/source/libs/index/src/indexComm.c +++ b/source/libs/index/src/indexComm.c @@ -75,35 +75,35 @@ char* idxInt2str(int64_t val, char* dst, int radix) { ; return dst - 1; } -__compar_fn_t indexGetCompar(int8_t type) { +__compar_fn_t idxGetCompar(int8_t type) { if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) { return (__compar_fn_t)strcmp; } return getComparFunc(type, 0); } static TExeCond tCompareLessThan(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_LESS_THAN, a, b, type); } static TExeCond tCompareLessEqual(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_LESS_EQUAL, a, b, type); } static TExeCond tCompareGreaterThan(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_GREATER_THAN, a, b, type); } static TExeCond tCompareGreaterEqual(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_GREATER_EQUAL, a, b, type); } static TExeCond tCompareContains(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_TERM, a, b, type); } static TExeCond tCompareEqual(void* a, void* b, int8_t type) { - __compar_fn_t func = indexGetCompar(type); + __compar_fn_t func = idxGetCompar(type); return tCompare(func, QUERY_TERM, a, b, type); } TExeCond tCompare(__compar_fn_t func, int8_t cmptype, void* a, void* b, int8_t dtype) { @@ -205,14 +205,14 @@ TExeCond tDoCompare(__compar_fn_t func, int8_t comparType, void* a, void* b) { static TExeCond (*rangeCompare[])(void* a, void* b, int8_t type) = { tCompareLessThan, tCompareLessEqual, tCompareGreaterThan, tCompareGreaterEqual, tCompareContains, tCompareEqual}; -_cache_range_compare indexGetCompare(RangeType ty) { return rangeCompare[ty]; } +_cache_range_compare idxGetCompare(RangeType ty) { return rangeCompare[ty]; } char* idxPackJsonData(SIndexTerm* itm) { /* * |<-----colname---->|<-----dataType---->|<--------colVal---------->| * |<-----string----->|<-----uint8_t----->|<----depend on dataType-->| */ - uint8_t ty = INDEX_TYPE_GET_TYPE(itm->colType); + uint8_t ty = IDX_TYPE_GET_TYPE(itm->colType); int32_t sz = itm->nColName + itm->nColVal + sizeof(uint8_t) + sizeof(JSON_VALUE_DELIM) * 2 + 1; char* buf = (char*)taosMemoryCalloc(1, sz); @@ -240,7 +240,7 @@ char* idxPackJsonDataPrefix(SIndexTerm* itm, int32_t* skip) { * |<-----colname---->|<-----dataType---->|<--------colVal---------->| * |<-----string----->|<-----uint8_t----->|<----depend on dataType-->| */ - uint8_t ty = INDEX_TYPE_GET_TYPE(itm->colType); + uint8_t ty = IDX_TYPE_GET_TYPE(itm->colType); int32_t sz = itm->nColName + itm->nColVal + sizeof(uint8_t) + sizeof(JSON_VALUE_DELIM) * 2 + 1; char* buf = (char*)taosMemoryCalloc(1, sz); @@ -267,7 +267,7 @@ char* idxPackJsonDataPrefixNoType(SIndexTerm* itm, int32_t* skip) { * |<-----colname---->|<-----dataType---->|<--------colVal---------->| * |<-----string----->|<-----uint8_t----->|<----depend on dataType-->| */ - uint8_t ty = INDEX_TYPE_GET_TYPE(itm->colType); + uint8_t ty = IDX_TYPE_GET_TYPE(itm->colType); int32_t sz = itm->nColName + itm->nColVal + sizeof(uint8_t) + sizeof(JSON_VALUE_DELIM) * 2 + 1; char* buf = (char*)taosMemoryCalloc(1, sz); diff --git a/source/libs/index/src/indexFilter.c b/source/libs/index/src/indexFilter.c index e4af4a7a3f..bd78ec574a 100644 --- a/source/libs/index/src/indexFilter.c +++ b/source/libs/index/src/indexFilter.c @@ -318,7 +318,7 @@ int sifLessThan(void *a, void *b, int16_t dtype) { } int sifEqual(void *a, void *b, int16_t dtype) { __compar_fn_t func = getComparFunc(dtype, 0); - //__compar_fn_t func = indexGetCompar(dtype); + //__compar_fn_t func = idxGetCompar(dtype); return (int)tDoCompare(func, QUERY_TERM, a, b); } static Filter sifGetFilterFunc(EIndexQueryType type, bool *reverse) { diff --git a/source/libs/index/src/indexJson.c b/source/libs/index/src/indexJson.c index 88b3d907bb..8ce625dfb9 100644 --- a/source/libs/index/src/indexJson.c +++ b/source/libs/index/src/indexJson.c @@ -30,7 +30,7 @@ int indexJsonPut(SIndexJson *index, SIndexJsonMultiTerm *terms, uint64_t uid) { } else { p->colType = TSDB_DATA_TYPE_DOUBLE; } - INDEX_TYPE_ADD_EXTERN_TYPE(p->colType, TSDB_DATA_TYPE_JSON); + IDX_TYPE_ADD_EXTERN_TYPE(p->colType, TSDB_DATA_TYPE_JSON); } // handle put return indexPut(index, terms, uid); @@ -48,7 +48,7 @@ int indexJsonSearch(SIndexJson *index, SIndexJsonMultiTermQuery *tq, SArray *res } else { p->colType = TSDB_DATA_TYPE_DOUBLE; } - INDEX_TYPE_ADD_EXTERN_TYPE(p->colType, TSDB_DATA_TYPE_JSON); + IDX_TYPE_ADD_EXTERN_TYPE(p->colType, TSDB_DATA_TYPE_JSON); } // handle search return indexSearch(index, tq, result); diff --git a/source/libs/index/src/indexTfile.c b/source/libs/index/src/indexTfile.c index b64db1dde4..d632540ee1 100644 --- a/source/libs/index/src/indexTfile.c +++ b/source/libs/index/src/indexTfile.c @@ -118,7 +118,7 @@ TFileCache* tfileCacheCreate(const char* path) { ICacheKey key = {.suid = header->suid, .colName = header->colName, .nColName = (int32_t)strlen(header->colName)}; char buf[128] = {0}; - int32_t sz = indexSerialCacheKey(&key, buf); + int32_t sz = idxSerialCacheKey(&key, buf); assert(sz < sizeof(buf)); taosHashPut(tcache->tableCache, buf, sz, &reader, sizeof(void*)); tfileReaderRef(reader); @@ -149,7 +149,7 @@ void tfileCacheDestroy(TFileCache* tcache) { TFileReader* tfileCacheGet(TFileCache* tcache, ICacheKey* key) { char buf[128] = {0}; - int32_t sz = indexSerialCacheKey(key, buf); + int32_t sz = idxSerialCacheKey(key, buf); assert(sz < sizeof(buf)); TFileReader** reader = taosHashGet(tcache->tableCache, buf, sz); if (reader == NULL || *reader == NULL) { @@ -161,7 +161,7 @@ TFileReader* tfileCacheGet(TFileCache* tcache, ICacheKey* key) { } void tfileCachePut(TFileCache* tcache, ICacheKey* key, TFileReader* reader) { char buf[128] = {0}; - int32_t sz = indexSerialCacheKey(key, buf); + int32_t sz = idxSerialCacheKey(key, buf); // remove last version index reader TFileReader** p = taosHashGet(tcache->tableCache, buf, sz); if (p != NULL && *p != NULL) { @@ -281,7 +281,7 @@ static int32_t tfSearchSuffix(void* reader, SIndexTerm* tem, SIdxTRslt* tr) { return 0; } static int32_t tfSearchRegex(void* reader, SIndexTerm* tem, SIdxTRslt* tr) { - bool hasJson = INDEX_TYPE_CONTAIN_EXTERN_TYPE(tem->colType, TSDB_DATA_TYPE_JSON); + bool hasJson = IDX_TYPE_CONTAIN_EXTERN_TYPE(tem->colType, TSDB_DATA_TYPE_JSON); int ret = 0; char* p = tem->colVal; @@ -305,7 +305,7 @@ static int32_t tfSearchCompareFunc(void* reader, SIndexTerm* tem, SIdxTRslt* tr, int ret = 0; char* p = tem->colVal; int skip = 0; - _cache_range_compare cmpFn = indexGetCompare(type); + _cache_range_compare cmpFn = idxGetCompare(type); SArray* offsets = taosArrayInit(16, sizeof(uint64_t)); @@ -431,7 +431,7 @@ static int32_t tfSearchCompareFunc_JSON(void* reader, SIndexTerm* tem, SIdxTRslt p = idxPackJsonDataPrefix(tem, &skip); } - _cache_range_compare cmpFn = indexGetCompare(ctype); + _cache_range_compare cmpFn = idxGetCompare(ctype); SArray* offsets = taosArrayInit(16, sizeof(uint64_t)); @@ -457,7 +457,7 @@ static int32_t tfSearchCompareFunc_JSON(void* reader, SIndexTerm* tem, SIdxTRslt } else if (0 != strncmp(ch, p, skip)) { continue; } - cond = cmpFn(ch + skip, tem->colVal, INDEX_TYPE_GET_TYPE(tem->colType)); + cond = cmpFn(ch + skip, tem->colVal, IDX_TYPE_GET_TYPE(tem->colType)); } if (MATCH == cond) { tfileReaderLoadTableIds((TFileReader*)reader, rt->out.out, tr->total); @@ -476,7 +476,7 @@ int tfileReaderSearch(TFileReader* reader, SIndexTermQuery* query, SIdxTRslt* tr SIndexTerm* term = query->term; EIndexQueryType qtype = query->qType; int ret = 0; - if (INDEX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { + if (IDX_TYPE_CONTAIN_EXTERN_TYPE(term->colType, TSDB_DATA_TYPE_JSON)) { ret = tfSearch[1][qtype](reader, term, tr); } else { ret = tfSearch[0][qtype](reader, term, tr); @@ -536,7 +536,7 @@ int tfileWriterPut(TFileWriter* tw, void* data, bool order) { __compar_fn_t fn; int8_t colType = tw->header.colType; - colType = INDEX_TYPE_GET_TYPE(colType); + colType = IDX_TYPE_GET_TYPE(colType); if (colType == TSDB_DATA_TYPE_BINARY || colType == TSDB_DATA_TYPE_NCHAR) { fn = tfileStrCompare; } else { @@ -620,7 +620,7 @@ void tfileWriterDestroy(TFileWriter* tw) { taosMemoryFree(tw); } -IndexTFile* indexTFileCreate(const char* path) { +IndexTFile* idxTFileCreate(const char* path) { TFileCache* cache = tfileCacheCreate(path); if (cache == NULL) { return NULL; @@ -635,7 +635,7 @@ IndexTFile* indexTFileCreate(const char* path) { tfile->cache = cache; return tfile; } -void indexTFileDestroy(IndexTFile* tfile) { +void idxTFileDestroy(IndexTFile* tfile) { if (tfile == NULL) { return; } @@ -644,7 +644,7 @@ void indexTFileDestroy(IndexTFile* tfile) { taosMemoryFree(tfile); } -int indexTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* result) { +int idxTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* result) { int ret = -1; if (tfile == NULL) { return ret; @@ -667,7 +667,7 @@ int indexTFileSearch(void* tfile, SIndexTermQuery* query, SIdxTRslt* result) { return tfileReaderSearch(reader, query, result); } -int indexTFilePut(void* tfile, SIndexTerm* term, uint64_t uid) { +int idxTFilePut(void* tfile, SIndexTerm* term, uint64_t uid) { // TFileWriterOpt wOpt = {.suid = term->suid, .colType = term->colType, .colName = term->colName, .nColName = // term->nColName, .version = 1}; @@ -845,7 +845,7 @@ static int tfileWriteData(TFileWriter* write, TFileValue* tval) { TFileHeader* header = &write->header; uint8_t colType = header->colType; - colType = INDEX_TYPE_GET_TYPE(colType); + colType = IDX_TYPE_GET_TYPE(colType); FstSlice key = fstSliceCreate((uint8_t*)(tval->colVal), (size_t)strlen(tval->colVal)); if (fstBuilderInsert(write->fb, key, tval->offset)) { fstSliceDestroy(&key); diff --git a/source/libs/index/test/indexTests.cc b/source/libs/index/test/indexTests.cc index 90dea3a377..e18297cd25 100644 --- a/source/libs/index/test/indexTests.cc +++ b/source/libs/index/test/indexTests.cc @@ -521,10 +521,10 @@ class CacheObj { public: CacheObj() { // TODO - cache = indexCacheCreate(NULL, 0, "voltage", TSDB_DATA_TYPE_BINARY); + cache = idxCacheCreate(NULL, 0, "voltage", TSDB_DATA_TYPE_BINARY); } int Put(SIndexTerm* term, int16_t colId, int32_t version, uint64_t uid) { - int ret = indexCachePut(cache, term, uid); + int ret = idxCachePut(cache, term, uid); if (ret != 0) { // std::cout << "failed to put into cache: " << ret << std::endl; @@ -533,12 +533,12 @@ class CacheObj { } void Debug() { // - indexCacheDebug(cache); + idxCacheDebug(cache); } int Get(SIndexTermQuery* query, int16_t colId, int32_t version, SArray* result, STermValueType* s) { SIdxTRslt* tr = idxTRsltCreate(); - int ret = indexCacheSearch(cache, query, tr, s); + int ret = idxCacheSearch(cache, query, tr, s); idxTRsltMergeTo(tr, result); idxTRsltDestroy(tr); @@ -549,7 +549,7 @@ class CacheObj { } ~CacheObj() { // TODO - indexCacheDestroy(cache); + idxCacheDestroy(cache); } private: diff --git a/source/libs/sync/inc/syncAppendEntries.h b/source/libs/sync/inc/syncAppendEntries.h index 5999ef8300..98df22d51b 100644 --- a/source/libs/sync/inc/syncAppendEntries.h +++ b/source/libs/sync/inc/syncAppendEntries.h @@ -93,6 +93,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg); +int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncAppendEntriesReply.h b/source/libs/sync/inc/syncAppendEntriesReply.h index c0c1f76707..e509a50dc4 100644 --- a/source/libs/sync/inc/syncAppendEntriesReply.h +++ b/source/libs/sync/inc/syncAppendEntriesReply.h @@ -41,6 +41,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); +int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncElection.h b/source/libs/sync/inc/syncElection.h index 85a82dcfb7..128dbf4050 100644 --- a/source/libs/sync/inc/syncElection.h +++ b/source/libs/sync/inc/syncElection.h @@ -39,6 +39,8 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode); +int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode); + int32_t syncNodeElect(SSyncNode* pSyncNode); int32_t syncNodeRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg); diff --git a/source/libs/sync/inc/syncIO.h b/source/libs/sync/inc/syncIO.h index b69c087b5f..88d3065375 100644 --- a/source/libs/sync/inc/syncIO.h +++ b/source/libs/sync/inc/syncIO.h @@ -57,6 +57,9 @@ typedef struct SSyncIO { int32_t (*FpOnSyncAppendEntriesReply)(SSyncNode *pSyncNode, SyncAppendEntriesReply *pMsg); int32_t (*FpOnSyncTimeout)(SSyncNode *pSyncNode, SyncTimeout *pMsg); + int32_t (*FpOnSyncSnapshotSend)(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg); + int32_t (*FpOnSyncSnapshotRsp)(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg); + int8_t isStart; } SSyncIO; diff --git a/source/libs/sync/inc/syncIndexMgr.h b/source/libs/sync/inc/syncIndexMgr.h index 0a6e2428fe..1f60a9d57e 100644 --- a/source/libs/sync/inc/syncIndexMgr.h +++ b/source/libs/sync/inc/syncIndexMgr.h @@ -30,6 +30,7 @@ extern "C" { typedef struct SSyncIndexMgr { SRaftId (*replicas)[TSDB_MAX_REPLICA]; SyncIndex index[TSDB_MAX_REPLICA]; + SyncTerm privateTerm[TSDB_MAX_REPLICA]; // for advanced function int32_t replicaNum; SSyncNode *pSyncNode; } SSyncIndexMgr; @@ -43,6 +44,9 @@ SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId cJSON * syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr); char * syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr); +// void syncIndexMgrSetTerm(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, SyncTerm term); +// SyncTerm syncIndexMgrGetTerm(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId); + // for debug ------------------- void syncIndexMgrPrint(SSyncIndexMgr *pObj); void syncIndexMgrPrint2(char *s, SSyncIndexMgr *pObj); diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 4100aa0216..10218f69e6 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -58,6 +58,8 @@ typedef struct SSyncRespMgr SSyncRespMgr; typedef struct SSyncSnapshotSender SSyncSnapshotSender; typedef struct SSyncSnapshotReceiver SSyncSnapshotReceiver; +extern bool gRaftDetailLog; + typedef struct SSyncNode { // init by SSyncInfo SyncGroupId vgId; @@ -137,24 +139,27 @@ typedef struct SSyncNode { uint64_t heartbeatTimerCounter; // callback - int32_t (*FpOnPing)(SSyncNode* ths, SyncPing* pMsg); - int32_t (*FpOnPingReply)(SSyncNode* ths, SyncPingReply* pMsg); - int32_t (*FpOnClientRequest)(SSyncNode* ths, SyncClientRequest* pMsg); - int32_t (*FpOnRequestVote)(SSyncNode* ths, SyncRequestVote* pMsg); - int32_t (*FpOnRequestVoteReply)(SSyncNode* ths, SyncRequestVoteReply* pMsg); - int32_t (*FpOnAppendEntries)(SSyncNode* ths, SyncAppendEntries* pMsg); - int32_t (*FpOnAppendEntriesReply)(SSyncNode* ths, SyncAppendEntriesReply* pMsg); - int32_t (*FpOnTimeout)(SSyncNode* pSyncNode, SyncTimeout* pMsg); + FpOnPingCb FpOnPing; + FpOnPingReplyCb FpOnPingReply; + FpOnClientRequestCb FpOnClientRequest; + FpOnTimeoutCb FpOnTimeout; + FpOnRequestVoteCb FpOnRequestVote; + FpOnRequestVoteReplyCb FpOnRequestVoteReply; + FpOnAppendEntriesCb FpOnAppendEntries; + FpOnAppendEntriesReplyCb FpOnAppendEntriesReply; + FpOnSnapshotSendCb FpOnSnapshotSend; + FpOnSnapshotRspCb FpOnSnapshotRsp; // tools SSyncRespMgr* pSyncRespMgr; // restore state - // sem_t restoreSem; - bool restoreFinish; - SSnapshot* pSnapshot; - SSyncSnapshotSender* pSender; - SSyncSnapshotReceiver* pReceiver; + bool restoreFinish; + // SSnapshot* pSnapshot; + SSyncSnapshotSender* senders[TSDB_MAX_REPLICA]; + SSyncSnapshotReceiver* pNewNodeReceiver; + + SSnapshotMeta sMeta; } SSyncNode; @@ -164,6 +169,9 @@ void syncNodeStart(SSyncNode* pSyncNode); void syncNodeStartStandBy(SSyncNode* pSyncNode); void syncNodeClose(SSyncNode* pSyncNode); +// option +bool syncNodeSnapshotEnable(SSyncNode* pSyncNode); + // ping -------------- int32_t syncNodePing(SSyncNode* pSyncNode, const SRaftId* destRaftId, SyncPing* pMsg); int32_t syncNodePingSelf(SSyncNode* pSyncNode); @@ -205,6 +213,25 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode); void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId); void syncNodeVoteForSelf(SSyncNode* pSyncNode); +// snapshot -------------- +bool syncNodeHasSnapshot(SSyncNode* pSyncNode); +bool syncNodeIsIndexInSnapshot(SSyncNode* pSyncNode, SyncIndex index); + +SyncIndex syncNodeGetLastIndex(SSyncNode* pSyncNode); +SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode); +int32_t syncNodeGetLastIndexTerm(SSyncNode* pSyncNode, SyncIndex* pLastIndex, SyncTerm* pLastTerm); + +SyncIndex syncNodeSyncStartIndex(SSyncNode* pSyncNode); + +SyncIndex syncNodeGetPreIndex(SSyncNode* pSyncNode, SyncIndex index); +SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index); +int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex* pPreIndex, SyncTerm* pPreTerm); + +int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag); + +bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId); +SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId); + // for debug -------------- void syncNodePrint(SSyncNode* pObj); void syncNodePrint2(char* s, SSyncNode* pObj); diff --git a/source/libs/sync/inc/syncRaftCfg.h b/source/libs/sync/inc/syncRaftCfg.h index 1061e8bdc4..86c5fab87c 100644 --- a/source/libs/sync/inc/syncRaftCfg.h +++ b/source/libs/sync/inc/syncRaftCfg.h @@ -34,6 +34,7 @@ typedef struct SRaftCfg { TdFilePtr pFile; char path[TSDB_FILENAME_LEN * 2]; int8_t isStandBy; + int8_t snapshotEnable; } SRaftCfg; SRaftCfg *raftCfgOpen(const char *path); @@ -50,7 +51,12 @@ char * raftCfg2Str(SRaftCfg *pRaftCfg); int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg); int32_t raftCfgFromStr(const char *s, SRaftCfg *pRaftCfg); -int32_t raftCfgCreateFile(SSyncCfg *pCfg, int8_t isStandBy, const char *path); +typedef struct SRaftCfgMeta { + int8_t isStandBy; + int8_t snapshotEnable; +} SRaftCfgMeta; + +int32_t raftCfgCreateFile(SSyncCfg *pCfg, SRaftCfgMeta meta, const char *path); // for debug ---------------------- void syncCfgPrint(SSyncCfg *pCfg); diff --git a/source/libs/sync/inc/syncRaftLog.h b/source/libs/sync/inc/syncRaftLog.h index df5cd3f36c..aec1f77b42 100644 --- a/source/libs/sync/inc/syncRaftLog.h +++ b/source/libs/sync/inc/syncRaftLog.h @@ -30,6 +30,7 @@ extern "C" { typedef struct SSyncLogStoreData { SSyncNode* pSyncNode; SWal* pWal; + SyncIndex beginIndex; // valid begin index, default 0, may be set beginIndex > 0 } SSyncLogStoreData; SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode); @@ -39,14 +40,7 @@ char* logStore2Str(SSyncLogStore* pLogStore); cJSON* logStoreSimple2Json(SSyncLogStore* pLogStore); char* logStoreSimple2Str(SSyncLogStore* pLogStore); -// SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore); -// SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore); -// SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore); -// SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index); -// int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); -// int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex); -// int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index); -// SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore); +SyncIndex logStoreFirstIndex(SSyncLogStore* pLogStore); // for debug void logStorePrint(SSyncLogStore* pLogStore); diff --git a/source/libs/sync/inc/syncRaftStore.h b/source/libs/sync/inc/syncRaftStore.h index e0cbcf0744..9f03ac3e55 100644 --- a/source/libs/sync/inc/syncRaftStore.h +++ b/source/libs/sync/inc/syncRaftStore.h @@ -49,8 +49,8 @@ void raftStoreClearVote(SRaftStore *pRaftStore); void raftStoreNextTerm(SRaftStore *pRaftStore); void raftStoreSetTerm(SRaftStore *pRaftStore, SyncTerm term); int32_t raftStoreFromJson(SRaftStore *pRaftStore, cJSON *pJson); -cJSON * raftStore2Json(SRaftStore *pRaftStore); -char * raftStore2Str(SRaftStore *pRaftStore); +cJSON *raftStore2Json(SRaftStore *pRaftStore); +char *raftStore2Str(SRaftStore *pRaftStore); // for debug ------------------- void raftStorePrint(SRaftStore *pObj); diff --git a/source/libs/sync/inc/syncReplication.h b/source/libs/sync/inc/syncReplication.h index 6fe18dae38..4b1f5b4638 100644 --- a/source/libs/sync/inc/syncReplication.h +++ b/source/libs/sync/inc/syncReplication.h @@ -52,6 +52,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode); +int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode); int32_t syncNodeReplicate(SSyncNode* pSyncNode); int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg); diff --git a/source/libs/sync/inc/syncRequestVote.h b/source/libs/sync/inc/syncRequestVote.h index fd4ccd5371..3fe8dc0237 100644 --- a/source/libs/sync/inc/syncRequestVote.h +++ b/source/libs/sync/inc/syncRequestVote.h @@ -50,6 +50,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg); +int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncRequestVoteReply.h b/source/libs/sync/inc/syncRequestVoteReply.h index bcaf71a541..ac47a8d026 100644 --- a/source/libs/sync/inc/syncRequestVoteReply.h +++ b/source/libs/sync/inc/syncRequestVoteReply.h @@ -45,6 +45,7 @@ extern "C" { // /\ UNCHANGED <> // int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); +int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index 43d1c0c0c3..b16e47b51e 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -25,40 +25,64 @@ extern "C" { #include #include "cJSON.h" #include "syncInt.h" +#include "syncMessage.h" #include "taosdef.h" +#define SYNC_SNAPSHOT_SEQ_INVALID -1 +#define SYNC_SNAPSHOT_SEQ_FORCE_CLOSE -2 +#define SYNC_SNAPSHOT_SEQ_BEGIN 0 +#define SYNC_SNAPSHOT_SEQ_END 0x7FFFFFFF + +#define SYNC_SNAPSHOT_RETRY_MS 5000 + typedef struct SSyncSnapshotSender { - int32_t sending; - int32_t received; - bool finish; - void * pCurrentBlock; + bool start; + int32_t seq; + int32_t ack; + void *pReader; + void *pCurrentBlock; int32_t blockLen; + SSnapshot snapshot; int64_t sendingMS; - SSnapshot *pSnapshot; SSyncNode *pSyncNode; + int32_t replicaIndex; + SyncTerm term; + SyncTerm privateTerm; + bool finish; } SSyncSnapshotSender; -SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode); +SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaIndex); void snapshotSenderDestroy(SSyncSnapshotSender *pSender); +bool snapshotSenderIsStart(SSyncSnapshotSender *pSender); +void snapshotSenderStart(SSyncSnapshotSender *pSender); +void snapshotSenderStop(SSyncSnapshotSender *pSender); int32_t snapshotSend(SSyncSnapshotSender *pSender); -cJSON * snapshotSender2Json(SSyncSnapshotSender *pSender); -char * snapshotSender2Str(SSyncSnapshotSender *pSender); +int32_t snapshotReSend(SSyncSnapshotSender *pSender); +cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender); +char *snapshotSender2Str(SSyncSnapshotSender *pSender); typedef struct SSyncSnapshotReceiver { - bool start; - int32_t received; - int32_t progressIndex; - void * pCurrentBlock; - int32_t len; - SSnapshot *pSnapshot; + bool start; + + int32_t ack; + void *pWriter; + SyncTerm term; + SyncTerm privateTerm; + SSyncNode *pSyncNode; + int32_t replicaIndex; } SSyncSnapshotReceiver; -SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode); +SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, int32_t replicaIndex); void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver); -int32_t snapshotReceive(SSyncSnapshotReceiver *pReceiver); -cJSON * snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver); -char * snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver); +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm); +bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver); +void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply); +cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver); +char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver); + +int32_t syncNodeOnSnapshotSendCb(SSyncNode *ths, SyncSnapshotSend *pMsg); +int32_t syncNodeOnSnapshotRspCb(SSyncNode *ths, SyncSnapshotRsp *pMsg); #ifdef __cplusplus } diff --git a/source/libs/sync/inc/syncUtil.h b/source/libs/sync/inc/syncUtil.h index 1b08d3f7a1..7ecff7ae97 100644 --- a/source/libs/sync/inc/syncUtil.h +++ b/source/libs/sync/inc/syncUtil.h @@ -61,6 +61,7 @@ bool syncUtilIsData(tmsg_t msgType); bool syncUtilUserPreCommit(tmsg_t msgType); bool syncUtilUserCommit(tmsg_t msgType); bool syncUtilUserRollback(tmsg_t msgType); +void syncUtilJson2Line(char* jsonStr); #ifdef __cplusplus } diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 89dcd8a476..3c558b60c8 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -18,8 +18,10 @@ #include "syncRaftCfg.h" #include "syncRaftLog.h" #include "syncRaftStore.h" +#include "syncSnapshot.h" #include "syncUtil.h" #include "syncVoteMgr.h" +#include "wal.h" // TLA+ Spec // HandleAppendEntriesRequest(i, j, m) == @@ -335,8 +337,12 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { cbMeta.currentTerm = ths->pRaftStore->currentTerm; cbMeta.flag = 0x11; + SSnapshot snapshot; + ASSERT(ths->pFsm->FpGetSnapshot != NULL); + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + bool needExecute = true; - if (ths->pSnapshot != NULL && cbMeta.index <= ths->pSnapshot->lastApplyIndex) { + if (cbMeta.index <= snapshot.lastApplyIndex) { needExecute = false; } @@ -427,3 +433,332 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { return ret; } + +static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) { + int32_t code; + + SyncIndex delBegin = pMsg->prevLogIndex + 1; + SyncIndex delEnd = ths->pLogStore->syncLogLastIndex(ths->pLogStore); + + // invert roll back! + for (SyncIndex index = delEnd; index >= delBegin; --index) { + if (ths->pFsm->FpRollBackCb != NULL) { + SSyncRaftEntry* pRollBackEntry; + code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, index, &pRollBackEntry); + ASSERT(code == 0); + ASSERT(pRollBackEntry != NULL); + + if (syncUtilUserRollback(pRollBackEntry->msgType)) { + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pRollBackEntry, &rpcMsg); + + SFsmCbMeta cbMeta; + cbMeta.index = pRollBackEntry->index; + cbMeta.isWeak = pRollBackEntry->isWeak; + cbMeta.code = 0; + cbMeta.state = ths->state; + cbMeta.seqNum = pRollBackEntry->seqNum; + ths->pFsm->FpRollBackCb(ths->pFsm, &rpcMsg, cbMeta); + rpcFreeCont(rpcMsg.pCont); + } + + syncEntryDestory(pRollBackEntry); + } + } + + // delete confict entries + code = ths->pLogStore->syncLogTruncate(ths->pLogStore, delBegin); + ASSERT(code == 0); + sInfo("sync event log truncate, from %ld to %ld", delBegin, delEnd); + logStoreSimpleLog2("after syncNodeMakeLogSame", ths->pLogStore); + + return code; +} + +static int32_t syncNodePreCommit(SSyncNode* ths, SSyncRaftEntry* pEntry) { + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pEntry, &rpcMsg); + if (ths->pFsm != NULL) { + if (ths->pFsm->FpPreCommitCb != NULL && syncUtilUserPreCommit(pEntry->originalRpcType)) { + SFsmCbMeta cbMeta; + cbMeta.index = pEntry->index; + cbMeta.isWeak = pEntry->isWeak; + cbMeta.code = 2; + cbMeta.state = ths->state; + cbMeta.seqNum = pEntry->seqNum; + ths->pFsm->FpPreCommitCb(ths->pFsm, &rpcMsg, cbMeta); + } + } + rpcFreeCont(rpcMsg.pCont); + return 0; +} + +// really pre log match +// prevLogIndex == -1 +static bool syncNodeOnAppendEntriesLogOK(SSyncNode* pSyncNode, SyncAppendEntries* pMsg) { + if (pMsg->prevLogIndex == SYNC_INDEX_INVALID) { + if (gRaftDetailLog) { + sTrace("syncNodeOnAppendEntriesLogOK true, pMsg->prevLogIndex:%ld", pMsg->prevLogIndex); + } + return true; + } + + SyncIndex myLastIndex = syncNodeGetLastIndex(pSyncNode); + if (pMsg->prevLogIndex > myLastIndex) { + if (gRaftDetailLog) { + sTrace("syncNodeOnAppendEntriesLogOK false, pMsg->prevLogIndex:%ld, myLastIndex:%ld", pMsg->prevLogIndex, + myLastIndex); + } + return false; + } + + SyncTerm myPreLogTerm = syncNodeGetPreTerm(pSyncNode, pMsg->prevLogIndex + 1); + if (pMsg->prevLogIndex <= myLastIndex && pMsg->prevLogTerm == myPreLogTerm) { + if (gRaftDetailLog) { + sTrace( + "syncNodeOnAppendEntriesLogOK true, pMsg->prevLogIndex:%ld, myLastIndex:%ld, pMsg->prevLogTerm:%lu, " + "myPreLogTerm:%lu", + pMsg->prevLogIndex, myLastIndex, pMsg->prevLogTerm, myPreLogTerm); + } + return true; + } + + if (gRaftDetailLog) { + sTrace( + "syncNodeOnAppendEntriesLogOK false, pMsg->prevLogIndex:%ld, myLastIndex:%ld, pMsg->prevLogTerm:%lu, " + "myPreLogTerm:%lu", + pMsg->prevLogIndex, myLastIndex, pMsg->prevLogTerm, myPreLogTerm); + } + + return false; +} + +int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg) { + int32_t ret = 0; + int32_t code = 0; + + // print log + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncAppendEntries, vgId:%d, term:%lu", ths->vgId, + ths->pRaftStore->currentTerm); + syncAppendEntriesLog2(logBuf, pMsg); + + // if already drop replica, do not process + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { + sInfo("recv SyncAppendEntries maybe replica already dropped"); + return ret; + } + + // maybe update term + if (pMsg->term > ths->pRaftStore->currentTerm) { + syncNodeUpdateTerm(ths, pMsg->term); + } + ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); + + // reset elect timer + if (pMsg->term == ths->pRaftStore->currentTerm) { + ths->leaderCache = pMsg->srcId; + syncNodeResetElectTimer(ths); + } + ASSERT(pMsg->dataLen >= 0); + + // candidate to follower + // + // operation: + // to follower + do { + bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE; + if (condition) { + sTrace("recv SyncAppendEntries, candidate to follower"); + + syncNodeBecomeFollower(ths); + // do not reply? + return ret; + } + } while (0); + + // fake match + // + // condition1: + // I have snapshot, no log, preIndex > myLastIndex + // + // condition2: + // I have snapshot, have log, log <= snapshot, preIndex > myLastIndex + // + // condition3: + // I have snapshot, preIndex < snapshot.lastApplyIndex + // + // condition4: + // I have snapshot, preIndex == snapshot.lastApplyIndex, no data + // + // operation: + // match snapshot.lastApplyIndex - 1; + // no operation on log + do { + SyncIndex myLastIndex = syncNodeGetLastIndex(ths); + SSnapshot snapshot; + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + + bool condition0 = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && + syncNodeHasSnapshot(ths); + bool condition1 = + condition0 && (ths->pLogStore->syncLogEntryCount(ths->pLogStore) == 0) && (pMsg->prevLogIndex > myLastIndex); + bool condition2 = condition0 && (ths->pLogStore->syncLogLastIndex(ths->pLogStore) <= snapshot.lastApplyIndex) && + (pMsg->prevLogIndex > myLastIndex); + bool condition3 = condition0 && (pMsg->prevLogIndex < snapshot.lastApplyIndex); + bool condition4 = condition0 && (pMsg->prevLogIndex == snapshot.lastApplyIndex) && (pMsg->dataLen == 0); + bool condition = condition1 || condition2 || condition3 || condition4; + + if (condition) { + sTrace( + "recv SyncAppendEntries, fake match, myLastIndex:%ld, syncLogBeginIndex:%ld, syncLogEndIndex:%ld, " + "condition1:%d, condition2:%d, condition3:%d, condition4:%d", + myLastIndex, ths->pLogStore->syncLogBeginIndex(ths->pLogStore), + ths->pLogStore->syncLogEndIndex(ths->pLogStore), condition1, condition2, condition3, condition4); + + // prepare response msg + SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; + pReply->success = true; + pReply->matchIndex = snapshot.lastApplyIndex; + + // send response + SRpcMsg rpcMsg; + syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncAppendEntriesReplyDestroy(pReply); + + return ret; + } + } while (0); + + // calculate logOK here, before will coredump, due to fake match + bool logOK = syncNodeOnAppendEntriesLogOK(ths, pMsg); + + // not match + // + // condition1: + // term < myTerm + // + // condition2: + // !logOK + // + // operation: + // not match + // no operation on log + do { + bool condition1 = pMsg->term < ths->pRaftStore->currentTerm; + bool condition2 = + (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK; + bool condition = condition1 || condition2; + + if (condition) { + sTrace( + "recv SyncAppendEntries, not match, syncLogBeginIndex:%ld, syncLogEndIndex:%ld, condition1:%d, " + "condition2:%d, logOK:%d", + ths->pLogStore->syncLogBeginIndex(ths->pLogStore), ths->pLogStore->syncLogEndIndex(ths->pLogStore), + condition1, condition2, logOK); + + // prepare response msg + SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; + pReply->success = false; + pReply->matchIndex = SYNC_INDEX_INVALID; + + // send response + SRpcMsg rpcMsg; + syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncAppendEntriesReplyDestroy(pReply); + + return ret; + } + } while (0); + + // really match + // + // condition: + // logOK + // + // operation: + // match + // make log same + do { + bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && logOK; + if (condition) { + // has extra entries (> preIndex) in local log + SyncIndex myLastIndex = syncNodeGetLastIndex(ths); + bool hasExtraEntries = myLastIndex > pMsg->prevLogIndex; + + // has entries in SyncAppendEntries msg + bool hasAppendEntries = pMsg->dataLen > 0; + + sTrace("recv SyncAppendEntries, match, myLastIndex:%ld, hasExtraEntries:%d, hasAppendEntries:%d", myLastIndex, + hasExtraEntries, hasAppendEntries); + + if (hasExtraEntries) { + // make log same, rollback deleted entries + code = syncNodeMakeLogSame(ths, pMsg); + ASSERT(code == 0); + } + + if (hasAppendEntries) { + // append entry + SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); + ASSERT(pAppendEntry != NULL); + + code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); + ASSERT(code == 0); + + // pre commit + code = syncNodePreCommit(ths, pAppendEntry); + ASSERT(code == 0); + + syncEntryDestory(pAppendEntry); + } + + // prepare response msg + SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; + pReply->success = true; + pReply->matchIndex = hasAppendEntries ? pMsg->prevLogIndex + 1 : pMsg->prevLogIndex; + + // send response + SRpcMsg rpcMsg; + syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncAppendEntriesReplyDestroy(pReply); + + // maybe update commit index, leader notice me + if (pMsg->commitIndex > ths->commitIndex) { + // has commit entry in local + if (pMsg->commitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { + SyncIndex beginIndex = ths->commitIndex + 1; + SyncIndex endIndex = pMsg->commitIndex; + + // update commit index + ths->commitIndex = pMsg->commitIndex; + + // call back Wal + code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex); + ASSERT(code == 0); + + code = syncNodeCommit(ths, beginIndex, endIndex, ths->state); + ASSERT(code == 0); + } + } + return ret; + } + } while (0); + + return ret; +} diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 4e6d870e19..5a543e1605 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -17,8 +17,10 @@ #include "syncCommit.h" #include "syncIndexMgr.h" #include "syncInt.h" +#include "syncRaftCfg.h" #include "syncRaftLog.h" #include "syncRaftStore.h" +#include "syncSnapshot.h" #include "syncUtil.h" #include "syncVoteMgr.h" @@ -94,3 +96,116 @@ int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* p return ret; } + +int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { + int32_t ret = 0; + + // print log + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncAppendEntriesReply, vgId:%d, term:%lu", ths->vgId, + ths->pRaftStore->currentTerm); + syncAppendEntriesReplyLog2(logBuf, pMsg); + + // if already drop replica, do not process + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { + sInfo("recv SyncAppendEntriesReply, maybe replica already dropped"); + return ret; + } + + // drop stale response + if (pMsg->term < ths->pRaftStore->currentTerm) { + sTrace("recv SyncAppendEntriesReply, drop stale response, receive_term:%lu current_term:%lu", pMsg->term, + ths->pRaftStore->currentTerm); + return ret; + } + + syncIndexMgrLog2("recv SyncAppendEntriesReply, before pNextIndex:", ths->pNextIndex); + syncIndexMgrLog2("recv SyncAppendEntriesReply, before pMatchIndex:", ths->pMatchIndex); + { + SSnapshot snapshot; + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + sTrace("recv SyncAppendEntriesReply, before snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", + snapshot.lastApplyIndex, snapshot.lastApplyTerm); + } + + // no need this code, because if I receive reply.term, then I must have sent for that term. + // if (pMsg->term > ths->pRaftStore->currentTerm) { + // syncNodeUpdateTerm(ths, pMsg->term); + // } + + if (pMsg->term > ths->pRaftStore->currentTerm) { + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncAppendEntriesReply, error term, receive_term:%lu current_term:%lu", + pMsg->term, ths->pRaftStore->currentTerm); + syncNodeLog2(logBuf, ths); + sError("%s", logBuf); + return ret; + } + + ASSERT(pMsg->term == ths->pRaftStore->currentTerm); + + if (pMsg->success) { + // nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1] + syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1); + sTrace("update next match, index:%ld, success:%d", pMsg->matchIndex + 1, pMsg->success); + + // matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex] + syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); + + // maybe commit + if (ths->state == TAOS_SYNC_STATE_LEADER) { + syncMaybeAdvanceCommitIndex(ths); + } + + } else { + SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); + sTrace("update next not match, begin, index:%ld, success:%d", nextIndex, pMsg->success); + + // notice! int64, uint64 + if (nextIndex > SYNC_INDEX_BEGIN) { + --nextIndex; + + // get sender + SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(pMsg->srcId)); + ASSERT(pSender != NULL); + bool hasSnapshot = syncNodeHasSnapshot(ths); + SSnapshot snapshot; + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + + // start sending snapshot first time + // start here, stop by receiver + if (hasSnapshot && nextIndex <= snapshot.lastApplyIndex + 1 && !snapshotSenderIsStart(pSender) && + pMsg->privateTerm < pSender->privateTerm) { + snapshotSenderStart(pSender); + + char* s = snapshotSender2Str(pSender); + sInfo("sync event snapshot send start sender first time, sender:%s", s); + taosMemoryFree(s); + } + + SyncIndex sentryIndex = pSender->snapshot.lastApplyIndex + 1; + + // update nextIndex to sentryIndex + if (nextIndex <= sentryIndex) { + nextIndex = sentryIndex; + } + + } else { + nextIndex = SYNC_INDEX_BEGIN; + } + + syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); + sTrace("update next not match, end, index:%ld, success:%d", nextIndex, pMsg->success); + } + + syncIndexMgrLog2("recv SyncAppendEntriesReply, after pNextIndex:", ths->pNextIndex); + syncIndexMgrLog2("recv SyncAppendEntriesReply, after pMatchIndex:", ths->pMatchIndex); + { + SSnapshot snapshot; + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + sTrace("recv SyncAppendEntriesReply, after snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", + snapshot.lastApplyIndex, snapshot.lastApplyTerm); + } + + return ret; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index c6376495a4..c092b31adf 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -94,109 +94,8 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { // execute fsm if (pSyncNode->pFsm != NULL) { - for (SyncIndex i = beginIndex; i <= endIndex; ++i) { - if (i != SYNC_INDEX_INVALID) { - SSyncRaftEntry* pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, i); - assert(pEntry != NULL); - - SRpcMsg rpcMsg; - syncEntry2OriginalRpc(pEntry, &rpcMsg); - - if (pSyncNode->pFsm->FpCommitCb != NULL && syncUtilUserCommit(pEntry->originalRpcType)) { - SFsmCbMeta cbMeta; - cbMeta.index = pEntry->index; - cbMeta.isWeak = pEntry->isWeak; - cbMeta.code = 0; - cbMeta.state = pSyncNode->state; - cbMeta.seqNum = pEntry->seqNum; - cbMeta.term = pEntry->term; - cbMeta.currentTerm = pSyncNode->pRaftStore->currentTerm; - cbMeta.flag = 0x1; - - bool needExecute = true; - if (pSyncNode->pSnapshot != NULL && cbMeta.index <= pSyncNode->pSnapshot->lastApplyIndex) { - needExecute = false; - } - - if (needExecute) { - pSyncNode->pFsm->FpCommitCb(pSyncNode->pFsm, &rpcMsg, cbMeta); - } - } - - // config change - if (pEntry->originalRpcType == TDMT_SYNC_CONFIG_CHANGE) { - SSyncCfg oldSyncCfg = pSyncNode->pRaftCfg->cfg; - - SSyncCfg newSyncCfg; - int32_t ret = syncCfgFromStr(rpcMsg.pCont, &newSyncCfg); - ASSERT(ret == 0); - - // update new config myIndex - bool hit = false; - for (int i = 0; i < newSyncCfg.replicaNum; ++i) { - if (strcmp(pSyncNode->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && - pSyncNode->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { - newSyncCfg.myIndex = i; - hit = true; - break; - } - } - - if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { - ASSERT(hit == true); - } - - bool isDrop; - syncNodeUpdateConfig(pSyncNode, &newSyncCfg, &isDrop); - - // change isStandBy to normal - if (!isDrop) { - if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { - syncNodeBecomeLeader(pSyncNode); - } else { - syncNodeBecomeFollower(pSyncNode); - } - } - - char* sOld = syncCfg2Str(&oldSyncCfg); - char* sNew = syncCfg2Str(&newSyncCfg); - sInfo("==config change== 0x1 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); - taosMemoryFree(sOld); - taosMemoryFree(sNew); - - if (pSyncNode->pFsm->FpReConfigCb != NULL) { - SReConfigCbMeta cbMeta = {0}; - cbMeta.code = 0; - cbMeta.currentTerm = pSyncNode->pRaftStore->currentTerm; - cbMeta.index = pEntry->index; - cbMeta.term = pEntry->term; - cbMeta.oldCfg = oldSyncCfg; - cbMeta.flag = 0x1; - cbMeta.isDrop = isDrop; - pSyncNode->pFsm->FpReConfigCb(pSyncNode->pFsm, newSyncCfg, cbMeta); - } - } - - // restore finish - if (pEntry->index == pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore)) { - if (pSyncNode->restoreFinish == false) { - if (pSyncNode->pFsm->FpRestoreFinishCb != NULL) { - pSyncNode->pFsm->FpRestoreFinishCb(pSyncNode->pFsm); - } - pSyncNode->restoreFinish = true; - sInfo("==syncMaybeAdvanceCommitIndex== restoreFinish set true %p vgId:%d", pSyncNode, pSyncNode->vgId); - - /* - tsem_post(&pSyncNode->restoreSem); - sInfo("==syncMaybeAdvanceCommitIndex== RestoreFinish tsem_post %p", pSyncNode); - */ - } - } - - rpcFreeCont(rpcMsg.pCont); - syncEntryDestory(pEntry); - } - } + int32_t code = syncNodeCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state); + ASSERT(code == 0); } } } diff --git a/source/libs/sync/src/syncElection.c b/source/libs/sync/src/syncElection.c index 5101344b84..fdebbe3990 100644 --- a/source/libs/sync/src/syncElection.c +++ b/source/libs/sync/src/syncElection.c @@ -15,6 +15,7 @@ #include "syncElection.h" #include "syncMessage.h" +#include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncVoteMgr.h" @@ -49,6 +50,26 @@ int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode) { return ret; } +int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode) { + ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); + + int32_t ret = 0; + for (int i = 0; i < pSyncNode->peersNum; ++i) { + SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId); + pMsg->srcId = pSyncNode->myRaftId; + pMsg->destId = pSyncNode->peersId[i]; + pMsg->term = pSyncNode->pRaftStore->currentTerm; + + ret = syncNodeGetLastIndexTerm(pSyncNode, &(pMsg->lastLogIndex), &(pMsg->lastLogTerm)); + ASSERT(ret == 0); + + ret = syncNodeRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg); + ASSERT(ret == 0); + syncRequestVoteDestroy(pMsg); + } + return ret; +} + int32_t syncNodeElect(SSyncNode* pSyncNode) { int32_t ret = 0; if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { @@ -71,7 +92,12 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) { return ret; } - ret = syncNodeRequestVotePeers(pSyncNode); + if (pSyncNode->pRaftCfg->snapshotEnable) { + ret = syncNodeRequestVotePeersSnapshot(pSyncNode); + } else { + ret = syncNodeRequestVotePeers(pSyncNode); + } + assert(ret == 0); syncNodeResetElectTimer(pSyncNode); diff --git a/source/libs/sync/src/syncIO.c b/source/libs/sync/src/syncIO.c index aa8484de99..0b5a9685c0 100644 --- a/source/libs/sync/src/syncIO.c +++ b/source/libs/sync/src/syncIO.c @@ -75,7 +75,8 @@ int32_t syncIOSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { syncUtilMsgNtoH(pMsg->pCont); char logBuf[256] = {0}; - snprintf(logBuf, sizeof(logBuf), "==syncIOSendMsg== %s:%d", pEpSet->eps[0].fqdn, pEpSet->eps[0].port); + snprintf(logBuf, sizeof(logBuf), "==syncIOSendMsg== %s:%d msgType:%d", pEpSet->eps[0].fqdn, pEpSet->eps[0].port, + pMsg->msgType); syncRpcMsgLog2(logBuf, pMsg); syncUtilMsgHtoN(pMsg->pCont); @@ -89,8 +90,10 @@ int32_t syncIOSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { int32_t syncIOEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) { int32_t ret = 0; - char logBuf[128] = {0}; - syncRpcMsgLog2((char *)"==syncIOEqMsg==", pMsg); + + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==syncIOEqMsg== msgType:%d", pMsg->msgType); + syncRpcMsgLog2(logBuf, pMsg); SRpcMsg *pTemp; pTemp = taosAllocateQitem(sizeof(SRpcMsg), DEF_QITEM); @@ -253,7 +256,9 @@ static void *syncIOConsumerFunc(void *param) { for (int i = 0; i < numOfMsgs; ++i) { taosGetQitem(qall, (void **)&pRpcMsg); - syncRpcMsgLog2((char *)"==syncIOConsumerFunc==", pRpcMsg); + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "==syncIOConsumMsg== msgType:%d", pRpcMsg->msgType); + syncRpcMsgLog2(logBuf, pRpcMsg); // use switch case instead of if else if (pRpcMsg->msgType == TDMT_SYNC_PING) { @@ -319,6 +324,23 @@ static void *syncIOConsumerFunc(void *param) { io->FpOnSyncTimeout(io->pSyncNode, pSyncMsg); syncTimeoutDestroy(pSyncMsg); } + + } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { + if (io->FpOnSyncSnapshotSend != NULL) { + SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + io->FpOnSyncSnapshotSend(io->pSyncNode, pSyncMsg); + syncSnapshotSendDestroy(pSyncMsg); + } + + } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { + if (io->FpOnSyncSnapshotRsp != NULL) { + SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pRpcMsg); + assert(pSyncMsg != NULL); + io->FpOnSyncSnapshotRsp(io->pSyncNode, pSyncMsg); + syncSnapshotRspDestroy(pSyncMsg); + } + } else { sTrace("unknown msgType:%d, no operator", pRpcMsg->msgType); } diff --git a/source/libs/sync/src/syncIndexMgr.c b/source/libs/sync/src/syncIndexMgr.c index 4d556d21dd..ecc1c8f1e2 100644 --- a/source/libs/sync/src/syncIndexMgr.c +++ b/source/libs/sync/src/syncIndexMgr.c @@ -46,6 +46,7 @@ void syncIndexMgrDestroy(SSyncIndexMgr *pSyncIndexMgr) { void syncIndexMgrClear(SSyncIndexMgr *pSyncIndexMgr) { memset(pSyncIndexMgr->index, 0, sizeof(pSyncIndexMgr->index)); + memset(pSyncIndexMgr->privateTerm, 0, sizeof(pSyncIndexMgr->privateTerm)); /* for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { pSyncIndexMgr->index[i] = 0; @@ -62,7 +63,7 @@ void syncIndexMgrSetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, } // maybe config change - // assert(0); + assert(0); } SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId) { @@ -86,14 +87,27 @@ cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr) { for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { cJSON_AddItemToArray(pReplicas, syncUtilRaftId2Json(&(*(pSyncIndexMgr->replicas))[i])); } - int respondNum = 0; - int *arr = (int *)taosMemoryMalloc(sizeof(int) * pSyncIndexMgr->replicaNum); - for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { - arr[i] = pSyncIndexMgr->index[i]; + + { + int *arr = (int *)taosMemoryMalloc(sizeof(int) * pSyncIndexMgr->replicaNum); + for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { + arr[i] = pSyncIndexMgr->index[i]; + } + cJSON *pIndex = cJSON_CreateIntArray(arr, pSyncIndexMgr->replicaNum); + taosMemoryFree(arr); + cJSON_AddItemToObject(pRoot, "index", pIndex); } - cJSON *pIndex = cJSON_CreateIntArray(arr, pSyncIndexMgr->replicaNum); - taosMemoryFree(arr); - cJSON_AddItemToObject(pRoot, "index", pIndex); + + { + int *arr = (int *)taosMemoryMalloc(sizeof(int) * pSyncIndexMgr->replicaNum); + for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { + arr[i] = pSyncIndexMgr->privateTerm[i]; + } + cJSON *pIndex = cJSON_CreateIntArray(arr, pSyncIndexMgr->replicaNum); + taosMemoryFree(arr); + cJSON_AddItemToObject(pRoot, "privateTerm", pIndex); + } + snprintf(u64buf, sizeof(u64buf), "%p", pSyncIndexMgr->pSyncNode); cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); } @@ -105,7 +119,7 @@ cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr) { char *syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr) { cJSON *pJson = syncIndexMgr2Json(pSyncIndexMgr); - char * serialized = cJSON_Print(pJson); + char *serialized = cJSON_Print(pJson); cJSON_Delete(pJson); return serialized; } @@ -132,7 +146,31 @@ void syncIndexMgrLog(SSyncIndexMgr *pObj) { } void syncIndexMgrLog2(char *s, SSyncIndexMgr *pObj) { - char *serialized = syncIndexMgr2Str(pObj); - sTrace("syncIndexMgrLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char *serialized = syncIndexMgr2Str(pObj); + sTrace("syncIndexMgrLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } +} + +void syncIndexMgrSetTerm(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, SyncTerm term) { + for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { + if (syncUtilSameId(&((*(pSyncIndexMgr->replicas))[i]), pRaftId)) { + (pSyncIndexMgr->privateTerm)[i] = term; + return; + } + } + + // maybe config change + assert(0); +} + +SyncTerm syncIndexMgrGetTerm(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId) { + for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { + if (syncUtilSameId(&((*(pSyncIndexMgr->replicas))[i]), pRaftId)) { + SyncTerm term = (pSyncIndexMgr->privateTerm)[i]; + return term; + } + } + assert(0); } \ No newline at end of file diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 795d3e3c27..9516df64da 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -29,11 +29,14 @@ #include "syncRequestVote.h" #include "syncRequestVoteReply.h" #include "syncRespMgr.h" +#include "syncSnapshot.h" #include "syncTimeout.h" #include "syncUtil.h" #include "syncVoteMgr.h" #include "tref.h" +bool gRaftDetailLog = false; + static int32_t tsNodeRefId = -1; // ------ local funciton --------- @@ -213,6 +216,18 @@ bool syncIsRestoreFinish(int64_t rid) { return b; } +int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) { + SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); + if (pSyncNode == NULL) { + return -1; + } + assert(rid == pSyncNode->rid); + *sMeta = pSyncNode->sMeta; + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return 0; +} + const char* syncGetMyRoleStr(int64_t rid) { const char* s = syncUtilState2String(syncGetMyRole(rid)); return s; @@ -411,8 +426,11 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { snprintf(pSyncNode->configPath, sizeof(pSyncNode->configPath), "%s/raft_config.json", pSyncInfo->path); if (!taosCheckExistFile(pSyncNode->configPath)) { - // create raft config file - ret = raftCfgCreateFile((SSyncCfg*)&(pSyncInfo->syncCfg), pSyncInfo->isStandBy, pSyncNode->configPath); + // create a new raft config file + SRaftCfgMeta meta; + meta.isStandBy = pSyncInfo->isStandBy; + meta.snapshotEnable = pSyncInfo->snapshotEnable; + ret = raftCfgCreateFile((SSyncCfg*)&(pSyncInfo->syncCfg), meta, pSyncNode->configPath); assert(ret == 0); } else { @@ -552,35 +570,64 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { pSyncNode->FpOnPing = syncNodeOnPingCb; pSyncNode->FpOnPingReply = syncNodeOnPingReplyCb; pSyncNode->FpOnClientRequest = syncNodeOnClientRequestCb; - pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteCb; - pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplyCb; - pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesCb; - pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplyCb; pSyncNode->FpOnTimeout = syncNodeOnTimeoutCb; + pSyncNode->FpOnSnapshotSend = syncNodeOnSnapshotSendCb; + pSyncNode->FpOnSnapshotRsp = syncNodeOnSnapshotRspCb; + + if (pSyncNode->pRaftCfg->snapshotEnable) { + sInfo("sync node use snapshot"); + pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteSnapshotCb; + pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplySnapshotCb; + pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesSnapshotCb; + pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplySnapshotCb; + + } else { + sInfo("sync node do not use snapshot"); + pSyncNode->FpOnRequestVote = syncNodeOnRequestVoteCb; + pSyncNode->FpOnRequestVoteReply = syncNodeOnRequestVoteReplyCb; + pSyncNode->FpOnAppendEntries = syncNodeOnAppendEntriesCb; + pSyncNode->FpOnAppendEntriesReply = syncNodeOnAppendEntriesReplyCb; + } + // tools pSyncNode->pSyncRespMgr = syncRespMgrCreate(NULL, 0); assert(pSyncNode->pSyncRespMgr != NULL); // restore state pSyncNode->restoreFinish = false; - pSyncNode->pSnapshot = NULL; - if (pSyncNode->pFsm->FpGetSnapshot != NULL) { - pSyncNode->pSnapshot = taosMemoryMalloc(sizeof(SSnapshot)); - pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, pSyncNode->pSnapshot); - } + + // pSyncNode->pSnapshot = NULL; + // if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + // pSyncNode->pSnapshot = taosMemoryMalloc(sizeof(SSnapshot)); + // pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, pSyncNode->pSnapshot); + // } // tsem_init(&(pSyncNode->restoreSem), 0, 0); + // snapshot senders + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + SSyncSnapshotSender* pSender = snapshotSenderCreate(pSyncNode, i); + // ASSERT(pSender != NULL); + (pSyncNode->senders)[i] = pSender; + } + + // snapshot receivers + pSyncNode->pNewNodeReceiver = snapshotReceiverCreate(pSyncNode, 100); + // start in syncNodeStart // start raft // syncNodeBecomeFollower(pSyncNode); + // snapshot meta + pSyncNode->sMeta.lastConfigIndex = -1; + return pSyncNode; } void syncNodeStart(SSyncNode* pSyncNode) { // start raft if (pSyncNode->replicaNum == 1) { + raftStoreNextTerm(pSyncNode->pRaftStore); syncNodeBecomeLeader(pSyncNode); syncNodeLog2("==state change become leader immediately==", pSyncNode); @@ -662,9 +709,23 @@ void syncNodeClose(SSyncNode* pSyncNode) { taosMemoryFree(pSyncNode->pFsm); } + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + if ((pSyncNode->senders)[i] != NULL) { + snapshotSenderDestroy((pSyncNode->senders)[i]); + (pSyncNode->senders)[i] = NULL; + } + } + + if (pSyncNode->pNewNodeReceiver != NULL) { + snapshotReceiverDestroy(pSyncNode->pNewNodeReceiver); + pSyncNode->pNewNodeReceiver = NULL; + } + + /* if (pSyncNode->pSnapshot != NULL) { taosMemoryFree(pSyncNode->pSnapshot); } + */ // tsem_destroy(&pSyncNode->restoreSem); @@ -672,6 +733,9 @@ void syncNodeClose(SSyncNode* pSyncNode) { // taosMemoryFree(pSyncNode); } +// option +bool syncNodeSnapshotEnable(SSyncNode* pSyncNode) { return pSyncNode->pRaftCfg->snapshotEnable; } + // ping -------------- int32_t syncNodePing(SSyncNode* pSyncNode, const SRaftId* destRaftId, SyncPing* pMsg) { syncPingLog2((char*)"==syncNodePing==", pMsg); @@ -762,7 +826,13 @@ int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms) { int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode) { int32_t ret = 0; - int32_t electMS = syncUtilElectRandomMS(pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine); + int32_t electMS; + + if (pSyncNode->pRaftCfg->isStandBy) { + electMS = TIMER_MAX_MS; + } else { + electMS = syncUtilElectRandomMS(pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine); + } ret = syncNodeRestartElectTimer(pSyncNode, electMS); return ret; } @@ -788,6 +858,13 @@ int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRp SEpSet epSet; syncUtilraftId2EpSet(destRaftId, &epSet); if (pSyncNode->FpSendMsg != NULL) { + if (gRaftDetailLog) { + char* JsonStr = syncRpcMsg2Str(pMsg); + syncUtilJson2Line(JsonStr); + sTrace("sync send msg, vgId:%d, type:%d, msg:%s", pSyncNode->vgId, pMsg->msgType, JsonStr); + taosMemoryFree(JsonStr); + } + // htonl syncUtilMsgHtoN(pMsg->pCont); @@ -952,6 +1029,20 @@ cJSON* syncNode2Json(const SSyncNode* pSyncNode) { cJSON_AddStringToObject(pRoot, "FpOnAppendEntriesReply", u64buf); snprintf(u64buf, sizeof(u64buf), "%p", pSyncNode->FpOnTimeout); cJSON_AddStringToObject(pRoot, "FpOnTimeout", u64buf); + + // restoreFinish + cJSON_AddNumberToObject(pRoot, "restoreFinish", pSyncNode->restoreFinish); + + // snapshot senders + cJSON* pSenders = cJSON_CreateArray(); + cJSON_AddItemToObject(pRoot, "senders", pSenders); + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + cJSON_AddItemToArray(pSenders, snapshotSender2Json((pSyncNode->senders)[i])); + } + + // snapshot receivers + cJSON* pReceivers = cJSON_CreateArray(); + cJSON_AddItemToObject(pRoot, "receiver", snapshotReceiver2Json(pSyncNode->pNewNodeReceiver)); } cJSON* pJson = cJSON_CreateObject(); @@ -973,10 +1064,10 @@ char* syncNode2SimpleStr(const SSyncNode* pSyncNode) { "syncNode2SimpleStr vgId:%d currentTerm:%lu, commitIndex:%ld, state:%d %s, isStandBy:%d, " "electTimerLogicClock:%lu, " "electTimerLogicClockUser:%lu, " - "electTimerMS:%d", + "electTimerMS:%d, replicaNum:%d", pSyncNode->vgId, pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, pSyncNode->state, syncUtilState2String(pSyncNode->state), pSyncNode->pRaftCfg->isStandBy, pSyncNode->electTimerLogicClock, - pSyncNode->electTimerLogicClockUser, pSyncNode->electTimerMS); + pSyncNode->electTimerLogicClockUser, pSyncNode->electTimerMS, pSyncNode->replicaNum); return s; } @@ -1013,6 +1104,8 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, bool* isDro voteGrantedUpdate(pSyncNode->pVotesGranted, pSyncNode); votesRespondUpdate(pSyncNode->pVotesRespond, pSyncNode); + pSyncNode->quorum = syncUtilQuorum(pSyncNode->pRaftCfg->cfg.replicaNum); + // isDrop *isDrop = true; bool IamInOld, IamInNew; @@ -1103,7 +1196,15 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode) { for (int i = 0; i < pSyncNode->pNextIndex->replicaNum; ++i) { // maybe overwrite myself, no harm // just do it! - pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1; + + // pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1; + + // maybe wal is deleted + SyncIndex lastIndex; + SyncTerm lastTerm; + int32_t code = syncNodeGetLastIndexTerm(pSyncNode, &lastIndex, &lastTerm); + ASSERT(code == 0); + pSyncNode->pNextIndex->index[i] = lastIndex + 1; } for (int i = 0; i < pSyncNode->pMatchIndex->replicaNum; ++i) { @@ -1112,6 +1213,17 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode) { pSyncNode->pMatchIndex->index[i] = SYNC_INDEX_INVALID; } + // update sender private term + SSyncSnapshotSender* pMySender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->myRaftId)); + if (pMySender != NULL) { + for (int i = 0; i < pSyncNode->pMatchIndex->replicaNum; ++i) { + if ((pSyncNode->senders)[i]->privateTerm > pMySender->privateTerm) { + pMySender->privateTerm = (pSyncNode->senders)[i]->privateTerm; + } + } + (pMySender->privateTerm) += 100; + } + // stop elect timer syncNodeStopElectTimer(pSyncNode); @@ -1186,6 +1298,153 @@ void syncNodeVoteForSelf(SSyncNode* pSyncNode) { syncRequestVoteReplyDestroy(pMsg); } +// snapshot -------------- +bool syncNodeHasSnapshot(SSyncNode* pSyncNode) { + bool ret = false; + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + if (snapshot.lastApplyIndex >= SYNC_INDEX_BEGIN) { + ret = true; + } + } + return ret; +} + +bool syncNodeIsIndexInSnapshot(SSyncNode* pSyncNode, SyncIndex index) { + ASSERT(syncNodeHasSnapshot(pSyncNode)); + ASSERT(pSyncNode->pFsm->FpGetSnapshot != NULL); + ASSERT(index >= SYNC_INDEX_BEGIN); + + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + bool b = (index <= snapshot.lastApplyIndex); + return b; +} + +SyncIndex syncNodeGetLastIndex(SSyncNode* pSyncNode) { + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + } + SyncIndex logLastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); + + SyncIndex lastIndex = logLastIndex > snapshot.lastApplyIndex ? logLastIndex : snapshot.lastApplyIndex; + return lastIndex; +} + +SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode) { + SyncTerm lastTerm = 0; + if (syncNodeHasSnapshot(pSyncNode)) { + // has snapshot + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + } + + SyncIndex logLastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); + if (logLastIndex > snapshot.lastApplyIndex) { + lastTerm = pSyncNode->pLogStore->syncLogLastTerm(pSyncNode->pLogStore); + } else { + lastTerm = snapshot.lastApplyTerm; + } + + } else { + // no snapshot + lastTerm = pSyncNode->pLogStore->syncLogLastTerm(pSyncNode->pLogStore); + } + + return lastTerm; +} + +// get last index and term along with snapshot +int32_t syncNodeGetLastIndexTerm(SSyncNode* pSyncNode, SyncIndex* pLastIndex, SyncTerm* pLastTerm) { + *pLastIndex = syncNodeGetLastIndex(pSyncNode); + *pLastTerm = syncNodeGetLastTerm(pSyncNode); + return 0; +} + +SyncIndex syncNodeSyncStartIndex(SSyncNode* pSyncNode) { + SyncIndex syncStartIndex = syncNodeGetLastIndex(pSyncNode) + 1; + return syncStartIndex; +} + +SyncIndex syncNodeGetPreIndex(SSyncNode* pSyncNode, SyncIndex index) { + ASSERT(index >= SYNC_INDEX_BEGIN); + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + ASSERT(index <= syncStartIndex); + + SyncIndex preIndex = index - 1; + return preIndex; +} + +SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index) { + ASSERT(index >= SYNC_INDEX_BEGIN); + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + ASSERT(index <= syncStartIndex); + + if (index == SYNC_INDEX_BEGIN) { + return 0; + } + + SyncTerm preTerm = 0; + if (syncNodeHasSnapshot(pSyncNode)) { + // has snapshot + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (pSyncNode->pFsm->FpGetSnapshot != NULL) { + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + } + + if (index > snapshot.lastApplyIndex + 1) { + // should be log preTerm + SSyncRaftEntry* pPreEntry = NULL; + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index - 1, &pPreEntry); + ASSERT(code == 0); + ASSERT(pPreEntry != NULL); + + preTerm = pPreEntry->term; + taosMemoryFree(pPreEntry); + + } else if (index == snapshot.lastApplyIndex + 1) { + preTerm = snapshot.lastApplyTerm; + + } else { + // maybe snapshot change + sError("sync get pre term, bad scene. index:%ld", index); + logStoreLog2("sync get pre term, bad scene", pSyncNode->pLogStore); + + SSyncRaftEntry* pPreEntry = NULL; + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index - 1, &pPreEntry); + ASSERT(code == 0); + ASSERT(pPreEntry != NULL); + + preTerm = pPreEntry->term; + taosMemoryFree(pPreEntry); + } + + } else { + // no snapshot + ASSERT(index > SYNC_INDEX_BEGIN); + + SSyncRaftEntry* pPreEntry = NULL; + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index - 1, &pPreEntry); + ASSERT(code == 0); + ASSERT(pPreEntry != NULL); + + preTerm = pPreEntry->term; + taosMemoryFree(pPreEntry); + } + + return preTerm; +} + +// get pre index and term of "index" +int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex* pPreIndex, SyncTerm* pPreTerm) { + *pPreIndex = syncNodeGetPreIndex(pSyncNode, index); + *pPreTerm = syncNodeGetPreTerm(pSyncNode, index); + return 0; +} + // for debug -------------- void syncNodePrint(SSyncNode* pObj) { char* serialized = syncNode2Str(pObj); @@ -1327,7 +1586,8 @@ static int32_t syncNodeAppendNoop(SSyncNode* ths) { assert(pEntry != NULL); if (ths->state == TAOS_SYNC_STATE_LEADER) { - ths->pLogStore->appendEntry(ths->pLogStore, pEntry); + // ths->pLogStore->appendEntry(ths->pLogStore, pEntry); + ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); syncNodeReplicate(ths); } @@ -1383,13 +1643,14 @@ int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg) { int32_t ret = 0; syncClientRequestLog2("==syncNodeOnClientRequestCb==", pMsg); - SyncIndex index = ths->pLogStore->getLastIndex(ths->pLogStore) + 1; + SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore); SyncTerm term = ths->pRaftStore->currentTerm; SSyncRaftEntry* pEntry = syncEntryBuild2((SyncClientRequest*)pMsg, term, index); assert(pEntry != NULL); if (ths->state == TAOS_SYNC_STATE_LEADER) { - ths->pLogStore->appendEntry(ths->pLogStore, pEntry); + // ths->pLogStore->appendEntry(ths->pLogStore, pEntry); + ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); // start replicate right now! syncNodeReplicate(ths); @@ -1459,3 +1720,137 @@ const char* syncStr(ESyncState state) { return "error"; } } + +int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { + int32_t code = 0; + ESyncState state = flag; + sInfo("sync event commit from index:%" PRId64 " to index:%" PRId64 ", %s", beginIndex, endIndex, + syncUtilState2String(state)); + + // maybe execute by leader, skip snapshot + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (ths->pFsm->FpGetSnapshot != NULL) { + ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot); + } + if (beginIndex <= snapshot.lastApplyIndex) { + beginIndex = snapshot.lastApplyIndex + 1; + } + + // execute fsm + if (ths->pFsm != NULL) { + for (SyncIndex i = beginIndex; i <= endIndex; ++i) { + if (i != SYNC_INDEX_INVALID) { + SSyncRaftEntry* pEntry; + code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry); + ASSERT(code == 0); + ASSERT(pEntry != NULL); + + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pEntry, &rpcMsg); + + if (ths->pFsm->FpCommitCb != NULL && syncUtilUserCommit(pEntry->originalRpcType)) { + SFsmCbMeta cbMeta; + cbMeta.index = pEntry->index; + cbMeta.isWeak = pEntry->isWeak; + cbMeta.code = 0; + cbMeta.state = ths->state; + cbMeta.seqNum = pEntry->seqNum; + cbMeta.term = pEntry->term; + cbMeta.currentTerm = ths->pRaftStore->currentTerm; + cbMeta.flag = flag; + + ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, cbMeta); + } + + // config change + if (pEntry->originalRpcType == TDMT_SYNC_CONFIG_CHANGE) { + SSyncCfg oldSyncCfg = ths->pRaftCfg->cfg; + + SSyncCfg newSyncCfg; + int32_t ret = syncCfgFromStr(rpcMsg.pCont, &newSyncCfg); + ASSERT(ret == 0); + + // update new config myIndex + bool hit = false; + for (int i = 0; i < newSyncCfg.replicaNum; ++i) { + if (strcmp(ths->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && + ths->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { + newSyncCfg.myIndex = i; + hit = true; + break; + } + } + + SReConfigCbMeta cbMeta = {0}; + bool isDrop; + + // I am in newConfig + if (hit) { + syncNodeUpdateConfig(ths, &newSyncCfg, &isDrop); + + // change isStandBy to normal + if (!isDrop) { + if (ths->state == TAOS_SYNC_STATE_LEADER) { + syncNodeBecomeLeader(ths); + } else { + syncNodeBecomeFollower(ths); + } + } + + char* sOld = syncCfg2Str(&oldSyncCfg); + char* sNew = syncCfg2Str(&newSyncCfg); + sInfo("==config change== 0x11 old:%s new:%s isDrop:%d \n", sOld, sNew, isDrop); + taosMemoryFree(sOld); + taosMemoryFree(sNew); + } + + // always call FpReConfigCb + if (ths->pFsm->FpReConfigCb != NULL) { + cbMeta.code = 0; + cbMeta.currentTerm = ths->pRaftStore->currentTerm; + cbMeta.index = pEntry->index; + cbMeta.term = pEntry->term; + cbMeta.oldCfg = oldSyncCfg; + cbMeta.flag = 0x11; + cbMeta.isDrop = isDrop; + ths->pFsm->FpReConfigCb(ths->pFsm, newSyncCfg, cbMeta); + } + } + + // restore finish + if (pEntry->index == ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { + if (ths->restoreFinish == false) { + if (ths->pFsm->FpRestoreFinishCb != NULL) { + ths->pFsm->FpRestoreFinishCb(ths->pFsm); + } + ths->restoreFinish = true; + sInfo("restore finish %p vgId:%d", ths, ths->vgId); + } + } + + rpcFreeCont(rpcMsg.pCont); + syncEntryDestory(pEntry); + } + } + } + return 0; +} + +bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) { + for (int i = 0; i < ths->replicaNum; ++i) { + if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) { + return true; + } + } + return false; +} + +SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId) { + SSyncSnapshotSender* pSender = NULL; + for (int i = 0; i < ths->replicaNum; ++i) { + if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) { + pSender = (ths->senders)[i]; + } + } + return pSender; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 6871e6b3ed..af04a0f649 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -65,6 +65,16 @@ cJSON* syncRpcMsg2Json(SRpcMsg* pRpcMsg) { pRoot = syncAppendEntriesReply2Json(pSyncMsg); syncAppendEntriesReplyDestroy(pSyncMsg); + } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) { + SyncSnapshotSend* pSyncMsg = syncSnapshotSendDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + pRoot = syncSnapshotSend2Json(pSyncMsg); + syncSnapshotSendDestroy(pSyncMsg); + + } else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) { + SyncSnapshotRsp* pSyncMsg = syncSnapshotRspDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + pRoot = syncSnapshotRsp2Json(pSyncMsg); + syncSnapshotRspDestroy(pSyncMsg); + } else if (pRpcMsg->msgType == TDMT_SYNC_COMMON_RESPONSE) { pRoot = cJSON_CreateObject(); char* s; @@ -135,9 +145,11 @@ void syncRpcMsgLog(SRpcMsg* pMsg) { } void syncRpcMsgLog2(char* s, SRpcMsg* pMsg) { - char* serialized = syncRpcMsg2Str(pMsg); - sTrace("syncRpcMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncRpcMsg2Str(pMsg); + sTrace("syncRpcMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncTimeout---- @@ -264,9 +276,11 @@ void syncTimeoutLog(const SyncTimeout* pMsg) { } void syncTimeoutLog2(char* s, const SyncTimeout* pMsg) { - char* serialized = syncTimeout2Str(pMsg); - sTrace("syncTimeoutLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncTimeout2Str(pMsg); + sTrace("syncTimeoutLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncPing---- @@ -524,9 +538,11 @@ void syncPingLog(const SyncPing* pMsg) { } void syncPingLog2(char* s, const SyncPing* pMsg) { - char* serialized = syncPing2Str(pMsg); - sTrace("syncPingLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncPing2Str(pMsg); + sTrace("syncPingLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncPingReply---- @@ -784,9 +800,11 @@ void syncPingReplyLog(const SyncPingReply* pMsg) { } void syncPingReplyLog2(char* s, const SyncPingReply* pMsg) { - char* serialized = syncPingReply2Str(pMsg); - sTrace("syncPingReplyLog2 | len:%zu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncPingReply2Str(pMsg); + sTrace("syncPingReplyLog2 | len:%zu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncClientRequest---- @@ -925,9 +943,11 @@ void syncClientRequestLog(const SyncClientRequest* pMsg) { } void syncClientRequestLog2(char* s, const SyncClientRequest* pMsg) { - char* serialized = syncClientRequest2Str(pMsg); - sTrace("syncClientRequestLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncClientRequest2Str(pMsg); + sTrace("syncClientRequestLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncRequestVote---- @@ -1074,9 +1094,11 @@ void syncRequestVoteLog(const SyncRequestVote* pMsg) { } void syncRequestVoteLog2(char* s, const SyncRequestVote* pMsg) { - char* serialized = syncRequestVote2Str(pMsg); - sTrace("syncRequestVoteLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncRequestVote2Str(pMsg); + sTrace("syncRequestVoteLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncRequestVoteReply---- @@ -1220,9 +1242,11 @@ void syncRequestVoteReplyLog(const SyncRequestVoteReply* pMsg) { } void syncRequestVoteReplyLog2(char* s, const SyncRequestVoteReply* pMsg) { - char* serialized = syncRequestVoteReply2Str(pMsg); - sTrace("syncRequestVoteReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncRequestVoteReply2Str(pMsg); + sTrace("syncRequestVoteReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncAppendEntries---- @@ -1333,6 +1357,9 @@ cJSON* syncAppendEntries2Json(const SyncAppendEntries* pMsg) { snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->term); cJSON_AddStringToObject(pRoot, "term", u64buf); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + snprintf(u64buf, sizeof(u64buf), "%ld", pMsg->prevLogIndex); cJSON_AddStringToObject(pRoot, "prevLogIndex", u64buf); @@ -1386,9 +1413,11 @@ void syncAppendEntriesLog(const SyncAppendEntries* pMsg) { } void syncAppendEntriesLog2(char* s, const SyncAppendEntries* pMsg) { - char* serialized = syncAppendEntries2Str(pMsg); - sTrace("syncAppendEntriesLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncAppendEntries2Str(pMsg); + sTrace("syncAppendEntriesLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncAppendEntriesReply---- @@ -1494,6 +1523,9 @@ cJSON* syncAppendEntriesReply2Json(const SyncAppendEntriesReply* pMsg) { cJSON_AddNumberToObject(pDestId, "vgId", pMsg->destId.vgId); cJSON_AddItemToObject(pRoot, "destId", pDestId); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->term); cJSON_AddStringToObject(pRoot, "term", u64buf); cJSON_AddNumberToObject(pRoot, "success", pMsg->success); @@ -1535,9 +1567,11 @@ void syncAppendEntriesReplyLog(const SyncAppendEntriesReply* pMsg) { } void syncAppendEntriesReplyLog2(char* s, const SyncAppendEntriesReply* pMsg) { - char* serialized = syncAppendEntriesReply2Str(pMsg); - sTrace("syncAppendEntriesReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = syncAppendEntriesReply2Str(pMsg); + sTrace("syncAppendEntriesReplyLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } // ---- message process SyncApplyMsg---- @@ -1686,7 +1720,339 @@ void syncApplyMsgLog(const SyncApplyMsg* pMsg) { } void syncApplyMsgLog2(char* s, const SyncApplyMsg* pMsg) { - char* serialized = syncApplyMsg2Str(pMsg); - sTrace("syncApplyMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + if (gRaftDetailLog) { + char* serialized = syncApplyMsg2Str(pMsg); + sTrace("syncApplyMsgLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } +} + +// --------------------------------------------- +SyncSnapshotSend* syncSnapshotSendBuild(uint32_t dataLen, int32_t vgId) { + uint32_t bytes = sizeof(SyncSnapshotSend) + dataLen; + SyncSnapshotSend* pMsg = taosMemoryMalloc(bytes); + memset(pMsg, 0, bytes); + pMsg->bytes = bytes; + pMsg->vgId = vgId; + pMsg->msgType = TDMT_SYNC_SNAPSHOT_SEND; + pMsg->dataLen = dataLen; + return pMsg; +} + +void syncSnapshotSendDestroy(SyncSnapshotSend* pMsg) { + if (pMsg != NULL) { + taosMemoryFree(pMsg); + } +} + +void syncSnapshotSendSerialize(const SyncSnapshotSend* pMsg, char* buf, uint32_t bufLen) { + assert(pMsg->bytes <= bufLen); + memcpy(buf, pMsg, pMsg->bytes); +} + +void syncSnapshotSendDeserialize(const char* buf, uint32_t len, SyncSnapshotSend* pMsg) { + memcpy(pMsg, buf, len); + assert(len == pMsg->bytes); + assert(pMsg->bytes == sizeof(SyncSnapshotSend) + pMsg->dataLen); +} + +char* syncSnapshotSendSerialize2(const SyncSnapshotSend* pMsg, uint32_t* len) { + char* buf = taosMemoryMalloc(pMsg->bytes); + assert(buf != NULL); + syncSnapshotSendSerialize(pMsg, buf, pMsg->bytes); + if (len != NULL) { + *len = pMsg->bytes; + } + return buf; +} + +SyncSnapshotSend* syncSnapshotSendDeserialize2(const char* buf, uint32_t len) { + uint32_t bytes = *((uint32_t*)buf); + SyncSnapshotSend* pMsg = taosMemoryMalloc(bytes); + assert(pMsg != NULL); + syncSnapshotSendDeserialize(buf, len, pMsg); + assert(len == pMsg->bytes); + return pMsg; +} + +void syncSnapshotSend2RpcMsg(const SyncSnapshotSend* pMsg, SRpcMsg* pRpcMsg) { + memset(pRpcMsg, 0, sizeof(*pRpcMsg)); + pRpcMsg->msgType = pMsg->msgType; + pRpcMsg->contLen = pMsg->bytes; + pRpcMsg->pCont = rpcMallocCont(pRpcMsg->contLen); + syncSnapshotSendSerialize(pMsg, pRpcMsg->pCont, pRpcMsg->contLen); +} + +void syncSnapshotSendFromRpcMsg(const SRpcMsg* pRpcMsg, SyncSnapshotSend* pMsg) { + syncSnapshotSendDeserialize(pRpcMsg->pCont, pRpcMsg->contLen, pMsg); +} + +SyncSnapshotSend* syncSnapshotSendFromRpcMsg2(const SRpcMsg* pRpcMsg) { + SyncSnapshotSend* pMsg = syncSnapshotSendDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + assert(pMsg != NULL); + return pMsg; +} + +cJSON* syncSnapshotSend2Json(const SyncSnapshotSend* pMsg) { + char u64buf[128]; + cJSON* pRoot = cJSON_CreateObject(); + + if (pMsg != NULL) { + cJSON_AddNumberToObject(pRoot, "bytes", pMsg->bytes); + cJSON_AddNumberToObject(pRoot, "vgId", pMsg->vgId); + cJSON_AddNumberToObject(pRoot, "msgType", pMsg->msgType); + + cJSON* pSrcId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->srcId.addr); + cJSON_AddStringToObject(pSrcId, "addr", u64buf); + { + uint64_t u64 = pMsg->srcId.addr; + cJSON* pTmp = pSrcId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pSrcId, "vgId", pMsg->srcId.vgId); + cJSON_AddItemToObject(pRoot, "srcId", pSrcId); + + cJSON* pDestId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->destId.addr); + cJSON_AddStringToObject(pDestId, "addr", u64buf); + { + uint64_t u64 = pMsg->destId.addr; + cJSON* pTmp = pDestId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pDestId, "vgId", pMsg->destId.vgId); + cJSON_AddItemToObject(pRoot, "destId", pDestId); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->term); + cJSON_AddStringToObject(pRoot, "term", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%ld", pMsg->lastIndex); + cJSON_AddStringToObject(pRoot, "lastIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->lastTerm); + cJSON_AddStringToObject(pRoot, "lastTerm", u64buf); + + cJSON_AddNumberToObject(pRoot, "seq", pMsg->seq); + + cJSON_AddNumberToObject(pRoot, "dataLen", pMsg->dataLen); + char* s; + s = syncUtilprintBin((char*)(pMsg->data), pMsg->dataLen); + cJSON_AddStringToObject(pRoot, "data", s); + taosMemoryFree(s); + s = syncUtilprintBin2((char*)(pMsg->data), pMsg->dataLen); + cJSON_AddStringToObject(pRoot, "data2", s); + taosMemoryFree(s); + } + + cJSON* pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SyncSnapshotSend", pRoot); + return pJson; +} + +char* syncSnapshotSend2Str(const SyncSnapshotSend* pMsg) { + cJSON* pJson = syncSnapshotSend2Json(pMsg); + char* serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// for debug ---------------------- +void syncSnapshotSendPrint(const SyncSnapshotSend* pMsg) { + char* serialized = syncSnapshotSend2Str(pMsg); + printf("syncSnapshotSendPrint | len:%lu | %s \n", strlen(serialized), serialized); + fflush(NULL); taosMemoryFree(serialized); } + +void syncSnapshotSendPrint2(char* s, const SyncSnapshotSend* pMsg) { + char* serialized = syncSnapshotSend2Str(pMsg); + printf("syncSnapshotSendPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncSnapshotSendLog(const SyncSnapshotSend* pMsg) { + char* serialized = syncSnapshotSend2Str(pMsg); + sTrace("syncSnapshotSendLog | len:%lu | %s", strlen(serialized), serialized); + taosMemoryFree(serialized); +} + +void syncSnapshotSendLog2(char* s, const SyncSnapshotSend* pMsg) { + if (gRaftDetailLog) { + char* serialized = syncSnapshotSend2Str(pMsg); + sTrace("syncSnapshotSendLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } +} + +// --------------------------------------------- +SyncSnapshotRsp* syncSnapshotRspBuild(int32_t vgId) { + uint32_t bytes = sizeof(SyncSnapshotRsp); + SyncSnapshotRsp* pMsg = taosMemoryMalloc(bytes); + memset(pMsg, 0, bytes); + pMsg->bytes = bytes; + pMsg->vgId = vgId; + pMsg->msgType = TDMT_SYNC_SNAPSHOT_RSP; + return pMsg; +} + +void syncSnapshotRspDestroy(SyncSnapshotRsp* pMsg) { + if (pMsg != NULL) { + taosMemoryFree(pMsg); + } +} + +void syncSnapshotRspSerialize(const SyncSnapshotRsp* pMsg, char* buf, uint32_t bufLen) { + assert(pMsg->bytes <= bufLen); + memcpy(buf, pMsg, pMsg->bytes); +} + +void syncSnapshotRspDeserialize(const char* buf, uint32_t len, SyncSnapshotRsp* pMsg) { + memcpy(pMsg, buf, len); + assert(len == pMsg->bytes); +} + +char* syncSnapshotRspSerialize2(const SyncSnapshotRsp* pMsg, uint32_t* len) { + char* buf = taosMemoryMalloc(pMsg->bytes); + assert(buf != NULL); + syncSnapshotRspSerialize(pMsg, buf, pMsg->bytes); + if (len != NULL) { + *len = pMsg->bytes; + } + return buf; +} + +SyncSnapshotRsp* syncSnapshotRspDeserialize2(const char* buf, uint32_t len) { + uint32_t bytes = *((uint32_t*)buf); + SyncSnapshotRsp* pMsg = taosMemoryMalloc(bytes); + assert(pMsg != NULL); + syncSnapshotRspDeserialize(buf, len, pMsg); + assert(len == pMsg->bytes); + return pMsg; +} + +void syncSnapshotRsp2RpcMsg(const SyncSnapshotRsp* pMsg, SRpcMsg* pRpcMsg) { + memset(pRpcMsg, 0, sizeof(*pRpcMsg)); + pRpcMsg->msgType = pMsg->msgType; + pRpcMsg->contLen = pMsg->bytes; + pRpcMsg->pCont = rpcMallocCont(pRpcMsg->contLen); + syncSnapshotRspSerialize(pMsg, pRpcMsg->pCont, pRpcMsg->contLen); +} + +void syncSnapshotRspFromRpcMsg(const SRpcMsg* pRpcMsg, SyncSnapshotRsp* pMsg) { + syncSnapshotRspDeserialize(pRpcMsg->pCont, pRpcMsg->contLen, pMsg); +} + +SyncSnapshotRsp* syncSnapshotRspFromRpcMsg2(const SRpcMsg* pRpcMsg) { + SyncSnapshotRsp* pMsg = syncSnapshotRspDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + assert(pMsg != NULL); + return pMsg; +} + +cJSON* syncSnapshotRsp2Json(const SyncSnapshotRsp* pMsg) { + char u64buf[128]; + cJSON* pRoot = cJSON_CreateObject(); + + if (pMsg != NULL) { + cJSON_AddNumberToObject(pRoot, "bytes", pMsg->bytes); + cJSON_AddNumberToObject(pRoot, "vgId", pMsg->vgId); + cJSON_AddNumberToObject(pRoot, "msgType", pMsg->msgType); + + cJSON* pSrcId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->srcId.addr); + cJSON_AddStringToObject(pSrcId, "addr", u64buf); + { + uint64_t u64 = pMsg->srcId.addr; + cJSON* pTmp = pSrcId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pSrcId, "vgId", pMsg->srcId.vgId); + cJSON_AddItemToObject(pRoot, "srcId", pSrcId); + + cJSON* pDestId = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->destId.addr); + cJSON_AddStringToObject(pDestId, "addr", u64buf); + { + uint64_t u64 = pMsg->destId.addr; + cJSON* pTmp = pDestId; + char host[128]; + uint16_t port; + syncUtilU642Addr(u64, host, sizeof(host), &port); + cJSON_AddStringToObject(pTmp, "addr_host", host); + cJSON_AddNumberToObject(pTmp, "addr_port", port); + } + cJSON_AddNumberToObject(pDestId, "vgId", pMsg->destId.vgId); + cJSON_AddItemToObject(pRoot, "destId", pDestId); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->term); + cJSON_AddStringToObject(pRoot, "term", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%ld", pMsg->lastIndex); + cJSON_AddStringToObject(pRoot, "lastIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->lastTerm); + cJSON_AddStringToObject(pRoot, "lastTerm", u64buf); + + cJSON_AddNumberToObject(pRoot, "ack", pMsg->ack); + cJSON_AddNumberToObject(pRoot, "code", pMsg->code); + } + + cJSON* pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SyncSnapshotRsp", pRoot); + return pJson; +} + +char* syncSnapshotRsp2Str(const SyncSnapshotRsp* pMsg) { + cJSON* pJson = syncSnapshotRsp2Json(pMsg); + char* serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// for debug ---------------------- +void syncSnapshotRspPrint(const SyncSnapshotRsp* pMsg) { + char* serialized = syncSnapshotRsp2Str(pMsg); + printf("syncSnapshotRspPrint | len:%lu | %s \n", strlen(serialized), serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncSnapshotRspPrint2(char* s, const SyncSnapshotRsp* pMsg) { + char* serialized = syncSnapshotRsp2Str(pMsg); + printf("syncSnapshotRspPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncSnapshotRspLog(const SyncSnapshotRsp* pMsg) { + char* serialized = syncSnapshotRsp2Str(pMsg); + sTrace("syncSnapshotRspLog | len:%lu | %s", strlen(serialized), serialized); + taosMemoryFree(serialized); +} + +void syncSnapshotRspLog2(char* s, const SyncSnapshotRsp* pMsg) { + if (gRaftDetailLog) { + char* serialized = syncSnapshotRsp2Str(pMsg); + sTrace("syncSnapshotRspLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } +} \ No newline at end of file diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index 3e1931e2c3..95eec5d98f 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -148,6 +148,7 @@ cJSON *raftCfg2Json(SRaftCfg *pRaftCfg) { cJSON *pRoot = cJSON_CreateObject(); cJSON_AddItemToObject(pRoot, "SSyncCfg", syncCfg2Json(&(pRaftCfg->cfg))); cJSON_AddNumberToObject(pRoot, "isStandBy", pRaftCfg->isStandBy); + cJSON_AddNumberToObject(pRoot, "snapshotEnable", pRaftCfg->snapshotEnable); cJSON *pJson = cJSON_CreateObject(); cJSON_AddItemToObject(pJson, "RaftCfg", pRoot); @@ -161,7 +162,7 @@ char *raftCfg2Str(SRaftCfg *pRaftCfg) { return serialized; } -int32_t raftCfgCreateFile(SSyncCfg *pCfg, int8_t isStandBy, const char *path) { +int32_t raftCfgCreateFile(SSyncCfg *pCfg, SRaftCfgMeta meta, const char *path) { assert(pCfg != NULL); TdFilePtr pFile = taosOpenFile(path, TD_FILE_CREATE | TD_FILE_WRITE); @@ -169,7 +170,8 @@ int32_t raftCfgCreateFile(SSyncCfg *pCfg, int8_t isStandBy, const char *path) { SRaftCfg raftCfg; raftCfg.cfg = *pCfg; - raftCfg.isStandBy = isStandBy; + raftCfg.isStandBy = meta.isStandBy; + raftCfg.snapshotEnable = meta.snapshotEnable; char *s = raftCfg2Str(&raftCfg); char buf[CONFIG_FILE_LEN] = {0}; @@ -194,6 +196,9 @@ int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg) { cJSON *pJsonIsStandBy = cJSON_GetObjectItem(pJson, "isStandBy"); pRaftCfg->isStandBy = cJSON_GetNumberValue(pJsonIsStandBy); + cJSON *pJsonSnapshotEnable = cJSON_GetObjectItem(pJson, "snapshotEnable"); + pRaftCfg->snapshotEnable = cJSON_GetNumberValue(pJsonSnapshotEnable); + cJSON * pJsonSyncCfg = cJSON_GetObjectItem(pJson, "SSyncCfg"); int32_t code = syncCfgFromJson(pJsonSyncCfg, &(pRaftCfg->cfg)); ASSERT(code == 0); diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index b353ed85db..49509ae979 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -16,6 +16,23 @@ #include "syncRaftLog.h" #include "wal.h" +// refactor, log[0 .. n] ==> log[m .. n] +static int32_t raftLogSetBeginIndex(struct SSyncLogStore* pLogStore, SyncIndex beginIndex); +static SyncIndex raftLogBeginIndex(struct SSyncLogStore* pLogStore); +static SyncIndex raftLogEndIndex(struct SSyncLogStore* pLogStore); +static SyncIndex raftLogWriteIndex(struct SSyncLogStore* pLogStore); +static bool raftLogIsEmpty(struct SSyncLogStore* pLogStore); +static int32_t raftLogEntryCount(struct SSyncLogStore* pLogStore); +static bool raftLogInRange(struct SSyncLogStore* pLogStore, SyncIndex index); +static SyncIndex raftLogLastIndex(struct SSyncLogStore* pLogStore); +static SyncTerm raftLogLastTerm(struct SSyncLogStore* pLogStore); +static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); +static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry); +static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); + +static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** ppLastEntry); + +//------------------------------- static SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore); static SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore); static SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore); @@ -25,6 +42,202 @@ static int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex from static int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index); static SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore); +// refactor, log[0 .. n] ==> log[m .. n] +static int32_t raftLogSetBeginIndex(struct SSyncLogStore* pLogStore, SyncIndex beginIndex) { + sTrace("raftLogSetBeginIndex beginIndex:%ld", beginIndex); + + // if beginIndex == 0, donot need call this funciton + ASSERT(beginIndex > 0); + + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + pData->beginIndex = beginIndex; + walRestoreFromSnapshot(pWal, beginIndex - 1); + return 0; +} + +int32_t raftLogResetBeginIndex(struct SSyncLogStore* pLogStore) { return 0; } + +static SyncIndex raftLogBeginIndex(struct SSyncLogStore* pLogStore) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + return pData->beginIndex; +} + +static SyncIndex raftLogEndIndex(struct SSyncLogStore* pLogStore) { return raftLogLastIndex(pLogStore); } + +static bool raftLogIsEmpty(struct SSyncLogStore* pLogStore) { + SyncIndex beginIndex = raftLogBeginIndex(pLogStore); + SyncIndex endIndex = raftLogEndIndex(pLogStore); + return (endIndex < beginIndex); +} + +static int32_t raftLogEntryCount(struct SSyncLogStore* pLogStore) { + SyncIndex beginIndex = raftLogBeginIndex(pLogStore); + SyncIndex endIndex = raftLogEndIndex(pLogStore); + int32_t count = endIndex - beginIndex + 1; + return count > 0 ? count : 0; +} + +static bool raftLogInRange(struct SSyncLogStore* pLogStore, SyncIndex index) { + SyncIndex beginIndex = raftLogBeginIndex(pLogStore); + SyncIndex endIndex = raftLogEndIndex(pLogStore); + if (index >= beginIndex && index <= endIndex) { + return true; + } else { + return false; + } +} + +static SyncIndex raftLogLastIndex(struct SSyncLogStore* pLogStore) { + SyncIndex lastIndex; + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + SyncIndex lastVer = walGetLastVer(pWal); + SyncIndex firstVer = walGetFirstVer(pWal); + + if (lastVer < firstVer) { + // no record + lastIndex = -1; + + } else { + if (firstVer >= 0) { + lastIndex = lastVer; + } else if (firstVer == -1) { + lastIndex = -1; + } else { + ASSERT(0); + } + } + + return lastIndex; +} + +static SyncIndex raftLogWriteIndex(struct SSyncLogStore* pLogStore) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + SyncIndex lastVer = walGetLastVer(pWal); + return lastVer + 1; +} + +static SyncTerm raftLogLastTerm(struct SSyncLogStore* pLogStore) { + SyncTerm lastTerm = 0; + if (raftLogEntryCount(pLogStore) == 0) { + lastTerm = 0; + } else { + SSyncRaftEntry* pLastEntry; + int32_t code = raftLogGetLastEntry(pLogStore, &pLastEntry); + ASSERT(code == 0); + if (pLastEntry != NULL) { + lastTerm = pLastEntry->term; + taosMemoryFree(pLastEntry); + } + } + return lastTerm; +} + +static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + + SyncIndex writeIndex = raftLogWriteIndex(pLogStore); + ASSERT(pEntry->index == writeIndex); + + int code = 0; + SSyncLogMeta syncMeta; + syncMeta.isWeek = pEntry->isWeak; + syncMeta.seqNum = pEntry->seqNum; + syncMeta.term = pEntry->term; + code = walWriteWithSyncInfo(pWal, pEntry->index, pEntry->originalRpcType, syncMeta, pEntry->data, pEntry->dataLen); + if (code != 0) { + int32_t err = terrno; + const char* errStr = tstrerror(err); + int32_t linuxErr = errno; + const char* linuxErrMsg = strerror(errno); + sError("raftLogAppendEntry error, err:%d %X, msg:%s, linuxErr:%d, linuxErrMsg:%s", err, err, errStr, linuxErr, + linuxErrMsg); + ASSERT(0); + } + + walFsync(pWal, true); + + sTrace("sync event write index:%" PRId64, pEntry->index); + + return code; +} + +static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + int32_t code; + + *ppEntry = NULL; + if (raftLogInRange(pLogStore, index)) { + SWalReadHandle* pWalHandle = walOpenReadHandle(pWal); + ASSERT(pWalHandle != NULL); + + code = walReadWithHandle(pWalHandle, index); + if (code != 0) { + int32_t err = terrno; + const char* errStr = tstrerror(err); + int32_t linuxErr = errno; + const char* linuxErrMsg = strerror(errno); + sError("raftLogGetEntry error, err:%d %X, msg:%s, linuxErr:%d, linuxErrMsg:%s", err, err, errStr, linuxErr, + linuxErrMsg); + ASSERT(0); + walCloseReadHandle(pWalHandle); + return code; + } + + *ppEntry = syncEntryBuild(pWalHandle->pHead->head.bodyLen); + ASSERT(*ppEntry != NULL); + (*ppEntry)->msgType = TDMT_SYNC_CLIENT_REQUEST; + (*ppEntry)->originalRpcType = pWalHandle->pHead->head.msgType; + (*ppEntry)->seqNum = pWalHandle->pHead->head.syncMeta.seqNum; + (*ppEntry)->isWeak = pWalHandle->pHead->head.syncMeta.isWeek; + (*ppEntry)->term = pWalHandle->pHead->head.syncMeta.term; + (*ppEntry)->index = index; + ASSERT((*ppEntry)->dataLen == pWalHandle->pHead->head.bodyLen); + memcpy((*ppEntry)->data, pWalHandle->pHead->head.body, pWalHandle->pHead->head.bodyLen); + + // need to hold, do not new every time!! + walCloseReadHandle(pWalHandle); + + } else { + // index not in range + code = 0; + } + + return code; +} + +static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + int32_t code = walRollback(pWal, fromIndex); + if (code != 0) { + int32_t err = terrno; + const char* errStr = tstrerror(err); + int32_t linuxErr = errno; + const char* linuxErrMsg = strerror(errno); + sError("raftLogTruncate error, err:%d %X, msg:%s, linuxErr:%d, linuxErrMsg:%s", err, err, errStr, linuxErr, + linuxErrMsg); + ASSERT(0); + } + return code; +} + +static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** ppLastEntry) { + *ppLastEntry = NULL; + if (raftLogEntryCount(pLogStore) == 0) { + return 0; + } + SyncIndex lastIndex = raftLogLastIndex(pLogStore); + int32_t code = raftLogGetEntry(pLogStore, lastIndex, ppLastEntry); + return code; +} + +//------------------------------- SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { SSyncLogStore* pLogStore = taosMemoryMalloc(sizeof(SSyncLogStore)); assert(pLogStore != NULL); @@ -36,6 +249,16 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { pData->pSyncNode = pSyncNode; pData->pWal = pSyncNode->pWal; + SyncIndex firstVer = walGetFirstVer(pData->pWal); + SyncIndex lastVer = walGetLastVer(pData->pWal); + if (firstVer >= 0) { + pData->beginIndex = firstVer; + } else if (firstVer == -1) { + pData->beginIndex = lastVer + 1; + } else { + ASSERT(0); + } + pLogStore->appendEntry = logStoreAppendEntry; pLogStore->getEntry = logStoreGetEntry; pLogStore->truncate = logStoreTruncate; @@ -43,6 +266,20 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { pLogStore->getLastTerm = logStoreLastTerm; pLogStore->updateCommitIndex = logStoreUpdateCommitIndex; pLogStore->getCommitIndex = logStoreGetCommitIndex; + + pLogStore->syncLogSetBeginIndex = raftLogSetBeginIndex; + pLogStore->syncLogBeginIndex = raftLogBeginIndex; + pLogStore->syncLogEndIndex = raftLogEndIndex; + pLogStore->syncLogIsEmpty = raftLogIsEmpty; + pLogStore->syncLogEntryCount = raftLogEntryCount; + pLogStore->syncLogInRange = raftLogInRange; + pLogStore->syncLogLastIndex = raftLogLastIndex; + pLogStore->syncLogLastTerm = raftLogLastTerm; + pLogStore->syncLogAppendEntry = raftLogAppendEntry; + pLogStore->syncLogGetEntry = raftLogGetEntry; + pLogStore->syncLogTruncate = raftLogTruncate; + pLogStore->syncLogWriteIndex = raftLogWriteIndex; + return pLogStore; } @@ -53,6 +290,7 @@ void logStoreDestory(SSyncLogStore* pLogStore) { } } +//------------------------------- int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; @@ -78,6 +316,8 @@ int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { // assert(code == 0); walFsync(pWal, true); + + sTrace("sync event old write wal: %ld", pEntry->index); return code; } @@ -136,7 +376,7 @@ int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex) { linuxErrMsg); ASSERT(0); } - return 0; // to avoid compiler error + return 0; } SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore) { @@ -169,7 +409,7 @@ int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) { sError("walCommit error, err:%d %X, msg:%s, linuxErr:%d, linuxErrMsg:%s", err, err, errStr, linuxErr, linuxErrMsg); ASSERT(0); } - return 0; // to avoid compiler error + return 0; } SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore) { @@ -199,15 +439,32 @@ cJSON* logStore2Json(SSyncLogStore* pLogStore) { cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); snprintf(u64buf, sizeof(u64buf), "%p", pData->pWal); cJSON_AddStringToObject(pRoot, "pWal", u64buf); - snprintf(u64buf, sizeof(u64buf), "%ld", logStoreLastIndex(pLogStore)); + + snprintf(u64buf, sizeof(u64buf), "%ld", pData->beginIndex); + cJSON_AddStringToObject(pRoot, "beginIndex", u64buf); + + SyncIndex endIndex = raftLogEndIndex(pLogStore); + snprintf(u64buf, sizeof(u64buf), "%ld", endIndex); + cJSON_AddStringToObject(pRoot, "endIndex", u64buf); + + int32_t count = raftLogEntryCount(pLogStore); + cJSON_AddNumberToObject(pRoot, "entryCount", count); + + snprintf(u64buf, sizeof(u64buf), "%ld", raftLogWriteIndex(pLogStore)); + cJSON_AddStringToObject(pRoot, "WriteIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%d", raftLogIsEmpty(pLogStore)); + cJSON_AddStringToObject(pRoot, "IsEmpty", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%ld", raftLogLastIndex(pLogStore)); cJSON_AddStringToObject(pRoot, "LastIndex", u64buf); - snprintf(u64buf, sizeof(u64buf), "%lu", logStoreLastTerm(pLogStore)); + snprintf(u64buf, sizeof(u64buf), "%lu", raftLogLastTerm(pLogStore)); cJSON_AddStringToObject(pRoot, "LastTerm", u64buf); cJSON* pEntries = cJSON_CreateArray(); cJSON_AddItemToObject(pRoot, "pEntries", pEntries); - SyncIndex lastIndex = logStoreLastIndex(pLogStore); - for (SyncIndex i = 0; i <= lastIndex; ++i) { + + for (SyncIndex i = pData->beginIndex; i <= endIndex; ++i) { SSyncRaftEntry* pEntry = logStoreGetEntry(pLogStore, i); cJSON_AddItemToArray(pEntries, syncEntry2Json(pEntry)); syncEntryDestory(pEntry); @@ -236,9 +493,26 @@ cJSON* logStoreSimple2Json(SSyncLogStore* pLogStore) { cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); snprintf(u64buf, sizeof(u64buf), "%p", pData->pWal); cJSON_AddStringToObject(pRoot, "pWal", u64buf); - snprintf(u64buf, sizeof(u64buf), "%ld", logStoreLastIndex(pLogStore)); + + snprintf(u64buf, sizeof(u64buf), "%ld", pData->beginIndex); + cJSON_AddStringToObject(pRoot, "beginIndex", u64buf); + + SyncIndex endIndex = raftLogEndIndex(pLogStore); + snprintf(u64buf, sizeof(u64buf), "%ld", endIndex); + cJSON_AddStringToObject(pRoot, "endIndex", u64buf); + + int32_t count = raftLogEntryCount(pLogStore); + cJSON_AddNumberToObject(pRoot, "entryCount", count); + + snprintf(u64buf, sizeof(u64buf), "%ld", raftLogWriteIndex(pLogStore)); + cJSON_AddStringToObject(pRoot, "WriteIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%d", raftLogIsEmpty(pLogStore)); + cJSON_AddStringToObject(pRoot, "IsEmpty", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%ld", raftLogLastIndex(pLogStore)); cJSON_AddStringToObject(pRoot, "LastIndex", u64buf); - snprintf(u64buf, sizeof(u64buf), "%lu", logStoreLastTerm(pLogStore)); + snprintf(u64buf, sizeof(u64buf), "%lu", raftLogLastTerm(pLogStore)); cJSON_AddStringToObject(pRoot, "LastTerm", u64buf); } @@ -254,6 +528,12 @@ char* logStoreSimple2Str(SSyncLogStore* pLogStore) { return serialized; } +SyncIndex logStoreFirstIndex(SSyncLogStore* pLogStore) { + SSyncLogStoreData* pData = pLogStore->data; + SWal* pWal = pData->pWal; + return walGetFirstVer(pWal); +} + // for debug ----------------- void logStorePrint(SSyncLogStore* pLogStore) { char* serialized = logStore2Str(pLogStore); @@ -303,7 +583,9 @@ void logStoreSimpleLog(SSyncLogStore* pLogStore) { } void logStoreSimpleLog2(char* s, SSyncLogStore* pLogStore) { - char* serialized = logStoreSimple2Str(pLogStore); - sTrace("logStoreSimpleLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); - taosMemoryFree(serialized); + if (gRaftDetailLog) { + char* serialized = logStoreSimple2Str(pLogStore); + sTrace("logStoreSimpleLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index d17e64d936..ff39b0b13d 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -16,9 +16,11 @@ #include "syncReplication.h" #include "syncIndexMgr.h" #include "syncMessage.h" +#include "syncRaftCfg.h" #include "syncRaftEntry.h" #include "syncRaftLog.h" #include "syncRaftStore.h" +#include "syncSnapshot.h" #include "syncUtil.h" // TLA+ Spec @@ -59,6 +61,7 @@ int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode) { // set prevLogIndex SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); + SyncIndex preLogIndex = nextIndex - 1; // set preLogTerm @@ -113,9 +116,87 @@ int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode) { return ret; } +int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode) { + ASSERT(pSyncNode->state == TAOS_SYNC_STATE_LEADER); + + syncIndexMgrLog2("begin append entries peers pNextIndex:", pSyncNode->pNextIndex); + syncIndexMgrLog2("begin append entries peers pMatchIndex:", pSyncNode->pMatchIndex); + logStoreSimpleLog2("begin append entries peers LogStore:", pSyncNode->pLogStore); + { + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + sTrace("begin append entries peers, snapshot.lastApplyIndex:%ld, snapshot.lastApplyTerm:%lu", + snapshot.lastApplyIndex, snapshot.lastApplyTerm); + } + + int32_t ret = 0; + for (int i = 0; i < pSyncNode->peersNum; ++i) { + SRaftId* pDestId = &(pSyncNode->peersId[i]); + + // next index + SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); + + // pre index, pre term + SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex); + SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex); + + // batch optimized + // SyncIndex lastIndex = syncUtilMinIndex(pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore), nextIndex); + + // prepare entry + SyncAppendEntries* pMsg = NULL; + + SSyncRaftEntry* pEntry; + int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry); + ASSERT(code == 0); + + if (pEntry != NULL) { + pMsg = syncAppendEntriesBuild(pEntry->bytes, pSyncNode->vgId); + ASSERT(pMsg != NULL); + + // add pEntry into msg + uint32_t len; + char* serialized = syncEntrySerialize(pEntry, &len); + assert(len == pEntry->bytes); + memcpy(pMsg->data, serialized, len); + + taosMemoryFree(serialized); + syncEntryDestory(pEntry); + + } else { + // no entry in log + pMsg = syncAppendEntriesBuild(0, pSyncNode->vgId); + ASSERT(pMsg != NULL); + } + + // prepare msg + ASSERT(pMsg != NULL); + pMsg->srcId = pSyncNode->myRaftId; + pMsg->destId = *pDestId; + pMsg->term = pSyncNode->pRaftStore->currentTerm; + pMsg->prevLogIndex = preLogIndex; + pMsg->prevLogTerm = preLogTerm; + pMsg->commitIndex = pSyncNode->commitIndex; + pMsg->privateTerm = 0; + // pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId); + + // send msg + syncNodeAppendEntries(pSyncNode, pDestId, pMsg); + syncAppendEntriesDestroy(pMsg); + } + + return ret; +} + int32_t syncNodeReplicate(SSyncNode* pSyncNode) { // start replicate - int32_t ret = syncNodeAppendEntriesPeers(pSyncNode); + int32_t ret = 0; + + if (pSyncNode->pRaftCfg->snapshotEnable) { + ret = syncNodeAppendEntriesPeersSnapshot(pSyncNode); + } else { + ret = syncNodeAppendEntriesPeers(pSyncNode); + } return ret; } diff --git a/source/libs/sync/src/syncRequestVote.c b/source/libs/sync/src/syncRequestVote.c index 2656771292..9ed7f00982 100644 --- a/source/libs/sync/src/syncRequestVote.c +++ b/source/libs/sync/src/syncRequestVote.c @@ -15,6 +15,7 @@ #include "syncRequestVote.h" #include "syncInt.h" +#include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncUtil.h" #include "syncVoteMgr.h" @@ -62,6 +63,9 @@ int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg) { // maybe has already voted for pMsg->srcId // vote again, no harm raftStoreVote(ths->pRaftStore, &(pMsg->srcId)); + + // forbid elect for this round + syncNodeResetElectTimer(ths); } SyncRequestVoteReply* pReply = syncRequestVoteReplyBuild(ths->vgId); @@ -77,3 +81,64 @@ int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg) { return ret; } + +static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pMsg) { + SyncTerm myLastTerm = syncNodeGetLastTerm(pSyncNode); + SyncIndex myLastIndex = syncNodeGetLastIndex(pSyncNode); + + if (pMsg->lastLogTerm > myLastTerm) { + return true; + } + if (pMsg->lastLogTerm == myLastTerm && pMsg->lastLogIndex >= myLastIndex) { + return true; + } + + return false; +} + +int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) { + int32_t ret = 0; + + // print log + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncRequestVote, currentTerm:%lu", ths->pRaftStore->currentTerm); + syncRequestVoteLog2(logBuf, pMsg); + + // if already drop replica, do not process + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { + sInfo("recv SyncRequestVote maybe replica already dropped"); + return ret; + } + + // maybe update term + if (pMsg->term > ths->pRaftStore->currentTerm) { + syncNodeUpdateTerm(ths, pMsg->term); + } + ASSERT(pMsg->term <= ths->pRaftStore->currentTerm); + + bool logOK = syncNodeOnRequestVoteLogOK(ths, pMsg); + bool grant = (pMsg->term == ths->pRaftStore->currentTerm) && logOK && + ((!raftStoreHasVoted(ths->pRaftStore)) || (syncUtilSameId(&(ths->pRaftStore->voteFor), &(pMsg->srcId)))); + if (grant) { + // maybe has already voted for pMsg->srcId + // vote again, no harm + raftStoreVote(ths->pRaftStore, &(pMsg->srcId)); + + // forbid elect for this round + syncNodeResetElectTimer(ths); + } + + // send msg + SyncRequestVoteReply* pReply = syncRequestVoteReplyBuild(ths->vgId); + pReply->srcId = ths->myRaftId; + pReply->destId = pMsg->srcId; + pReply->term = ths->pRaftStore->currentTerm; + pReply->voteGranted = grant; + + SRpcMsg rpcMsg; + syncRequestVoteReply2RpcMsg(pReply, &rpcMsg); + syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg); + syncRequestVoteReplyDestroy(pReply); + + return ret; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncRequestVoteReply.c b/source/libs/sync/src/syncRequestVoteReply.c index 75236aee2b..5d041cefcd 100644 --- a/source/libs/sync/src/syncRequestVoteReply.c +++ b/source/libs/sync/src/syncRequestVoteReply.c @@ -15,6 +15,7 @@ #include "syncRequestVoteReply.h" #include "syncInt.h" +#include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncUtil.h" #include "syncVoteMgr.h" @@ -92,3 +93,68 @@ int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) return ret; } + +int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) { + int32_t ret = 0; + + // print log + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncRequestVoteReply, term:%lu", ths->pRaftStore->currentTerm); + syncRequestVoteReplyLog2(logBuf, pMsg); + + // if already drop replica, do not process + if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { + sInfo("recv SyncRequestVoteReply, maybe replica already dropped"); + return ret; + } + + // drop stale response + if (pMsg->term < ths->pRaftStore->currentTerm) { + sTrace("recv SyncRequestVoteReply, drop stale response, receive_term:%lu current_term:%lu", pMsg->term, + ths->pRaftStore->currentTerm); + return ret; + } + + // assert(!(pMsg->term > ths->pRaftStore->currentTerm)); + // no need this code, because if I receive reply.term, then I must have sent for that term. + // if (pMsg->term > ths->pRaftStore->currentTerm) { + // syncNodeUpdateTerm(ths, pMsg->term); + // } + + if (pMsg->term > ths->pRaftStore->currentTerm) { + char logBuf[128] = {0}; + snprintf(logBuf, sizeof(logBuf), "recv SyncRequestVoteReply, error term, receive_term:%lu current_term:%lu", + pMsg->term, ths->pRaftStore->currentTerm); + syncNodePrint2(logBuf, ths); + sError("%s", logBuf); + return ret; + } + + ASSERT(pMsg->term == ths->pRaftStore->currentTerm); + + // This tallies votes even when the current state is not Candidate, + // but they won't be looked at, so it doesn't matter. + if (ths->state == TAOS_SYNC_STATE_CANDIDATE) { + votesRespondAdd(ths->pVotesRespond, pMsg); + if (pMsg->voteGranted) { + // add vote + voteGrantedVote(ths->pVotesGranted, pMsg); + + // maybe to leader + if (voteGrantedMajority(ths->pVotesGranted)) { + if (!ths->pVotesGranted->toLeader) { + syncNodeCandidate2Leader(ths); + + // prevent to leader again! + ths->pVotesGranted->toLeader = true; + } + } + } else { + ; + // do nothing + // UNCHANGED <> + } + } + + return ret; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index ccb0e6071b..a68312d07f 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -14,23 +14,598 @@ */ #include "syncSnapshot.h" +#include "syncIndexMgr.h" +#include "syncRaftLog.h" +#include "syncRaftStore.h" +#include "syncUtil.h" +#include "wal.h" -SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode) { return NULL; } +static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm); -void snapshotSenderDestroy(SSyncSnapshotSender *pSender) {} +//---------------------------------- +SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaIndex) { + bool condition = (pSyncNode->pFsm->FpSnapshotStartRead != NULL) && (pSyncNode->pFsm->FpSnapshotStopRead != NULL) && + (pSyncNode->pFsm->FpSnapshotDoRead != NULL); -int32_t snapshotSend(SSyncSnapshotSender *pSender) { return 0; } + SSyncSnapshotSender *pSender = NULL; + if (condition) { + pSender = taosMemoryMalloc(sizeof(SSyncSnapshotSender)); + ASSERT(pSender != NULL); + memset(pSender, 0, sizeof(*pSender)); -cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender) { return NULL; } + pSender->start = false; + pSender->seq = SYNC_SNAPSHOT_SEQ_INVALID; + pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; + pSender->pReader = NULL; + pSender->pCurrentBlock = NULL; + pSender->blockLen = 0; + pSender->sendingMS = SYNC_SNAPSHOT_RETRY_MS; + pSender->pSyncNode = pSyncNode; + pSender->replicaIndex = replicaIndex; + pSender->term = pSyncNode->pRaftStore->currentTerm; + pSender->privateTerm = taosGetTimestampMs() + 100; + pSender->pSyncNode->pFsm->FpGetSnapshot(pSender->pSyncNode->pFsm, &(pSender->snapshot)); + pSender->finish = false; + } else { + sError("snapshotSenderCreate cannot create sender"); + } + return pSender; +} -char *snapshotSender2Str(SSyncSnapshotSender *pSender) { return NULL; } +void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { + if (pSender != NULL) { + if (pSender->pCurrentBlock != NULL) { + taosMemoryFree(pSender->pCurrentBlock); + } + taosMemoryFree(pSender); + } +} -SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode) { return NULL; } +bool snapshotSenderIsStart(SSyncSnapshotSender *pSender) { return pSender->start; } -void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) {} +// begin send snapshot (current term, seq begin) +void snapshotSenderStart(SSyncSnapshotSender *pSender) { + ASSERT(!snapshotSenderIsStart(pSender)); -int32_t snapshotReceive(SSyncSnapshotReceiver *pReceiver) { return 0; } + pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN; + pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; -cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { return NULL; } + // open snapshot reader + ASSERT(pSender->pReader == NULL); + int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotStartRead(pSender->pSyncNode->pFsm, &(pSender->pReader)); + ASSERT(ret == 0); -char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver) { return NULL; } + if (pSender->pCurrentBlock != NULL) { + taosMemoryFree(pSender->pCurrentBlock); + } + + pSender->blockLen = 0; + + // get current snapshot info + pSender->pSyncNode->pFsm->FpGetSnapshot(pSender->pSyncNode->pFsm, &(pSender->snapshot)); + + pSender->sendingMS = SYNC_SNAPSHOT_RETRY_MS; + pSender->term = pSender->pSyncNode->pRaftStore->currentTerm; + ++(pSender->privateTerm); + pSender->finish = false; + pSender->start = true; + + // build begin msg + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(0, pSender->pSyncNode->vgId); + pMsg->srcId = pSender->pSyncNode->myRaftId; + pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->lastIndex = pSender->snapshot.lastApplyIndex; + pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->seq = pSender->seq; // SYNC_SNAPSHOT_SEQ_BEGIN + pMsg->privateTerm = pSender->privateTerm; + + // send msg + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); + sTrace("sync event snapshot send to %s:%d begin seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", host, + port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, msgStr); + taosMemoryFree(msgStr); + + syncSnapshotSendDestroy(pMsg); +} + +#if 0 +// when entry in snapshot, start sender +void snapshotSenderStart(SSyncSnapshotSender *pSender) { + if (!(pSender->start)) { + // start + snapshotSenderDoStart(pSender); + pSender->start = true; + } else { + // already start + ASSERT(pSender->pSyncNode->pRaftStore->currentTerm >= pSender->term); + + // if current term is higher, need start again + if (pSender->pSyncNode->pRaftStore->currentTerm > pSender->term) { + // force peer rollback + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(0, pSender->pSyncNode->vgId); + pMsg->srcId = pSender->pSyncNode->myRaftId; + pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->lastIndex = pSender->snapshot.lastApplyIndex; + pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->seq = SYNC_SNAPSHOT_SEQ_FORCE_CLOSE; + + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); + + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace("snapshot send force close seq:%d ack:%d send msg:%s", pSender->seq, pSender->ack, msgStr); + taosMemoryFree(msgStr); + + syncSnapshotSendDestroy(pMsg); + + // close reader + int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotStopRead(pSender->pSyncNode->pFsm, pSender->pReader); + ASSERT(ret == 0); + pSender->pReader = NULL; + + // start again + snapshotSenderDoStart(pSender); + pSender->start = true; + } else { + // current term, do nothing + ASSERT(pSender->pSyncNode->pRaftStore->currentTerm == pSender->term); + } + } + + char *s = snapshotSender2Str(pSender); + sInfo("snapshotSenderStart %s", s); + taosMemoryFree(s); +} +#endif + +void snapshotSenderStop(SSyncSnapshotSender *pSender) { + if (pSender->pReader != NULL) { + int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotStopRead(pSender->pSyncNode->pFsm, pSender->pReader); + ASSERT(ret == 0); + pSender->pReader = NULL; + } + + if (pSender->pCurrentBlock != NULL) { + taosMemoryFree(pSender->pCurrentBlock); + pSender->pCurrentBlock = NULL; + pSender->blockLen = 0; + } + + pSender->start = false; + + char *s = snapshotSender2Str(pSender); + sInfo("snapshotSenderStop %s", s); + taosMemoryFree(s); +} + +// when sender receiver ack, call this function to send msg from seq +// seq = ack + 1, already updated +int32_t snapshotSend(SSyncSnapshotSender *pSender) { + // free memory last time (seq - 1) + if (pSender->pCurrentBlock != NULL) { + taosMemoryFree(pSender->pCurrentBlock); + pSender->pCurrentBlock = NULL; + pSender->blockLen = 0; + } + + // read data + int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotDoRead(pSender->pSyncNode->pFsm, pSender->pReader, + &(pSender->pCurrentBlock), &(pSender->blockLen)); + ASSERT(ret == 0); + if (pSender->blockLen > 0) { + // has read data + } else { + // read finish + pSender->seq = SYNC_SNAPSHOT_SEQ_END; + } + + // build msg + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(pSender->blockLen, pSender->pSyncNode->vgId); + pMsg->srcId = pSender->pSyncNode->myRaftId; + pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->lastIndex = pSender->snapshot.lastApplyIndex; + pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->seq = pSender->seq; + pMsg->privateTerm = pSender->privateTerm; + memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); + + // send msg + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); + if (pSender->seq == SYNC_SNAPSHOT_SEQ_END) { + sTrace("sync event snapshot send to %s:%d finish seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", + host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, + msgStr); + } else { + sTrace("sync event snapshot send to %s:%d sending seq:%d ack:%d lastApplyIndex:%ld lastApplyTerm:%lu send msg:%s", + host, port, pSender->seq, pSender->ack, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, + msgStr); + } + taosMemoryFree(msgStr); + + syncSnapshotSendDestroy(pMsg); + return 0; +} + +// send snapshot data from cache +int32_t snapshotReSend(SSyncSnapshotSender *pSender) { + if (pSender->pCurrentBlock != NULL) { + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(pSender->blockLen, pSender->pSyncNode->vgId); + pMsg->srcId = pSender->pSyncNode->myRaftId; + pMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pMsg->lastIndex = pSender->snapshot.lastApplyIndex; + pMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pMsg->seq = pSender->seq; + memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); + + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + syncNodeSendMsgById(&(pMsg->destId), pSender->pSyncNode, &rpcMsg); + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pSender->pSyncNode->replicasId[pSender->replicaIndex].addr, host, sizeof(host), &port); + sTrace("sync event snapshot send to %s:%d resend seq:%d ack:%d send msg:%s", host, port, pSender->seq, pSender->ack, + msgStr); + taosMemoryFree(msgStr); + + syncSnapshotSendDestroy(pMsg); + } + return 0; +} + +cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender) { + char u64buf[128]; + cJSON *pRoot = cJSON_CreateObject(); + + if (pSender != NULL) { + cJSON_AddNumberToObject(pRoot, "start", pSender->start); + cJSON_AddNumberToObject(pRoot, "seq", pSender->seq); + cJSON_AddNumberToObject(pRoot, "ack", pSender->ack); + + snprintf(u64buf, sizeof(u64buf), "%p", pSender->pReader); + cJSON_AddStringToObject(pRoot, "pReader", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%p", pSender->pCurrentBlock); + cJSON_AddStringToObject(pRoot, "pCurrentBlock", u64buf); + cJSON_AddNumberToObject(pRoot, "blockLen", pSender->blockLen); + + if (pSender->pCurrentBlock != NULL) { + char *s; + s = syncUtilprintBin((char *)(pSender->pCurrentBlock), pSender->blockLen); + cJSON_AddStringToObject(pRoot, "pCurrentBlock", s); + taosMemoryFree(s); + s = syncUtilprintBin2((char *)(pSender->pCurrentBlock), pSender->blockLen); + cJSON_AddStringToObject(pRoot, "pCurrentBlock2", s); + taosMemoryFree(s); + } + + cJSON *pSnapshot = cJSON_CreateObject(); + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->snapshot.lastApplyIndex); + cJSON_AddStringToObject(pSnapshot, "lastApplyIndex", u64buf); + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->snapshot.lastApplyTerm); + cJSON_AddStringToObject(pSnapshot, "lastApplyTerm", u64buf); + cJSON_AddItemToObject(pRoot, "snapshot", pSnapshot); + + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->sendingMS); + cJSON_AddStringToObject(pRoot, "sendingMS", u64buf); + snprintf(u64buf, sizeof(u64buf), "%p", pSender->pSyncNode); + cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); + cJSON_AddNumberToObject(pRoot, "replicaIndex", pSender->replicaIndex); + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->term); + cJSON_AddStringToObject(pRoot, "term", u64buf); + snprintf(u64buf, sizeof(u64buf), "%lu", pSender->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + cJSON_AddNumberToObject(pRoot, "finish", pSender->finish); + } + + cJSON *pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SSyncSnapshotSender", pRoot); + return pJson; +} + +char *snapshotSender2Str(SSyncSnapshotSender *pSender) { + cJSON *pJson = snapshotSender2Json(pSender); + char *serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// ------------------------------------- +SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, int32_t replicaIndex) { + bool condition = (pSyncNode->pFsm->FpSnapshotStartWrite != NULL) && (pSyncNode->pFsm->FpSnapshotStopWrite != NULL) && + (pSyncNode->pFsm->FpSnapshotDoWrite != NULL); + + SSyncSnapshotReceiver *pReceiver = NULL; + if (condition) { + pReceiver = taosMemoryMalloc(sizeof(SSyncSnapshotReceiver)); + ASSERT(pReceiver != NULL); + memset(pReceiver, 0, sizeof(*pReceiver)); + + pReceiver->start = false; + pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; + pReceiver->pWriter = NULL; + pReceiver->pSyncNode = pSyncNode; + pReceiver->replicaIndex = replicaIndex; + pReceiver->term = pSyncNode->pRaftStore->currentTerm; + pReceiver->privateTerm = 0; + + } else { + sInfo("snapshotReceiverCreate cannot create receiver"); + } + + return pReceiver; +} + +void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) { + if (pReceiver != NULL) { + taosMemoryFree(pReceiver); + } +} + +bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver) { return pReceiver->start; } + +// begin receive snapshot msg (current term, seq begin) +static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm) { + pReceiver->term = pReceiver->pSyncNode->pRaftStore->currentTerm; + pReceiver->privateTerm = privateTerm; + pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; + + ASSERT(pReceiver->pWriter == NULL); + int32_t ret = pReceiver->pSyncNode->pFsm->FpSnapshotStartWrite(pReceiver->pSyncNode->pFsm, &(pReceiver->pWriter)); + ASSERT(ret == 0); +} + +// if receiver receive msg from seq = SYNC_SNAPSHOT_SEQ_BEGIN, start receiver +// if already start, force close, start again +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm) { + if (!snapshotReceiverIsStart(pReceiver)) { + // start + snapshotReceiverDoStart(pReceiver, privateTerm); + pReceiver->start = true; + + } else { + // already start + + // force close, abandon incomplete data + int32_t ret = + pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, false); + ASSERT(ret == 0); + pReceiver->pWriter = NULL; + + // start again + snapshotReceiverDoStart(pReceiver, privateTerm); + pReceiver->start = true; + + ASSERT(0); + } + + char *s = snapshotReceiver2Str(pReceiver); + sInfo("snapshotReceiverStart %s", s); + taosMemoryFree(s); +} + +void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply) { + if (pReceiver->pWriter != NULL) { + int32_t ret = + pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, false); + ASSERT(ret == 0); + pReceiver->pWriter = NULL; + } + + pReceiver->start = false; + + if (apply) { + ++(pReceiver->privateTerm); + } + + char *s = snapshotReceiver2Str(pReceiver); + sInfo("snapshotReceiverStop %s", s); + taosMemoryFree(s); +} + +cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { + char u64buf[128]; + cJSON *pRoot = cJSON_CreateObject(); + + if (pReceiver != NULL) { + cJSON_AddNumberToObject(pRoot, "start", pReceiver->start); + cJSON_AddNumberToObject(pRoot, "ack", pReceiver->ack); + + snprintf(u64buf, sizeof(u64buf), "%p", pReceiver->pWriter); + cJSON_AddStringToObject(pRoot, "pWriter", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%p", pReceiver->pSyncNode); + cJSON_AddStringToObject(pRoot, "pSyncNode", u64buf); + cJSON_AddNumberToObject(pRoot, "replicaIndex", pReceiver->replicaIndex); + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->term); + cJSON_AddStringToObject(pRoot, "term", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->privateTerm); + cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + } + + cJSON *pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SSyncSnapshotReceiver", pRoot); + return pJson; +} + +char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver) { + cJSON *pJson = snapshotReceiver2Json(pReceiver); + char *serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// receiver do something +int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { + // get receiver + SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; + bool needRsp = false; + int32_t writeCode = 0; + + // state, term, seq/ack + if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { + if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { + if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) { + // begin + snapshotReceiverStart(pReceiver, pMsg->privateTerm); + pReceiver->ack = pMsg->seq; + needRsp = true; + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + sTrace("sync event snapshot recv from %s:%d begin ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, port, + pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + + } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { + // end, finish FSM + writeCode = pSyncNode->pFsm->FpSnapshotDoWrite(pSyncNode->pFsm, pReceiver->pWriter, pMsg->data, pMsg->dataLen); + ASSERT(writeCode == 0); + + pSyncNode->pFsm->FpSnapshotStopWrite(pSyncNode->pFsm, pReceiver->pWriter, true); + + pSyncNode->pLogStore->syncLogSetBeginIndex(pSyncNode->pLogStore, pMsg->lastIndex + 1); + char *logSimpleStr = logStoreSimple2Str(pSyncNode->pLogStore); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + char host[128]; + uint16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + sInfo( + "sync event snapshot recv from %s:%d finish, update log begin index:%ld, snapshot.lastApplyIndex:%ld, " + "snapshot.lastApplyTerm:%lu, raft log:%s", + host, port, pMsg->lastIndex + 1, snapshot.lastApplyIndex, snapshot.lastApplyTerm, logSimpleStr); + taosMemoryFree(logSimpleStr); + + pReceiver->pWriter = NULL; + snapshotReceiverStop(pReceiver, true); + pReceiver->ack = pMsg->seq; + needRsp = true; + + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace("sync event snapshot recv from %s:%d end ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, port, + pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + + } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) { + pSyncNode->pFsm->FpSnapshotStopWrite(pSyncNode->pFsm, pReceiver->pWriter, false); + snapshotReceiverStop(pReceiver, false); + needRsp = false; + + char host[128]; + uint16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + + char *msgStr = syncSnapshotSend2Str(pMsg); + sTrace("sync event snapshot recv from %s:%d force close ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, + port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + + taosMemoryFree(msgStr); + + } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { + // transfering + if (pMsg->seq == pReceiver->ack + 1) { + writeCode = + pSyncNode->pFsm->FpSnapshotDoWrite(pSyncNode->pFsm, pReceiver->pWriter, pMsg->data, pMsg->dataLen); + ASSERT(writeCode == 0); + pReceiver->ack = pMsg->seq; + } + needRsp = true; + + char *msgStr = syncSnapshotSend2Str(pMsg); + char host[128]; + uint16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + sTrace("sync event snapshot recv from %s:%d receiving ack:%d, lastIndex:%ld, lastTerm:%lu, recv msg:%s", host, + port, pReceiver->ack, pMsg->lastIndex, pMsg->lastTerm, msgStr); + taosMemoryFree(msgStr); + + } else { + ASSERT(0); + } + + if (needRsp) { + SyncSnapshotRsp *pRspMsg = syncSnapshotRspBuild(pSyncNode->vgId); + pRspMsg->srcId = pSyncNode->myRaftId; + pRspMsg->destId = pMsg->srcId; + pRspMsg->term = pSyncNode->pRaftStore->currentTerm; + pRspMsg->lastIndex = pMsg->lastIndex; + pRspMsg->lastTerm = pMsg->lastTerm; + pRspMsg->ack = pReceiver->ack; + pRspMsg->code = writeCode; + pRspMsg->privateTerm = pReceiver->privateTerm; + + SRpcMsg rpcMsg; + syncSnapshotRsp2RpcMsg(pRspMsg, &rpcMsg); + syncNodeSendMsgById(&(pRspMsg->destId), pSyncNode, &rpcMsg); + + syncSnapshotRspDestroy(pRspMsg); + } + } + } else { + syncNodeLog2("syncNodeOnSnapshotSendCb not follower", pSyncNode); + } + + return 0; +} + +// sender receives ack, set seq = ack + 1, send msg from seq +// if ack == SYNC_SNAPSHOT_SEQ_END, stop sender +int32_t syncNodeOnSnapshotRspCb(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) { + // get sender + SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, &(pMsg->srcId)); + ASSERT(pSender != NULL); + + // state, term, seq/ack + if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { + if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { + // receiver ack is finish, close sender + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_END) { + pSender->finish = true; + snapshotSenderStop(pSender); + return 0; + } + + // send next msg + if (pMsg->ack == pSender->seq) { + // update sender ack + pSender->ack = pMsg->ack; + (pSender->seq)++; + snapshotSend(pSender); + + } else if (pMsg->ack == pSender->seq - 1) { + snapshotReSend(pSender); + + } else { + ASSERT(0); + } + } + } else { + syncNodeLog2("syncNodeOnSnapshotRspCb not leader", pSyncNode); + } + + return 0; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index 48567b75c2..f6ff521e01 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -240,4 +240,26 @@ bool syncUtilUserRollback(tmsg_t msgType) { return true; } return false; +} + +void syncUtilJson2Line(char* jsonStr) { + int p, q, len; + p = 0; + q = 1; + len = strlen(jsonStr); + while (1) { + if (jsonStr[q] == '\0') { + jsonStr[p + 1] = '\0'; + break; + } + + if (jsonStr[q] == '\n' || jsonStr[q] == ' ' || jsonStr[q] == '\t') { + q++; + continue; + } else { + jsonStr[p + 1] = jsonStr[q]; + p++; + q++; + } + } } \ No newline at end of file diff --git a/source/libs/sync/test/CMakeLists.txt b/source/libs/sync/test/CMakeLists.txt index cfbdf0e961..c68c6349fb 100644 --- a/source/libs/sync/test/CMakeLists.txt +++ b/source/libs/sync/test/CMakeLists.txt @@ -38,6 +38,15 @@ add_executable(syncRespMgrTest "") add_executable(syncSnapshotTest "") add_executable(syncApplyMsgTest "") add_executable(syncConfigChangeTest "") +add_executable(syncConfigChangeSnapshotTest "") +add_executable(syncSnapshotSendTest "") +add_executable(syncSnapshotRspTest "") +add_executable(syncSnapshotSenderTest "") +add_executable(syncSnapshotReceiverTest "") +add_executable(syncTestTool "") +add_executable(syncRaftLogTest "") +add_executable(syncRaftLogTest2 "") +add_executable(syncRaftLogTest3 "") target_sources(syncTest @@ -200,6 +209,42 @@ target_sources(syncConfigChangeTest PRIVATE "syncConfigChangeTest.cpp" ) +target_sources(syncConfigChangeSnapshotTest + PRIVATE + "syncConfigChangeSnapshotTest.cpp" +) +target_sources(syncSnapshotSendTest + PRIVATE + "syncSnapshotSendTest.cpp" +) +target_sources(syncSnapshotRspTest + PRIVATE + "syncSnapshotRspTest.cpp" +) +target_sources(syncSnapshotSenderTest + PRIVATE + "syncSnapshotSenderTest.cpp" +) +target_sources(syncSnapshotReceiverTest + PRIVATE + "syncSnapshotReceiverTest.cpp" +) +target_sources(syncTestTool + PRIVATE + "syncTestTool.cpp" +) +target_sources(syncRaftLogTest + PRIVATE + "syncRaftLogTest.cpp" +) +target_sources(syncRaftLogTest2 + PRIVATE + "syncRaftLogTest2.cpp" +) +target_sources(syncRaftLogTest3 + PRIVATE + "syncRaftLogTest3.cpp" +) target_include_directories(syncTest @@ -402,6 +447,51 @@ target_include_directories(syncConfigChangeTest "${TD_SOURCE_DIR}/include/libs/sync" "${CMAKE_CURRENT_SOURCE_DIR}/../inc" ) +target_include_directories(syncConfigChangeSnapshotTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncSnapshotSendTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncSnapshotRspTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncSnapshotSenderTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncSnapshotReceiverTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncTestTool + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncRaftLogTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncRaftLogTest2 + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) +target_include_directories(syncRaftLogTest3 + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) target_link_libraries(syncTest @@ -564,6 +654,42 @@ target_link_libraries(syncConfigChangeTest sync gtest_main ) +target_link_libraries(syncConfigChangeSnapshotTest + sync + gtest_main +) +target_link_libraries(syncSnapshotSendTest + sync + gtest_main +) +target_link_libraries(syncSnapshotRspTest + sync + gtest_main +) +target_link_libraries(syncSnapshotSenderTest + sync + gtest_main +) +target_link_libraries(syncSnapshotReceiverTest + sync + gtest_main +) +target_link_libraries(syncTestTool + sync + gtest_main +) +target_link_libraries(syncRaftLogTest + sync + gtest_main +) +target_link_libraries(syncRaftLogTest2 + sync + gtest_main +) +target_link_libraries(syncRaftLogTest3 + sync + gtest_main +) enable_testing() diff --git a/source/libs/sync/test/syncAppendEntriesReplyTest.cpp b/source/libs/sync/test/syncAppendEntriesReplyTest.cpp index a90259bc3a..d41e99a3cd 100644 --- a/source/libs/sync/test/syncAppendEntriesReplyTest.cpp +++ b/source/libs/sync/test/syncAppendEntriesReplyTest.cpp @@ -22,6 +22,8 @@ SyncAppendEntriesReply *createMsg() { pMsg->destId.vgId = 100; pMsg->success = true; pMsg->matchIndex = 77; + pMsg->term = 33; + pMsg->privateTerm = 44; return pMsg; } diff --git a/source/libs/sync/test/syncAppendEntriesTest.cpp b/source/libs/sync/test/syncAppendEntriesTest.cpp index bb9f306a1c..98b392274e 100644 --- a/source/libs/sync/test/syncAppendEntriesTest.cpp +++ b/source/libs/sync/test/syncAppendEntriesTest.cpp @@ -23,6 +23,7 @@ SyncAppendEntries *createMsg() { pMsg->prevLogIndex = 11; pMsg->prevLogTerm = 22; pMsg->commitIndex = 33; + pMsg->privateTerm = 44; strcpy(pMsg->data, "hello world"); return pMsg; } diff --git a/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp new file mode 100644 index 0000000000..781c168da9 --- /dev/null +++ b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp @@ -0,0 +1,366 @@ +#include +#include +#include "os.h" +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +uint16_t gPorts[] = {7010, 7110, 7210, 7310, 7410}; +const char* gDir = "./syncReplicateTest"; +int32_t gVgId = 1234; +SyncIndex gSnapshotLastApplyIndex; +SyncIndex gSnapshotLastApplyTerm; + +void init() { + int code = walInit(); + assert(code == 0); + + code = syncInit(); + assert(code == 0); + + sprintf(tsTempDir, "%s", "."); +} + +void cleanup() { walCleanUp(); } + +void CommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + SyncIndex beginIndex = SYNC_INDEX_INVALID; + if (pFsm->FpGetSnapshot != NULL) { + SSnapshot snapshot; + pFsm->FpGetSnapshot(pFsm, &snapshot); + beginIndex = snapshot.lastApplyIndex; + } + + if (cbMeta.index > beginIndex) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==CommitCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s, flag:%lu, term:%lu \n", + pFsm, cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), + cbMeta.flag, cbMeta.term); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); + } else { + sTrace("==callback== ==CommitCb== do not apply again %ld", cbMeta.index); + } +} + +void PreCommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==PreCommitCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s flag:%lu\n", pFsm, + cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), cbMeta.flag); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +void RollBackCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==RollBackCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s flag:%lu\n", pFsm, + cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), cbMeta.flag); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { + pSnapshot->data = NULL; + pSnapshot->lastApplyIndex = gSnapshotLastApplyIndex; + pSnapshot->lastApplyTerm = gSnapshotLastApplyTerm; + return 0; +} + +int32_t SnapshotStartRead(struct SSyncFSM* pFsm, void** ppReader) { + *ppReader = (void*)0xABCD; + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStartRead== pFsm:%p, *ppReader:%p", pFsm, *ppReader); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotStopRead(struct SSyncFSM* pFsm, void* pReader) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStopRead== pFsm:%p, pReader:%p", pFsm, pReader); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotDoRead(struct SSyncFSM* pFsm, void* pReader, void** ppBuf, int32_t* len) { + static int readIter = 0; + + if (readIter == 5) { + *len = 0; + *ppBuf = NULL; + } else if (readIter < 5) { + *len = 20; + *ppBuf = taosMemoryMalloc(*len); + snprintf((char*)*ppBuf, *len, "data iter:%d", readIter); + } + + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==SnapshotDoRead== pFsm:%p, pReader:%p, *len:%d, *ppBuf:%s, readIter:%d", pFsm, pReader, *len, + (char*)(*ppBuf), readIter); + sTrace("%s", logBuf); + + readIter++; + return 0; +} + +int32_t SnapshotStartWrite(struct SSyncFSM* pFsm, void** ppWriter) { + *ppWriter = (void*)0xCDEF; + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStartWrite== pFsm:%p, *ppWriter:%p", pFsm, *ppWriter); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotStopWrite(struct SSyncFSM* pFsm, void* pWriter, bool isApply) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStopWrite== pFsm:%p, pWriter:%p, isApply:%d", pFsm, pWriter, + isApply); + sTrace("%s", logBuf); + + if (isApply) { + gSnapshotLastApplyIndex = 10; + gSnapshotLastApplyTerm = 1; + } + + return 0; +} + +int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_t len) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotDoWrite== pFsm:%p, pWriter:%p, len:%d pBuf:%s", pFsm, + pWriter, len, (char*)pBuf); + sTrace("%s", logBuf); + return 0; +} + +void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFinishCb=="); } + +void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { + sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu", + cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term); +} + +SSyncFSM* createFsm() { + SSyncFSM* pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); + memset(pFsm, 0, sizeof(*pFsm)); + + pFsm->FpCommitCb = CommitCb; + pFsm->FpPreCommitCb = PreCommitCb; + pFsm->FpRollBackCb = RollBackCb; + + pFsm->FpGetSnapshot = GetSnapshotCb; + pFsm->FpRestoreFinishCb = RestoreFinishCb; + pFsm->FpSnapshotStartRead = SnapshotStartRead; + pFsm->FpSnapshotStopRead = SnapshotStopRead; + pFsm->FpSnapshotDoRead = SnapshotDoRead; + pFsm->FpSnapshotStartWrite = SnapshotStartWrite; + pFsm->FpSnapshotStopWrite = SnapshotStopWrite; + pFsm->FpSnapshotDoWrite = SnapshotDoWrite; + + pFsm->FpReConfigCb = ReConfigCb; + + return pFsm; +} + +SWal* createWal(char* path, int32_t vgId) { + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = vgId; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal* pWal = walOpen(path, &walCfg); + assert(pWal != NULL); + return pWal; +} + +int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal* pWal, char* path, bool isStandBy) { + SSyncInfo syncInfo; + syncInfo.vgId = vgId; + syncInfo.msgcb = &gSyncIO->msgcb; + syncInfo.FpSendMsg = syncIOSendMsg; + syncInfo.FpEqMsg = syncIOEqMsg; + syncInfo.pFsm = createFsm(); + snprintf(syncInfo.path, sizeof(syncInfo.path), "%s_sync_replica%d_index%d", path, replicaNum, myIndex); + syncInfo.pWal = pWal; + syncInfo.isStandBy = isStandBy; + syncInfo.snapshotEnable = true; + + SSyncCfg* pCfg = &syncInfo.syncCfg; + + if (isStandBy) { + pCfg->myIndex = 0; + pCfg->replicaNum = 1; + pCfg->nodeInfo[0].nodePort = gPorts[myIndex]; + taosGetFqdn(pCfg->nodeInfo[0].nodeFqdn); + + } else { + pCfg->myIndex = myIndex; + pCfg->replicaNum = replicaNum; + + for (int i = 0; i < replicaNum; ++i) { + pCfg->nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(pCfg->nodeInfo[i].nodeFqdn); + // snprintf(pCfg->nodeInfo[i].nodeFqdn, sizeof(pCfg->nodeInfo[i].nodeFqdn), "%s", "127.0.0.1"); + } + } + + int64_t rid = syncOpen(&syncInfo); + assert(rid > 0); + + SSyncNode* pSyncNode = (SSyncNode*)syncNodeAcquire(rid); + assert(pSyncNode != NULL); + gSyncIO->FpOnSyncPing = pSyncNode->FpOnPing; + gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply; + gSyncIO->FpOnSyncTimeout = pSyncNode->FpOnTimeout; + gSyncIO->FpOnSyncClientRequest = pSyncNode->FpOnClientRequest; + + gSyncIO->FpOnSyncRequestVote = pSyncNode->FpOnRequestVote; + gSyncIO->FpOnSyncRequestVoteReply = pSyncNode->FpOnRequestVoteReply; + gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries; + gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply; + + gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend; + gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp; + + gSyncIO->pSyncNode = pSyncNode; + syncNodeRelease(pSyncNode); + + return rid; +} + +void configChange(int64_t rid, int32_t replicaNum, int32_t myIndex) { + SSyncCfg syncCfg; + + syncCfg.myIndex = myIndex; + syncCfg.replicaNum = replicaNum; + + for (int i = 0; i < replicaNum; ++i) { + syncCfg.nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(syncCfg.nodeInfo[i].nodeFqdn); + } + + syncReconfig(rid, &syncCfg); +} + +void usage(char* exe) { + printf("usage: %s replicaNum myIndex lastApplyIndex writeRecordNum isStandBy isConfigChange lastApplyTerm \n", exe); +} + +SRpcMsg* createRpcMsg(int i, int count, int myIndex) { + SRpcMsg* pMsg = (SRpcMsg*)taosMemoryMalloc(sizeof(SRpcMsg)); + memset(pMsg, 0, sizeof(SRpcMsg)); + pMsg->msgType = 9999; + pMsg->contLen = 256; + pMsg->pCont = rpcMallocCont(pMsg->contLen); + snprintf((char*)(pMsg->pCont), pMsg->contLen, "value-myIndex:%u-%d-%d-%ld", myIndex, i, count, taosGetTimestampMs()); + return pMsg; +} + +int main(int argc, char** argv) { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE + DEBUG_INFO; + if (argc != 8) { + usage(argv[0]); + exit(-1); + } + + int32_t replicaNum = atoi(argv[1]); + int32_t myIndex = atoi(argv[2]); + int32_t lastApplyIndex = atoi(argv[3]); + int32_t writeRecordNum = atoi(argv[4]); + bool isStandBy = atoi(argv[5]); + bool isConfigChange = atoi(argv[6]); + int32_t lastApplyTerm = atoi(argv[7]); + + sTrace( + "args: replicaNum:%d, myIndex:%d, lastApplyIndex:%d, writeRecordNum:%d, isStandBy:%d, isConfigChange:%d, " + "lastApplyTerm:%d", + replicaNum, myIndex, lastApplyIndex, writeRecordNum, isStandBy, isConfigChange, lastApplyTerm); + + gSnapshotLastApplyIndex = lastApplyIndex; + gSnapshotLastApplyTerm = lastApplyTerm; + + if (!isStandBy) { + assert(replicaNum >= 1 && replicaNum <= 5); + assert(myIndex >= 0 && myIndex < replicaNum); + assert(lastApplyIndex >= -1); + assert(writeRecordNum >= 0); + } + + init(); + int32_t ret = syncIOStart((char*)"127.0.0.1", gPorts[myIndex]); + assert(ret == 0); + + char walPath[128]; + snprintf(walPath, sizeof(walPath), "%s_wal_replica%d_index%d", gDir, replicaNum, myIndex); + SWal* pWal = createWal(walPath, gVgId); + + int64_t rid = createSyncNode(replicaNum, myIndex, gVgId, pWal, (char*)gDir, isStandBy); + assert(rid > 0); + + syncStart(rid); + + /* + if (isStandBy) { + syncStartStandBy(rid); + } else { + syncStart(rid); + } + */ + + SSyncNode* pSyncNode = (SSyncNode*)syncNodeAcquire(rid); + assert(pSyncNode != NULL); + + if (isConfigChange) { + configChange(rid, 2, myIndex); + } + + //--------------------------- + int32_t alreadySend = 0; + while (1) { + char* s = syncNode2SimpleStr(pSyncNode); + + if (alreadySend < writeRecordNum) { + SRpcMsg* pRpcMsg = createRpcMsg(alreadySend, writeRecordNum, myIndex); + int32_t ret = syncPropose(rid, pRpcMsg, false); + if (ret == TAOS_SYNC_PROPOSE_NOT_LEADER) { + sTrace("%s value%d write not leader", s, alreadySend); + } else { + assert(ret == 0); + sTrace("%s value%d write ok", s, alreadySend); + } + alreadySend++; + + rpcFreeCont(pRpcMsg->pCont); + taosMemoryFree(pRpcMsg); + } else { + sTrace("%s", s); + } + + taosMsleep(1000); + taosMemoryFree(s); + taosMsleep(1000); + } + + syncNodeRelease(pSyncNode); + syncStop(rid); + walClose(pWal); + syncIOStop(); + cleanup(); + return 0; +} diff --git a/source/libs/sync/test/syncConfigChangeTest.cpp b/source/libs/sync/test/syncConfigChangeTest.cpp index 1ab3ce203a..c9d9ca48aa 100644 --- a/source/libs/sync/test/syncConfigChangeTest.cpp +++ b/source/libs/sync/test/syncConfigChangeTest.cpp @@ -93,7 +93,6 @@ SSyncFSM* createFsm() { pFsm->FpGetSnapshot = GetSnapshotCb; pFsm->FpRestoreFinishCb = RestoreFinishCb; - pFsm->FpReConfigCb = ReConfigCb; return pFsm; diff --git a/source/libs/sync/test/syncIndexMgrTest.cpp b/source/libs/sync/test/syncIndexMgrTest.cpp index 7fcce2bc4f..0ad69f0f51 100644 --- a/source/libs/sync/test/syncIndexMgrTest.cpp +++ b/source/libs/sync/test/syncIndexMgrTest.cpp @@ -22,55 +22,23 @@ int32_t replicaNum = 3; int32_t myIndex = 0; SRaftId ids[TSDB_MAX_REPLICA]; -SSyncInfo syncInfo; -SSyncFSM* pFsm; SSyncNode* pSyncNode; SSyncNode* syncNodeInit() { - syncInfo.vgId = 1234; - syncInfo.msgcb = &gSyncIO->msgcb; - syncInfo.FpSendMsg = syncIOSendMsg; - syncInfo.FpEqMsg = syncIOEqMsg; - syncInfo.pFsm = pFsm; - snprintf(syncInfo.path, sizeof(syncInfo.path), "%s", "./"); - - SSyncCfg* pCfg = &syncInfo.syncCfg; - pCfg->myIndex = myIndex; - pCfg->replicaNum = replicaNum; - + pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(SSyncNode)); + memset(pSyncNode, 0, sizeof(SSyncNode)); + pSyncNode->replicaNum = replicaNum; for (int i = 0; i < replicaNum; ++i) { - pCfg->nodeInfo[i].nodePort = ports[i]; - snprintf(pCfg->nodeInfo[i].nodeFqdn, sizeof(pCfg->nodeInfo[i].nodeFqdn), "%s", "127.0.0.1"); - // taosGetFqdn(pCfg->nodeInfo[0].nodeFqdn); + pSyncNode->replicasId[i].addr = syncUtilAddr2U64("127.0.0.1", ports[i]); + pSyncNode->replicasId[i].vgId = 1234; + + ids[i].addr = pSyncNode->replicasId[i].addr; + ids[i].vgId = pSyncNode->replicasId[i].vgId; } - pSyncNode = syncNodeOpen(&syncInfo); - assert(pSyncNode != NULL); - - gSyncIO->FpOnSyncPing = pSyncNode->FpOnPing; - gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply; - gSyncIO->FpOnSyncRequestVote = pSyncNode->FpOnRequestVote; - gSyncIO->FpOnSyncRequestVoteReply = pSyncNode->FpOnRequestVoteReply; - gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries; - gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply; - gSyncIO->FpOnSyncPing = pSyncNode->FpOnPing; - gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply; - gSyncIO->pSyncNode = pSyncNode; - return pSyncNode; } -SSyncNode* syncInitTest() { return syncNodeInit(); } - -void initRaftId(SSyncNode* pSyncNode) { - for (int i = 0; i < replicaNum; ++i) { - ids[i] = pSyncNode->replicasId[i]; - char* s = syncUtilRaftId2Str(&ids[i]); - printf("raftId[%d] : %s\n", i, s); - taosMemoryFree(s); - } -} - int main(int argc, char** argv) { tsAsyncLog = 0; sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; @@ -80,58 +48,52 @@ int main(int argc, char** argv) { myIndex = atoi(argv[1]); } - int32_t ret = syncIOStart((char*)"127.0.0.1", ports[myIndex]); - assert(ret == 0); - - ret = syncEnvStart(); - assert(ret == 0); - - SSyncNode* pSyncNode = syncInitTest(); + SSyncNode* pSyncNode = syncNodeInit(); assert(pSyncNode != NULL); - char* serialized = syncNode2Str(pSyncNode); - printf("%s\n", serialized); - taosMemoryFree(serialized); - - initRaftId(pSyncNode); - + printf("---------------------------------------\n"); SSyncIndexMgr* pSyncIndexMgr = syncIndexMgrCreate(pSyncNode); assert(pSyncIndexMgr != NULL); - - printf("---------------------------------------\n"); { char* serialized = syncIndexMgr2Str(pSyncIndexMgr); assert(serialized != NULL); printf("%s\n", serialized); taosMemoryFree(serialized); } + printf("---------------------------------------\n"); + printf("---------------------------------------\n"); syncIndexMgrSetIndex(pSyncIndexMgr, &ids[0], 100); syncIndexMgrSetIndex(pSyncIndexMgr, &ids[1], 200); syncIndexMgrSetIndex(pSyncIndexMgr, &ids[2], 300); - - printf("---------------------------------------\n"); + // syncIndexMgrSetTerm(pSyncIndexMgr, &ids[0], 700); + // syncIndexMgrSetTerm(pSyncIndexMgr, &ids[1], 800); + // syncIndexMgrSetTerm(pSyncIndexMgr, &ids[2], 900); { char* serialized = syncIndexMgr2Str(pSyncIndexMgr); assert(serialized != NULL); printf("%s\n", serialized); taosMemoryFree(serialized); } + printf("---------------------------------------\n"); printf("---------------------------------------\n"); for (int i = 0; i < pSyncIndexMgr->replicaNum; ++i) { SyncIndex idx = syncIndexMgrGetIndex(pSyncIndexMgr, &ids[i]); - printf("index %d : %lu \n", i, idx); + // SyncTerm term = syncIndexMgrGetTerm(pSyncIndexMgr, &ids[i]); + // printf("%d: index:%ld term:%lu \n", i, idx, term); } - - syncIndexMgrClear(pSyncIndexMgr); printf("---------------------------------------\n"); + + printf("---------------------------------------\n"); + syncIndexMgrClear(pSyncIndexMgr); { char* serialized = syncIndexMgr2Str(pSyncIndexMgr); assert(serialized != NULL); printf("%s\n", serialized); taosMemoryFree(serialized); } + printf("---------------------------------------\n"); syncIndexMgrDestroy(pSyncIndexMgr); return 0; diff --git a/source/libs/sync/test/syncRaftCfgTest.cpp b/source/libs/sync/test/syncRaftCfgTest.cpp index f5b24db651..564cbdb69a 100644 --- a/source/libs/sync/test/syncRaftCfgTest.cpp +++ b/source/libs/sync/test/syncRaftCfgTest.cpp @@ -71,7 +71,10 @@ void test3() { if (taosCheckExistFile(s)) { printf("%s file: %s already exist! \n", (char*)__FUNCTION__, s); } else { - raftCfgCreateFile(pCfg, 7, s); + SRaftCfgMeta meta; + meta.isStandBy = 7; + meta.snapshotEnable = 9; + raftCfgCreateFile(pCfg, meta, s); printf("%s create json file: %s \n", (char*)__FUNCTION__, s); } @@ -94,6 +97,7 @@ void test5() { pCfg->cfg.myIndex = taosGetTimestampSec(); pCfg->isStandBy += 2; + pCfg->snapshotEnable += 3; raftCfgPersist(pCfg); printf("%s update json file: %s myIndex->%d \n", (char*)__FUNCTION__, "./test3_raft_cfg.json", pCfg->cfg.myIndex); diff --git a/source/libs/sync/test/syncRaftLogTest.cpp b/source/libs/sync/test/syncRaftLogTest.cpp new file mode 100644 index 0000000000..7903e86749 --- /dev/null +++ b/source/libs/sync/test/syncRaftLogTest.cpp @@ -0,0 +1,172 @@ +#include "syncRaftLog.h" +//#include +#include +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncRaftStore.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +const char *gWalPath = "./syncLogStoreTest_wal"; + +void init() { walInit(); } + +void test1() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void test2() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + for (int i = 0; i < 5; ++i) { + int code = walWrite(pWal, i, 100, "aa", 3); + if (code != 0) { + printf("code:%d terror:%d msg:%s i:%d \n", code, terrno, tstrerror(terrno), i); + assert(0); + } + } + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void test3() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + walRestoreFromSnapshot(pWal, 5); + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void test4() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + walRestoreFromSnapshot(pWal, 5); + + for (int i = 6; i < 10; ++i) { + int code = walWrite(pWal, i, 100, "aa", 3); + if (code != 0) { + printf("code:%d terror:%d msg:%s i:%d \n", code, terrno, tstrerror(terrno), i); + assert(0); + } + } + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void test5() { + taosRemoveDir(gWalPath); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal *pWal = walOpen(gWalPath, &walCfg); + assert(pWal != NULL); + + walRestoreFromSnapshot(pWal, 5); + walRestoreFromSnapshot(pWal, 7); + + int64_t firstVer = walGetFirstVer(pWal); + int64_t lastVer = walGetLastVer(pWal); + printf("firstVer:%ld lastVer:%ld \n", firstVer, lastVer); + + walClose(pWal); +} + +void cleanup() { walCleanUp(); } + +int main(int argc, char **argv) { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + init(); + + test1(); + test2(); + test3(); + test4(); + test5(); + + cleanup(); + return 0; +} diff --git a/source/libs/sync/test/syncRaftLogTest2.cpp b/source/libs/sync/test/syncRaftLogTest2.cpp new file mode 100644 index 0000000000..64e1da51a1 --- /dev/null +++ b/source/libs/sync/test/syncRaftLogTest2.cpp @@ -0,0 +1,437 @@ +#include +#include +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncRaftLog.h" +#include "syncRaftStore.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +SSyncNode* pSyncNode; +SWal* pWal; +SSyncLogStore* pLogStore; +const char* pWalPath = "./syncLogStoreTest_wal"; + +SyncIndex gSnapshotLastApplyIndex; +SyncIndex gSnapshotLastApplyTerm; + +int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { + pSnapshot->data = NULL; + pSnapshot->lastApplyIndex = gSnapshotLastApplyIndex; + pSnapshot->lastApplyTerm = gSnapshotLastApplyTerm; + return 0; +} + +bool gAssert = true; + +void init() { + walInit(); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + pWal = walOpen(pWalPath, &walCfg); + assert(pWal != NULL); + + pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(SSyncNode)); + memset(pSyncNode, 0, sizeof(SSyncNode)); + pSyncNode->pWal = pWal; + + pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); + pSyncNode->pFsm->FpGetSnapshot = GetSnapshotCb; +} + +void cleanup() { + walClose(pWal); + walCleanUp(); + taosMemoryFree(pSyncNode); +} + +void test1() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest1 ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 0); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest1 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 0); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test2() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pLogStore->syncLogSetBeginIndex(pLogStore, 5); + logStoreLog2((char*)"\n\n\ntest2 ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest2 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test3() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest3 ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 0); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + for (int i = 0; i <= 4; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test3 after appendEntry", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == 4); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 4); + assert(pLogStore->syncLogLastTerm(pLogStore) == 104); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest3 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 0); + assert(pLogStore->syncLogEndIndex(pLogStore) == 4); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 4); + assert(pLogStore->syncLogLastTerm(pLogStore) == 104); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test4() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest4 ----- ", pLogStore); + pLogStore->syncLogSetBeginIndex(pLogStore, 5); + + for (int i = 5; i <= 9; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test4 after appendEntry", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 9); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 10); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 9); + assert(pLogStore->syncLogLastTerm(pLogStore) == 109); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest4 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 9); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 10); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 9); + assert(pLogStore->syncLogLastTerm(pLogStore) == 109); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test5() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest5 ----- ", pLogStore); + pLogStore->syncLogSetBeginIndex(pLogStore, 5); + + for (int i = 5; i <= 9; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test5 after appendEntry", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 9); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 10); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 9); + assert(pLogStore->syncLogLastTerm(pLogStore) == 109); + } + + pLogStore->syncLogTruncate(pLogStore, 7); + logStoreLog2((char*)"after truncate 7", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 6); + assert(pLogStore->syncLogEntryCount(pLogStore) == 2); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 7); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 6); + assert(pLogStore->syncLogLastTerm(pLogStore) == 106); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest5 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 6); + assert(pLogStore->syncLogEntryCount(pLogStore) == 2); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 7); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 6); + assert(pLogStore->syncLogLastTerm(pLogStore) == 106); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test6() { + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest6 ----- ", pLogStore); + pLogStore->syncLogSetBeginIndex(pLogStore, 5); + + for (int i = 5; i <= 9; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test6 after appendEntry", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == 9); + assert(pLogStore->syncLogEntryCount(pLogStore) == 5); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 10); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 0); + assert(pLogStore->syncLogLastIndex(pLogStore) == 9); + assert(pLogStore->syncLogLastTerm(pLogStore) == 109); + } + + pLogStore->syncLogTruncate(pLogStore, 5); + logStoreLog2((char*)"after truncate 5", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); + + // restart + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + logStoreLog2((char*)"\n\n\ntest6 restart ----- ", pLogStore); + + if (gAssert) { + assert(pLogStore->syncLogBeginIndex(pLogStore) == 5); + assert(pLogStore->syncLogEndIndex(pLogStore) == -1); + assert(pLogStore->syncLogEntryCount(pLogStore) == 0); + assert(pLogStore->syncLogWriteIndex(pLogStore) == 5); + assert(pLogStore->syncLogIsEmpty(pLogStore) == 1); + assert(pLogStore->syncLogLastIndex(pLogStore) == -1); + assert(pLogStore->syncLogLastTerm(pLogStore) == 0); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +int main(int argc, char** argv) { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_INFO + DEBUG_SCREEN + DEBUG_FILE; + + if (argc == 2) { + gAssert = atoi(argv[1]); + } + sTrace("gAssert : %d", gAssert); + + test1(); + test2(); + test3(); + test4(); + test5(); + test6(); + + return 0; +} diff --git a/source/libs/sync/test/syncRaftLogTest3.cpp b/source/libs/sync/test/syncRaftLogTest3.cpp new file mode 100644 index 0000000000..b47f8c96c5 --- /dev/null +++ b/source/libs/sync/test/syncRaftLogTest3.cpp @@ -0,0 +1,388 @@ +#include +#include +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncRaftLog.h" +#include "syncRaftStore.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +bool gAssert = true; + +SSyncNode* pSyncNode; +SWal* pWal; +SSyncLogStore* pLogStore; +const char* pWalPath = "./syncLogStoreTest_wal"; + +SyncIndex gSnapshotLastApplyIndex; +SyncIndex gSnapshotLastApplyTerm; + +int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { + pSnapshot->data = NULL; + pSnapshot->lastApplyIndex = gSnapshotLastApplyIndex; + pSnapshot->lastApplyTerm = gSnapshotLastApplyTerm; + return 0; +} + +void init() { + walInit(); + + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = 1000; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + pWal = walOpen(pWalPath, &walCfg); + assert(pWal != NULL); + + pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(SSyncNode)); + memset(pSyncNode, 0, sizeof(SSyncNode)); + pSyncNode->pWal = pWal; + + pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); + pSyncNode->pFsm->FpGetSnapshot = GetSnapshotCb; +} + +void cleanup() { + walClose(pWal); + walCleanUp(); + taosMemoryFree(pSyncNode); +} + +void test1() { + // no snapshot + // no log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest1 ----- ", pLogStore); + + gSnapshotLastApplyIndex = -1; + gSnapshotLastApplyTerm = 0; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex testIndex = 0; + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, testIndex); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, testIndex); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test1"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + sTrace("%ld's preIndex: %ld", testIndex, preIndex); + sTrace("%ld's preTerm: %lu", testIndex, preTerm); + + if (gAssert) { + assert(lastIndex == -1); + assert(lastTerm == 0); + assert(syncStartIndex == 0); + assert(preIndex == -1); + assert(preTerm == 0); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test2() { + // no snapshot + // whole log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest2 ----- ", pLogStore); + + for (int i = 0; i <= 10; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test2 after appendEntry", pLogStore); + + gSnapshotLastApplyIndex = -1; + gSnapshotLastApplyTerm = 0; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test2"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + + if (gAssert) { + assert(lastIndex == 10); + assert(lastTerm == 110); + assert(syncStartIndex == 11); + } + + for (SyncIndex i = 11; i >= 0; --i) { + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, i); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, i); + + sTrace("%ld's preIndex: %ld", i, preIndex); + sTrace("%ld's preTerm: %lu", i, preTerm); + + if (gAssert) { + SyncIndex preIndexArr[12] = {-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + SyncTerm preTermArr[12] = {0, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110}; + + assert(preIndex == preIndexArr[i]); + assert(preTerm == preTermArr[i]); + } + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test3() { + // has snapshot + // no log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest3 ----- ", pLogStore); + + gSnapshotLastApplyIndex = 5; + gSnapshotLastApplyTerm = 100; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, 6); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, 6); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test3"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + sTrace("%d's preIndex: %ld", 6, preIndex); + sTrace("%d's preTerm: %lu", 6, preTerm); + + if (gAssert) { + assert(lastIndex == 5); + assert(lastTerm == 100); + assert(syncStartIndex == 6); + assert(preIndex == 5); + assert(preTerm == 100); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test4() { + // has snapshot + // whole log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest4 ----- ", pLogStore); + + for (int i = 0; i <= 10; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test4 after appendEntry", pLogStore); + + gSnapshotLastApplyIndex = 5; + gSnapshotLastApplyTerm = 100; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test4"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + + if (gAssert) { + assert(lastIndex == 10); + assert(lastTerm == 110); + assert(syncStartIndex == 11); + } + + for (SyncIndex i = 11; i >= 6; --i) { + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, i); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, i); + + sTrace("%ld's preIndex: %ld", i, preIndex); + sTrace("%ld's preTerm: %lu", i, preTerm); + } + + logStoreDestory(pLogStore); + cleanup(); +} + +void test5() { + // has snapshot + // partial log + + taosRemoveDir(pWalPath); + + init(); + pLogStore = logStoreCreate(pSyncNode); + assert(pLogStore); + pSyncNode->pLogStore = pLogStore; + logStoreLog2((char*)"\n\n\ntest5 ----- ", pLogStore); + + pSyncNode->pLogStore->syncLogSetBeginIndex(pSyncNode->pLogStore, 6); + for (int i = 6; i <= 10; ++i) { + int32_t dataLen = 10; + SSyncRaftEntry* pEntry = syncEntryBuild(dataLen); + assert(pEntry != NULL); + pEntry->msgType = 1; + pEntry->originalRpcType = 2; + pEntry->seqNum = 3; + pEntry->isWeak = true; + pEntry->term = 100 + i; + pEntry->index = pLogStore->syncLogWriteIndex(pLogStore); + snprintf(pEntry->data, dataLen, "value%d", i); + + pLogStore->syncLogAppendEntry(pLogStore, pEntry); + syncEntryDestory(pEntry); + } + logStoreLog2((char*)"test5 after appendEntry", pLogStore); + + gSnapshotLastApplyIndex = 5; + gSnapshotLastApplyTerm = 100; + + bool hasSnapshot = syncNodeHasSnapshot(pSyncNode); + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshot(pSyncNode->pFsm, &snapshot); + + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + SyncTerm lastTerm = syncNodeGetLastTerm(pSyncNode); + + SyncIndex syncStartIndex = syncNodeSyncStartIndex(pSyncNode); + + sTrace("test5"); + sTrace("hasSnapshot:%d, lastApplyIndex:%ld, lastApplyTerm:%lu", hasSnapshot, snapshot.lastApplyIndex, + snapshot.lastApplyTerm); + sTrace("lastIndex: %ld", lastIndex); + sTrace("lastTerm: %lu", lastTerm); + sTrace("syncStartIndex: %ld", syncStartIndex); + + for (SyncIndex i = 11; i >= 6; --i) { + SyncIndex preIndex = syncNodeGetPreIndex(pSyncNode, i); + SyncTerm preTerm = syncNodeGetPreTerm(pSyncNode, i); + + sTrace("%ld's preIndex: %ld", i, preIndex); + sTrace("%ld's preTerm: %lu", i, preTerm); + + if (gAssert) { + SyncIndex preIndexArr[12] = {9999, 9999, 9999, 9999, 9999, 9999, 5, 6, 7, 8, 9, 10}; + SyncTerm preTermArr[12] = {9999, 9999, 9999, 9999, 9999, 9999, 100, 106, 107, 108, 109, 110}; + + assert(preIndex == preIndexArr[i]); + assert(preTerm == preTermArr[i]); + } + } + + logStoreDestory(pLogStore); + cleanup(); +} + +int main(int argc, char** argv) { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_INFO + DEBUG_SCREEN + DEBUG_FILE; + + if (argc == 2) { + gAssert = atoi(argv[1]); + } + sTrace("gAssert : %d", gAssert); + + test1(); + test2(); + test3(); + test4(); + test5(); + + return 0; +} diff --git a/source/libs/sync/test/syncSnapshotReceiverTest.cpp b/source/libs/sync/test/syncSnapshotReceiverTest.cpp new file mode 100644 index 0000000000..69670f09a6 --- /dev/null +++ b/source/libs/sync/test/syncSnapshotReceiverTest.cpp @@ -0,0 +1,63 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncRaftStore.h" +#include "syncSnapshot.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +void CommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} +void PreCommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} +void RollBackCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} + +void RestoreFinishCb(struct SSyncFSM* pFsm) {} +void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) {} + +int32_t GetSnapshot(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { return 0; } + +int32_t SnapshotStartRead(struct SSyncFSM* pFsm, void** ppReader) { return 0; } +int32_t SnapshotStopRead(struct SSyncFSM* pFsm, void* pReader) { return 0; } +int32_t SnapshotDoRead(struct SSyncFSM* pFsm, void* pReader, void** ppBuf, int32_t* len) { return 0; } + +int32_t SnapshotStartWrite(struct SSyncFSM* pFsm, void** ppWriter) { return 0; } +int32_t SnapshotStopWrite(struct SSyncFSM* pFsm, void* pWriter, bool isApply) { return 0; } +int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_t len) { return 0; } + +SSyncSnapshotReceiver* createReceiver() { + SSyncNode* pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(*pSyncNode)); + pSyncNode->pRaftStore = (SRaftStore*)taosMemoryMalloc(sizeof(*(pSyncNode->pRaftStore))); + pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(*(pSyncNode->pFsm))); + pSyncNode->pFsm->FpSnapshotStartWrite = SnapshotStartWrite; + pSyncNode->pFsm->FpSnapshotStopWrite = SnapshotStopWrite; + pSyncNode->pFsm->FpSnapshotDoWrite = SnapshotDoWrite; + + SSyncSnapshotReceiver* pReceiver = snapshotReceiverCreate(pSyncNode, 2); + pReceiver->start = true; + pReceiver->ack = 20; + pReceiver->pWriter = (void*)0x11; + pReceiver->term = 66; + pReceiver->privateTerm = 99; + + return pReceiver; +} + +int main() { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + SSyncSnapshotReceiver* pReceiver = createReceiver(); + sTrace("%s", snapshotReceiver2Str(pReceiver)); + + return 0; +} diff --git a/source/libs/sync/test/syncSnapshotRspTest.cpp b/source/libs/sync/test/syncSnapshotRspTest.cpp new file mode 100644 index 0000000000..f689d47aaf --- /dev/null +++ b/source/libs/sync/test/syncSnapshotRspTest.cpp @@ -0,0 +1,101 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +SyncSnapshotRsp *createMsg() { + SyncSnapshotRsp *pMsg = syncSnapshotRspBuild(1000); + pMsg->srcId.addr = syncUtilAddr2U64("127.0.0.1", 1234); + pMsg->srcId.vgId = 100; + pMsg->destId.addr = syncUtilAddr2U64("127.0.0.1", 5678); + pMsg->destId.vgId = 100; + pMsg->term = 11; + pMsg->privateTerm = 99; + pMsg->lastIndex = 22; + pMsg->lastTerm = 33; + pMsg->ack = 44; + pMsg->code = 55; + return pMsg; +} + +void test1() { + SyncSnapshotRsp *pMsg = createMsg(); + syncSnapshotRspLog2((char *)"test1:", pMsg); + syncSnapshotRspDestroy(pMsg); +} + +void test2() { + SyncSnapshotRsp *pMsg = createMsg(); + uint32_t len = pMsg->bytes; + char * serialized = (char *)taosMemoryMalloc(len); + syncSnapshotRspSerialize(pMsg, serialized, len); + SyncSnapshotRsp *pMsg2 = syncSnapshotRspBuild(1000); + syncSnapshotRspDeserialize(serialized, len, pMsg2); + syncSnapshotRspLog2((char *)"test2: syncSnapshotRspSerialize -> syncSnapshotRspDeserialize ", pMsg2); + + taosMemoryFree(serialized); + syncSnapshotRspDestroy(pMsg); + syncSnapshotRspDestroy(pMsg2); +} + +void test3() { + SyncSnapshotRsp *pMsg = createMsg(); + uint32_t len; + char * serialized = syncSnapshotRspSerialize2(pMsg, &len); + SyncSnapshotRsp *pMsg2 = syncSnapshotRspDeserialize2(serialized, len); + syncSnapshotRspLog2((char *)"test3: syncSnapshotRspSerialize2 -> syncSnapshotRspDeserialize2 ", pMsg2); + + taosMemoryFree(serialized); + syncSnapshotRspDestroy(pMsg); + syncSnapshotRspDestroy(pMsg2); +} + +void test4() { + SyncSnapshotRsp *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncSnapshotRsp2RpcMsg(pMsg, &rpcMsg); + SyncSnapshotRsp *pMsg2 = (SyncSnapshotRsp *)taosMemoryMalloc(rpcMsg.contLen); + syncSnapshotRspFromRpcMsg(&rpcMsg, pMsg2); + syncSnapshotRspLog2((char *)"test4: syncSnapshotRsp2RpcMsg -> syncSnapshotRspFromRpcMsg ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncSnapshotRspDestroy(pMsg); + syncSnapshotRspDestroy(pMsg2); +} + +void test5() { + SyncSnapshotRsp *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncSnapshotRsp2RpcMsg(pMsg, &rpcMsg); + SyncSnapshotRsp *pMsg2 = syncSnapshotRspFromRpcMsg2(&rpcMsg); + syncSnapshotRspLog2((char *)"test5: syncSnapshotRsp2RpcMsg -> syncSnapshotRspFromRpcMsg2 ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncSnapshotRspDestroy(pMsg); + syncSnapshotRspDestroy(pMsg2); +} + +int main() { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + test1(); + test2(); + test3(); + test4(); + test5(); + + return 0; +} diff --git a/source/libs/sync/test/syncSnapshotSendTest.cpp b/source/libs/sync/test/syncSnapshotSendTest.cpp new file mode 100644 index 0000000000..01d3264693 --- /dev/null +++ b/source/libs/sync/test/syncSnapshotSendTest.cpp @@ -0,0 +1,101 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +SyncSnapshotSend *createMsg() { + SyncSnapshotSend *pMsg = syncSnapshotSendBuild(20, 1000); + pMsg->srcId.addr = syncUtilAddr2U64("127.0.0.1", 1234); + pMsg->srcId.vgId = 100; + pMsg->destId.addr = syncUtilAddr2U64("127.0.0.1", 5678); + pMsg->destId.vgId = 100; + pMsg->term = 11; + pMsg->privateTerm = 99; + pMsg->lastIndex = 22; + pMsg->lastTerm = 33; + pMsg->seq = 44; + strcpy(pMsg->data, "hello world"); + return pMsg; +} + +void test1() { + SyncSnapshotSend *pMsg = createMsg(); + syncSnapshotSendLog2((char *)"test1:", pMsg); + syncSnapshotSendDestroy(pMsg); +} + +void test2() { + SyncSnapshotSend *pMsg = createMsg(); + uint32_t len = pMsg->bytes; + char * serialized = (char *)taosMemoryMalloc(len); + syncSnapshotSendSerialize(pMsg, serialized, len); + SyncSnapshotSend *pMsg2 = syncSnapshotSendBuild(pMsg->dataLen, 1000); + syncSnapshotSendDeserialize(serialized, len, pMsg2); + syncSnapshotSendLog2((char *)"test2: syncSnapshotSendSerialize -> syncSnapshotSendDeserialize ", pMsg2); + + taosMemoryFree(serialized); + syncSnapshotSendDestroy(pMsg); + syncSnapshotSendDestroy(pMsg2); +} + +void test3() { + SyncSnapshotSend *pMsg = createMsg(); + uint32_t len; + char * serialized = syncSnapshotSendSerialize2(pMsg, &len); + SyncSnapshotSend *pMsg2 = syncSnapshotSendDeserialize2(serialized, len); + syncSnapshotSendLog2((char *)"test3: syncSnapshotSendSerialize2 -> syncSnapshotSendDeserialize2 ", pMsg2); + + taosMemoryFree(serialized); + syncSnapshotSendDestroy(pMsg); + syncSnapshotSendDestroy(pMsg2); +} + +void test4() { + SyncSnapshotSend *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + SyncSnapshotSend *pMsg2 = (SyncSnapshotSend *)taosMemoryMalloc(rpcMsg.contLen); + syncSnapshotSendFromRpcMsg(&rpcMsg, pMsg2); + syncSnapshotSendLog2((char *)"test4: syncSnapshotSend2RpcMsg -> syncSnapshotSendFromRpcMsg ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncSnapshotSendDestroy(pMsg); + syncSnapshotSendDestroy(pMsg2); +} + +void test5() { + SyncSnapshotSend *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pMsg, &rpcMsg); + SyncSnapshotSend *pMsg2 = syncSnapshotSendFromRpcMsg2(&rpcMsg); + syncSnapshotSendLog2((char *)"test5: syncSnapshotSend2RpcMsg -> syncSnapshotSendFromRpcMsg2 ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncSnapshotSendDestroy(pMsg); + syncSnapshotSendDestroy(pMsg2); +} + +int main() { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + test1(); + test2(); + test3(); + test4(); + test5(); + + return 0; +} diff --git a/source/libs/sync/test/syncSnapshotSenderTest.cpp b/source/libs/sync/test/syncSnapshotSenderTest.cpp new file mode 100644 index 0000000000..404ba2acae --- /dev/null +++ b/source/libs/sync/test/syncSnapshotSenderTest.cpp @@ -0,0 +1,72 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncRaftStore.h" +#include "syncSnapshot.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +void CommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} +void PreCommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} +void RollBackCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) {} + +void RestoreFinishCb(struct SSyncFSM* pFsm) {} +void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) {} + +int32_t GetSnapshot(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { return 0; } + +int32_t SnapshotStartRead(struct SSyncFSM* pFsm, void** ppReader) { return 0; } +int32_t SnapshotStopRead(struct SSyncFSM* pFsm, void* pReader) { return 0; } +int32_t SnapshotDoRead(struct SSyncFSM* pFsm, void* pReader, void** ppBuf, int32_t* len) { return 0; } + +int32_t SnapshotStartWrite(struct SSyncFSM* pFsm, void** ppWriter) { return 0; } +int32_t SnapshotStopWrite(struct SSyncFSM* pFsm, void* pWriter, bool isApply) { return 0; } +int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_t len) { return 0; } + +SSyncSnapshotSender* createSender() { + SSyncNode* pSyncNode = (SSyncNode*)taosMemoryMalloc(sizeof(*pSyncNode)); + pSyncNode->pRaftStore = (SRaftStore*)taosMemoryMalloc(sizeof(*(pSyncNode->pRaftStore))); + pSyncNode->pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(*(pSyncNode->pFsm))); + pSyncNode->pFsm->FpSnapshotStartRead = SnapshotStartRead; + pSyncNode->pFsm->FpSnapshotStopRead = SnapshotStopRead; + pSyncNode->pFsm->FpSnapshotDoRead = SnapshotDoRead; + pSyncNode->pFsm->FpGetSnapshot = GetSnapshot; + + SSyncSnapshotSender* pSender = snapshotSenderCreate(pSyncNode, 2); + pSender->start = true; + pSender->seq = 10; + pSender->ack = 20; + pSender->pReader = (void*)0x11; + pSender->blockLen = 20; + pSender->pCurrentBlock = taosMemoryMalloc(pSender->blockLen); + snprintf((char*)(pSender->pCurrentBlock), pSender->blockLen, "%s", "hello"); + + pSender->snapshot.lastApplyIndex = 99; + pSender->snapshot.lastApplyTerm = 88; + pSender->sendingMS = 77; + pSender->term = 66; + pSender->privateTerm = 99; + + return pSender; +} + +int main() { + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + SSyncSnapshotSender* pSender = createSender(); + sTrace("%s", snapshotSender2Str(pSender)); + + return 0; +} diff --git a/source/libs/sync/test/syncTest.cpp b/source/libs/sync/test/syncTest.cpp index ffe8b81571..97de81572a 100644 --- a/source/libs/sync/test/syncTest.cpp +++ b/source/libs/sync/test/syncTest.cpp @@ -50,14 +50,16 @@ void test4() { } int main(int argc, char** argv) { - // taosInitLog("tmp/syncTest.log", 100); + taosInitLog("/tmp/syncTest.log", 100); tsAsyncLog = 0; + sDebugFlag = DEBUG_SCREEN + DEBUG_FILE + DEBUG_TRACE + DEBUG_INFO + DEBUG_ERROR; test1(); test2(); test3(); test4(); + /* if (argc == 2) { bool bTaosDirExist = taosDirExist(argv[1]); printf("%s bTaosDirExist:%d \n", argv[1], bTaosDirExist); @@ -65,7 +67,8 @@ int main(int argc, char** argv) { bool bTaosCheckExistFile = taosCheckExistFile(argv[1]); printf("%s bTaosCheckExistFile:%d \n", argv[1], bTaosCheckExistFile); } + */ - // taosCloseLog(); + taosCloseLog(); return 0; } diff --git a/source/libs/sync/test/syncTestTool.cpp b/source/libs/sync/test/syncTestTool.cpp new file mode 100644 index 0000000000..782baf3c97 --- /dev/null +++ b/source/libs/sync/test/syncTestTool.cpp @@ -0,0 +1,399 @@ +#include +#include +#include "os.h" +#include "syncEnv.h" +#include "syncIO.h" +#include "syncInt.h" +#include "syncRaftCfg.h" +#include "syncUtil.h" +#include "wal.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +uint16_t gPorts[] = {7000, 7001, 7002, 7003, 7004}; +const char* gDir = "./syncTestTool"; +int32_t gVgId = 1234; +SyncIndex gSnapshotLastApplyIndex; +SyncIndex gSnapshotLastApplyTerm; +int gIterTimes = 0; + +SyncIndex gFinishLastApplyIndex; +SyncIndex gFinishLastApplyTerm; + +void init() { + int code = walInit(); + assert(code == 0); + + code = syncInit(); + assert(code == 0); +} + +void cleanup() { walCleanUp(); } + +void CommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==CommitCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s, flag:%lu, term:%lu " + "currentTerm:%lu \n", + pFsm, cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), + cbMeta.flag, cbMeta.term, cbMeta.currentTerm); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +void PreCommitCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==PreCommitCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s, flag:%lu, term:%lu " + "currentTerm:%lu \n", + pFsm, cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), + cbMeta.flag, cbMeta.term, cbMeta.currentTerm); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +void RollBackCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==RollBackCb== pFsm:%p, index:%ld, isWeak:%d, code:%d, state:%d %s, flag:%lu, term:%lu " + "currentTerm:%lu \n", + pFsm, cbMeta.index, cbMeta.isWeak, cbMeta.code, cbMeta.state, syncUtilState2String(cbMeta.state), + cbMeta.flag, cbMeta.term, cbMeta.currentTerm); + syncRpcMsgLog2(logBuf, (SRpcMsg*)pMsg); +} + +int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { + pSnapshot->data = NULL; + pSnapshot->lastApplyIndex = gSnapshotLastApplyIndex; + pSnapshot->lastApplyTerm = gSnapshotLastApplyTerm; + return 0; +} + +int32_t SnapshotStartRead(struct SSyncFSM* pFsm, void** ppReader) { + *ppReader = (void*)0xABCD; + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStartRead== pFsm:%p, *ppReader:%p", pFsm, *ppReader); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotStopRead(struct SSyncFSM* pFsm, void* pReader) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStopRead== pFsm:%p, pReader:%p", pFsm, pReader); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotDoRead(struct SSyncFSM* pFsm, void* pReader, void** ppBuf, int32_t* len) { + static int readIter = 0; + + if (readIter == gIterTimes) { + *len = 0; + *ppBuf = NULL; + } else if (readIter < gIterTimes) { + *len = 20; + *ppBuf = taosMemoryMalloc(*len); + snprintf((char*)*ppBuf, *len, "data iter:%d", readIter); + } + + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==SnapshotDoRead== pFsm:%p, pReader:%p, *len:%d, *ppBuf:[%s], readIter:%d", pFsm, pReader, + *len, (char*)(*ppBuf), readIter); + sTrace("%s", logBuf); + + readIter++; + return 0; +} + +int32_t SnapshotStartWrite(struct SSyncFSM* pFsm, void** ppWriter) { + *ppWriter = (void*)0xCDEF; + char logBuf[256] = {0}; + + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotStartWrite== pFsm:%p, *ppWriter:%p", pFsm, *ppWriter); + sTrace("%s", logBuf); + return 0; +} + +int32_t SnapshotStopWrite(struct SSyncFSM* pFsm, void* pWriter, bool isApply) { + if (isApply) { + gSnapshotLastApplyIndex = gFinishLastApplyIndex; + gSnapshotLastApplyTerm = gFinishLastApplyTerm; + } + + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), + "==callback== ==SnapshotStopWrite== pFsm:%p, pWriter:%p, isApply:%d, gSnapshotLastApplyIndex:%ld, " + "gSnapshotLastApplyTerm:%ld", + pFsm, pWriter, isApply, gSnapshotLastApplyIndex, gSnapshotLastApplyTerm); + sTrace("%s", logBuf); + + return 0; +} + +int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_t len) { + char logBuf[256] = {0}; + snprintf(logBuf, sizeof(logBuf), "==callback== ==SnapshotDoWrite== pFsm:%p, pWriter:%p, len:%d pBuf:[%s]", pFsm, + pWriter, len, (char*)pBuf); + sTrace("%s", logBuf); + return 0; +} + +void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFinishCb== pFsm:%p", pFsm); } + +void ReConfigCb(struct SSyncFSM* pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { + char* s = syncCfg2Str(&newCfg); + sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu, newCfg:%s", + cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term, s); + taosMemoryFree(s); +} + +SSyncFSM* createFsm() { + SSyncFSM* pFsm = (SSyncFSM*)taosMemoryMalloc(sizeof(SSyncFSM)); + memset(pFsm, 0, sizeof(*pFsm)); + + pFsm->FpCommitCb = CommitCb; + pFsm->FpPreCommitCb = PreCommitCb; + pFsm->FpRollBackCb = RollBackCb; + + pFsm->FpReConfigCb = ReConfigCb; + pFsm->FpGetSnapshot = GetSnapshotCb; + pFsm->FpRestoreFinishCb = RestoreFinishCb; + + pFsm->FpSnapshotStartRead = SnapshotStartRead; + pFsm->FpSnapshotStopRead = SnapshotStopRead; + pFsm->FpSnapshotDoRead = SnapshotDoRead; + pFsm->FpSnapshotStartWrite = SnapshotStartWrite; + pFsm->FpSnapshotStopWrite = SnapshotStopWrite; + pFsm->FpSnapshotDoWrite = SnapshotDoWrite; + + return pFsm; +} + +SWal* createWal(char* path, int32_t vgId) { + SWalCfg walCfg; + memset(&walCfg, 0, sizeof(SWalCfg)); + walCfg.vgId = vgId; + walCfg.fsyncPeriod = 1000; + walCfg.retentionPeriod = 1000; + walCfg.rollPeriod = 1000; + walCfg.retentionSize = 1000; + walCfg.segSize = 1000; + walCfg.level = TAOS_WAL_FSYNC; + SWal* pWal = walOpen(path, &walCfg); + assert(pWal != NULL); + return pWal; +} + +int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal* pWal, char* path, bool isStandBy, + bool enableSnapshot) { + SSyncInfo syncInfo; + syncInfo.vgId = vgId; + syncInfo.msgcb = &gSyncIO->msgcb; + syncInfo.FpSendMsg = syncIOSendMsg; + syncInfo.FpEqMsg = syncIOEqMsg; + syncInfo.pFsm = createFsm(); + snprintf(syncInfo.path, sizeof(syncInfo.path), "%s_sync_replica%d_index%d", path, replicaNum, myIndex); + syncInfo.pWal = pWal; + syncInfo.isStandBy = isStandBy; + syncInfo.snapshotEnable = enableSnapshot; + + SSyncCfg* pCfg = &syncInfo.syncCfg; + +#if 0 + { + pCfg->myIndex = myIndex; + pCfg->replicaNum = replicaNum; + + for (int i = 0; i < replicaNum; ++i) { + pCfg->nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(pCfg->nodeInfo[i].nodeFqdn); + // snprintf(pCfg->nodeInfo[i].nodeFqdn, sizeof(pCfg->nodeInfo[i].nodeFqdn), "%s", "127.0.0.1"); + } + } +#endif + + if (isStandBy) { + pCfg->myIndex = 0; + pCfg->replicaNum = 1; + pCfg->nodeInfo[0].nodePort = gPorts[myIndex]; + taosGetFqdn(pCfg->nodeInfo[0].nodeFqdn); + + } else { + pCfg->myIndex = myIndex; + pCfg->replicaNum = replicaNum; + + for (int i = 0; i < replicaNum; ++i) { + pCfg->nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(pCfg->nodeInfo[i].nodeFqdn); + // snprintf(pCfg->nodeInfo[i].nodeFqdn, sizeof(pCfg->nodeInfo[i].nodeFqdn), "%s", "127.0.0.1"); + } + } + + + int64_t rid = syncOpen(&syncInfo); + assert(rid > 0); + + SSyncNode* pSyncNode = (SSyncNode*)syncNodeAcquire(rid); + assert(pSyncNode != NULL); + gSyncIO->FpOnSyncPing = pSyncNode->FpOnPing; + gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply; + gSyncIO->FpOnSyncTimeout = pSyncNode->FpOnTimeout; + gSyncIO->FpOnSyncClientRequest = pSyncNode->FpOnClientRequest; + + gSyncIO->FpOnSyncRequestVote = pSyncNode->FpOnRequestVote; + gSyncIO->FpOnSyncRequestVoteReply = pSyncNode->FpOnRequestVoteReply; + gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries; + gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply; + + gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend; + gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp; + + gSyncIO->pSyncNode = pSyncNode; + syncNodeRelease(pSyncNode); + + return rid; +} + +void configChange(int64_t rid, int32_t newReplicaNum, int32_t myIndex) { + SSyncCfg syncCfg; + + syncCfg.myIndex = myIndex; + syncCfg.replicaNum = newReplicaNum; + + for (int i = 0; i < newReplicaNum; ++i) { + syncCfg.nodeInfo[i].nodePort = gPorts[i]; + taosGetFqdn(syncCfg.nodeInfo[i].nodeFqdn); + } + + syncReconfig(rid, &syncCfg); +} + +void usage(char* exe) { + printf( + "usage: %s replicaNum(1-5) myIndex(0-..) enableSnapshot(0/1) lastApplyIndex(>=-1) lastApplyTerm(>=0) " + "writeRecordNum(>=0) " + "isStandBy(0/1) isConfigChange(0-5) iterTimes(>=0) finishLastApplyIndex(>=-1) finishLastApplyTerm(>=0) \n", + exe); +} + +SRpcMsg* createRpcMsg(int i, int count, int myIndex) { + SRpcMsg* pMsg = (SRpcMsg*)taosMemoryMalloc(sizeof(SRpcMsg)); + memset(pMsg, 0, sizeof(SRpcMsg)); + pMsg->msgType = 9999; + pMsg->contLen = 256; + pMsg->pCont = rpcMallocCont(pMsg->contLen); + snprintf((char*)(pMsg->pCont), pMsg->contLen, "value-myIndex:%u-%d-%d-%ld", myIndex, i, count, taosGetTimestampMs()); + return pMsg; +} + +int main(int argc, char** argv) { + sprintf(tsTempDir, "%s", "."); + tsAsyncLog = 0; + sDebugFlag = DEBUG_SCREEN + DEBUG_FILE + DEBUG_TRACE + DEBUG_INFO + DEBUG_ERROR; + + if (argc != 12) { + usage(argv[0]); + exit(-1); + } + + int32_t replicaNum = atoi(argv[1]); + int32_t myIndex = atoi(argv[2]); + bool enableSnapshot = atoi(argv[3]); + int32_t lastApplyIndex = atoi(argv[4]); + int32_t lastApplyTerm = atoi(argv[5]); + int32_t writeRecordNum = atoi(argv[6]); + bool isStandBy = atoi(argv[7]); + int32_t isConfigChange = atoi(argv[8]); + int32_t iterTimes = atoi(argv[9]); + int32_t finishLastApplyIndex = atoi(argv[10]); + int32_t finishLastApplyTerm = atoi(argv[11]); + + sTrace( + "args: replicaNum:%d, myIndex:%d, enableSnapshot:%d, lastApplyIndex:%d, lastApplyTerm:%d, writeRecordNum:%d, " + "isStandBy:%d, isConfigChange:%d, iterTimes:%d, finishLastApplyIndex:%d, finishLastApplyTerm:%d", + replicaNum, myIndex, enableSnapshot, lastApplyIndex, lastApplyTerm, writeRecordNum, isStandBy, isConfigChange, + iterTimes, finishLastApplyIndex, finishLastApplyTerm); + + // check parameter + assert(replicaNum >= 1 && replicaNum <= 5); + // assert(myIndex >= 0 && myIndex < replicaNum); + assert(lastApplyIndex >= -1); + assert(lastApplyTerm >= 0); + assert(writeRecordNum >= 0); + assert(isConfigChange >= 0 && isConfigChange <= 5); + assert(iterTimes >= 0); + assert(finishLastApplyIndex >= -1); + assert(finishLastApplyTerm >= 0); + + char logFile[256]; + snprintf(logFile, sizeof(logFile), "/tmp/%s-replicaNum%d-myIndex%d.log", gDir, replicaNum, myIndex); + taosInitLog(logFile, 100); + sTrace("logFile : %s", logFile); + + gSnapshotLastApplyIndex = lastApplyIndex; + gSnapshotLastApplyTerm = lastApplyTerm; + gIterTimes = iterTimes; + + gFinishLastApplyIndex = finishLastApplyIndex; + gFinishLastApplyTerm = finishLastApplyTerm; + + init(); + int32_t ret = syncIOStart((char*)"127.0.0.1", gPorts[myIndex]); + assert(ret == 0); + + char walPath[128]; + snprintf(walPath, sizeof(walPath), "%s_wal_replica%d_index%d", gDir, replicaNum, myIndex); + SWal* pWal = createWal(walPath, gVgId); + + int64_t rid = createSyncNode(replicaNum, myIndex, gVgId, pWal, (char*)gDir, isStandBy, enableSnapshot); + assert(rid > 0); + syncStart(rid); + + SSyncNode* pSyncNode = (SSyncNode*)syncNodeAcquire(rid); + assert(pSyncNode != NULL); + + if (isConfigChange > 0) { + configChange(rid, isConfigChange, myIndex); + } + + //--------------------------- + int32_t alreadySend = 0; + while (1) { + char* simpleStr = syncNode2SimpleStr(pSyncNode); + + if (alreadySend < writeRecordNum) { + SRpcMsg* pRpcMsg = createRpcMsg(alreadySend, writeRecordNum, myIndex); + int32_t ret = syncPropose(rid, pRpcMsg, false); + if (ret == TAOS_SYNC_PROPOSE_NOT_LEADER) { + sTrace("%s value%d write not leader", simpleStr, alreadySend); + } else { + assert(ret == 0); + sTrace("%s value%d write ok", simpleStr, alreadySend); + } + alreadySend++; + + rpcFreeCont(pRpcMsg->pCont); + taosMemoryFree(pRpcMsg); + } else { + sTrace("%s", simpleStr); + } + + taosMsleep(1000); + taosMemoryFree(simpleStr); + taosMsleep(1000); + } + + syncNodeRelease(pSyncNode); + syncStop(rid); + walClose(pWal); + syncIOStop(); + cleanup(); + taosCloseLog(); + return 0; +} diff --git a/source/libs/sync/test/syncTimeoutTest.cpp b/source/libs/sync/test/syncTimeoutTest.cpp index 30f25bd1d8..e60fabe38b 100644 --- a/source/libs/sync/test/syncTimeoutTest.cpp +++ b/source/libs/sync/test/syncTimeoutTest.cpp @@ -78,6 +78,26 @@ void test5() { syncTimeoutDestroy(pMsg2); } +void test6() { + SyncTimeout *pMsg = createMsg(); + char * jsonStr = syncTimeout2Str(pMsg); + sTrace("jsonStr: %s", jsonStr); + + syncUtilJson2Line(jsonStr); + sTrace("jsonStr: %s", jsonStr); + + char str[10]; + snprintf(str, sizeof(str), "%s", "{}"); + sTrace("str: %s", str); + syncUtilJson2Line(str); + sTrace("str: %s", str); + + snprintf(str, sizeof(str), "%s", ""); + sTrace("str: %s", str); + syncUtilJson2Line(str); + sTrace("str: %s", str); +} + int main() { tsAsyncLog = 0; sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; @@ -88,6 +108,7 @@ int main() { test3(); test4(); test5(); + test6(); return 0; } diff --git a/source/libs/wal/inc/walInt.h b/source/libs/wal/inc/walInt.h index 84fe2814ff..7ca105ff2b 100644 --- a/source/libs/wal/inc/walInt.h +++ b/source/libs/wal/inc/walInt.h @@ -132,6 +132,7 @@ static inline void walResetVer(SWalVer* pVer) { int walLoadMeta(SWal* pWal); int walSaveMeta(SWal* pWal); +int walRemoveMeta(SWal* pWal); int walRollFileInfo(SWal* pWal); int walCheckAndRepairMeta(SWal* pWal); diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 9aa848a7bb..8e9cb3a84b 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -419,3 +419,12 @@ int walLoadMeta(SWal* pWal) { taosMemoryFree(buf); return code; } + +int walRemoveMeta(SWal* pWal) { + int metaVer = walFindCurMetaVer(pWal); + if (metaVer == -1) return 0; + char fnameStr[WAL_FILE_LEN]; + walBuildMetaName(pWal, metaVer, fnameStr); + taosRemoveFile(fnameStr); + return 0; +} diff --git a/source/libs/wal/src/walMgmt.c b/source/libs/wal/src/walMgmt.c index 71cd6de73f..9505b02806 100644 --- a/source/libs/wal/src/walMgmt.c +++ b/source/libs/wal/src/walMgmt.c @@ -75,7 +75,7 @@ void walCleanUp() { } SWal *walOpen(const char *path, SWalCfg *pCfg) { - SWal *pWal = taosMemoryMalloc(sizeof(SWal)); + SWal *pWal = taosMemoryCalloc(1, sizeof(SWal)); if (pWal == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); return NULL; @@ -92,6 +92,13 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { return NULL; } + // init ref + pWal->pRefHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_ENTRY_LOCK); + if (pWal->pRefHash == NULL) { + taosMemoryFree(pWal); + return NULL; + } + // open meta walResetVer(&pWal->vers); pWal->pWriteLogTFile = NULL; @@ -100,6 +107,7 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { pWal->fileInfoSet = taosArrayInit(8, sizeof(SWalFileInfo)); if (pWal->fileInfoSet == NULL) { wError("vgId:%d, path:%s, failed to init taosArray %s", pWal->cfg.vgId, pWal->path, strerror(errno)); + taosHashCleanup(pWal->pRefHash); taosMemoryFree(pWal); return NULL; } @@ -115,12 +123,14 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { if (taosThreadMutexInit(&pWal->mutex, NULL) < 0) { taosArrayDestroy(pWal->fileInfoSet); + taosHashCleanup(pWal->pRefHash); taosMemoryFree(pWal); return NULL; } pWal->refId = taosAddRef(tsWal.refSetId, pWal); if (pWal->refId < 0) { + taosHashCleanup(pWal->pRefHash); taosThreadMutexDestroy(&pWal->mutex); taosArrayDestroy(pWal->fileInfoSet); taosMemoryFree(pWal); @@ -130,6 +140,7 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { walLoadMeta(pWal); if (walCheckAndRepairMeta(pWal) < 0) { + taosHashCleanup(pWal->pRefHash); taosRemoveRef(tsWal.refSetId, pWal->refId); taosThreadMutexDestroy(&pWal->mutex); taosArrayDestroy(pWal->fileInfoSet); @@ -175,6 +186,7 @@ void walClose(SWal *pWal) { walSaveMeta(pWal); taosArrayDestroy(pWal->fileInfoSet); pWal->fileInfoSet = NULL; + taosHashCleanup(pWal->pRefHash); taosThreadMutexUnlock(&pWal->mutex); taosRemoveRef(tsWal.refSetId, pWal->refId); diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index 793ab8b2fb..d30e0b6844 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -18,12 +18,47 @@ #include "tchecksum.h" #include "walInt.h" -void walRestoreFromSnapshot(SWal *pWal, int64_t ver) { - /*pWal->vers.firstVer = -1;*/ +int32_t walRestoreFromSnapshot(SWal *pWal, int64_t ver) { + taosThreadMutexLock(&pWal->mutex); + + void *pIter = NULL; + while (1) { + taosHashIterate(pWal->pRefHash, pIter); + if (pIter == NULL) break; + SWalRef *pRef = (SWalRef *)pIter; + if (pRef->ver != -1) { + taosHashCancelIterate(pWal->pRefHash, pIter); + return -1; + } + } + + taosCloseFile(&pWal->pWriteLogTFile); + taosCloseFile(&pWal->pWriteIdxTFile); + + if (pWal->vers.firstVer != -1) { + int32_t fileSetSize = taosArrayGetSize(pWal->fileInfoSet); + for (int32_t i = 0; i < fileSetSize; i++) { + SWalFileInfo *pFileInfo = taosArrayGet(pWal->fileInfoSet, i); + char fnameStr[WAL_FILE_LEN]; + walBuildLogName(pWal, pFileInfo->firstVer, fnameStr); + taosRemoveFile(fnameStr); + } + } + walRemoveMeta(pWal); + + pWal->writeCur = -1; + pWal->totSize = 0; + pWal->lastRollSeq = -1; + + taosArrayClear(pWal->fileInfoSet); + pWal->vers.firstVer = -1; pWal->vers.lastVer = ver; pWal->vers.commitVer = ver - 1; pWal->vers.snapshotVer = ver - 1; pWal->vers.verInSnapshotting = -1; + + taosThreadMutexUnlock(&pWal->mutex); + return 0; } int32_t walCommit(SWal *pWal, int64_t ver) { diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 28bc98a972..5a8cf562a0 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -22,6 +22,7 @@ # ---- dnode ./test.sh -f tsim/dnode/create_dnode.sim +./test.sh -f tsim/dnode/drop_dnode_mnode.sim # ---- insert ./test.sh -f tsim/insert/basic0.sim @@ -56,7 +57,7 @@ # ---- mnode ./test.sh -f tsim/mnode/basic1.sim -./test.sh -f tsim/mnode/basic2.sim +#./test.sh -f tsim/mnode/basic2.sim ./test.sh -f tsim/mnode/basic3.sim ./test.sh -f tsim/mnode/basic4.sim diff --git a/tests/script/tsim/dnode/drop_dnode_mnode.sim b/tests/script/tsim/dnode/drop_dnode_mnode.sim new file mode 100644 index 0000000000..e0a85b9803 --- /dev/null +++ b/tests/script/tsim/dnode/drop_dnode_mnode.sim @@ -0,0 +1,52 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/deploy.sh -n dnode2 -i 2 +system sh/exec.sh -n dnode1 -s start +system sh/exec.sh -n dnode2 -s start +sql connect + +print =============== step1 create dnode2 +sql create dnode $hostname port 7200 + +$x = 0 +step1: + $ = $x + 1 + sleep 1000 + if $x == 10 then + print ====> dnode not ready! + return -1 + endi +sql show dnodes +print ===> $data00 $data01 $data02 $data03 $data04 $data05 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 +if $rows != 2 then + return -1 +endi +if $data(1)[4] != ready then + goto step1 +endi +if $data(2)[4] != ready then + goto step1 +endi + +sql create dnode $hostname port 7300 +sql drop dnode 3 +sql_error drop dnode 1 + +print =============== step2: create mnode +sql create mnode on dnode 2 + +print =============== step3: drop dnode 3 +sql drop dnode 2 +sql show dnodes; +if $rows != 1 then + return -1 +endi + +if $data00 != 1 then + return -1 +endi + +return +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode2 -s stop -x SIGINT diff --git a/tests/script/tsim/mnode/basic3.sim b/tests/script/tsim/mnode/basic3.sim index bc70cd7a85..dec036faaf 100644 --- a/tests/script/tsim/mnode/basic3.sim +++ b/tests/script/tsim/mnode/basic3.sim @@ -39,8 +39,11 @@ endi print =============== step2: create mnode 2 sql create mnode on dnode 2 sql create mnode on dnode 3 +return +system sh/exec.sh -n dnode1 -s stop -x SIGKILL sql_error create mnode on dnode 4 + $x = 0 step2: $x = $x + 1 @@ -147,4 +150,4 @@ endi system sh/exec.sh -n dnode1 -s stop system sh/exec.sh -n dnode2 -s stop system sh/exec.sh -n dnode3 -s stop -system sh/exec.sh -n dnode4 -s stop \ No newline at end of file +system sh/exec.sh -n dnode4 -s stop From 63269c19de77c016adbf5d36f71b8e6d3724f0bb Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Jun 2022 16:45:26 +0800 Subject: [PATCH 05/16] fix(query): fix invalid write. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 5 +++++ source/libs/executor/src/executorimpl.c | 25 ++++++------------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 4a433a557b..d2c60047d7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -3163,6 +3163,11 @@ bool tsdbNextDataBlock(tsdbReaderT pHandle) { size_t numOfCols = taosArrayGetSize(pTsdbReadHandle->pColumns); for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i); + int32_t code = colInfoDataEnsureCapacity(pColInfo, 0, pTsdbReadHandle->outputCapacity); + if (code != TSDB_CODE_SUCCESS) { + // todo handle error + ASSERT(0); + } colInfoDataCleanup(pColInfo, pTsdbReadHandle->outputCapacity); } diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 13f1b976dc..9a002d2d2f 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1819,7 +1819,7 @@ void setResultRowInitCtx(SResultRow* pResult, SqlFunctionCtx* pCtx, int32_t numO } } -static void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowRes, bool keep, bool needFree); +static void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowRes, bool keep); void doFilter(const SNode* pFilterNode, SSDataBlock* pBlock, bool needFree) { if (pFilterNode == NULL) { @@ -1840,11 +1840,11 @@ void doFilter(const SNode* pFilterNode, SSDataBlock* pBlock, bool needFree) { bool keep = filterExecute(filter, pBlock, &rowRes, NULL, param1.numOfCols); filterFreeInfo(filter); - extractQualifiedTupleByFilterResult(pBlock, rowRes, keep, needFree); + extractQualifiedTupleByFilterResult(pBlock, rowRes, keep); blockDataUpdateTsWindow(pBlock, 0); } -void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowRes, bool keep, bool needFree) { +void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowRes, bool keep) { if (keep) { return; } @@ -1884,17 +1884,9 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowR ASSERT(pBlock->info.rows == numOfRows); } - SColumnInfoData tmp = *pSrc; - *pSrc = *pDst; - *pDst = tmp; - - if (!needFree) { - if (IS_VAR_DATA_TYPE(pDst->info.type)) { // this elements do not need free - pDst->varmeta.offset = NULL; - } else { - pDst->nullbitmap = NULL; - } - pDst->pData = NULL; + // write back + if (pBlock->info.rows > 0) { + colDataAssign(pSrc, pDst, pBlock->info.rows); } } blockDataDestroy(px); // fix memory leak @@ -2086,11 +2078,6 @@ static int32_t compressQueryColData(SColumnInfoData* pColRes, int32_t numOfRows, } int32_t doFillTimeIntervalGapsInResults(struct SFillInfo* pFillInfo, SSDataBlock* pBlock, int32_t capacity) { - // for(int32_t i = 0; i < pFillInfo->numOfCols; ++i) { - // SColumnInfoData* pColInfoData = taosArrayGet(pOutput->pDataBlock, i); - // p[i] = pColInfoData->pData + (pColInfoData->info.bytes * pOutput->info.rows); - // } - int32_t numOfRows = (int32_t)taosFillResultDataBlock(pFillInfo, pBlock, capacity - pBlock->info.rows); pBlock->info.rows += numOfRows; From 516750bd42c4b374fbdd4b8f35905adf784560d4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Jun 2022 23:58:17 +0800 Subject: [PATCH 06/16] refactor: remove unnecessary check. --- source/libs/executor/src/executorimpl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 9a002d2d2f..368f6c3aba 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1885,9 +1885,7 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowR } // write back - if (pBlock->info.rows > 0) { - colDataAssign(pSrc, pDst, pBlock->info.rows); - } + colDataAssign(pSrc, pDst, pBlock->info.rows); } blockDataDestroy(px); // fix memory leak } else { From afe612e369a2f6fd9544a1d34b23d4d10ab8322c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Jun 2022 00:51:59 +0800 Subject: [PATCH 07/16] refactor: remove unnecessary check. --- source/common/src/tdatablock.c | 3 ++- source/dnode/vnode/src/tsdb/tsdbRead.c | 5 ----- source/libs/executor/src/executorimpl.c | 10 ++++------ 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 66b12678ab..38be0b8f0a 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -294,7 +294,7 @@ int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, uint32_t numOfRow1, in int32_t colDataAssign(SColumnInfoData* pColumnInfoData, const SColumnInfoData* pSource, int32_t numOfRows) { ASSERT(pColumnInfoData != NULL && pSource != NULL && pColumnInfoData->info.type == pSource->info.type); - if (numOfRows == 0) { + if (numOfRows <= 0) { return numOfRows; } @@ -322,6 +322,7 @@ int32_t colDataAssign(SColumnInfoData* pColumnInfoData, const SColumnInfoData* p pColumnInfoData->varmeta.length = pSource->varmeta.length; } else { char* tmp = taosMemoryRealloc(pColumnInfoData->nullbitmap, BitmapLen(numOfRows)); + printf("----------------%d\n", BitmapLen(numOfRows)); if (tmp == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index d2c60047d7..4a433a557b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -3163,11 +3163,6 @@ bool tsdbNextDataBlock(tsdbReaderT pHandle) { size_t numOfCols = taosArrayGetSize(pTsdbReadHandle->pColumns); for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i); - int32_t code = colInfoDataEnsureCapacity(pColInfo, 0, pTsdbReadHandle->outputCapacity); - if (code != TSDB_CODE_SUCCESS) { - // todo handle error - ASSERT(0); - } colInfoDataCleanup(pColInfo, pTsdbReadHandle->outputCapacity); } diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 368f6c3aba..69a61b413d 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1850,14 +1850,12 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowR } if (rowRes != NULL) { - SSDataBlock* px = createOneDataBlock(pBlock, false); - blockDataEnsureCapacity(px, pBlock->info.rows); + SSDataBlock* px = createOneDataBlock(pBlock, true); int32_t totalRows = pBlock->info.rows; - for (int32_t i = 0; i < pBlock->info.numOfCols; ++i) { - SColumnInfoData* pDst = taosArrayGet(px->pDataBlock, i); - SColumnInfoData* pSrc = taosArrayGet(pBlock->pDataBlock, i); + SColumnInfoData* pSrc = taosArrayGet(px->pDataBlock, i); + SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); // it is a reserved column for scalar function, and no data in this column yet. if (pSrc->pData == NULL) { @@ -1885,7 +1883,7 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowR } // write back - colDataAssign(pSrc, pDst, pBlock->info.rows); +// colDataAssign(pSrc, pDst, pBlock->info.rows); } blockDataDestroy(px); // fix memory leak } else { From 514e86ff26dde68298747e3d49f79d6829f3397a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Jun 2022 11:28:59 +0800 Subject: [PATCH 08/16] fix(query): clear flag before assign data. --- source/libs/executor/src/executorimpl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 69a61b413d..eda24cb0d6 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1856,12 +1856,13 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowR for (int32_t i = 0; i < pBlock->info.numOfCols; ++i) { SColumnInfoData* pSrc = taosArrayGet(px->pDataBlock, i); SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); - // it is a reserved column for scalar function, and no data in this column yet. if (pSrc->pData == NULL) { continue; } + colInfoDataCleanup(pDst, pBlock->info.rows); + int32_t numOfRows = 0; for (int32_t j = 0; j < totalRows; ++j) { if (rowRes[j] == 0) { From 79552ed65d0d7533c48f37ac6312733fc65f11e8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Jun 2022 11:35:42 +0800 Subject: [PATCH 09/16] fix(query): add null pointer check. --- source/common/src/tdatablock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 38be0b8f0a..61b79bb1df 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1233,6 +1233,10 @@ SSDataBlock* createOneDataBlock(const SSDataBlock* pDataBlock, bool copyData) { SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); SColumnInfoData* pSrc = taosArrayGet(pDataBlock->pDataBlock, i); + if (pSrc->pData== NULL) { + continue; + } + int32_t code = colInfoDataEnsureCapacity(pDst, 0, pDataBlock->info.rows); if (code != TSDB_CODE_SUCCESS) { return NULL; From f30ae95b9dcf1f8a6da1618e03424f95792e9326 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Jun 2022 11:36:13 +0800 Subject: [PATCH 10/16] fix(query): add null pointer check. --- source/common/src/tdatablock.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 61b79bb1df..04de2738e1 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1233,15 +1233,14 @@ SSDataBlock* createOneDataBlock(const SSDataBlock* pDataBlock, bool copyData) { SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); SColumnInfoData* pSrc = taosArrayGet(pDataBlock->pDataBlock, i); - if (pSrc->pData== NULL) { - continue; - } - int32_t code = colInfoDataEnsureCapacity(pDst, 0, pDataBlock->info.rows); if (code != TSDB_CODE_SUCCESS) { return NULL; } + if (pSrc->pData== NULL) { + continue; + } colDataAssign(pDst, pSrc, pDataBlock->info.rows); } From 80b7ba902ced89ecfca85dcffd38793e55040d47 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Jun 2022 13:48:18 +0800 Subject: [PATCH 11/16] fix(query): add null pointer check. --- source/common/src/tdatablock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 04de2738e1..c1f2726629 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1238,7 +1238,7 @@ SSDataBlock* createOneDataBlock(const SSDataBlock* pDataBlock, bool copyData) { return NULL; } - if (pSrc->pData== NULL) { + if (pSrc->pData== NULL || pDst->pData == NULL) { continue; } colDataAssign(pDst, pSrc, pDataBlock->info.rows); From 94c8ca1e068bc8aa26baa70dce61c29be35fa15b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Jun 2022 14:10:00 +0800 Subject: [PATCH 12/16] fix(query): add null pointer check. --- source/common/src/tdatablock.c | 2 +- source/libs/executor/src/executorimpl.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index c1f2726629..5b71578190 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1238,7 +1238,7 @@ SSDataBlock* createOneDataBlock(const SSDataBlock* pDataBlock, bool copyData) { return NULL; } - if (pSrc->pData== NULL || pDst->pData == NULL) { + if (pSrc->pData == NULL) { continue; } colDataAssign(pDst, pSrc, pDataBlock->info.rows); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index eda24cb0d6..516824a446 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1857,7 +1857,7 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowR SColumnInfoData* pSrc = taosArrayGet(px->pDataBlock, i); SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); // it is a reserved column for scalar function, and no data in this column yet. - if (pSrc->pData == NULL) { + if (pDst->pData == NULL) { continue; } @@ -1882,10 +1882,8 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowR } else { ASSERT(pBlock->info.rows == numOfRows); } - - // write back -// colDataAssign(pSrc, pDst, pBlock->info.rows); } + blockDataDestroy(px); // fix memory leak } else { // do nothing From d210ef1c0eb9e64f63559833270b1395b61f37d3 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Sat, 11 Jun 2022 14:27:45 +0800 Subject: [PATCH 13/16] feat(stream): stream max delay --- include/common/tcommon.h | 1 + include/common/tmsg.h | 1 + source/dnode/mnode/impl/src/mndScheduler.c | 20 ++--- source/libs/executor/src/timewindowoperator.c | 80 +++++++++++-------- 4 files changed, 59 insertions(+), 43 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 362f56c865..cc51f96f6c 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -50,6 +50,7 @@ typedef enum EStreamType { STREAM_INVERT, STREAM_REPROCESS, STREAM_INVALID, + STREAM_GET_ALL, } EStreamType; typedef struct { diff --git a/include/common/tmsg.h b/include/common/tmsg.h index d5946516e8..e051a43f21 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1496,6 +1496,7 @@ typedef struct { #define STREAM_TRIGGER_AT_ONCE 1 #define STREAM_TRIGGER_WINDOW_CLOSE 2 +#define STREAM_TRIGGER_MAX_DELAY 3 typedef struct { char name[TSDB_TABLE_FNAME_LEN]; diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index deb7382183..d41928e909 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -167,17 +167,17 @@ int32_t mndAddDispatcherToInnerTask(SMnode* pMnode, STrans* pTrans, SStreamObj* } } } - } else { - pTask->dispatchType = TASK_DISPATCH__FIXED; - pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; - SArray* pArray = taosArrayGetP(pStream->tasks, 0); - // one sink only - ASSERT(taosArrayGetSize(pArray) == 1); - SStreamTask* lastLevelTask = taosArrayGetP(pArray, 0); - pTask->fixedEpDispatcher.taskId = lastLevelTask->taskId; - pTask->fixedEpDispatcher.nodeId = lastLevelTask->nodeId; - pTask->fixedEpDispatcher.epSet = lastLevelTask->epSet; } + } else { + pTask->dispatchType = TASK_DISPATCH__FIXED; + pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; + SArray* pArray = taosArrayGetP(pStream->tasks, 0); + // one sink only + ASSERT(taosArrayGetSize(pArray) == 1); + SStreamTask* lastLevelTask = taosArrayGetP(pArray, 0); + pTask->fixedEpDispatcher.taskId = lastLevelTask->taskId; + pTask->fixedEpDispatcher.nodeId = lastLevelTask->nodeId; + pTask->fixedEpDispatcher.epSet = lastLevelTask->epSet; } return 0; } diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 71e6a3ad09..ab595a3e34 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -750,14 +750,15 @@ int64_t getReskey(void* data, int32_t index) { return *(int64_t*)pos->key; } -static int32_t saveResult(SResultRow* result, uint64_t groupId, SArray* pUpdated) { +static int32_t saveResult(int64_t ts, int32_t pageId, int32_t offset, uint64_t groupId, + SArray* pUpdated) { int32_t size = taosArrayGetSize(pUpdated); - int32_t index = binarySearch(pUpdated, size, result->win.skey, TSDB_ORDER_DESC, getReskey); + int32_t index = binarySearch(pUpdated, size, ts, TSDB_ORDER_DESC, getReskey); if (index == -1) { index = 0; } else { TSKEY resTs = getReskey(pUpdated, index); - if (resTs < result->win.skey) { + if (resTs < ts) { index++; } else { return TSDB_CODE_SUCCESS; @@ -769,14 +770,18 @@ static int32_t saveResult(SResultRow* result, uint64_t groupId, SArray* pUpdated return TSDB_CODE_OUT_OF_MEMORY; } newPos->groupId = groupId; - newPos->pos = (SResultRowPosition){.pageId = result->pageId, .offset = result->offset}; - *(int64_t*)newPos->key = result->win.skey; + newPos->pos = (SResultRowPosition){.pageId = pageId, .offset = offset}; + *(int64_t*)newPos->key = ts; if (taosArrayInsert(pUpdated, index, &newPos) == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } return TSDB_CODE_SUCCESS; } +static int32_t saveResultRow(SResultRow* result, uint64_t groupId, SArray* pUpdated) { + return saveResult(result->win.skey, result->pageId, result->offset, groupId, pUpdated); +} + static void removeResult(SArray* pUpdated, TSKEY key) { int32_t size = taosArrayGetSize(pUpdated); int32_t index = binarySearch(pUpdated, size, key, TSDB_ORDER_DESC, getReskey); @@ -819,7 +824,7 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul if (pInfo->execModel == OPTR_EXEC_MODEL_STREAM) { if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { - saveResult(pResult, tableGroupId, pUpdated); + saveResultRow(pResult, tableGroupId, pUpdated); } } @@ -869,7 +874,7 @@ static void hashIntervalAgg(SOperatorInfo* pOperatorInfo, SResultRowInfo* pResul if (pInfo->execModel == OPTR_EXEC_MODEL_STREAM) { if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { - saveResult(pResult, tableGroupId, pUpdated); + saveResultRow(pResult, tableGroupId, pUpdated); } } @@ -1243,6 +1248,23 @@ static void doClearWindows(SAggSupporter* pSup, SOptrBasicInfo* pBinfo, SInterva } } +static int32_t getAllIntervalWindow(SHashObj* pHashMap, SArray* resWins) { + void* pIte = NULL; + size_t keyLen = 0; + while ((pIte = taosHashIterate(pHashMap, pIte)) != NULL) { + void* key = taosHashGetKey(pIte, &keyLen); + uint64_t groupId = *(uint64_t*)key; + ASSERT(keyLen == GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))); + TSKEY ts = *(int64_t*)((char*)key + sizeof(uint64_t)); + SResultRowPosition* pPos = (SResultRowPosition*)pIte; + int32_t code = saveResult(ts, pPos->pageId, pPos->offset, groupId, resWins); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + return TSDB_CODE_SUCCESS; +} + static int32_t closeIntervalWindow(SHashObj* pHashMap, STimeWindowAggSupp* pSup, SInterval* pInterval, SArray* closeWins) { void* pIte = NULL; @@ -1259,16 +1281,12 @@ static int32_t closeIntervalWindow(SHashObj* pHashMap, STimeWindowAggSupp* pSup, char keyBuf[GET_RES_WINDOW_KEY_LEN(sizeof(TSKEY))]; SET_RES_WINDOW_KEY(keyBuf, &ts, sizeof(TSKEY), groupId); taosHashRemove(pHashMap, keyBuf, keyLen); - SResKeyPos* pos = taosMemoryMalloc(sizeof(SResKeyPos) + sizeof(uint64_t)); - if (pos == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - pos->groupId = groupId; - pos->pos = *(SResultRowPosition*)pIte; - *(int64_t*)pos->key = ts; - if (pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE && !taosArrayPush(closeWins, &pos)) { - taosMemoryFree(pos); - return TSDB_CODE_OUT_OF_MEMORY; + SResultRowPosition* pPos = (SResultRowPosition*)pIte; + if (pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + int32_t code = saveResult(ts, pPos->pageId, pPos->offset, groupId, closeWins); + if (code != TSDB_CODE_SUCCESS) { + return code; + } } } } @@ -1296,8 +1314,6 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { SOperatorInfo* downstream = pOperator->pDownstream[0]; SArray* pUpdated = taosArrayInit(4, POINTER_BYTES); - SArray* pClosed = taosArrayInit(4, POINTER_BYTES); - while (1) { SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); if (pBlock == NULL) { @@ -1316,19 +1332,18 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { doClearWindows(&pInfo->aggSup, &pInfo->binfo, &pInfo->interval, 0, pOperator->numOfExprs, pBlock, NULL); qDebug("%s clear existed time window results for updates checked", GET_TASKID(pTaskInfo)); continue; + } else if (pBlock->info.type == STREAM_GET_ALL && + pInfo->twAggSup.calTrigger == STREAM_TRIGGER_MAX_DELAY) { + getAllIntervalWindow(pInfo->aggSup.pResultRowHashTable, pUpdated); + continue; } pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); hashIntervalAgg(pOperator, &pInfo->binfo.resultRowInfo, pBlock, MAIN_SCAN, pUpdated); } + closeIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, pUpdated); - closeIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, pClosed); - finalizeUpdatedResult(pOperator->numOfExprs, pInfo->aggSup.pResultBuf, pClosed, pInfo->binfo.rowCellInfoOffset); - taosArrayAddAll(pUpdated, pClosed); - - taosArrayDestroy(pClosed); finalizeUpdatedResult(pOperator->numOfExprs, pInfo->aggSup.pResultBuf, pUpdated, pInfo->binfo.rowCellInfoOffset); - initMultiResInfoFromArrayList(&pInfo->groupResInfo, pUpdated); blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); doBuildResultDatablock(pOperator, &pInfo->binfo, &pInfo->groupResInfo, pInfo->aggSup.pResultBuf); @@ -1898,7 +1913,7 @@ static void doHashInterval(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBloc forwardRows = getNumOfRowsInTimeWindow(&pSDataBlock->info, tsCols, startPos, nextWin.ekey, binarySearchForKey, NULL, TSDB_ORDER_ASC); if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pUpdated) { - saveResult(pResult, tableGroupId, pUpdated); + saveResultRow(pResult, tableGroupId, pUpdated); } // window start(end) key interpolation // doWindowBorderInterpolation(pInfo, pSDataBlock, numOfOutput, pInfo->binfo.pCtx, pResult, &nextWin, startPos, @@ -1993,7 +2008,6 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { SStreamFinalIntervalOperatorInfo* pInfo = pOperator->info; SOperatorInfo* downstream = pOperator->pDownstream[0]; SArray* pUpdated = taosArrayInit(4, POINTER_BYTES); - SArray* pClosed = taosArrayInit(4, POINTER_BYTES); if (pOperator->status == OP_EXEC_DONE) { return NULL; @@ -2042,7 +2056,12 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { copyUpdateDataBlock(pInfo->pUpdateRes, pBlock, pInfo->primaryTsIndex); taosArrayDestroy(pUpWins); break; + } else if (pBlock->info.type == STREAM_GET_ALL && isFinalInterval(pInfo) && + pInfo->twAggSup.calTrigger == STREAM_TRIGGER_MAX_DELAY) { + getAllIntervalWindow(pInfo->aggSup.pResultRowHashTable, pUpdated); + continue; } + if (isFinalInterval(pInfo)) { int32_t chIndex = getChildIndex(pBlock); int32_t size = taosArrayGetSize(pInfo->pChildren); @@ -2064,13 +2083,8 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { } if (isFinalInterval(pInfo)) { - closeIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, pClosed); - finalizeUpdatedResult(pOperator->numOfExprs, pInfo->aggSup.pResultBuf, pClosed, pInfo->binfo.rowCellInfoOffset); - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - taosArrayAddAll(pUpdated, pClosed); - } + closeIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, pUpdated); } - taosArrayDestroy(pClosed); finalizeUpdatedResult(pOperator->numOfExprs, pInfo->aggSup.pResultBuf, pUpdated, pInfo->binfo.rowCellInfoOffset); initMultiResInfoFromArrayList(&pInfo->groupResInfo, pUpdated); From 0fbf30ce6d105fc7ba88a9faf1a09ea0b39b20d2 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Jun 2022 14:51:54 +0800 Subject: [PATCH 14/16] refactor: do some internal refactor, and add some check before create exchange operator. --- source/client/src/clientImpl.c | 3 +-- source/client/test/clientTests.cpp | 4 ++-- source/common/src/tdatablock.c | 1 - source/libs/executor/inc/executorimpl.h | 2 +- source/libs/executor/src/executorimpl.c | 19 +++++++++++-------- source/libs/executor/src/groupoperator.c | 2 +- source/libs/executor/src/scanoperator.c | 6 +++--- source/libs/executor/src/sortoperator.c | 9 +++++++-- source/libs/executor/src/tsort.c | 14 ++++++++++++-- 9 files changed, 38 insertions(+), 22 deletions(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 65a91fe426..dc83f7bc43 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -689,16 +689,15 @@ void launchAsyncQuery(SRequestObj* pRequest, SQuery* pQuery) { .msgLen = ERROR_MSG_BUF_DEFAULT_SIZE}; SAppInstInfo* pAppInfo = getAppInfo(pRequest); - if (TSDB_CODE_SUCCESS == code) { code = qCreateQueryPlan(&cxt, &pRequest->body.pDag, pNodeList); - tscError("0x%"PRIx64" failed to create query plan, code:%s 0x%"PRIx64, pRequest->self, tstrerror(code), pRequest->requestId); } if (TSDB_CODE_SUCCESS == code) { schedulerAsyncExecJob(pAppInfo->pTransporter, pNodeList, pRequest->body.pDag, &pRequest->body.queryJob, pRequest->sqlstr, pRequest->metric.start, schedulerExecCb, pRequest); } else { + tscError("0x%"PRIx64" failed to create query plan, code:%s 0x%"PRIx64, pRequest->self, tstrerror(code), pRequest->requestId); pRequest->body.queryFp(pRequest->body.param, pRequest, code); } diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index f9825c50ff..dc15c0b13d 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -778,9 +778,9 @@ TEST(testCase, async_api_test) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); ASSERT_NE(pConn, nullptr); - taos_query(pConn, "use test"); + taos_query(pConn, "use nest"); - TAOS_RES* pRes = taos_query(pConn, "desc abc1.tu"); + TAOS_RES* pRes = taos_query(pConn, "select NOW() from (select * from regular_table_2 where tbname in ('regular_table_2_1') and q_bigint <= 9223372036854775807 and q_tinyint <= 127 and q_bool in ( true , false) ) order by ts;"); if (taos_errno(pRes) != 0) { printf("failed, reason:%s\n", taos_errstr(pRes)); } diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 38be0b8f0a..d8897c3b9c 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -322,7 +322,6 @@ int32_t colDataAssign(SColumnInfoData* pColumnInfoData, const SColumnInfoData* p pColumnInfoData->varmeta.length = pSource->varmeta.length; } else { char* tmp = taosMemoryRealloc(pColumnInfoData->nullbitmap, BitmapLen(numOfRows)); - printf("----------------%d\n", BitmapLen(numOfRows)); if (tmp == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index a8a95b513a..da681bd13b 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -760,7 +760,7 @@ int32_t getTableScanInfo(SOperatorInfo* pOperator, int32_t *order, int32_t* scan int32_t getBufferPgSize(int32_t rowSize, uint32_t* defaultPgsz, uint32_t* defaultBufsz); void doSetOperatorCompleted(SOperatorInfo* pOperator); -void doFilter(const SNode* pFilterNode, SSDataBlock* pBlock, bool needFree); +void doFilter(const SNode* pFilterNode, SSDataBlock* pBlock); SqlFunctionCtx* createSqlFunctionCtx(SExprInfo* pExprInfo, int32_t numOfOutput, int32_t** rowCellInfoOffset); void relocateColumnData(SSDataBlock* pBlock, const SArray* pColMatchInfo, SArray* pCols); void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index eda24cb0d6..b1ad7a1cf2 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1821,7 +1821,7 @@ void setResultRowInitCtx(SResultRow* pResult, SqlFunctionCtx* pCtx, int32_t numO static void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const int8_t* rowRes, bool keep); -void doFilter(const SNode* pFilterNode, SSDataBlock* pBlock, bool needFree) { +void doFilter(const SNode* pFilterNode, SSDataBlock* pBlock) { if (pFilterNode == NULL) { return; } @@ -2829,7 +2829,6 @@ static SSDataBlock* doLoadRemoteData(SOperatorInfo* pOperator) { return seqLoadRemoteData(pOperator); } else { return concurrentlyLoadRemoteDataImpl(pOperator, pExchangeInfo, pTaskInfo); - // return concurrentlyLoadRemoteData(pOperator); } } @@ -2855,9 +2854,14 @@ static int32_t initDataSource(int32_t numOfSources, SExchangeInfo* pInfo) { return TSDB_CODE_SUCCESS; } -static int32_t initExchangeOperator(SExchangePhysiNode* pExNode, SExchangeInfo* pInfo) { +static int32_t initExchangeOperator(SExchangePhysiNode* pExNode, SExchangeInfo* pInfo, const char* id) { size_t numOfSources = LIST_LENGTH(pExNode->pSrcEndPoints); + if (numOfSources == 0) { + qError("%s invalid number: %d of sources in exchange operator", id, (int32_t) numOfSources); + return TSDB_CODE_INVALID_PARA; + } + pInfo->pSources = taosArrayInit(numOfSources, sizeof(SDownstreamSourceNode)); pInfo->pSourceDataInfo = taosArrayInit(numOfSources, sizeof(SSourceDataInfo)); if (pInfo->pSourceDataInfo == NULL || pInfo->pSources == NULL) { @@ -2879,7 +2883,7 @@ SOperatorInfo* createExchangeOperatorInfo(void* pTransporter, SExchangePhysiNode goto _error; } - int32_t code = initExchangeOperator(pExNode, pInfo); + int32_t code = initExchangeOperator(pExNode, pInfo, GET_TASKID(pTaskInfo)); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -2908,7 +2912,7 @@ _error: taosMemoryFreeClear(pInfo); taosMemoryFreeClear(pOperator); - pTaskInfo->code = TSDB_CODE_OUT_OF_MEMORY; + pTaskInfo->code = code; return NULL; } @@ -3677,7 +3681,7 @@ static SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { longjmp(pTaskInfo->env, code); } - doFilter(pProjectInfo->pFilterNode, pBlock, true); + doFilter(pProjectInfo->pFilterNode, pBlock); setInputDataBlock(pOperator, pInfo->pCtx, pBlock, order, scanFlag, false); blockDataEnsureCapacity(pInfo->pRes, pInfo->pRes->info.rows + pBlock->info.rows); @@ -5189,7 +5193,7 @@ int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SRead (*pTaskInfo)->pRoot = createOperatorTree(pPlan->pNode, *pTaskInfo, pHandle, queryId, taskId, &(*pTaskInfo)->tableqinfoList, pPlan->pTagCond); if (NULL == (*pTaskInfo)->pRoot) { - code = terrno; + code = (*pTaskInfo)->code; goto _complete; } @@ -5202,7 +5206,6 @@ int32_t createExecTaskInfoImpl(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SRead _complete: taosMemoryFreeClear(*pTaskInfo); - terrno = code; return code; } diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 5965f5d7ad..132f93a6a5 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -359,7 +359,7 @@ static SSDataBlock* hashGroupbyAggregate(SOperatorInfo* pOperator) { while(1) { doBuildResultDatablock(pOperator, &pInfo->binfo, &pInfo->groupResInfo, pInfo->aggSup.pResultBuf); - doFilter(pInfo->pCondition, pRes, true); + doFilter(pInfo->pCondition, pRes); bool hasRemain = hashRemainDataInGroupInfo(&pInfo->groupResInfo); if (!hasRemain) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 6c513a05b7..637534ad96 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -267,7 +267,7 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca } int64_t st = taosGetTimestampMs(); - doFilter(pTableScanInfo->pFilterNode, pBlock, false); + doFilter(pTableScanInfo->pFilterNode, pBlock); int64_t et = taosGetTimestampMs(); pTableScanInfo->readRecorder.filterTime += (et - st); @@ -939,7 +939,7 @@ static SSDataBlock* doStreamBlockScan(SOperatorInfo* pOperator) { addTagPseudoColumnData(&pInfo->readHandle, pInfo->pPseudoExpr, pInfo->numOfPseudoExpr, pInfo->pRes); } - doFilter(pInfo->pCondition, pInfo->pRes, false); + doFilter(pInfo->pCondition, pInfo->pRes); blockDataUpdateTsWindow(pInfo->pRes, 0); break; } @@ -1711,7 +1711,7 @@ static SSDataBlock* doTagScan(SOperatorInfo* pOperator) { } pRes->info.rows = count; - doFilter(pInfo->pFilterNode, pRes, true); + doFilter(pInfo->pFilterNode, pRes); pOperator->resultInfo.totalRows += pRes->info.rows; diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index dcaa95e28a..ff093741fa 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -296,7 +296,10 @@ int32_t doOpenMultiwaySortMergeOperator(SOperatorInfo* pOperator) { } SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, int32_t capacity, - SArray* pColMatchInfo, SMultiwaySortMergeOperatorInfo* pInfo) { + SArray* pColMatchInfo, SOperatorInfo* pOperator) { + SMultiwaySortMergeOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + blockDataCleanup(pDataBlock); SSDataBlock* p = tsortGetSortedDataBlock(pHandle); @@ -354,6 +357,8 @@ SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pData } blockDataDestroy(p); + + qDebug("%s get sorted row blocks, rows:%d", GET_TASKID(pTaskInfo), pDataBlock->info.rows); return (pDataBlock->info.rows > 0) ? pDataBlock : NULL; } @@ -371,7 +376,7 @@ SSDataBlock* doMultiwaySortMerge(SOperatorInfo* pOperator) { } SSDataBlock* pBlock = getMultiwaySortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, - pOperator->resultInfo.capacity, pInfo->pColMatchInfo, pInfo); + pOperator->resultInfo.capacity, pInfo->pColMatchInfo, pOperator); if (pBlock != NULL) { pOperator->resultInfo.totalRows += pBlock->info.rows; diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index ea8ded2c30..f63a9351c6 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -227,6 +227,8 @@ static int32_t sortComparInit(SMsortComparParam* cmpParam, SArray* pSources, int for (int32_t i = 0; i < cmpParam->numOfSources; ++i) { SSortSource* pSource = cmpParam->pSources[i]; pSource->src.pBlock = pHandle->fetchfp(pSource->param); + + // set current source id done if (pSource->src.pBlock == NULL) { pSource->src.rowIndex = -1; ++pHandle->numOfCompletedSources; @@ -426,8 +428,16 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { double sortPass = floorl(log2(numOfSources) / log2(pHandle->numOfPages)); pHandle->totalElapsed = taosGetTimestampUs() - pHandle->startTs; - qDebug("%s %d rounds mergesort required to complete the sort, first-round sorted data size:%"PRIzu", sort elapsed:%"PRId64", total elapsed:%"PRId64, - pHandle->idStr, (int32_t) (sortPass + 1), pHandle->pBuf ? getTotalBufSize(pHandle->pBuf) : 0, pHandle->sortElapsed, pHandle->totalElapsed); + + if (sortPass > 0) { + size_t s = pHandle->pBuf ? getTotalBufSize(pHandle->pBuf) : 0; + qDebug("%s %d rounds mergesort required to complete the sort, first-round sorted data size:%" PRIzu + ", sort elapsed:%" PRId64 ", total elapsed:%" PRId64, + pHandle->idStr, (int32_t)(sortPass + 1), s, pHandle->sortElapsed, pHandle->totalElapsed); + } else { + qDebug("%s ordered source:%"PRIzu", available buf:%d, no need internal sort", pHandle->idStr, numOfSources, + pHandle->numOfPages); + } int32_t numOfRows = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize); blockDataEnsureCapacity(pHandle->pDataBlock, numOfRows); From ed829455a9c8cbc889fe35ddbfff7ab8a0983c79 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Sat, 11 Jun 2022 15:15:55 +0800 Subject: [PATCH 15/16] feat: tsma refactor --- include/common/taosdef.h | 1 - include/common/tmsg.h | 45 - include/common/tmsgdef.h | 2 - source/common/src/tmsg.c | 139 +-- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 - source/dnode/mnode/impl/inc/mndDef.h | 2 - source/dnode/mnode/impl/src/mndSma.c | 105 +- source/dnode/vnode/CMakeLists.txt | 1 - source/dnode/vnode/src/inc/sma.h | 62 +- source/dnode/vnode/src/inc/vnodeInt.h | 2 - source/dnode/vnode/src/sma/sma.c | 25 +- source/dnode/vnode/src/sma/smaEnv.c | 84 -- source/dnode/vnode/src/sma/smaTDBImpl.c | 130 --- source/dnode/vnode/src/sma/smaTimeRange.c | 1036 ------------------- source/dnode/vnode/src/sma/smaTimeRange2.c | 839 +-------------- source/dnode/vnode/src/tq/tqPush.c | 13 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 47 - 17 files changed, 18 insertions(+), 2516 deletions(-) delete mode 100644 source/dnode/vnode/src/sma/smaTDBImpl.c delete mode 100644 source/dnode/vnode/src/sma/smaTimeRange.c diff --git a/include/common/taosdef.h b/include/common/taosdef.h index 60b1dc6c10..516df71b0b 100644 --- a/include/common/taosdef.h +++ b/include/common/taosdef.h @@ -98,7 +98,6 @@ extern char *qtypeStr[]; #undef TD_DEBUG_PRINT_ROW #undef TD_DEBUG_PRINT_TSDB_LOAD_DCOLS #undef TD_DEBUG_PRINT_TAG -#define TD_DEBUG_SMA_ID 123456 #ifdef __cplusplus } diff --git a/include/common/tmsg.h b/include/common/tmsg.h index e7d9bc762a..e4e132a532 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2345,7 +2345,6 @@ typedef struct { char indexName[TSDB_INDEX_NAME_LEN]; int32_t exprLen; int32_t tagsFilterLen; - int32_t numOfVgroups; // for dstVgroup int64_t indexUid; tb_uid_t tableUid; // super/child/common table uid tb_uid_t dstTbUid; // for dstVgroup @@ -2355,7 +2354,6 @@ typedef struct { char* dstTbName; // for dstVgroup char* expr; // sma expression char* tagsFilter; - SVgEpSet* pVgEpSet; // for dstVgroup SSchemaWrapper schemaRow; // for dstVgroup SSchemaWrapper schemaTag; // for dstVgroup } STSma; // Time-range-wise SMA @@ -2442,49 +2440,6 @@ static int32_t tDecodeTSmaWrapper(SDecoder* pDecoder, STSmaWrapper* pReq) { return 0; } -typedef struct { - int64_t indexUid; - STimeWindow queryWindow; -} SVGetTsmaExpWndsReq; - -#define SMA_WNDS_EXPIRE_FLAG (0x1) -#define SMA_WNDS_IS_EXPIRE(flag) (((flag)&SMA_WNDS_EXPIRE_FLAG) != 0) -#define SMA_WNDS_SET_EXPIRE(flag) ((flag) |= SMA_WNDS_EXPIRE_FLAG) - -typedef struct { - int64_t indexUid; - int8_t flags; // 0x1 all window expired - int32_t numExpWnds; - TSKEY wndSKeys[]; -} SVGetTsmaExpWndsRsp; - -int32_t tEncodeSVGetTSmaExpWndsReq(SEncoder* pCoder, const SVGetTsmaExpWndsReq* pReq); -int32_t tDecodeSVGetTsmaExpWndsReq(SDecoder* pCoder, SVGetTsmaExpWndsReq* pReq); -int32_t tEncodeSVGetTSmaExpWndsRsp(SEncoder* pCoder, const SVGetTsmaExpWndsRsp* pReq); -int32_t tDecodeSVGetTsmaExpWndsRsp(SDecoder* pCoder, SVGetTsmaExpWndsRsp* pReq); - -typedef struct { - int64_t nKeys; // n consecutive keys since skey - int64_t skey; -} SVTsmaExpWndItem; - -typedef struct { - int64_t indexUid; - int64_t version; // tsma result version - int64_t nItems; - SVTsmaExpWndItem items[]; -} SVClrTsmaExpWndsReq; - -typedef struct { - int64_t indexUid; - int32_t code; -} SVClrTsmaExpWndsRsp; - -int32_t tEncodeSVClrTsmaExpWndsReq(SEncoder* pCoder, const SVClrTsmaExpWndsReq* pReq); -int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder* pCoder, SVClrTsmaExpWndsReq* pReq); -int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder* pCoder, const SVClrTsmaExpWndsRsp* pReq); -int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder* pCoder, SVClrTsmaExpWndsRsp* pReq); - typedef struct { int idx; } SMCreateFullTextReq; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 7e31076528..a9c816707e 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -190,8 +190,6 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_CANCEL_SMA, "vnode-cancel-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_SMA, "vnode-drop-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT_RSMA, "vnode-submit-rsma", SSubmitReq, SSubmitRsp) - TD_DEF_MSG_TYPE(TDMT_VND_GET_TSMA_EXP_WNDS, "vnode-get-tsma-expired-windows", SVGetTsmaExpWndsReq, SVGetTsmaExpWndsRsp) - TD_DEF_MSG_TYPE(TDMT_VND_CLR_TSMA_EXP_WNDS, "vnode-clr-tsma-expired-windows", SVClrTsmaExpWndsReq, SVClrTsmaExpWndsRsp) TD_DEF_MSG_TYPE(TDMT_VND_DELETE, "delete-data", SVDeleteReq, SVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_CONFIG, "alter-config", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_ALTER_REPLICA, "alter-replica", NULL, NULL) diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 3fb60f3ff5..d16ab57ea9 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -3877,7 +3877,6 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (tEncodeCStr(pCoder, pSma->indexName) < 0) return -1; if (tEncodeI32(pCoder, pSma->exprLen) < 0) return -1; if (tEncodeI32(pCoder, pSma->tagsFilterLen) < 0) return -1; - if (tEncodeI32(pCoder, pSma->numOfVgroups) < 0) return -1; if (tEncodeI64(pCoder, pSma->indexUid) < 0) return -1; if (tEncodeI64(pCoder, pSma->tableUid) < 0) return -1; if (tEncodeI64(pCoder, pSma->dstTbUid) < 0) return -1; @@ -3892,22 +3891,8 @@ int32_t tEncodeTSma(SEncoder *pCoder, const STSma *pSma) { if (tEncodeCStr(pCoder, pSma->tagsFilter) < 0) return -1; } - if (pSma->numOfVgroups) { // only needed in dstVgroup - for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { - if (tEncodeI32(pCoder, pSma->pVgEpSet[v].vgId) < 0) return -1; - if (tEncodeI8(pCoder, pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; - int8_t numOfEps = pSma->pVgEpSet[v].epSet.numOfEps; - if (tEncodeI8(pCoder, numOfEps) < 0) return -1; - for (int32_t n = 0; n < numOfEps; ++n) { - const SEp *pEp = &pSma->pVgEpSet[v].epSet.eps[n]; - if (tEncodeCStr(pCoder, pEp->fqdn) < 0) return -1; - if (tEncodeU16(pCoder, pEp->port) < 0) return -1; - } - } - - tEncodeSSchemaWrapper(pCoder, &pSma->schemaRow); - tEncodeSSchemaWrapper(pCoder, &pSma->schemaTag); - } + tEncodeSSchemaWrapper(pCoder, &pSma->schemaRow); + tEncodeSSchemaWrapper(pCoder, &pSma->schemaTag); return 0; } @@ -3921,7 +3906,6 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { if (tDecodeCStrTo(pCoder, pSma->indexName) < 0) return -1; if (tDecodeI32(pCoder, &pSma->exprLen) < 0) return -1; if (tDecodeI32(pCoder, &pSma->tagsFilterLen) < 0) return -1; - if (tDecodeI32(pCoder, &pSma->numOfVgroups) < 0) return -1; if (tDecodeI64(pCoder, &pSma->indexUid) < 0) return -1; if (tDecodeI64(pCoder, &pSma->tableUid) < 0) return -1; if (tDecodeI64(pCoder, &pSma->dstTbUid) < 0) return -1; @@ -3939,30 +3923,9 @@ int32_t tDecodeTSma(SDecoder *pCoder, STSma *pSma) { } else { pSma->tagsFilter = NULL; } - if (pSma->numOfVgroups > 0) { // only needed in dstVgroup - pSma->pVgEpSet = (SVgEpSet *)tDecoderMalloc(pCoder, pSma->numOfVgroups * sizeof(SVgEpSet)); - if (!pSma->pVgEpSet) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - memset(pSma->pVgEpSet, 0, pSma->numOfVgroups * sizeof(SVgEpSet)); - - for (int32_t v = 0; v < pSma->numOfVgroups; ++v) { - if (tDecodeI32(pCoder, &pSma->pVgEpSet[v].vgId) < 0) return -1; - if (tDecodeI8(pCoder, &pSma->pVgEpSet[v].epSet.inUse) < 0) return -1; - if (tDecodeI8(pCoder, &pSma->pVgEpSet[v].epSet.numOfEps) < 0) return -1; - int8_t numOfEps = pSma->pVgEpSet[v].epSet.numOfEps; - for (int32_t n = 0; n < numOfEps; ++n) { - SEp *pEp = &pSma->pVgEpSet[v].epSet.eps[n]; - if (tDecodeCStrTo(pCoder, pEp->fqdn) < 0) return -1; - if (tDecodeU16(pCoder, &pEp->port) < 0) return -1; - } - } - - tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaRow); - tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaTag); - } + // only needed in dstVgroup + tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaRow); + tDecodeSSchemaWrapperEx(pCoder, &pSma->schemaTag); return 0; } @@ -4005,98 +3968,6 @@ int32_t tDecodeSVDropTSmaReq(SDecoder *pCoder, SVDropTSmaReq *pReq) { return 0; } -int32_t tEncodeSVGetTSmaExpWndsReq(SEncoder *pCoder, const SVGetTsmaExpWndsReq *pReq) { - if (tStartEncode(pCoder) < 0) return -1; - - if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI64(pCoder, pReq->queryWindow.skey) < 0) return -1; - if (tEncodeI64(pCoder, pReq->queryWindow.ekey) < 0) return -1; - - tEndEncode(pCoder); - return 0; -} - -int32_t tDecodeSVGetTsmaExpWndsReq(SDecoder *pCoder, SVGetTsmaExpWndsReq *pReq) { - if (tStartDecode(pCoder) < 0) return -1; - - if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->queryWindow.skey) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->queryWindow.ekey) < 0) return -1; - - tEndDecode(pCoder); - return 0; -} - -int32_t tEncodeSVGetTSmaExpWndsRsp(SEncoder *pCoder, const SVGetTsmaExpWndsRsp *pReq) { - if (tStartEncode(pCoder) < 0) return -1; - - if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI8(pCoder, pReq->flags) < 0) return -1; - if (tEncodeI32(pCoder, pReq->numExpWnds) < 0) return -1; - for (int32_t i = 0; i < pReq->numExpWnds; ++i) { - if (tEncodeI64(pCoder, pReq->wndSKeys[i]) < 0) return -1; - } - tEndEncode(pCoder); - return 0; -} - -int32_t tDecodeSVGetTsmaExpWndsRsp(SDecoder *pCoder, SVGetTsmaExpWndsRsp *pReq) { - if (tStartDecode(pCoder) < 0) return -1; - - if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI8(pCoder, &pReq->flags) < 0) return -1; - if (tDecodeI32(pCoder, &pReq->numExpWnds) < 0) return -1; - for (int32_t i = 0; i < pReq->numExpWnds; ++i) { - if (tDecodeI64(pCoder, &pReq->wndSKeys[i]) < 0) return -1; - } - - tEndDecode(pCoder); - return 0; -} - -int32_t tEncodeSVClrTsmaExpWndsReq(SEncoder *pCoder, const SVClrTsmaExpWndsReq *pReq) { - if (tStartEncode(pCoder) < 0) return -1; - if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI64(pCoder, pReq->version) < 0) return -1; - if (tEncodeI64v(pCoder, pReq->nItems) < 0) return -1; - for (int64_t n = 0; pReq->nItems; ++n) { - if (tEncodeI64v(pCoder, pReq->items[n].nKeys) < 0) return -1; - if (tEncodeI64(pCoder, pReq->items[n].skey) < 0) return -1; - } - tEndEncode(pCoder); - return 0; -} - -int32_t tDecodeSVClrTsmaExpWndsReq(SDecoder *pCoder, SVClrTsmaExpWndsReq *pReq) { - if (tStartDecode(pCoder) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->version) < 0) return -1; - if (tDecodeI64v(pCoder, &pReq->nItems) < 0) return -1; - - for (int64_t i = 0; i < pReq->nItems; ++i) { - if (tDecodeI64v(pCoder, &pReq->items[i].nKeys) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->items[i].skey) < 0) return -1; - } - tEndDecode(pCoder); - return 0; -} - -int32_t tEncodeSVClrTsmaExpWndsRsp(SEncoder *pCoder, const SVClrTsmaExpWndsRsp *pReq) { - if (tStartEncode(pCoder) < 0) return -1; - if (tEncodeI64(pCoder, pReq->indexUid) < 0) return -1; - if (tEncodeI32v(pCoder, pReq->code) < 0) return -1; - tEndEncode(pCoder); - return 0; -} - -int32_t tDecodeSVClrTsmaExpWndsRsp(SDecoder *pCoder, SVClrTsmaExpWndsRsp *pReq) { - if (tStartDecode(pCoder) < 0) return -1; - if (tDecodeI64(pCoder, &pReq->indexUid) < 0) return -1; - if (tDecodeI32v(pCoder, &pReq->code) < 0) return -1; - tEndDecode(pCoder); - return 0; -} - int32_t tSerializeSVDeleteReq(void *buf, int32_t bufLen, SVDeleteReq *pReq) { int32_t headLen = sizeof(SMsgHead); if (buf != NULL) { diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 1540f10ba4..ee120576c3 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -347,7 +347,6 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_CONSUME, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DELETE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_HEARTBEAT, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_CLR_TSMA_EXP_WNDS, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index d21af87067..ae92497b8a 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -318,12 +318,10 @@ typedef struct { int32_t tagsFilterLen; int32_t sqlLen; int32_t astLen; - int32_t numOfVgroups; // for dstVgroup char* expr; char* tagsFilter; char* sql; char* ast; - SVgEpSet* pVgEpSet; // for dstVgroup SSchemaWrapper schemaRow; // for dstVgroup SSchemaWrapper schemaTag; // for dstVgroup } SSmaObj; diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index a643402739..c19b558f19 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -272,8 +272,6 @@ static void *mndBuildVCreateSmaReq(SMnode *pMnode, SVgObj *pVgroup, SSmaObj *pSm req.sliding = pSma->sliding; req.expr = pSma->expr; req.tagsFilter = pSma->tagsFilter; - req.numOfVgroups = pSma->numOfVgroups; - req.pVgEpSet = pSma->pVgEpSet; req.schemaRow = pSma->schemaRow; req.schemaTag = pSma->schemaTag; req.dstTbName = pSma->dstTbName; @@ -435,7 +433,6 @@ static int32_t mndSetCreateSmaVgroupRedoActions(SMnode *pMnode, STrans *pTrans, mndReleaseDnode(pMnode, pDnode); // todo add sma info here -#if 1 SNode *pAst = NULL; if (nodesStringToNode(pSma->ast, &pAst) < 0) { return -1; @@ -452,7 +449,6 @@ static int32_t mndSetCreateSmaVgroupRedoActions(SMnode *pMnode, STrans *pTrans, pSma->schemaTag.version = 1; pSma->schemaTag.pSchema = taosMemoryCalloc(1, sizeof(SSchema)); if (!pSma->schemaTag.pSchema) { - nodesDestroyNode(pAst); return -1; } pSma->schemaTag.pSchema[0].type = TSDB_DATA_TYPE_BIGINT; @@ -461,17 +457,7 @@ static int32_t mndSetCreateSmaVgroupRedoActions(SMnode *pMnode, STrans *pTrans, pSma->schemaTag.pSchema[0].flags = 0; snprintf(pSma->schemaTag.pSchema[0].name, TSDB_COL_NAME_LEN, "groupId"); - SVgEpSet *pVgEpSet = NULL; - int32_t numOfVgroups = 0; - if (mndSmaGetVgEpSet(pMnode, pDb, &pVgEpSet, &numOfVgroups) != 0) { - nodesDestroyNode(pAst); - return -1; - } - nodesDestroyNode(pAst); - pSma->pVgEpSet = pVgEpSet; - pSma->numOfVgroups = numOfVgroups; -#endif int32_t smaContLen = 0; void *pSmaReq = mndBuildVCreateSmaReq(pMnode, pVgroup, pSma, &smaContLen); if (pSmaReq == NULL) return -1; @@ -501,10 +487,7 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.stb, pStb->name, TSDB_TABLE_FNAME_LEN); memcpy(smaObj.db, pDb->name, TSDB_DB_FNAME_LEN); smaObj.createdTime = taosGetTimestampMs(); -#if 0 smaObj.uid = mndGenerateUid(pCreate->name, TSDB_TABLE_FNAME_LEN); -#endif - smaObj.uid = TD_DEBUG_SMA_ID; char resultTbName[TSDB_TABLE_FNAME_LEN + 16] = {0}; snprintf(resultTbName, TSDB_TABLE_FNAME_LEN + 16, "td.tsma.rst.tb.%s", pCreate->name); memcpy(smaObj.dstTbName, resultTbName, TSDB_TABLE_FNAME_LEN); @@ -514,7 +497,6 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea smaObj.intervalUnit = pCreate->intervalUnit; smaObj.slidingUnit = pCreate->slidingUnit; smaObj.timezone = pCreate->timezone; - // smaObj.dstVgId = pCreate->dstVgId; smaObj.interval = pCreate->interval; smaObj.offset = pCreate->offset; smaObj.sliding = pCreate->sliding; @@ -547,42 +529,6 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea memcpy(smaObj.ast, pCreate->ast, smaObj.astLen); } -#if 1 // only for debugging, not needed in common vgroups, only needed in dstVgroup. - SNode *pAst = NULL; - if (nodesStringToNode(smaObj.ast, &pAst) < 0) { - return -1; - } - if (qExtractResultSchema(pAst, &smaObj.schemaRow.nCols, &smaObj.schemaRow.pSchema) != 0) { - nodesDestroyNode(pAst); - return -1; - } - smaObj.schemaRow.version = 1; - - smaObj.schemaTag.nCols = 1; - smaObj.schemaTag.version = 1; - smaObj.schemaTag.pSchema = taosMemoryCalloc(1, sizeof(SSchema)); - if (!smaObj.schemaTag.pSchema) { - nodesDestroyNode(pAst); - return -1; - } - smaObj.schemaTag.pSchema[0].type = TSDB_DATA_TYPE_BIGINT; - smaObj.schemaTag.pSchema[0].bytes = TYPE_BYTES[TSDB_DATA_TYPE_BIGINT]; - smaObj.schemaTag.pSchema[0].colId = smaObj.schemaRow.nCols + PRIMARYKEY_TIMESTAMP_COL_ID; - smaObj.schemaTag.pSchema[0].flags = 0; - snprintf(smaObj.schemaTag.pSchema[0].name, TSDB_COL_NAME_LEN, "groupId"); - - nodesDestroyNode(pAst); - - SVgEpSet *pVgEpSet = NULL; - int32_t numOfVgroups = 0; - if (mndSmaGetVgEpSet(pMnode, pDb, &pVgEpSet, &numOfVgroups) != 0) { - return -1; - } - - smaObj.pVgEpSet = pVgEpSet; - smaObj.numOfVgroups = numOfVgroups; -#endif - SStreamObj streamObj = {0}; tstrncpy(streamObj.name, pCreate->name, TSDB_STREAM_FNAME_LEN); tstrncpy(streamObj.sourceDb, pDb->name, TSDB_DB_FNAME_LEN); @@ -1168,53 +1114,4 @@ static int32_t mndRetrieveSma(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBloc static void mndCancelGetNextSma(SMnode *pMnode, void *pIter) { SSdb *pSdb = pMnode->pSdb; sdbCancelFetch(pSdb, pIter); -} - -static int32_t mndSmaGetVgEpSet(SMnode *pMnode, SDbObj *pDb, SVgEpSet **ppVgEpSet, int32_t *numOfVgroups) { - SSdb *pSdb = pMnode->pSdb; - SVgObj *pVgroup = NULL; - void *pIter = NULL; - SVgEpSet *pVgEpSet = NULL; - int32_t nAllocVgs = 16; - int32_t nVgs = 0; - - pVgEpSet = taosMemoryCalloc(nAllocVgs, sizeof(SVgEpSet)); - if (!pVgEpSet) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - while (1) { - pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); - if (pIter == NULL) break; - if (pVgroup->dbUid != pDb->uid) { - sdbRelease(pSdb, pVgroup); - continue; - } - - if (nVgs >= nAllocVgs) { - void *p = taosMemoryRealloc(pVgEpSet, nAllocVgs * 2 * sizeof(SVgEpSet)); - if (!p) { - taosMemoryFree(pVgEpSet); - sdbCancelFetch(pSdb, pIter); - sdbRelease(pSdb, pVgroup); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - pVgEpSet = (SVgEpSet *)p; - nAllocVgs *= 2; - } - - (pVgEpSet + nVgs)->vgId = pVgroup->vgId; - (pVgEpSet + nVgs)->epSet = mndGetVgroupEpset(pMnode, pVgroup); - - ++nVgs; - - sdbRelease(pSdb, pVgroup); - } - - *ppVgEpSet = pVgEpSet; - *numOfVgroups = nVgs; - - return 0; -} +} \ No newline at end of file diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index 978fd9013a..8dca589320 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -28,7 +28,6 @@ target_sources( # sma "src/sma/sma.c" - "src/sma/smaTDBImpl.c" "src/sma/smaEnv.c" "src/sma/smaOpen.c" "src/sma/smaRollup.c" diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 7eb5c34e5d..e9da125841 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -43,35 +43,17 @@ typedef struct SRSmaInfo SRSmaInfo; struct SSmaEnv { TdThreadRwlock lock; int8_t type; - TXN txn; - void *pPool; // SPoolMem - SDiskID did; - TDB *dbEnv; // TODO: If it's better to put it in smaIndex level? - char *path; // relative path SSmaStat *pStat; }; #define SMA_ENV_LOCK(env) ((env)->lock) #define SMA_ENV_TYPE(env) ((env)->type) -#define SMA_ENV_DID(env) ((env)->did) -#define SMA_ENV_ENV(env) ((env)->dbEnv) -#define SMA_ENV_PATH(env) ((env)->path) #define SMA_ENV_STAT(env) ((env)->pStat) #define SMA_ENV_STAT_ITEMS(env) ((env)->pStat->smaStatItems) struct SSmaStatItem { - /** - * @brief The field 'state' is here to demonstrate if one smaIndex is ready to provide service. - * - TSDB_SMA_STAT_OK: 1) The sma calculation of history data is finished; 2) Or recevied information from - * Streaming Module or TSDB local persistence. - * - TSDB_SMA_STAT_EXPIRED: 1) If sma calculation of history TS data is not finished; 2) Or if the TSDB is open, - * without information about its previous state. - * - TSDB_SMA_STAT_DROPPED: 1)sma dropped - * N.B. only applicable to tsma - */ - int8_t state; // ETsdbSmaStat - SHashObj *expireWindows; // key: skey of time window, value: version - STSma *pTSma; // cache schema + int8_t state; // ETsdbSmaStat + STSma *pTSma; // cache schema }; struct SSmaStat { @@ -84,29 +66,6 @@ struct SSmaStat { #define SMA_STAT_ITEMS(s) ((s)->smaStatItems) #define SMA_STAT_INFO_HASH(s) ((s)->rsmaInfoHash) -struct SSmaKey { - TSKEY skey; - int64_t groupId; -}; - -typedef struct SDBFile SDBFile; - -struct SDBFile { - int32_t fid; - TTB *pDB; - char *path; -}; - -int32_t tdSmaBeginCommit(SSmaEnv *pEnv); -int32_t tdSmaEndCommit(SSmaEnv *pEnv); - -int32_t smaOpenDBEnv(TDB **ppEnv, const char *path); -int32_t smaCloseDBEnv(TDB *pEnv); -int32_t smaOpenDBF(TDB *pEnv, SDBFile *pDBF); -int32_t smaCloseDBF(SDBFile *pDBF); -int32_t smaSaveSmaToDB(SDBFile *pDBF, void *pKey, int32_t keyLen, void *pVal, int32_t valLen, TXN *txn); -void *smaGetSmaDataByKey(SDBFile *pDBF, const void *pKey, int32_t keyLen, int32_t *valLen); - void tdDestroySmaEnv(SSmaEnv *pSmaEnv); void *tdFreeSmaEnv(SSmaEnv *pSmaEnv); #if 0 @@ -114,13 +73,6 @@ int32_t tbGetTSmaStatus(SSma *pSma, STSma *param, void *result); int32_t tbRemoveTSmaData(SSma *pSma, STSma *param, STimeWindow *pWin); #endif -static FORCE_INLINE int32_t tdEncodeTSmaKey(int64_t groupId, TSKEY tsKey, void **pData) { - int32_t len = 0; - len += taosEncodeFixedI64(pData, tsKey); - len += taosEncodeFixedI64(pData, groupId); - return len; -} - int32_t tdInitSma(SSma *pSma); int32_t tdDropTSma(SSma *pSma, char *pMsg); int32_t tdDropTSmaData(SSma *pSma, int64_t indexUid); @@ -133,8 +85,6 @@ int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType, bool onlyCheck); int32_t tdLockSma(SSma *pSma); int32_t tdUnLockSma(SSma *pSma); -int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg); - static FORCE_INLINE int16_t tdTSmaAdd(SSma *pSma, int16_t n) { return atomic_add_fetch_16(&SMA_TSMA_NUM(pSma), n); } static FORCE_INLINE int16_t tdTSmaSub(SSma *pSma, int16_t n) { return atomic_sub_fetch_16(&SMA_TSMA_NUM(pSma), n); } @@ -219,12 +169,8 @@ static int32_t tdInitSmaEnv(SSma *pSma, int8_t smaType, const char *path, SDisk void *tdFreeRSmaInfo(SRSmaInfo *pInfo); int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg); -int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version); -int32_t tdClearExpireWindowImpl(SSma *pSma, const SVClrTsmaExpWndsReq *pMsg); -// TODO: This is the basic params, and should wrap the params to a queryHandle. -int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult); - -int32_t tdGetTSmaDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); +int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg); +int32_t tdProcessTSmaGetDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 944a03f70a..e374a64bf3 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -150,8 +150,6 @@ int32_t tqProcessTaskRecoverRsp(STQ* pTq, SRpcMsg* pMsg); int32_t smaOpen(SVnode* pVnode); int32_t smaClose(SSma* pSma); -int32_t tdUpdateExpireWindow(SSma* pSma, const SSubmitReq* pMsg, int64_t version); -int32_t tdClearExpireWindow(SSma* pSma, const SVClrTsmaExpWndsReq* pMsg); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); diff --git a/source/dnode/vnode/src/sma/sma.c b/source/dnode/vnode/src/sma/sma.c index 5782318006..98e5d7c66d 100644 --- a/source/dnode/vnode/src/sma/sma.c +++ b/source/dnode/vnode/src/sma/sma.c @@ -36,32 +36,9 @@ int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg) { return code; } -int32_t tdUpdateExpireWindow(SSma* pSma, const SSubmitReq* pMsg, int64_t version) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdUpdateExpireWindowImpl(pSma, pMsg, version)) < 0) { - smaWarn("vgId:%d, update expire window failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} -int32_t tdClearExpireWindow(SSma* pSma, const SVClrTsmaExpWndsReq* pMsg) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdClearExpireWindowImpl(pSma, pMsg)) < 0) { - smaWarn("vgId:%d, update expire window failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} - -int32_t tdGetTSmaData(SSma* pSma, char* pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdGetTSmaDataImpl(pSma, pData, indexUid, querySKey, nMaxResult)) < 0) { - smaWarn("vgId:%d, get tsma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} - int32_t smaGetTSmaDays(SVnodeCfg* pCfg, void* pCont, uint32_t contLen, int32_t* days) { int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdGetTSmaDaysImpl(pCfg, pCont, contLen, days)) < 0) { + if ((code = tdProcessTSmaGetDaysImpl(pCfg, pCont, contLen, days)) < 0) { smaWarn("vgId:%d, get tsma days failed since %s", pCfg->vgId, tstrerror(terrno)); } smaDebug("vgId:%d, get tsma days %d", pCfg->vgId, *days); diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index d15769e28e..5eec5076e8 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -151,31 +151,11 @@ static SSmaEnv *tdNewSmaEnv(const SSma *pSma, int8_t smaType, const char *path, return NULL; } - ASSERT(path && (strlen(path) > 0)); - SMA_ENV_PATH(pEnv) = strdup(path); - if (!SMA_ENV_PATH(pEnv)) { - tdFreeSmaEnv(pEnv); - return NULL; - } - - SMA_ENV_DID(pEnv) = did; - if (tdInitSmaStat(&SMA_ENV_STAT(pEnv), smaType) != TSDB_CODE_SUCCESS) { tdFreeSmaEnv(pEnv); return NULL; } - char aname[TSDB_FILENAME_LEN] = {0}; - tfsAbsoluteName(SMA_TFS(pSma), did, path, aname); - if (smaOpenDBEnv(&pEnv->dbEnv, aname) != TSDB_CODE_SUCCESS) { - tdFreeSmaEnv(pEnv); - return NULL; - } - - if (!(pEnv->pPool = openPool())) { - tdFreeSmaEnv(pEnv); - return NULL; - } return pEnv; } @@ -205,10 +185,7 @@ void tdDestroySmaEnv(SSmaEnv *pSmaEnv) { if (pSmaEnv) { tdDestroySmaState(pSmaEnv->pStat, SMA_ENV_TYPE(pSmaEnv)); taosMemoryFreeClear(pSmaEnv->pStat); - taosMemoryFreeClear(pSmaEnv->path); taosThreadRwlockDestroy(&(pSmaEnv->lock)); - smaCloseDBEnv(pSmaEnv->dbEnv); - closePool(pSmaEnv->pPool); } } @@ -280,7 +257,6 @@ void *tdFreeSmaStatItem(SSmaStatItem *pSmaStatItem) { if (pSmaStatItem) { tDestroyTSma(pSmaStatItem->pTSma); taosMemoryFreeClear(pSmaStatItem->pTSma); - taosHashCleanup(pSmaStatItem->expireWindows); taosMemoryFreeClear(pSmaStatItem); } return NULL; @@ -399,63 +375,3 @@ int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType, bool onlyCheck) { return TSDB_CODE_SUCCESS; }; - -int32_t tdSmaBeginCommit(SSmaEnv *pEnv) { - TXN *pTxn = &pEnv->txn; - // start a new txn - tdbTxnOpen(pTxn, 0, poolMalloc, poolFree, pEnv->pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); - if (tdbBegin(pEnv->dbEnv, pTxn) != 0) { - smaWarn("tdSma tdb begin commit fail"); - return -1; - } - return 0; -} - -int32_t tdSmaEndCommit(SSmaEnv *pEnv) { - TXN *pTxn = &pEnv->txn; - - // Commit current txn - if (tdbCommit(pEnv->dbEnv, pTxn) != 0) { - smaWarn("tdSma tdb end commit fail"); - return -1; - } - tdbTxnClose(pTxn); - clearPool(pEnv->pPool); - return 0; -} - -#if 0 -/** - * @brief Get the start TS key of the last data block of one interval/sliding. - * - * @param pSma - * @param param - * @param result - * @return int32_t - * 1) Return 0 and fill the result if the check procedure is normal; - * 2) Return -1 if error occurs during the check procedure. - */ -int32_t tdGetTSmaStatus(SSma *pSma, void *smaIndex, void *result) { - const char *procedure = ""; - if (strncmp(procedure, "get the start TS key of the last data block", 100) != 0) { - return -1; - } - // fill the result - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Remove the tSma data files related to param between pWin. - * - * @param pSma - * @param param - * @param pWin - * @return int32_t - */ -int32_t tdRemoveTSmaData(SSma *pSma, void *smaIndex, STimeWindow *pWin) { - // for ("tSmaFiles of param-interval-sliding between pWin") { - // // remove the tSmaFile - // } - return TSDB_CODE_SUCCESS; -} -#endif diff --git a/source/dnode/vnode/src/sma/smaTDBImpl.c b/source/dnode/vnode/src/sma/smaTDBImpl.c deleted file mode 100644 index cac986d053..0000000000 --- a/source/dnode/vnode/src/sma/smaTDBImpl.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define ALLOW_FORBID_FUNC - -#include "sma.h" - -int32_t smaOpenDBEnv(TDB **ppEnv, const char *path) { - int ret = 0; - - if (path == NULL) return -1; - - ret = tdbOpen(path, 4096, 256, ppEnv); // use as param - - if (ret != 0) { - smaError("failed to create tsdb db env, ret = %d", ret); - return -1; - } - - return 0; -} - -int32_t smaCloseDBEnv(TDB *pEnv) { return tdbClose(pEnv); } - -static inline int tdSmaKeyCmpr(const void *arg1, int len1, const void *arg2, int len2) { - const SSmaKey *pKey1 = (const SSmaKey *)arg1; - const SSmaKey *pKey2 = (const SSmaKey *)arg2; - - ASSERT(len1 == len2 && len1 == sizeof(SSmaKey)); - - if (pKey1->skey < pKey2->skey) { - return -1; - } else if (pKey1->skey > pKey2->skey) { - return 1; - } - if (pKey1->groupId < pKey2->groupId) { - return -1; - } else if (pKey1->groupId > pKey2->groupId) { - return 1; - } - - return 0; -} - -static int32_t smaOpenDBDb(TTB **ppDB, TDB *pEnv, const char *pFName) { - tdb_cmpr_fn_t compFunc; - - // Create a database - compFunc = tdSmaKeyCmpr; - if (tdbTbOpen(pFName, -1, -1, compFunc, pEnv, ppDB) < 0) { - return -1; - } - - return 0; -} - -static int32_t smaCloseDBDb(TTB *pDB) { return tdbTbClose(pDB); } - -int32_t smaOpenDBF(TDB *pEnv, SDBFile *pDBF) { - // TEnv is shared by a group of SDBFile - if (!pEnv || !pDBF) { - terrno = TSDB_CODE_INVALID_PTR; - return -1; - } - - // Open DBF - if (smaOpenDBDb(&(pDBF->pDB), pEnv, pDBF->path) < 0) { - smaError("failed to open DBF: %s", pDBF->path); - smaCloseDBDb(pDBF->pDB); - return -1; - } - - return 0; -} - -int32_t smaCloseDBF(SDBFile *pDBF) { - int32_t ret = 0; - if (pDBF->pDB) { - ret = smaCloseDBDb(pDBF->pDB); - pDBF->pDB = NULL; - } - taosMemoryFreeClear(pDBF->path); - return ret; -} - -int32_t smaSaveSmaToDB(SDBFile *pDBF, void *pKey, int32_t keyLen, void *pVal, int32_t valLen, TXN *txn) { - int32_t ret; - - printf("save tsma data into %s, keyLen:%d valLen:%d txn:%p\n", pDBF->path, keyLen, valLen, txn); - ret = tdbTbUpsert(pDBF->pDB, pKey, keyLen, pVal, valLen, txn); - if (ret < 0) { - smaError("failed to upsert tsma data into db, ret = %d", ret); - return -1; - } - - return 0; -} - -void *smaGetSmaDataByKey(SDBFile *pDBF, const void *pKey, int32_t keyLen, int32_t *valLen) { - void *pVal = NULL; - int ret; - - ret = tdbTbGet(pDBF->pDB, pKey, keyLen, &pVal, valLen); - - if (ret < 0) { - smaError("failed to get tsma data from db, ret = %d", ret); - return NULL; - } - - ASSERT(*valLen >= 0); - - // TODO: lock? - // TODO: Would the key/value be destoryed during return the data? - // TODO: How about the key is updated while value length is changed? The original value buffer would be freed - // automatically? - - return pVal; -} \ No newline at end of file diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c deleted file mode 100644 index 4cc9703531..0000000000 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ /dev/null @@ -1,1036 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "sma.h" -#include "tsdb.h" - -typedef STsdbCfg STSmaKeepCfg; - -#undef _TEST_SMA_PRINT_DEBUG_LOG_ -#define SMA_STORAGE_TSDB_MINUTES 86400 -#define SMA_STORAGE_TSDB_TIMES 10 -#define SMA_STORAGE_SPLIT_FACTOR 144 // least records in tsma file -#define SMA_KEY_LEN 16 // TSKEY+groupId 8+8 -#define SMA_DROP_EXPIRED_TIME 10 // default is 10 seconds - -#define SMA_STATE_ITEM_HASH_SLOT 32 - -typedef struct { - SSma *pSma; - SDBFile dFile; - const SArray *pDataBlocks; // sma data - int64_t interval; // interval with the precision of DB -} STSmaWriteH; - -typedef struct { - int32_t iter; - int32_t fid; -} SmaFsIter; - -typedef struct { - STsdb *pTsdb; - SSma *pSma; - SDBFile dFile; - int64_t interval; // interval with the precision of DB - int32_t blockSize; // size of SMA block item - int32_t days; - int8_t storageLevel; - SmaFsIter smaFsIter; -} STSmaReadH; - -typedef enum { - SMA_STORAGE_LEVEL_TSDB = 0, // use days of self-defined e.g. vnode${N}/tsdb/tsma/sma_index_uid/v2f200.tsma - SMA_STORAGE_LEVEL_DFILESET = 1 // use days of TS data e.g. vnode${N}/tsdb/tsma/sma_index_uid/v2f1906.tsma -} ESmaStorageLevel; - -// static func - -static int64_t tdGetIntervalByPrecision(int64_t interval, uint8_t intervalUnit, int8_t precision, bool adjusted); -static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval); -static int32_t tdInitTSmaWriteH(STSmaWriteH *pSmaH, SSma *pSma, const SArray *pDataBlocks, int64_t interval, - int8_t intervalUnit); -static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, int8_t intervalUnit); -static void tdDestroyTSmaWriteH(STSmaWriteH *pSmaH); -static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel); -static int32_t tdSetTSmaDataFile(STSmaWriteH *pSmaH, int64_t indexUid, int32_t fid); -static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey); -static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey); -static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, - TXN *txn); -// expire window - -static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); -static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); -static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); - -// read data - -// implementation - -/** - * @brief - * - * @param pSmaH - * @param pSma - * @param interval - * @param intervalUnit - * @return int32_t - */ -static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, int8_t intervalUnit) { - STSmaKeepCfg *pCfg = SMA_TSDB_CFG(pSma); - pSmaH->pSma = pSma; - pSmaH->interval = tdGetIntervalByPrecision(interval, intervalUnit, SMA_TSDB_CFG(pSma)->precision, true); - pSmaH->storageLevel = tdGetSmaStorageLevel(pCfg, interval); - pSmaH->days = tdGetTSmaDays(pSma, pSmaH->interval, pSmaH->storageLevel); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Init of tSma FS - * - * @param pReadH - * @param indexUid - * @param skey - * @return int32_t - */ -static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey) { - SSma *pSma = pSmaH->pSma; - - int32_t fid = (int32_t)(TSDB_KEY_FID(skey, pSmaH->days, SMA_TSDB_CFG(pSma)->precision)); - char tSmaFile[TSDB_FILENAME_LEN] = {0}; - snprintf(tSmaFile, TSDB_FILENAME_LEN, "%" PRIi64 "%sv%df%d.tsma", indexUid, TD_DIRSEP, SMA_VID(pSma), fid); - pSmaH->dFile.path = strdup(tSmaFile); - pSmaH->smaFsIter.iter = 0; - pSmaH->smaFsIter.fid = fid; - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Set and open tSma file if it has key locates in queryWin. - * - * @param pReadH - * @param param - * @param queryWin - * @return true - * @return false - */ -static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey) { - // SArray *smaFs = pReadH->pTsdb->fs->cstatus->sf; - // int32_t nSmaFs = taosArrayGetSize(smaFs); - - smaCloseDBF(&pReadH->dFile); - -#if 0 - while (pReadH->smaFsIter.iter < nSmaFs) { - void *pSmaFile = taosArrayGet(smaFs, pReadH->smaFsIter.iter); - if (pSmaFile) { // match(indexName, queryWindow) - // TODO: select the file by index_name ... - pReadH->dFile = pSmaFile; - ++pReadH->smaFsIter.iter; - break; - } - ++pReadH->smaFsIter.iter; - } - - if (pReadH->pDFile) { - tdDebug("vg%d: smaFile %s matched", REPO_ID(pReadH->pTsdb), "[pSmaFile dir]"); - return true; - } -#endif - - return false; -} - -/** - * @brief Approximate value for week/month/year. - * - * @param interval - * @param intervalUnit - * @param precision - * @param adjusted Interval already adjusted according to DB precision - * @return int64_t - */ -static int64_t tdGetIntervalByPrecision(int64_t interval, uint8_t intervalUnit, int8_t precision, bool adjusted) { - if (adjusted) { - return interval; - } - - switch (intervalUnit) { - case TIME_UNIT_YEAR: // approximate value - interval *= 365 * 86400 * 1e3; - break; - case TIME_UNIT_MONTH: // approximate value - interval *= 30 * 86400 * 1e3; - break; - case TIME_UNIT_WEEK: // approximate value - interval *= 7 * 86400 * 1e3; - break; - case TIME_UNIT_DAY: // the interval for tSma calculation must <= day - interval *= 86400 * 1e3; - break; - case TIME_UNIT_HOUR: - interval *= 3600 * 1e3; - break; - case TIME_UNIT_MINUTE: - interval *= 60 * 1e3; - break; - case TIME_UNIT_SECOND: - interval *= 1e3; - break; - default: - break; - } - - switch (precision) { - case TSDB_TIME_PRECISION_MILLI: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval / 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // nano second - return interval / 1e6; - } else { // ms - return interval; - } - break; - case TSDB_TIME_PRECISION_MICRO: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval / 1e3; - } else { // ms - return interval * 1e3; - } - break; - case TSDB_TIME_PRECISION_NANO: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval * 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval; - } else { // ms - return interval * 1e6; - } - break; - default: // ms - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval / 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval / 1e6; - } else { // ms - return interval; - } - break; - } - return interval; -} - -static int32_t tdInitTSmaWriteH(STSmaWriteH *pSmaH, SSma *pSma, const SArray *pDataBlocks, int64_t interval, - int8_t intervalUnit) { - pSmaH->pSma = pSma; - pSmaH->interval = tdGetIntervalByPrecision(interval, intervalUnit, SMA_TSDB_CFG(pSma)->precision, true); - pSmaH->pDataBlocks = pDataBlocks; - pSmaH->dFile.fid = SMA_IVLD_FID; - return TSDB_CODE_SUCCESS; -} - -static void tdDestroyTSmaWriteH(STSmaWriteH *pSmaH) { - if (pSmaH) { - smaCloseDBF(&pSmaH->dFile); - } -} - -static int32_t tdSetTSmaDataFile(STSmaWriteH *pSmaH, int64_t indexUid, int32_t fid) { - SSma *pSma = pSmaH->pSma; - ASSERT(!pSmaH->dFile.path && !pSmaH->dFile.pDB); - - pSmaH->dFile.fid = fid; - char tSmaFile[TSDB_FILENAME_LEN] = {0}; - snprintf(tSmaFile, TSDB_FILENAME_LEN, "%" PRIi64 "%sv%df%d.tsma", indexUid, TD_DIRSEP, SMA_VID(pSma), fid); - pSmaH->dFile.path = strdup(tSmaFile); - - return TSDB_CODE_SUCCESS; -} - -/** - * @brief - * - * @param pSma - * @param interval Interval calculated by DB's precision - * @param storageLevel - * @return int32_t - */ -static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel) { - STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); - int32_t daysPerFile = pCfg->days; // unit is minute - - if (storageLevel == SMA_STORAGE_LEVEL_TSDB) { - int32_t minutes = SMA_STORAGE_TSDB_TIMES * (interval / tsTickPerMin[pCfg->precision]); - if (minutes > SMA_STORAGE_TSDB_MINUTES) { - daysPerFile = SMA_STORAGE_TSDB_MINUTES; - } - } - - return daysPerFile; -} - -/** - * @brief Judge the tSma storage level - * - * @param pCfg - * @param interval - * @return int32_t - */ -static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { - int64_t mInterval = convertTimeFromPrecisionToUnit(interval, pCfg->precision, TIME_UNIT_MINUTE); - if (pCfg->days / mInterval >= SMA_STORAGE_SPLIT_FACTOR) { - return SMA_STORAGE_LEVEL_DFILESET; - } - return SMA_STORAGE_LEVEL_TSDB; -} - -/** - * @brief Insert/Update Time-range-wise SMA data. - * - If interval < SMA_STORAGE_SPLIT_HOURS(e.g. 24), save the SMA data as a part of DFileSet to e.g. - * v3f1900.tsma.${sma_index_name}. The days is the same with that for TS data files. - * - If interval >= SMA_STORAGE_SPLIT_HOURS, save the SMA data to e.g. vnode3/tsma/v3f632.tsma.${sma_index_name}. The - * days is 30 times of the interval, and the minimum days is SMA_STORAGE_TSDB_DAYS(30d). - * - The destination file of one data block for some interval is determined by its start TS key. - * - * @param pSma - * @param msg - * @return int32_t - */ -int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { - STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); - const SArray *pDataBlocks = (const SArray *)msg; - int64_t testSkey = TSKEY_INITIAL_VAL; - - // TODO: destroy SSDataBlocks(msg) - - // For super table aggregation, the sma data is stored in vgroup calculated from the hash value of stable name. Thus - // the sma data would arrive ahead of the update-expired-window msg. - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) != TSDB_CODE_SUCCESS) { - terrno = TSDB_CODE_TDB_INIT_FAILED; - return TSDB_CODE_FAILED; - } - - if (!pDataBlocks) { - terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d, insert tSma data failed since pDataBlocks is NULL", SMA_VID(pSma)); - return terrno; - } - - if (taosArrayGetSize(pDataBlocks) <= 0) { - terrno = TSDB_CODE_INVALID_PARA; - smaWarn("vgId:%d, insert tSma data failed since pDataBlocks is empty", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); - SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SSmaStatItem *pItem = NULL; - - tdRefSmaStat(pSma, pStat); - - if (pStat && SMA_STAT_ITEMS(pStat)) { - pItem = taosHashGet(SMA_STAT_ITEMS(pStat), &indexUid, sizeof(indexUid)); - } - - if (!pItem || !(pItem = *(SSmaStatItem **)pItem) || tdSmaStatIsDropped(pItem)) { - terrno = TSDB_CODE_TSMA_INVALID_STAT; - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - - STSma *pTSma = pItem->pTSma; - STSmaWriteH tSmaH = {0}; - - if (tdInitTSmaWriteH(&tSmaH, pSma, pDataBlocks, pTSma->interval, pTSma->intervalUnit) != 0) { - return TSDB_CODE_FAILED; - } - - char rPath[TSDB_FILENAME_LEN] = {0}; - char aPath[TSDB_FILENAME_LEN] = {0}; - snprintf(rPath, TSDB_FILENAME_LEN, "%s%s%" PRIi64, SMA_ENV_PATH(pEnv), TD_DIRSEP, indexUid); - tfsAbsoluteName(SMA_TFS(pSma), SMA_ENV_DID(pEnv), rPath, aPath); - if (!taosCheckExistFile(aPath)) { - if (tfsMkdirRecurAt(SMA_TFS(pSma), rPath, SMA_ENV_DID(pEnv)) != TSDB_CODE_SUCCESS) { - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - } - - // Step 1: Judge the storage level and days - int32_t storageLevel = tdGetSmaStorageLevel(pCfg, tSmaH.interval); - int32_t minutePerFile = tdGetTSmaDays(pSma, tSmaH.interval, storageLevel); - - char smaKey[SMA_KEY_LEN] = {0}; // key: skey + groupId - char dataBuf[512] = {0}; // val: aggr data // TODO: handle 512 buffer? - void *pDataBuf = NULL; - int32_t sz = taosArrayGetSize(pDataBlocks); - for (int32_t i = 0; i < sz; ++i) { - SSDataBlock *pDataBlock = taosArrayGet(pDataBlocks, i); - int32_t colNum = pDataBlock->info.numOfCols; - int32_t rows = pDataBlock->info.rows; - int32_t rowSize = pDataBlock->info.rowSize; - int64_t groupId = pDataBlock->info.groupId; - for (int32_t j = 0; j < rows; ++j) { - printf("|"); - TSKEY skey = TSKEY_INITIAL_VAL; // the start key of TS window by interval - void *pSmaKey = &smaKey; - bool isStartKey = false; - - int32_t tlen = 0; // reset the len - pDataBuf = &dataBuf; // reset the buf - for (int32_t k = 0; k < colNum; ++k) { - SColumnInfoData *pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k); - void *var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes); - switch (pColInfoData->info.type) { - case TSDB_DATA_TYPE_TIMESTAMP: - if (!isStartKey) { - isStartKey = true; - skey = *(TSKEY *)var; - testSkey = skey; - printf("= skey %" PRIi64 " groupId = %" PRIi64 "|", skey, groupId); - tdEncodeTSmaKey(groupId, skey, &pSmaKey); - } else { - printf(" %" PRIi64 " |", *(int64_t *)var); - tlen += taosEncodeFixedI64(&pDataBuf, *(int64_t *)var); - break; - } - break; - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_UTINYINT: - printf(" %15d |", *(uint8_t *)var); - tlen += taosEncodeFixedU8(&pDataBuf, *(uint8_t *)var); - break; - case TSDB_DATA_TYPE_TINYINT: - printf(" %15d |", *(int8_t *)var); - tlen += taosEncodeFixedI8(&pDataBuf, *(int8_t *)var); - break; - case TSDB_DATA_TYPE_SMALLINT: - printf(" %15d |", *(int16_t *)var); - tlen += taosEncodeFixedI16(&pDataBuf, *(int16_t *)var); - break; - case TSDB_DATA_TYPE_USMALLINT: - printf(" %15d |", *(uint16_t *)var); - tlen += taosEncodeFixedU16(&pDataBuf, *(uint16_t *)var); - break; - case TSDB_DATA_TYPE_INT: - printf(" %15d |", *(int32_t *)var); - tlen += taosEncodeFixedI32(&pDataBuf, *(int32_t *)var); - break; - case TSDB_DATA_TYPE_FLOAT: - printf(" %15f |", *(float *)var); - tlen += taosEncodeBinary(&pDataBuf, var, sizeof(float)); - break; - case TSDB_DATA_TYPE_UINT: - printf(" %15u |", *(uint32_t *)var); - tlen += taosEncodeFixedU32(&pDataBuf, *(uint32_t *)var); - break; - case TSDB_DATA_TYPE_BIGINT: - printf(" %15ld |", *(int64_t *)var); - tlen += taosEncodeFixedI64(&pDataBuf, *(int64_t *)var); - break; - case TSDB_DATA_TYPE_DOUBLE: - printf(" %15lf |", *(double *)var); - tlen += taosEncodeBinary(&pDataBuf, var, sizeof(double)); - case TSDB_DATA_TYPE_UBIGINT: - printf(" %15lu |", *(uint64_t *)var); - tlen += taosEncodeFixedU64(&pDataBuf, *(uint64_t *)var); - break; - case TSDB_DATA_TYPE_NCHAR: { - char tmpChar[100] = {0}; - strncpy(tmpChar, varDataVal(var), varDataLen(var)); - printf(" %s |", tmpChar); - tlen += taosEncodeBinary(&pDataBuf, varDataVal(var), varDataLen(var)); - break; - } - case TSDB_DATA_TYPE_VARCHAR: { // TSDB_DATA_TYPE_BINARY - char tmpChar[100] = {0}; - strncpy(tmpChar, varDataVal(var), varDataLen(var)); - printf(" %s |", tmpChar); - tlen += taosEncodeBinary(&pDataBuf, varDataVal(var), varDataLen(var)); - break; - } - case TSDB_DATA_TYPE_VARBINARY: - // TODO: add binary/varbinary - TASSERT(0); - default: - printf("the column type %" PRIi16 " is undefined\n", pColInfoData->info.type); - TASSERT(0); - break; - } - } - printf("\n"); - // if ((tlen > 0) && (skey != TSKEY_INITIAL_VAL)) { - if (tlen > 0) { - int32_t fid = (int32_t)(TSDB_KEY_FID(skey, minutePerFile, pCfg->precision)); - - // Step 2: Set the DFile for storage of SMA index, and iterate/split the TSma data and store to B+Tree index - // file - // - Set and open the DFile or the B+Tree file - // TODO: tsdbStartTSmaCommit(); - if (fid != tSmaH.dFile.fid) { - if (tSmaH.dFile.fid != SMA_IVLD_FID) { - tdSmaEndCommit(pEnv); - smaCloseDBF(&tSmaH.dFile); - } - tdSetTSmaDataFile(&tSmaH, indexUid, fid); - smaDebug("vgId:%d, write to DBF %s, days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi32 - " queryKey:%" PRIi64, - SMA_VID(pSma), tSmaH.dFile.path, minutePerFile, tSmaH.interval, storageLevel, testSkey); - if (smaOpenDBF(pEnv->dbEnv, &tSmaH.dFile) != 0) { - smaWarn("vgId:%d, open DB file %s failed since %s", SMA_VID(pSma), - tSmaH.dFile.path ? tSmaH.dFile.path : "path is NULL", tstrerror(terrno)); - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - tdSmaBeginCommit(pEnv); - } - - if (tdInsertTSmaBlocks(&tSmaH, &smaKey, SMA_KEY_LEN, dataBuf, tlen, &pEnv->txn) != 0) { - smaWarn("vgId:%d, insert tsma data blocks fail for index %" PRIi64 ", skey %" PRIi64 ", groupId %" PRIi64 - " since %s", - SMA_VID(pSma), indexUid, skey, groupId, tstrerror(terrno)); - tdSmaEndCommit(pEnv); - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - - smaDebug("vgId:%d, insert tsma data blocks success for index %" PRIi64 ", skey %" PRIi64 ", groupId %" PRIi64, - SMA_VID(pSma), indexUid, skey, groupId); - // TODO:tsdbEndTSmaCommit(); - - // Step 3: reset the SSmaStat - tdResetExpireWindow(pSma, pStat, indexUid, skey); - } else { - smaWarn("vgId:%d, invalid data skey:%" PRIi64 ", tlen %" PRIi32 " during insert tSma data for %" PRIi64, - SMA_VID(pSma), skey, tlen, indexUid); - } - } - } - tdSmaEndCommit(pEnv); // TODO: not commit for every insert - tdDestroyTSmaWriteH(&tSmaH); - tdUnRefSmaStat(pSma, pStat); - - return TSDB_CODE_SUCCESS; -} - -int32_t tdDropTSmaData(SSma *pSma, int64_t indexUid) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdDropTSmaDataImpl(pSma, indexUid)) < 0) { - smaWarn("vgId:%d, drop tSma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} - -/** - * @brief Insert TSma data blocks to DB File build by B+Tree - * - * @param pSmaH - * @param smaKey tableUid-colId-skeyOfWindow(8-2-8) - * @param keyLen - * @param pData - * @param dataLen - * @return int32_t - */ -static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, - TXN *txn) { - SDBFile *pDBFile = &pSmaH->dFile; - - // TODO: insert tsma data blocks into B+Tree(TTB) - if (smaSaveSmaToDB(pDBFile, smaKey, keyLen, pData, dataLen, txn) != 0) { - smaWarn("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " fail", - SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " succeed", - SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); - -#ifdef _TEST_SMA_PRINT_DEBUG_LOG_ - uint32_t valueSize = 0; - void *data = tdGetSmaDataByKey(pDBFile, smaKey, keyLen, &valueSize); - ASSERT(data != NULL); - for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d, insert sma data val[%d] %" PRIi64, REPO_ID(pSmaH->pTsdb), v, *(int64_t *)POINTER_SHIFT(data, v)); - } -#endif - return TSDB_CODE_SUCCESS; -} - -/** - * @brief When sma data received from stream computing, make the relative expire window valid. - * - * @param pSma - * @param pStat - * @param indexUid - * @param skey - * @return int32_t - */ -static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { - SSmaStatItem *pItem = NULL; - - tdRefSmaStat(pSma, pStat); - - if (pStat && SMA_STAT_ITEMS(pStat)) { - pItem = taosHashGet(SMA_STAT_ITEMS(pStat), &indexUid, sizeof(indexUid)); - } - if ((pItem) && ((pItem = *(SSmaStatItem **)pItem))) { - // pItem resides in hash buffer all the time unless drop sma index - // TODO: multithread protect - if (taosHashRemove(pItem->expireWindows, &skey, sizeof(TSKEY)) != 0) { - // error handling - tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, - indexUid); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " succeed", SMA_VID(pSma), - skey, indexUid); - // TODO: use a standalone interface to received state upate notification from stream computing module. - /** - * @brief state - * - When SMA env init in TSDB, its status is TSDB_SMA_STAT_OK. - * - In startup phase of stream computing module, it should notify the SMA env in TSDB to expired if needed(e.g. - * when batch data caculation not finised) - * - When TSDB_SMA_STAT_OK, the stream computing module should also notify that to the SMA env in TSDB. - */ - pItem->state = TSDB_SMA_STAT_OK; - } else { - // error handling - tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, expire window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); - return TSDB_CODE_FAILED; - } - - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Drop tSma data and local cache - * - insert/query reference - * @param pSma - * @param msg - * @return int32_t - */ -static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid) { - SSmaEnv *pEnv = atomic_load_ptr(&SMA_TSMA_ENV(pSma)); - - // clear local cache - if (pEnv) { - smaDebug("vgId:%d, drop tSma local cache for %" PRIi64, SMA_VID(pSma), indexUid); - - SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); - if ((pItem) || ((pItem = *(SSmaStatItem **)pItem))) { - if (tdSmaStatIsDropped(pItem)) { - smaDebug("vgId:%d, tSma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode - } - - tdWLockSmaEnv(pEnv); - if (tdSmaStatIsDropped(pItem)) { - tdUnLockSmaEnv(pEnv); - smaDebug("vgId:%d, tSma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode - } - tdSmaStatSetDropped(pItem); - tdUnLockSmaEnv(pEnv); - - int32_t nSleep = 0; - int32_t refVal = INT32_MAX; - while (true) { - if ((refVal = T_REF_VAL_GET(SMA_ENV_STAT(pEnv))) <= 0) { - smaDebug("vgId:%d, drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); - break; - } - smaDebug("vgId:%d, wait 1s to drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); - taosSsleep(1); - if (++nSleep > SMA_DROP_EXPIRED_TIME) { - smaDebug("vgId:%d, drop index %" PRIi64 " after wait %d (refVal=%d)", SMA_VID(pSma), indexUid, nSleep, refVal); - break; - }; - } - - tdFreeSmaStatItem(pItem); - smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64 " in local cache", SMA_VID(pSma), indexUid); - } - } - // clear sma data files - // TODO: - return TSDB_CODE_SUCCESS; -} - -/** - * @brief - * - * @param pSma Return the data between queryWin and fill the pData. - * @param pData - * @param indexUid - * @param pQuerySKey - * @param nMaxResult The query invoker should control the nMaxResult need to return to avoid OOM. - * @return int32_t - */ -int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult) { - SSmaEnv *pEnv = atomic_load_ptr(&SMA_TSMA_ENV(pSma)); - SSmaStat *pStat = NULL; - - if (!pEnv) { - terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d, getTSmaDataImpl failed since pTSmaEnv is NULL", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - pStat = SMA_ENV_STAT(pEnv); - - tdRefSmaStat(pSma, pStat); - SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); - if (!pItem || !(pItem = *(SSmaStatItem **)pItem)) { - // Normally pItem should not be NULL, mark all windows as expired and notify query module to fetch raw TS data if - // it's NULL. - tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TDB_INVALID_ACTION; - smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_FAILED; - } - -#if 0 - int32_t nQueryWin = taosArrayGetSize(pQuerySKey); - for (int32_t n = 0; n < nQueryWin; ++n) { - TSKEY skey = taosArrayGet(pQuerySKey, n); - if (taosHashGet(pItem->expireWindows, &skey, sizeof(TSKEY))) { - // TODO: mark this window as expired. - } - } -#endif - -#if 1 - int8_t smaStat = 0; - if (!tdSmaStatIsOK(pItem, &smaStat)) { // TODO: multiple check for large scale sma query - tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TSMA_INVALID_STAT; - smaWarn("vgId:%d, getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, - tstrerror(terrno), smaStat); - return TSDB_CODE_FAILED; - } - - if (taosHashGet(pItem->expireWindows, &querySKey, sizeof(TSKEY))) { - // TODO: mark this window as expired. - smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, - indexUid); - } else { - smaDebug("vgId:%d, skey %" PRIi64 " of window not in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, - indexUid); - } - - STSma *pTSma = pItem->pTSma; -#endif - -#if 1 - STSmaReadH tReadH = {0}; - tdInitTSmaReadH(&tReadH, pSma, pTSma->interval, pTSma->intervalUnit); - smaCloseDBF(&tReadH.dFile); - - tdUnRefSmaStat(pSma, pStat); - - tdInitTSmaFile(&tReadH, indexUid, querySKey); - smaDebug("### vgId:%d, read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, - SMA_VID(pSma), tReadH.dFile.path, tReadH.days, tReadH.interval, tReadH.storageLevel, querySKey); - if (smaOpenDBF(pEnv->dbEnv, &tReadH.dFile) != 0) { - smaWarn("vgId:%d, open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); - return TSDB_CODE_FAILED; - } - - char smaKey[SMA_KEY_LEN] = {0}; - void *pSmaKey = &smaKey; - int64_t queryGroupId = 0; - tdEncodeTSmaKey(queryGroupId, querySKey, (void **)&pSmaKey); - - smaDebug("vgId:%d, get sma data from %s: smaKey %" PRIx64 "-%" PRIx64 ", keyLen %d", SMA_VID(pSma), tReadH.dFile.path, - *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), SMA_KEY_LEN); - - void *result = NULL; - int32_t valueSize = 0; - if (!(result = smaGetSmaDataByKey(&tReadH.dFile, smaKey, SMA_KEY_LEN, &valueSize))) { - smaWarn("vgId:%d, get sma data failed from smaIndex %" PRIi64 ", smaKey %" PRIx64 "-%" PRIx64 " since %s", - SMA_VID(pSma), indexUid, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), tstrerror(terrno)); - smaCloseDBF(&tReadH.dFile); - return TSDB_CODE_FAILED; - } -#endif - -#ifdef _TEST_SMA_PRINT_DEBUG_LOG_ - for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d, get sma data v[%d]=%" PRIi64, SMA_VID(pSma), v, *(int64_t *)POINTER_SHIFT(result, v)); - } -#endif - taosMemoryFreeClear(result); // TODO: fill the result to output - -#if 0 - int32_t nResult = 0; - int64_t lastKey = 0; - - while (true) { - if (nResult >= nMaxResult) { - break; - } - - // set and open the file according to the STSma param - if (tdSetAndOpenTSmaFile(&tReadH, queryWin)) { - char bTree[100] = "\0"; - while (strncmp(bTree, "has more nodes", 100) == 0) { - if (nResult >= nMaxResult) { - break; - } - // tdGetDataFromBTree(bTree, queryWin, lastKey) - // fill the pData - ++nResult; - } - } - } -#endif - // read data from file and fill the result - smaCloseDBF(&tReadH.dFile); - return TSDB_CODE_SUCCESS; -} - -int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { - SSmaCfg *pCfg = (SSmaCfg *)pMsg; - - if (metaCreateTSma(SMA_META(pSma), version, pCfg) < 0) { - return -1; - } - - tdTSmaAdd(pSma, 1); - return 0; -} - -int32_t tdDropTSma(SSma *pSma, char *pMsg) { -#if 0 - SVDropTSmaReq vDropSmaReq = {0}; - if (!tDeserializeSVDropTSmaReq(pMsg, &vDropSmaReq)) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - // TODO: send msg to stream computing to drop tSma - // if ((send msg to stream computing) < 0) { - // tDestroyTSma(&vCreateSmaReq); - // return -1; - // } - // - - if (metaDropTSma(SMA_META(pSma), vDropSmaReq.indexUid) < 0) { - // TODO: handle error - return -1; - } - - if (tdDropTSmaData(pSma, vDropSmaReq.indexUid) < 0) { - // TODO: handle error - return -1; - } - - tdTSmaSub(pSma, 1); -#endif - - // TODO: return directly or go on follow steps? - return TSDB_CODE_SUCCESS; -} - -static SSmaStatItem *tdNewSmaStatItem(int8_t state) { - SSmaStatItem *pItem = NULL; - - pItem = (SSmaStatItem *)taosMemoryCalloc(1, sizeof(SSmaStatItem)); - if (!pItem) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - pItem->state = state; - pItem->expireWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), - true, HASH_ENTRY_LOCK); - if (!pItem->expireWindows) { - taosMemoryFreeClear(pItem); - return NULL; - } - - return pItem; -} - -static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version) { - SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); - if (!pItem) { - // TODO: use TSDB_SMA_STAT_EXPIRED and update by stream computing later - pItem = tdNewSmaStatItem(TSDB_SMA_STAT_OK); // TODO use the real state - if (!pItem) { - // Response to stream computing: OOM - // For query, if the indexUid not found, the TSDB should tell query module to query raw TS data. - return TSDB_CODE_FAILED; - } - - // cache smaMeta - STSma *pTSma = metaGetSmaInfoByIndex(SMA_META(pSma), indexUid); - if (!pTSma) { - terrno = TSDB_CODET_TSMA_NO_INDEX_IN_META; - taosHashCleanup(pItem->expireWindows); - taosMemoryFree(pItem); - smaWarn("vgId:%d, set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), - indexUid, tstrerror(terrno)); - return TSDB_CODE_FAILED; - } - pItem->pTSma = pTSma; - - if (taosHashPut(pItemsHash, &indexUid, sizeof(indexUid), &pItem, sizeof(pItem)) != 0) { - // If error occurs during put smaStatItem, free the resources of pItem - taosHashCleanup(pItem->expireWindows); - taosMemoryFree(pItem); - return TSDB_CODE_FAILED; - } - } else if (!(pItem = *(SSmaStatItem **)pItem)) { - terrno = TSDB_CODE_INVALID_PTR; - return TSDB_CODE_FAILED; - } - - if (taosHashPut(pItem->expireWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { - // If error occurs during taosHashPut expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would - // tell query module to query raw TS data. - // N.B. - // 1) It is assumed to be extemely little probability event of fail to taosHashPut. - // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired - // windows failed to put into hash table. - taosHashCleanup(pItem->expireWindows); - taosMemoryFreeClear(pItem->pTSma); - taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); - smaWarn("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, - winSKey); - return TSDB_CODE_FAILED; - } - - smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window succeed", SMA_VID(pSma), indexUid, - winSKey); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Update expire window according to msg from stream computing module. - * - * @param pSma - * @param msg SSubmitReq - * @return int32_t - */ -int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { - // no time-range-sma, just return success - if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { - smaTrace("vgId:%d, not update expire window since no tSma", SMA_VID(pSma)); - return TSDB_CODE_SUCCESS; - } - - if (!SMA_META(pSma)) { - terrno = TSDB_CODE_INVALID_PTR; - smaError("vgId:%d, update expire window failed since no meta ptr", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) < 0) { - smaError("vgId:%d, init sma env failed since %s", SMA_VID(pSma), terrstr(terrno)); - terrno = TSDB_CODE_TDB_INIT_FAILED; - return TSDB_CODE_FAILED; - } - - // Firstly, assume that tSma can only be created on super table/normal table. - // getActiveTimeWindow - - SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); - SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SHashObj *pItemsHash = SMA_ENV_STAT_ITEMS(pEnv); - - TASSERT(pEnv && pStat && pItemsHash); - - // basic procedure - // TODO: optimization - tdRefSmaStat(pSma, pStat); - - SSubmitMsgIter msgIter = {0}; - SSubmitBlk *pBlock = NULL; - SInterval interval = {0}; - TSKEY lastWinSKey = INT64_MIN; - - if (tInitSubmitMsgIter(pMsg, &msgIter) < 0) { - return TSDB_CODE_FAILED; - } - - while (true) { - tGetSubmitMsgNext(&msgIter, &pBlock); - if (!pBlock) break; - - STSmaWrapper *pSW = NULL; - STSma *pTSma = NULL; - - SSubmitBlkIter blkIter = {0}; - if (tInitSubmitBlkIter(&msgIter, pBlock, &blkIter) < 0) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - - while (true) { - STSRow *row = tGetSubmitBlkNext(&blkIter); - if (!row) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - if (!pSW || (pTSma && (pTSma->tableUid != msgIter.suid))) { - if (pSW) { - pSW = tFreeTSmaWrapper(pSW, false); - } - if (!(pSW = metaGetSmaInfoByTable(SMA_META(pSma), msgIter.suid, false))) { - break; - } - if ((pSW->number) <= 0 || !pSW->tSma) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - - pTSma = pSW->tSma; - - interval.interval = pTSma->interval; - interval.intervalUnit = pTSma->intervalUnit; - interval.offset = pTSma->offset; - interval.precision = SMA_TSDB_CFG(pSma)->precision; - interval.sliding = pTSma->sliding; - interval.slidingUnit = pTSma->slidingUnit; - } - - // TODO: process multiple tsma for one table uid - TSKEY winSKey = taosTimeTruncate(TD_ROW_KEY(row), &interval, interval.precision); - - if (lastWinSKey != winSKey) { - lastWinSKey = winSKey; - if (tdSetExpireWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { - pSW = tFreeTSmaWrapper(pSW, false); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - } else { - smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window ignore as duplicated", - SMA_VID(pSma), pTSma->indexUid, winSKey); - } - } - } - - tdUnRefSmaStat(pSma, pStat); - - return TSDB_CODE_SUCCESS; -} diff --git a/source/dnode/vnode/src/sma/smaTimeRange2.c b/source/dnode/vnode/src/sma/smaTimeRange2.c index 760eb808bb..9c613873ab 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange2.c +++ b/source/dnode/vnode/src/sma/smaTimeRange2.c @@ -30,54 +30,8 @@ typedef STsdbCfg STSmaKeepCfg; #define SMA_STATE_ITEM_HASH_SLOT 32 -typedef struct { - SSma *pSma; - SDBFile dFile; - const SArray *pDataBlocks; // sma data - int64_t interval; // interval with the precision of DB -} STSmaWriteH; - -typedef struct { - int32_t iter; - int32_t fid; -} SmaFsIter; - -typedef struct { - STsdb *pTsdb; - SSma *pSma; - SDBFile dFile; - int64_t interval; // interval with the precision of DB - int32_t blockSize; // size of SMA block item - int32_t days; - int8_t storageLevel; - SmaFsIter smaFsIter; -} STSmaReadH; - -typedef enum { - SMA_STORAGE_LEVEL_TSDB = 0, // use days of self-defined e.g. vnode${N}/tsdb/tsma/sma_index_uid/v2f200.tsma - SMA_STORAGE_LEVEL_DFILESET = 1 // use days of TS data e.g. vnode${N}/tsdb/tsma/sma_index_uid/v2f1906.tsma -} ESmaStorageLevel; - // static func -static int64_t tdGetIntervalByPrecision(int64_t interval, uint8_t intervalUnit, int8_t precision, bool adjusted); -static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval); -static int32_t tdInitTSmaWriteH(STSmaWriteH *pSmaH, SSma *pSma, const SArray *pDataBlocks, int64_t interval, - int8_t intervalUnit); -static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, int8_t intervalUnit); -static void tdDestroyTSmaWriteH(STSmaWriteH *pSmaH); -static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel); -static int32_t tdSetTSmaDataFile(STSmaWriteH *pSmaH, int64_t indexUid, int32_t fid); -static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey); -static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey); -static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, - TXN *txn); -// expire window - -static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version); -static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey); -static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); - /** * @brief Judge the tsma file split days * @@ -87,7 +41,7 @@ static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid); * @param days unit is minute * @return int32_t */ -int32_t tdGetTSmaDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days) { +int32_t tdProcessTSmaGetDaysImpl(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days) { SDecoder coder = {0}; tDecoderInit(&coder, pCont, contLen); @@ -130,225 +84,6 @@ _err: // implementation -/** - * @brief - * - * @param pSmaH - * @param pSma - * @param interval - * @param intervalUnit - * @return int32_t - */ -static int32_t tdInitTSmaReadH(STSmaReadH *pSmaH, SSma *pSma, int64_t interval, int8_t intervalUnit) { - STSmaKeepCfg *pCfg = SMA_TSDB_CFG(pSma); - pSmaH->pSma = pSma; - pSmaH->interval = tdGetIntervalByPrecision(interval, intervalUnit, SMA_TSDB_CFG(pSma)->precision, true); - pSmaH->storageLevel = tdGetSmaStorageLevel(pCfg, interval); - pSmaH->days = tdGetTSmaDays(pSma, pSmaH->interval, pSmaH->storageLevel); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Init of tsma FS - * - * @param pReadH - * @param indexUid - * @param skey - * @return int32_t - */ -static int32_t tdInitTSmaFile(STSmaReadH *pSmaH, int64_t indexUid, TSKEY skey) { - SSma *pSma = pSmaH->pSma; - - int32_t fid = (int32_t)(TSDB_KEY_FID(skey, pSmaH->days, SMA_TSDB_CFG(pSma)->precision)); - char tSmaFile[TSDB_FILENAME_LEN] = {0}; - snprintf(tSmaFile, TSDB_FILENAME_LEN, "%" PRIi64 "%sv%df%d.tsma", indexUid, TD_DIRSEP, SMA_VID(pSma), fid); - pSmaH->dFile.path = strdup(tSmaFile); - pSmaH->smaFsIter.iter = 0; - pSmaH->smaFsIter.fid = fid; - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Set and open tsma file if it has key locates in queryWin. - * - * @param pReadH - * @param param - * @param queryWin - * @return true - * @return false - */ -static bool tdSetAndOpenTSmaFile(STSmaReadH *pReadH, TSKEY *queryKey) { - // SArray *smaFs = pReadH->pTsdb->fs->cstatus->sf; - // int32_t nSmaFs = taosArrayGetSize(smaFs); - - smaCloseDBF(&pReadH->dFile); - -#if 0 - while (pReadH->smaFsIter.iter < nSmaFs) { - void *pSmaFile = taosArrayGet(smaFs, pReadH->smaFsIter.iter); - if (pSmaFile) { // match(indexName, queryWindow) - // TODO: select the file by index_name ... - pReadH->dFile = pSmaFile; - ++pReadH->smaFsIter.iter; - break; - } - ++pReadH->smaFsIter.iter; - } - - if (pReadH->pDFile) { - tdDebug("vg%d: smaFile %s matched", REPO_ID(pReadH->pTsdb), "[pSmaFile dir]"); - return true; - } -#endif - - return false; -} - -/** - * @brief Approximate value for week/month/year. - * - * @param interval - * @param intervalUnit - * @param precision - * @param adjusted Interval already adjusted according to DB precision - * @return int64_t - */ -static int64_t tdGetIntervalByPrecision(int64_t interval, uint8_t intervalUnit, int8_t precision, bool adjusted) { - if (adjusted) { - return interval; - } - - switch (intervalUnit) { - case TIME_UNIT_YEAR: // approximate value - interval *= 365 * 86400 * 1e3; - break; - case TIME_UNIT_MONTH: // approximate value - interval *= 30 * 86400 * 1e3; - break; - case TIME_UNIT_WEEK: // approximate value - interval *= 7 * 86400 * 1e3; - break; - case TIME_UNIT_DAY: // the interval for tSma calculation must <= day - interval *= 86400 * 1e3; - break; - case TIME_UNIT_HOUR: - interval *= 3600 * 1e3; - break; - case TIME_UNIT_MINUTE: - interval *= 60 * 1e3; - break; - case TIME_UNIT_SECOND: - interval *= 1e3; - break; - default: - break; - } - - switch (precision) { - case TSDB_TIME_PRECISION_MILLI: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval / 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // nano second - return interval / 1e6; - } else { // ms - return interval; - } - break; - case TSDB_TIME_PRECISION_MICRO: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval / 1e3; - } else { // ms - return interval * 1e3; - } - break; - case TSDB_TIME_PRECISION_NANO: - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval * 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval; - } else { // ms - return interval * 1e6; - } - break; - default: // ms - if (TIME_UNIT_MICROSECOND == intervalUnit) { // us - return interval / 1e3; - } else if (TIME_UNIT_NANOSECOND == intervalUnit) { // ns - return interval / 1e6; - } else { // ms - return interval; - } - break; - } - return interval; -} - -static int32_t tdInitTSmaWriteH(STSmaWriteH *pSmaH, SSma *pSma, const SArray *pDataBlocks, int64_t interval, - int8_t intervalUnit) { - pSmaH->pSma = pSma; - pSmaH->interval = tdGetIntervalByPrecision(interval, intervalUnit, SMA_TSDB_CFG(pSma)->precision, true); - pSmaH->pDataBlocks = pDataBlocks; - pSmaH->dFile.fid = SMA_IVLD_FID; - return TSDB_CODE_SUCCESS; -} - -static void tdDestroyTSmaWriteH(STSmaWriteH *pSmaH) { - if (pSmaH) { - smaCloseDBF(&pSmaH->dFile); - } -} - -static int32_t tdSetTSmaDataFile(STSmaWriteH *pSmaH, int64_t indexUid, int32_t fid) { - SSma *pSma = pSmaH->pSma; - ASSERT(!pSmaH->dFile.path && !pSmaH->dFile.pDB); - - pSmaH->dFile.fid = fid; - char tSmaFile[TSDB_FILENAME_LEN] = {0}; - snprintf(tSmaFile, TSDB_FILENAME_LEN, "%" PRIi64 "%sv%df%d.tsma", indexUid, TD_DIRSEP, SMA_VID(pSma), fid); - pSmaH->dFile.path = strdup(tSmaFile); - - return TSDB_CODE_SUCCESS; -} - -/** - * @brief - * - * @param pSma - * @param interval Interval calculated by DB's precision - * @param storageLevel - * @return int32_t - */ -static int32_t tdGetTSmaDays(SSma *pSma, int64_t interval, int32_t storageLevel) { - STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); - int32_t daysPerFile = pCfg->days; // unit is minute - - if (storageLevel == SMA_STORAGE_LEVEL_TSDB) { - int32_t minutes = SMA_STORAGE_TSDB_TIMES * (interval / tsTickPerMin[pCfg->precision]); - if (minutes > SMA_STORAGE_TSDB_MINUTES) { - daysPerFile = SMA_STORAGE_TSDB_MINUTES; - } - } - - return daysPerFile; -} - -/** - * @brief Judge the tsma storage level - * - * @param pCfg - * @param interval - * @return int32_t - */ -static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { - int64_t mInterval = convertTimeFromPrecisionToUnit(interval, pCfg->precision, TIME_UNIT_MINUTE); - if (pCfg->days / mInterval >= SMA_STORAGE_SPLIT_FACTOR) { - return SMA_STORAGE_LEVEL_DFILESET; - } - return SMA_STORAGE_LEVEL_TSDB; -} - /** * @brief Insert/Update Time-range-wise SMA data. * - If interval < SMA_STORAGE_SPLIT_HOURS(e.g. 24), save the SMA data as a part of DFileSet to e.g. @@ -363,7 +98,7 @@ static int32_t tdGetSmaStorageLevel(STSmaKeepCfg *pCfg, int64_t interval) { */ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { STsdbCfg *pCfg = SMA_TSDB_CFG(pSma); -#if 0 + const SArray *pDataBlocks = (const SArray *)msg; // TODO: destroy SSDataBlocks(msg) @@ -386,7 +121,6 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { smaWarn("vgId:%d, insert tsma data failed since pDataBlocks is empty", SMA_VID(pSma)); return TSDB_CODE_FAILED; } -#endif SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); SSmaStat *pStat = SMA_ENV_STAT(pEnv); @@ -411,285 +145,6 @@ int32_t tdProcessTSmaInsertImpl(SSma *pSma, int64_t indexUid, const char *msg) { return TSDB_CODE_SUCCESS; } -int32_t tdDropTSmaData(SSma *pSma, int64_t indexUid) { - int32_t code = TSDB_CODE_SUCCESS; - if ((code = tdDropTSmaDataImpl(pSma, indexUid)) < 0) { - smaWarn("vgId:%d, drop tsma data failed since %s", SMA_VID(pSma), tstrerror(terrno)); - } - return code; -} - -/** - * @brief Insert TSma data blocks to DB File build by B+Tree - * - * @param pSmaH - * @param smaKey tableUid-colId-skeyOfWindow(8-2-8) - * @param keyLen - * @param pData - * @param dataLen - * @return int32_t - */ -static int32_t tdInsertTSmaBlocks(STSmaWriteH *pSmaH, void *smaKey, int32_t keyLen, void *pData, int32_t dataLen, - TXN *txn) { - SDBFile *pDBFile = &pSmaH->dFile; - - // TODO: insert tsma data blocks into B+Tree(TTB) - if (smaSaveSmaToDB(pDBFile, smaKey, keyLen, pData, dataLen, txn) != 0) { - smaWarn("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " fail", - SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, insert tsma data blocks into %s: smaKey %" PRIx64 "-%" PRIx64 ", dataLen %" PRIu32 " succeed", - SMA_VID(pSmaH->pSma), pDBFile->path, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), dataLen); - -#ifdef _TEST_SMA_PRINT_DEBUG_LOG_ - uint32_t valueSize = 0; - void *data = tdGetSmaDataByKey(pDBFile, smaKey, keyLen, &valueSize); - ASSERT(data != NULL); - for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d, insert sma data val[%d] %" PRIi64, REPO_ID(pSmaH->pTsdb), v, *(int64_t *)POINTER_SHIFT(data, v)); - } -#endif - return TSDB_CODE_SUCCESS; -} - -/** - * @brief When sma data received from stream computing, make the relative expire window valid. - * - * @param pSma - * @param pStat - * @param indexUid - * @param skey - * @return int32_t - */ -static int32_t tdResetExpireWindow(SSma *pSma, SSmaStat *pStat, int64_t indexUid, TSKEY skey) { - SSmaStatItem *pItem = NULL; - - tdRefSmaStat(pSma, pStat); - - if (pStat && SMA_STAT_ITEMS(pStat)) { - pItem = taosHashGet(SMA_STAT_ITEMS(pStat), &indexUid, sizeof(indexUid)); - } - if ((pItem) && ((pItem = *(SSmaStatItem **)pItem))) { - // pItem resides in hash buffer all the time unless drop sma index - // TODO: multithread protect - if (taosHashRemove(pItem->expireWindows, &skey, sizeof(TSKEY)) != 0) { - // error handling - tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " fail", SMA_VID(pSma), skey, - indexUid); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, remove skey %" PRIi64 " from expire window for sma index %" PRIi64 " succeed", SMA_VID(pSma), - skey, indexUid); - // TODO: use a standalone interface to received state upate notification from stream computing module. - /** - * @brief state - * - When SMA env init in TSDB, its status is TSDB_SMA_STAT_OK. - * - In startup phase of stream computing module, it should notify the SMA env in TSDB to expired if needed(e.g. - * when batch data caculation not finised) - * - When TSDB_SMA_STAT_OK, the stream computing module should also notify that to the SMA env in TSDB. - */ - pItem->state = TSDB_SMA_STAT_OK; - } else { - // error handling - tdUnRefSmaStat(pSma, pStat); - smaWarn("vgId:%d, expire window %" PRIi64 " not exists for sma index %" PRIi64, SMA_VID(pSma), skey, indexUid); - return TSDB_CODE_FAILED; - } - - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Drop tsma data and local cache - * - insert/query reference - * @param pSma - * @param msg - * @return int32_t - */ -static int32_t tdDropTSmaDataImpl(SSma *pSma, int64_t indexUid) { - SSmaEnv *pEnv = atomic_load_ptr(&SMA_TSMA_ENV(pSma)); - - // clear local cache - if (pEnv) { - smaDebug("vgId:%d, drop tsma local cache for %" PRIi64, SMA_VID(pSma), indexUid); - - SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); - if ((pItem) || ((pItem = *(SSmaStatItem **)pItem))) { - if (tdSmaStatIsDropped(pItem)) { - smaDebug("vgId:%d, tsma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode - } - - tdWLockSmaEnv(pEnv); - if (tdSmaStatIsDropped(pItem)) { - tdUnLockSmaEnv(pEnv); - smaDebug("vgId:%d, tsma stat is already dropped for %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_TDB_INVALID_ACTION; // TODO: duplicate drop msg would be intercepted by mnode - } - tdSmaStatSetDropped(pItem); - tdUnLockSmaEnv(pEnv); - - int32_t nSleep = 0; - int32_t refVal = INT32_MAX; - while (true) { - if ((refVal = T_REF_VAL_GET(SMA_ENV_STAT(pEnv))) <= 0) { - smaDebug("vgId:%d, drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); - break; - } - smaDebug("vgId:%d, wait 1s to drop index %" PRIi64 " since refVal=%d", SMA_VID(pSma), indexUid, refVal); - taosSsleep(1); - if (++nSleep > SMA_DROP_EXPIRED_TIME) { - smaDebug("vgId:%d, drop index %" PRIi64 " after wait %d (refVal=%d)", SMA_VID(pSma), indexUid, nSleep, - refVal); - break; - }; - } - - tdFreeSmaStatItem(pItem); - smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64 " in local cache", SMA_VID(pSma), indexUid); - } - } - // clear sma data files - // TODO: - return TSDB_CODE_SUCCESS; -} - -/** - * @brief - * - * @param pSma Return the data between queryWin and fill the pData. - * @param pData - * @param indexUid - * @param pQuerySKey - * @param nMaxResult The query invoker should control the nMaxResult need to return to avoid OOM. - * @return int32_t - */ -int32_t tdGetTSmaDataImpl(SSma *pSma, char *pData, int64_t indexUid, TSKEY querySKey, int32_t nMaxResult) { - SSmaEnv *pEnv = atomic_load_ptr(&SMA_TSMA_ENV(pSma)); - SSmaStat *pStat = NULL; - - if (!pEnv) { - terrno = TSDB_CODE_INVALID_PTR; - smaWarn("vgId:%d, getTSmaDataImpl failed since pTSmaEnv is NULL", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - pStat = SMA_ENV_STAT(pEnv); - - tdRefSmaStat(pSma, pStat); - SSmaStatItem *pItem = taosHashGet(SMA_ENV_STAT_ITEMS(pEnv), &indexUid, sizeof(indexUid)); - if (!pItem || !(pItem = *(SSmaStatItem **)pItem)) { - // Normally pItem should not be NULL, mark all windows as expired and notify query module to fetch raw TS data if - // it's NULL. - tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TDB_INVALID_ACTION; - smaDebug("vgId:%d, getTSmaDataImpl failed since no index %" PRIi64, SMA_VID(pSma), indexUid); - return TSDB_CODE_FAILED; - } - -#if 0 - int32_t nQueryWin = taosArrayGetSize(pQuerySKey); - for (int32_t n = 0; n < nQueryWin; ++n) { - TSKEY skey = taosArrayGet(pQuerySKey, n); - if (taosHashGet(pItem->expireWindows, &skey, sizeof(TSKEY))) { - // TODO: mark this window as expired. - } - } -#endif - -#if 1 - int8_t smaStat = 0; - if (!tdSmaStatIsOK(pItem, &smaStat)) { // TODO: multiple check for large scale sma query - tdUnRefSmaStat(pSma, pStat); - terrno = TSDB_CODE_TSMA_INVALID_STAT; - smaWarn("vgId:%d, getTSmaDataImpl failed from index %" PRIi64 " since %s %" PRIi8, SMA_VID(pSma), indexUid, - tstrerror(terrno), smaStat); - return TSDB_CODE_FAILED; - } - - if (taosHashGet(pItem->expireWindows, &querySKey, sizeof(TSKEY))) { - // TODO: mark this window as expired. - smaDebug("vgId:%d, skey %" PRIi64 " of window exists in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, - indexUid); - } else { - smaDebug("vgId:%d, skey %" PRIi64 " of window not in expire window for index %" PRIi64, SMA_VID(pSma), querySKey, - indexUid); - } - - STSma *pTSma = pItem->pTSma; -#endif - -#if 1 - STSmaReadH tReadH = {0}; - tdInitTSmaReadH(&tReadH, pSma, pTSma->interval, pTSma->intervalUnit); - smaCloseDBF(&tReadH.dFile); - - tdUnRefSmaStat(pSma, pStat); - - tdInitTSmaFile(&tReadH, indexUid, querySKey); - smaDebug("### vgId:%d, read from DBF %s days:%d, interval:%" PRIi64 ", storageLevel:%" PRIi8 " queryKey:%" PRIi64, - SMA_VID(pSma), tReadH.dFile.path, tReadH.days, tReadH.interval, tReadH.storageLevel, querySKey); - if (smaOpenDBF(pEnv->dbEnv, &tReadH.dFile) != 0) { - smaWarn("vgId:%d, open DBF %s failed since %s", SMA_VID(pSma), tReadH.dFile.path, tstrerror(terrno)); - return TSDB_CODE_FAILED; - } - - char smaKey[SMA_KEY_LEN] = {0}; - void *pSmaKey = &smaKey; - int64_t queryGroupId = 0; - tdEncodeTSmaKey(queryGroupId, querySKey, (void **)&pSmaKey); - - smaDebug("vgId:%d, get sma data from %s: smaKey %" PRIx64 "-%" PRIx64 ", keyLen %d", SMA_VID(pSma), tReadH.dFile.path, - *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), SMA_KEY_LEN); - - void *result = NULL; - int32_t valueSize = 0; - if (!(result = smaGetSmaDataByKey(&tReadH.dFile, smaKey, SMA_KEY_LEN, &valueSize))) { - smaWarn("vgId:%d, get sma data failed from smaIndex %" PRIi64 ", smaKey %" PRIx64 "-%" PRIx64 " since %s", - SMA_VID(pSma), indexUid, *(int64_t *)smaKey, *(int64_t *)POINTER_SHIFT(smaKey, 8), tstrerror(terrno)); - smaCloseDBF(&tReadH.dFile); - return TSDB_CODE_FAILED; - } -#endif - -#ifdef _TEST_SMA_PRINT_DEBUG_LOG_ - for (uint32_t v = 0; v < valueSize; v += 8) { - smaWarn("vgId:%d, get sma data v[%d]=%" PRIi64, SMA_VID(pSma), v, *(int64_t *)POINTER_SHIFT(result, v)); - } -#endif - taosMemoryFreeClear(result); // TODO: fill the result to output - -#if 0 - int32_t nResult = 0; - int64_t lastKey = 0; - - while (true) { - if (nResult >= nMaxResult) { - break; - } - - // set and open the file according to the STSma param - if (tdSetAndOpenTSmaFile(&tReadH, queryWin)) { - char bTree[100] = "\0"; - while (strncmp(bTree, "has more nodes", 100) == 0) { - if (nResult >= nMaxResult) { - break; - } - // tdGetDataFromBTree(bTree, queryWin, lastKey) - // fill the pData - ++nResult; - } - } - } -#endif - // read data from file and fill the result - smaCloseDBF(&tReadH.dFile); - return TSDB_CODE_SUCCESS; -} - int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { SSmaCfg *pCfg = (SSmaCfg *)pMsg; @@ -712,294 +167,4 @@ int32_t tdProcessTSmaCreateImpl(SSma *pSma, int64_t version, const char *pMsg) { tdTSmaAdd(pSma, 1); return 0; -} - -int32_t tdDropTSma(SSma *pSma, char *pMsg) { -#if 0 - SVDropTSmaReq vDropSmaReq = {0}; - if (!tDeserializeSVDropTSmaReq(pMsg, &vDropSmaReq)) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - // TODO: send msg to stream computing to drop tsma - // if ((send msg to stream computing) < 0) { - // tDestroyTSma(&vCreateSmaReq); - // return -1; - // } - // - - if (metaDropTSma(SMA_META(pSma), vDropSmaReq.indexUid) < 0) { - // TODO: handle error - return -1; - } - - if (tdDropTSmaData(pSma, vDropSmaReq.indexUid) < 0) { - // TODO: handle error - return -1; - } - - tdTSmaSub(pSma, 1); -#endif - - // TODO: return directly or go on follow steps? - return TSDB_CODE_SUCCESS; -} - -static SSmaStatItem *tdNewSmaStatItem(int8_t state) { - SSmaStatItem *pItem = NULL; - - pItem = (SSmaStatItem *)taosMemoryCalloc(1, sizeof(SSmaStatItem)); - if (!pItem) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - pItem->state = state; - pItem->expireWindows = taosHashInit(SMA_STATE_ITEM_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP), - true, HASH_ENTRY_LOCK); - if (!pItem->expireWindows) { - taosMemoryFreeClear(pItem); - return NULL; - } - - return pItem; -} - -static int32_t tdSetExpireWindow(SSma *pSma, SHashObj *pItemsHash, int64_t indexUid, int64_t winSKey, int64_t version) { - SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); - if (!pItem) { - // TODO: use TSDB_SMA_STAT_EXPIRED and update by stream computing later - pItem = tdNewSmaStatItem(TSDB_SMA_STAT_OK); // TODO use the real state - if (!pItem) { - // Response to stream computing: OOM - // For query, if the indexUid not found, the TSDB should tell query module to query raw TS data. - return TSDB_CODE_FAILED; - } - - // cache smaMeta - STSma *pTSma = metaGetSmaInfoByIndex(SMA_META(pSma), indexUid); - if (!pTSma) { - terrno = TSDB_CODE_TSMA_NO_INDEX_IN_META; - taosHashCleanup(pItem->expireWindows); - taosMemoryFree(pItem); - smaWarn("vgId:%d, set expire window, get tsma meta failed for smaIndex %" PRIi64 " since %s", SMA_VID(pSma), - indexUid, tstrerror(terrno)); - return TSDB_CODE_FAILED; - } - pItem->pTSma = pTSma; - - if (taosHashPut(pItemsHash, &indexUid, sizeof(indexUid), &pItem, sizeof(pItem)) != 0) { - // If error occurs during put smaStatItem, free the resources of pItem - taosHashCleanup(pItem->expireWindows); - taosMemoryFree(pItem); - return TSDB_CODE_FAILED; - } - } else if (!(pItem = *(SSmaStatItem **)pItem)) { - terrno = TSDB_CODE_INVALID_PTR; - return TSDB_CODE_FAILED; - } - - if (taosHashPut(pItem->expireWindows, &winSKey, sizeof(TSKEY), &version, sizeof(version)) != 0) { - // If error occurs during taosHashPut expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB would - // tell query module to query raw TS data. - // N.B. - // 1) It is assumed to be extemely little probability event of fail to taosHashPut. - // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired - // windows failed to put into hash table. - taosHashCleanup(pItem->expireWindows); - taosMemoryFreeClear(pItem->pTSma); - taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); - smaWarn("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window fail", SMA_VID(pSma), indexUid, - winSKey); - return TSDB_CODE_FAILED; - } - - smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window succeed", SMA_VID(pSma), indexUid, - winSKey); - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Update expire window according to msg from stream computing module. - * - * @param pSma - * @param msg SSubmitReq - * @return int32_t - */ -int32_t tdUpdateExpireWindowImpl(SSma *pSma, const SSubmitReq *pMsg, int64_t version) { - // no time-range-sma, just return success - if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { - smaTrace("vgId:%d, not update expire window since no tsma", SMA_VID(pSma)); - return TSDB_CODE_SUCCESS; - } - - if (!SMA_META(pSma)) { - terrno = TSDB_CODE_INVALID_PTR; - smaError("vgId:%d, update expire window failed since no meta ptr", SMA_VID(pSma)); - return TSDB_CODE_FAILED; - } - - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, false) < 0) { - smaError("vgId:%d, init tsma env failed since %s", SMA_VID(pSma), terrstr(terrno)); - terrno = TSDB_CODE_TDB_INIT_FAILED; - return TSDB_CODE_FAILED; - } - - // Firstly, assume that tsma can only be created on super table/normal table. - // getActiveTimeWindow - - SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); - SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SHashObj *pItemsHash = SMA_ENV_STAT_ITEMS(pEnv); - - TASSERT(pEnv && pStat && pItemsHash); - - // basic procedure - // TODO: optimization - tdRefSmaStat(pSma, pStat); - - SSubmitMsgIter msgIter = {0}; - SSubmitBlk *pBlock = NULL; - SInterval interval = {0}; - TSKEY lastWinSKey = INT64_MIN; - - if (tInitSubmitMsgIter(pMsg, &msgIter) < 0) { - return TSDB_CODE_FAILED; - } - - while (true) { - tGetSubmitMsgNext(&msgIter, &pBlock); - if (!pBlock) break; - - STSmaWrapper *pSW = NULL; - STSma *pTSma = NULL; - - SSubmitBlkIter blkIter = {0}; - if (tInitSubmitBlkIter(&msgIter, pBlock, &blkIter) < 0) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - - while (true) { - STSRow *row = tGetSubmitBlkNext(&blkIter); - if (!row) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - if (!pSW || (pTSma && (pTSma->tableUid != msgIter.suid))) { - if (pSW) { - pSW = tFreeTSmaWrapper(pSW, false); - } - if (!(pSW = metaGetSmaInfoByTable(SMA_META(pSma), msgIter.suid, false))) { - break; - } - if ((pSW->number) <= 0 || !pSW->tSma) { - pSW = tFreeTSmaWrapper(pSW, false); - break; - } - - pTSma = pSW->tSma; - - interval.interval = pTSma->interval; - interval.intervalUnit = pTSma->intervalUnit; - interval.offset = pTSma->offset; - interval.precision = SMA_TSDB_CFG(pSma)->precision; - interval.sliding = pTSma->sliding; - interval.slidingUnit = pTSma->slidingUnit; - } - - // TODO: process multiple tsma for one table uid - TSKEY winSKey = taosTimeTruncate(TD_ROW_KEY(row), &interval, interval.precision); - - if (lastWinSKey != winSKey) { - lastWinSKey = winSKey; - if (tdSetExpireWindow(pSma, pItemsHash, pTSma->indexUid, winSKey, version) < 0) { - pSW = tFreeTSmaWrapper(pSW, false); - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - } else { - smaDebug("vgId:%d, smaIndex %" PRIi64 ", put skey %" PRIi64 " to expire window ignore as duplicated", - SMA_VID(pSma), pTSma->indexUid, winSKey); - } - } - } - - tdUnRefSmaStat(pSma, pStat); - - return TSDB_CODE_SUCCESS; -} - -/** - * @brief Clear skeys from tsma dstVgroups in expire window. - * - * @param pSma - * @param pMsg - * @return int32_t - */ -int32_t tdClearExpireWindowImpl(SSma *pSma, const SVClrTsmaExpWndsReq *pMsg) { - int64_t indexUid = pMsg->indexUid; - - if (atomic_load_16(&SMA_TSMA_NUM(pSma)) <= 0) { - smaWarn("vgId:%d, not clear expire window since no tsma for smaIndex %" PRIi64, SMA_VID(pSma), indexUid); - terrno = TSDB_CODE_TSMA_INVALID_ENV; - return TSDB_CODE_FAILED; - } - - if (tdCheckAndInitSmaEnv(pSma, TSDB_SMA_TYPE_TIME_RANGE, true) < 0) { - smaWarn("vgId:%d, not clear expire window since no tsma env", SMA_VID(pSma)); - terrno = TSDB_CODE_TSMA_INVALID_ENV; - return TSDB_CODE_FAILED; - } - - // Firstly, assume that tsma can only be created on super table/normal table. - // getActiveTimeWindow - - SSmaEnv *pEnv = SMA_TSMA_ENV(pSma); - SSmaStat *pStat = SMA_ENV_STAT(pEnv); - SHashObj *pItemsHash = SMA_ENV_STAT_ITEMS(pEnv); - - ASSERT(pEnv && pStat && pItemsHash); - - // basic procedure - // TODO: optimization - tdRefSmaStat(pSma, pStat); - - SSmaStatItem *pItem = taosHashGet(pItemsHash, &indexUid, sizeof(indexUid)); - if (!pItem || !(pItem = *(SSmaStatItem **)pItem)) { - smaWarn("vgId:%d, no sma item to clear expire window for smaIndex %" PRIi64, SMA_VID(pSma), indexUid); - terrno = TSDB_CODE_TSMA_NO_INDEX_IN_CACHE; - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - - for (int64_t i = 0; i < pMsg->nItems; ++i) { - const SVTsmaExpWndItem *pWndItem = &pMsg->items[i]; - int64_t winSKey = pWndItem->skey; - for (int64_t j = 0; j < pWndItem->nKeys; ++j) { - winSKey += pItem->pTSma->interval; - if (taosHashRemove(pItem->expireWindows, &winSKey, sizeof(winSKey)) != 0) { - // If error occurs during taosHashRemove expire windows, remove the smaIndex from pSma->pSmaStat, thus TSDB - // would tell query module to query raw TS data. N.B. - // 1) It is assumed to be extemely little probability event of fail to taosHashPut. - // 2) This would solve the inconsistency to some extent, but not completely, unless we record all expired - // windows failed to put into hash table. - taosHashCleanup(pItem->expireWindows); - taosMemoryFreeClear(pItem->pTSma); - taosHashRemove(pItemsHash, &indexUid, sizeof(indexUid)); - smaWarn("vgId:%d, rm skey %" PRIi64 " in expire window for smaIndex %" PRIi64 " fail", SMA_VID(pSma), winSKey, - indexUid); - terrno = TSDB_CODE_TSMA_RM_SKEY_IN_HASH; - tdUnRefSmaStat(pSma, pStat); - return TSDB_CODE_FAILED; - } - smaDebug("vgId:%d, rm skey %" PRIi64 " in expire window for smaIndex %" PRIi64 " success", SMA_VID(pSma), winSKey, - indexUid); - } - } - - tdUnRefSmaStat(pSma, pStat); - - return TSDB_CODE_SUCCESS; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 2f26fba50a..3af8901b2b 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -238,16 +238,13 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) if (msgType == TDMT_VND_SUBMIT) { if (taosHashGetSize(pTq->pStreamTasks) == 0) return 0; - if (tdUpdateExpireWindow(pTq->pVnode->pSma, msg, ver) != 0) { - // TODO handle sma error + void* data = taosMemoryMalloc(msgLen); + if (data == NULL) { + return -1; } - // void* data = taosMemoryMalloc(msgLen); - // if (data == NULL) { - // return -1; - // } - // memcpy(data, msg, msgLen); + memcpy(data, msg, msgLen); - // tqProcessStreamTrigger(pTq, data); + tqProcessStreamTrigger(pTq, data); } return 0; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index eb489a6d81..671e181dac 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -26,7 +26,6 @@ static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t version, void * static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessWriteMsg(SVnode *pVnode, int64_t version, SRpcMsg *pMsg, SRpcMsg *pRsp); -static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp); int32_t vnodePreprocessReq(SVnode *pVnode, SRpcMsg *pMsg) { int32_t code = 0; @@ -177,12 +176,10 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp vTrace("vgId:%d, process %s request success, index:%" PRId64, TD_VID(pVnode), TMSG_INFO(pMsg->msgType), version); -#if 0 if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } -#endif // commit if need if (vnodeShouldCommit(pVnode)) { @@ -254,8 +251,6 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_RECOVER_RSP: return tqProcessTaskRecoverRsp(pVnode->pTq, pMsg); - case TDMT_VND_CLR_TSMA_EXP_WNDS: - return vnodeProcessExpWndsClrReq(pVnode, pMsg, msgLen, NULL); default: vError("unknown msg type:%d in fetch queue", pMsg->msgType); return TSDB_CODE_VND_APP_ERROR; @@ -283,10 +278,7 @@ void smaHandleRes(void *pVnode, int64_t smaId, const SArray *data) { // TODO // blockDebugShowData(data, __func__); -#if 0 tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, smaId, (const char *)data); -#endif - tdProcessTSmaInsert(((SVnode *)pVnode)->pSma, TD_DEBUG_SMA_ID, NULL); } void vnodeUpdateMetaRsp(SVnode *pVnode, STableMetaRsp *pMetaRsp) { @@ -900,45 +892,6 @@ static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void return 0; } -static int32_t vnodeProcessExpWndsClrReq(SVnode *pVnode, void *pReq, int32_t len, SRpcMsg *pRsp) { - SVClrTsmaExpWndsReq req = {0}; - SDecoder coder = {0}; - - if (pRsp) { - pRsp->msgType = TDMT_VND_CLR_TSMA_EXP_WNDS_RSP; - pRsp->code = TSDB_CODE_SUCCESS; - pRsp->pCont = NULL; - pRsp->contLen = 0; - } - - // decode and process - tDecoderInit(&coder, pReq, len); - - if (tDecodeSVClrTsmaExpWndsReq(&coder, &req) < 0) { - terrno = TSDB_CODE_MSG_DECODE_ERROR; - if (pRsp) pRsp->code = terrno; - goto _err; - } - - ASSERT(0); - - if (tdClearExpireWindow(pVnode->pSma, (const SVClrTsmaExpWndsReq *)&req) < 0) { - if (pRsp) pRsp->code = terrno; - goto _err; - } - - tDecoderClear(&coder); - vDebug("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64, TD_VID(pVnode), - req.indexUid, req.version); - return 0; - -_err: - tDecoderClear(&coder); - vError("vgId:%d, success to process expWnds clear for tsma %" PRIi64 " version %" PRIi64 " since %s", TD_VID(pVnode), - req.indexUid, req.version, terrstr()); - return -1; -} - static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { vInfo("vgId:%d, alter hashrange msg will be processed", TD_VID(pVnode)); From 767b768e5137e29bc1c14a850d52abbe13b16cad Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Sat, 11 Jun 2022 15:03:23 +0800 Subject: [PATCH 16/16] fix(stream): fix shuffle vg id not initialized --- source/dnode/mnode/impl/src/mndScheduler.c | 54 ++++++++++++---------- source/libs/stream/src/streamDispatch.c | 6 ++- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index c04e416896..a4ddf96630 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -151,33 +151,36 @@ int32_t mndAddDispatcherToInnerTask(SMnode* pMnode, STrans* pTrans, SStreamObj* ASSERT(pDb); if (mndExtractDbInfo(pMnode, pDb, &pTask->shuffleDispatcher.dbInfo, NULL) < 0) { - sdbRelease(pMnode->pSdb, pDb); + ASSERT(0); + return -1; + } + sdbRelease(pMnode->pSdb, pDb); - SArray* pVgs = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - int32_t sz = taosArrayGetSize(pVgs); - SArray* sinkLv = taosArrayGetP(pStream->tasks, 0); - int32_t sinkLvSize = taosArrayGetSize(sinkLv); - for (int32_t i = 0; i < sz; i++) { - SVgroupInfo* pVgInfo = taosArrayGet(pVgs, i); - for (int32_t j = 0; j < sinkLvSize; j++) { - SStreamTask* pLastLevelTask = taosArrayGetP(sinkLv, j); - if (pLastLevelTask->nodeId == pVgInfo->vgId) { - pVgInfo->taskId = pLastLevelTask->taskId; - break; - } + SArray* pVgs = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + int32_t sz = taosArrayGetSize(pVgs); + SArray* sinkLv = taosArrayGetP(pStream->tasks, 0); + int32_t sinkLvSize = taosArrayGetSize(sinkLv); + for (int32_t i = 0; i < sz; i++) { + SVgroupInfo* pVgInfo = taosArrayGet(pVgs, i); + for (int32_t j = 0; j < sinkLvSize; j++) { + SStreamTask* pLastLevelTask = taosArrayGetP(sinkLv, j); + if (pLastLevelTask->nodeId == pVgInfo->vgId) { + pVgInfo->taskId = pLastLevelTask->taskId; + ASSERT(pVgInfo->taskId != 0); + break; } } - } else { - pTask->dispatchType = TASK_DISPATCH__FIXED; - pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; - SArray* pArray = taosArrayGetP(pStream->tasks, 0); - // one sink only - ASSERT(taosArrayGetSize(pArray) == 1); - SStreamTask* lastLevelTask = taosArrayGetP(pArray, 0); - pTask->fixedEpDispatcher.taskId = lastLevelTask->taskId; - pTask->fixedEpDispatcher.nodeId = lastLevelTask->nodeId; - pTask->fixedEpDispatcher.epSet = lastLevelTask->epSet; } + } else { + pTask->dispatchType = TASK_DISPATCH__FIXED; + pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; + SArray* pArray = taosArrayGetP(pStream->tasks, 0); + // one sink only + ASSERT(taosArrayGetSize(pArray) == 1); + SStreamTask* lastLevelTask = taosArrayGetP(pArray, 0); + pTask->fixedEpDispatcher.taskId = lastLevelTask->taskId; + pTask->fixedEpDispatcher.nodeId = lastLevelTask->nodeId; + pTask->fixedEpDispatcher.epSet = lastLevelTask->epSet; } return 0; } @@ -379,7 +382,10 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { pFinalTask->inputType = TASK_INPUT_TYPE__DATA_BLOCK; // dispatch - mndAddDispatcherToInnerTask(pMnode, pTrans, pStream, pFinalTask); + if (mndAddDispatcherToInnerTask(pMnode, pTrans, pStream, pFinalTask) < 0) { + qDestroyQueryPlan(pPlan); + return -1; + } // exec pFinalTask->execType = TASK_EXEC__PIPE; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 6fb5d75d86..d523374638 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -100,7 +100,6 @@ int32_t streamBuildDispatchMsg(SStreamTask* pTask, SStreamDataBlock* data, SRpcM .upstreamNodeId = pTask->nodeId, .blockNum = blockNum, }; - qInfo("dispatch from task %d (child id %d)", pTask->taskId, pTask->childId); req.data = taosArrayInit(blockNum, sizeof(void*)); req.dataLen = taosArrayInit(blockNum, sizeof(int32_t)); @@ -142,11 +141,14 @@ int32_t streamBuildDispatchMsg(SStreamTask* pTask, SStreamDataBlock* data, SRpcM break; } } - ASSERT(vgId != 0); } + ASSERT(vgId != 0); req.taskId = downstreamTaskId; + qInfo("dispatch from task %d (child id %d) to down stream task %d in vnode %d", pTask->taskId, pTask->childId, + downstreamTaskId, vgId); + // serialize int32_t tlen; tEncodeSize(tEncodeStreamDispatchReq, &req, tlen, code);